api: Remove dependency on exiftool

Use weasyprint to embed metadata
This commit is contained in:
TheOnlyWayUp
2025-10-28 03:50:37 +05:30
parent 35bbb54fc2
commit 5c1f3244b2
5 changed files with 541 additions and 581 deletions
+1 -1
View File
@@ -17,7 +17,6 @@ dependencies = [
"aiohttp-client-cache[all]", "aiohttp-client-cache[all]",
"bs4>=0.0.2", "bs4>=0.0.2",
"uvicorn>=0.32.1", "uvicorn>=0.32.1",
"pyexiftool>=0.5.6",
"weasyprint>=63.0", "weasyprint>=63.0",
"jinja2>=3.1.6", "jinja2>=3.1.6",
] ]
@@ -31,5 +30,6 @@ aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-c
[dependency-groups] [dependency-groups]
dev = [ dev = [
"ipykernel>=6.29.5", "ipykernel>=6.29.5",
"ipynb>=0.5.1",
"ruff>=0.11.12", "ruff>=0.11.12",
] ]
+6 -35
View File
@@ -4,7 +4,6 @@ from pathlib import Path
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from exiftool import ExifTool
from jinja2 import Template from jinja2 import Template
from weasyprint import CSS, HTML from weasyprint import CSS, HTML
from weasyprint.text.fonts import FontConfiguration from weasyprint.text.fonts import FontConfiguration
@@ -134,6 +133,12 @@ class PDFGenerator(AbstractGenerator):
"book_title": self.story["title"], "book_title": self.story["title"],
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}", "cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
"username": self.story["user"]["username"], "username": self.story["user"]["username"],
"author_bio": self.story["user"]["description"],
"tags": self.story["tags"],
"created": self.story["createDate"],
"modified": self.story["modifyDate"],
"is_completed": self.story["completed"],
"is_mature": self.story["mature"],
"description": self.story["description"], "description": self.story["description"],
"avatar": b64encode(self.author).decode(), "avatar": b64encode(self.author).decode(),
"copyright": { "copyright": {
@@ -160,44 +165,10 @@ class PDFGenerator(AbstractGenerator):
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
) )
def add_metadata(self):
"""Write metadata to generated PDF file at self.book, using ExifTool."""
clean_description = (
self.story["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
metadata = {
"Author": self.story["user"]["username"],
"Title": self.story["title"],
"Subject": clean_description,
"CreationDate": self.story["createDate"],
"ModDate": self.story["modifyDate"],
"Keywords": ",".join(self.story["tags"]),
"Language": self.story["language"]["name"],
"Completed": self.story["completed"],
"MatureContent": self.story["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
# Custom configuration adds Completed and MatureContent tags.
# exiftool logger logs executed command
et.execute(
*(
[f"-{key}={value}" for key, value in metadata.items()]
+ [
"-overwrite_original",
self.book.file.name,
]
)
)
def compile(self): def compile(self):
parts = self.generate_chapters() parts = self.generate_chapters()
self.populate_template(parts) self.populate_template(parts)
self.generate_pdf() self.generate_pdf()
self.add_metadata()
return True return True
def dump(self) -> BytesIO: def dump(self) -> BytesIO:
@@ -1,16 +1,32 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="{{ langcode }}"> <html lang="{{ langcode }}">
<head>
<title>{{ book_title }}</title>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ book_title }}</title> <meta name=Subject content="{{description}}">
<meta name=Author content="{{author}}">
<meta name=Keywords content="{{tags}}">
<meta name=Language content="{{langcode}}">
<meta name=CreationDate content="{{created}}">
<meta name=ModDate content="{{modified}}">
<meta name=Generator content="Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader">
<section class="fullpage"> <meta name=completed content="{{is_completed}}">
<meta name=maturecontent content="{{is_mature}}">
</head>
<section class="fullpage">
<img src="{{ cover }}" alt="Cover"> <img src="{{ cover }}" alt="Cover">
</section> </section>
<div id="copyright-container"> <div id="copyright-container">
<h1 id="copyright-notice">Copyright Notice</h1> <h1 id="copyright-notice">Copyright Notice</h1>
<h2 id="copyright-title">{{ book_title }}</h2> <h2 id="copyright-title">{{ book_title }}</h2>
@@ -23,11 +39,8 @@
<div id="copyright-separator"></div> <div id="copyright-separator"></div>
{% if copyright.data %} {% if copyright.data %}
<img src="data:image/jpg;base64,{{copyright.data}}" <img src="data:image/jpg;base64,{{copyright.data}}" alt="{{copyright.name}}" width="88" height="31"
alt="{{copyright.name}}" id="copyright-license-image">
width="88"
height="31"
id="copyright-license-image">
{% endif %} {% endif %}
<p id="copyright-copyright">{{ statement }}</p> <p id="copyright-copyright">{{ statement }}</p>
@@ -40,9 +53,9 @@ id="copyright-license-image">
ID: {{ book_id }}. ID: {{ book_id }}.
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a> <a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
</p> </p>
</div> </div>
<div id="book"> <div id="book">
<section id="contents" class="toc"> <section id="contents" class="toc">
<h1>Table of Contents</h1> <h1>Table of Contents</h1>
<ul> <ul>
@@ -55,10 +68,10 @@ id="copyright-license-image">
{{parts[part_id] | safe}} {{parts[part_id] | safe}}
{% endfor %} {% endfor %}
</div> </div>
<h1>About the Author</h1> <h1>About the Author</h1>
<div id="author-container"> <div id="author-container">
<div id="author-about"> <div id="author-about">
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture"> <img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
<h2 id="author-name"> <h2 id="author-name">
@@ -66,8 +79,9 @@ id="copyright-license-image">
</h2> </h2>
<hr id="author-divider"> <hr id="author-divider">
<p id="author-bio"> <p id="author-bio">
{{ description }} {{ author_bio }}
</p> </p>
</div> </div>
</div> </div>
</html> </html>
@@ -1,26 +0,0 @@
%Image::ExifTool::UserDefined = (
'Image::ExifTool::XMP::xmp' => {
Completed => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
MatureContent => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
},
'Image::ExifTool::IPTC::ApplicationRecord' => {
161 => {
Name => 'Completed',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
162 => {
Name => 'MatureContent',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
},
);
1; # End
+459 -458
View File
File diff suppressed because it is too large Load Diff