api: Remove dependency on exiftool

Use weasyprint to embed metadata
This commit is contained in:
TheOnlyWayUp
2025-10-28 03:50:37 +05:30
parent 35bbb54fc2
commit 5c1f3244b2
5 changed files with 541 additions and 581 deletions
+1 -1
View File
@@ -17,7 +17,6 @@ dependencies = [
"aiohttp-client-cache[all]",
"bs4>=0.0.2",
"uvicorn>=0.32.1",
"pyexiftool>=0.5.6",
"weasyprint>=63.0",
"jinja2>=3.1.6",
]
@@ -31,5 +30,6 @@ aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-c
[dependency-groups]
dev = [
"ipykernel>=6.29.5",
"ipynb>=0.5.1",
"ruff>=0.11.12",
]
+6 -35
View File
@@ -4,7 +4,6 @@ from pathlib import Path
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
from bs4 import BeautifulSoup
from exiftool import ExifTool
from jinja2 import Template
from weasyprint import CSS, HTML
from weasyprint.text.fonts import FontConfiguration
@@ -134,6 +133,12 @@ class PDFGenerator(AbstractGenerator):
"book_title": self.story["title"],
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
"username": self.story["user"]["username"],
"author_bio": self.story["user"]["description"],
"tags": self.story["tags"],
"created": self.story["createDate"],
"modified": self.story["modifyDate"],
"is_completed": self.story["completed"],
"is_mature": self.story["mature"],
"description": self.story["description"],
"avatar": b64encode(self.author).decode(),
"copyright": {
@@ -160,44 +165,10 @@ class PDFGenerator(AbstractGenerator):
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
)
def add_metadata(self):
"""Write metadata to generated PDF file at self.book, using ExifTool."""
clean_description = (
self.story["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
metadata = {
"Author": self.story["user"]["username"],
"Title": self.story["title"],
"Subject": clean_description,
"CreationDate": self.story["createDate"],
"ModDate": self.story["modifyDate"],
"Keywords": ",".join(self.story["tags"]),
"Language": self.story["language"]["name"],
"Completed": self.story["completed"],
"MatureContent": self.story["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
# Custom configuration adds Completed and MatureContent tags.
# exiftool logger logs executed command
et.execute(
*(
[f"-{key}={value}" for key, value in metadata.items()]
+ [
"-overwrite_original",
self.book.file.name,
]
)
)
def compile(self):
parts = self.generate_chapters()
self.populate_template(parts)
self.generate_pdf()
self.add_metadata()
return True
def dump(self) -> BytesIO:
@@ -1,16 +1,32 @@
<!DOCTYPE html>
<html lang="{{ langcode }}">
<head>
<title>{{ book_title }}</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ book_title }}</title>
<meta name=Subject content="{{description}}">
<meta name=Author content="{{author}}">
<meta name=Keywords content="{{tags}}">
<meta name=Language content="{{langcode}}">
<meta name=CreationDate content="{{created}}">
<meta name=ModDate content="{{modified}}">
<meta name=Generator content="Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader">
<section class="fullpage">
<meta name=completed content="{{is_completed}}">
<meta name=maturecontent content="{{is_mature}}">
</head>
<section class="fullpage">
<img src="{{ cover }}" alt="Cover">
</section>
</section>
<div id="copyright-container">
<div id="copyright-container">
<h1 id="copyright-notice">Copyright Notice</h1>
<h2 id="copyright-title">{{ book_title }}</h2>
@@ -23,11 +39,8 @@
<div id="copyright-separator"></div>
{% if copyright.data %}
<img src="data:image/jpg;base64,{{copyright.data}}"
alt="{{copyright.name}}"
width="88"
height="31"
id="copyright-license-image">
<img src="data:image/jpg;base64,{{copyright.data}}" alt="{{copyright.name}}" width="88" height="31"
id="copyright-license-image">
{% endif %}
<p id="copyright-copyright">{{ statement }}</p>
@@ -40,9 +53,9 @@ id="copyright-license-image">
ID: {{ book_id }}.
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
</p>
</div>
</div>
<div id="book">
<div id="book">
<section id="contents" class="toc">
<h1>Table of Contents</h1>
<ul>
@@ -55,10 +68,10 @@ id="copyright-license-image">
{{parts[part_id] | safe}}
{% endfor %}
</div>
</div>
<h1>About the Author</h1>
<div id="author-container">
<h1>About the Author</h1>
<div id="author-container">
<div id="author-about">
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
<h2 id="author-name">
@@ -66,8 +79,9 @@ id="copyright-license-image">
</h2>
<hr id="author-divider">
<p id="author-bio">
{{ description }}
{{ author_bio }}
</p>
</div>
</div>
</div>
</html>
@@ -1,26 +0,0 @@
%Image::ExifTool::UserDefined = (
'Image::ExifTool::XMP::xmp' => {
Completed => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
MatureContent => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
},
'Image::ExifTool::IPTC::ApplicationRecord' => {
161 => {
Name => 'Completed',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
162 => {
Name => 'MatureContent',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
},
);
1; # End
+459 -458
View File
File diff suppressed because it is too large Load Diff