api: Remove dependency on exiftool
Use weasyprint to embed metadata
This commit is contained in:
@@ -17,7 +17,6 @@ dependencies = [
|
|||||||
"aiohttp-client-cache[all]",
|
"aiohttp-client-cache[all]",
|
||||||
"bs4>=0.0.2",
|
"bs4>=0.0.2",
|
||||||
"uvicorn>=0.32.1",
|
"uvicorn>=0.32.1",
|
||||||
"pyexiftool>=0.5.6",
|
|
||||||
"weasyprint>=63.0",
|
"weasyprint>=63.0",
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
]
|
]
|
||||||
@@ -31,5 +30,6 @@ aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-c
|
|||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
"ipykernel>=6.29.5",
|
"ipykernel>=6.29.5",
|
||||||
|
"ipynb>=0.5.1",
|
||||||
"ruff>=0.11.12",
|
"ruff>=0.11.12",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from pathlib import Path
|
|||||||
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from exiftool import ExifTool
|
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from weasyprint import CSS, HTML
|
from weasyprint import CSS, HTML
|
||||||
from weasyprint.text.fonts import FontConfiguration
|
from weasyprint.text.fonts import FontConfiguration
|
||||||
@@ -134,6 +133,12 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
"book_title": self.story["title"],
|
"book_title": self.story["title"],
|
||||||
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
||||||
"username": self.story["user"]["username"],
|
"username": self.story["user"]["username"],
|
||||||
|
"author_bio": self.story["user"]["description"],
|
||||||
|
"tags": self.story["tags"],
|
||||||
|
"created": self.story["createDate"],
|
||||||
|
"modified": self.story["modifyDate"],
|
||||||
|
"is_completed": self.story["completed"],
|
||||||
|
"is_mature": self.story["mature"],
|
||||||
"description": self.story["description"],
|
"description": self.story["description"],
|
||||||
"avatar": b64encode(self.author).decode(),
|
"avatar": b64encode(self.author).decode(),
|
||||||
"copyright": {
|
"copyright": {
|
||||||
@@ -160,44 +165,10 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_metadata(self):
|
|
||||||
"""Write metadata to generated PDF file at self.book, using ExifTool."""
|
|
||||||
|
|
||||||
clean_description = (
|
|
||||||
self.story["description"].strip().replace("\n", "$/")
|
|
||||||
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"Author": self.story["user"]["username"],
|
|
||||||
"Title": self.story["title"],
|
|
||||||
"Subject": clean_description,
|
|
||||||
"CreationDate": self.story["createDate"],
|
|
||||||
"ModDate": self.story["modifyDate"],
|
|
||||||
"Keywords": ",".join(self.story["tags"]),
|
|
||||||
"Language": self.story["language"]["name"],
|
|
||||||
"Completed": self.story["completed"],
|
|
||||||
"MatureContent": self.story["mature"],
|
|
||||||
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
|
||||||
} # As per https://exiftool.org/TagNames/PDF.html
|
|
||||||
|
|
||||||
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
|
|
||||||
# Custom configuration adds Completed and MatureContent tags.
|
|
||||||
# exiftool logger logs executed command
|
|
||||||
et.execute(
|
|
||||||
*(
|
|
||||||
[f"-{key}={value}" for key, value in metadata.items()]
|
|
||||||
+ [
|
|
||||||
"-overwrite_original",
|
|
||||||
self.book.file.name,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def compile(self):
|
def compile(self):
|
||||||
parts = self.generate_chapters()
|
parts = self.generate_chapters()
|
||||||
self.populate_template(parts)
|
self.populate_template(parts)
|
||||||
self.generate_pdf()
|
self.generate_pdf()
|
||||||
self.add_metadata()
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
def dump(self) -> BytesIO:
|
||||||
|
|||||||
@@ -1,73 +1,87 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="{{ langcode }}">
|
<html lang="{{ langcode }}">
|
||||||
|
|
||||||
|
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<title>{{ book_title }}</title>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
|
||||||
<title>{{ book_title }}</title>
|
|
||||||
|
|
||||||
<section class="fullpage">
|
<meta name=Subject content="{{description}}">
|
||||||
<img src="{{ cover }}" alt="Cover">
|
<meta name=Author content="{{author}}">
|
||||||
|
<meta name=Keywords content="{{tags}}">
|
||||||
|
<meta name=Language content="{{langcode}}">
|
||||||
|
<meta name=CreationDate content="{{created}}">
|
||||||
|
<meta name=ModDate content="{{modified}}">
|
||||||
|
<meta name=Generator content="Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader">
|
||||||
|
|
||||||
|
<meta name=completed content="{{is_completed}}">
|
||||||
|
<meta name=maturecontent content="{{is_mature}}">
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<section class="fullpage">
|
||||||
|
<img src="{{ cover }}" alt="Cover">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<div id="copyright-container">
|
||||||
|
<h1 id="copyright-notice">Copyright Notice</h1>
|
||||||
|
|
||||||
|
<h2 id="copyright-title">{{ book_title }}</h2>
|
||||||
|
<p id="copyright-author">By {{ author }}</p>
|
||||||
|
|
||||||
|
<div id="copyright-separator"></div>
|
||||||
|
|
||||||
|
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
|
||||||
|
|
||||||
|
<div id="copyright-separator"></div>
|
||||||
|
|
||||||
|
{% if copyright.data %}
|
||||||
|
<img src="data:image/jpg;base64,{{copyright.data}}" alt="{{copyright.name}}" width="88" height="31"
|
||||||
|
id="copyright-license-image">
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p id="copyright-copyright">{{ statement }}</p>
|
||||||
|
|
||||||
|
<p id="copyright-rights">{{ freedoms }}</p>
|
||||||
|
|
||||||
|
<p id="copyright-printing">Printing: {{ printing }}</p>
|
||||||
|
|
||||||
|
<p id="book-link">
|
||||||
|
ID: {{ book_id }}.
|
||||||
|
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="book">
|
||||||
|
<section id="contents" class="toc">
|
||||||
|
<h1>Table of Contents</h1>
|
||||||
|
<ul>
|
||||||
|
{% for part_id in parts %}
|
||||||
|
<li><a href="#{{part_id}}"></a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
</section>
|
</section>
|
||||||
|
{% for part_id in parts %}
|
||||||
|
|
||||||
<div id="copyright-container">
|
{{parts[part_id] | safe}}
|
||||||
<h1 id="copyright-notice">Copyright Notice</h1>
|
{% endfor %}
|
||||||
|
</div>
|
||||||
<h2 id="copyright-title">{{ book_title }}</h2>
|
|
||||||
<p id="copyright-author">By {{ author }}</p>
|
|
||||||
|
|
||||||
<div id="copyright-separator"></div>
|
<h1>About the Author</h1>
|
||||||
|
<div id="author-container">
|
||||||
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
|
<div id="author-about">
|
||||||
|
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
|
||||||
<div id="copyright-separator"></div>
|
<h2 id="author-name">
|
||||||
|
<a href="https://wattpad.com/user/{{ username }}" id="author-link">{{ username }}</a>
|
||||||
{% if copyright.data %}
|
</h2>
|
||||||
<img src="data:image/jpg;base64,{{copyright.data}}"
|
<hr id="author-divider">
|
||||||
alt="{{copyright.name}}"
|
<p id="author-bio">
|
||||||
width="88"
|
{{ author_bio }}
|
||||||
height="31"
|
|
||||||
id="copyright-license-image">
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<p id="copyright-copyright">{{ statement }}</p>
|
|
||||||
|
|
||||||
<p id="copyright-rights">{{ freedoms }}</p>
|
|
||||||
|
|
||||||
<p id="copyright-printing">Printing: {{ printing }}</p>
|
|
||||||
|
|
||||||
<p id="book-link">
|
|
||||||
ID: {{ book_id }}.
|
|
||||||
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div id="book">
|
</html>
|
||||||
<section id="contents" class="toc">
|
|
||||||
<h1>Table of Contents</h1>
|
|
||||||
<ul>
|
|
||||||
{% for part_id in parts %}
|
|
||||||
<li><a href="#{{part_id}}"></a></li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</section>
|
|
||||||
{% for part_id in parts %}
|
|
||||||
|
|
||||||
{{parts[part_id] | safe}}
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h1>About the Author</h1>
|
|
||||||
<div id="author-container">
|
|
||||||
<div id="author-about">
|
|
||||||
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
|
|
||||||
<h2 id="author-name">
|
|
||||||
<a href="https://wattpad.com/user/{{ username }}" id="author-link">{{ username }}</a>
|
|
||||||
</h2>
|
|
||||||
<hr id="author-divider">
|
|
||||||
<p id="author-bio">
|
|
||||||
{{ description }}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</html>
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
|
|
||||||
%Image::ExifTool::UserDefined = (
|
|
||||||
'Image::ExifTool::XMP::xmp' => {
|
|
||||||
Completed => {
|
|
||||||
Writable => 'boolean', # Can be a boolean (True/False)
|
|
||||||
Groups => { 2 => 'Content' },
|
|
||||||
},
|
|
||||||
MatureContent => {
|
|
||||||
Writable => 'boolean', # Can be a boolean (True/False)
|
|
||||||
Groups => { 2 => 'Content' },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
'Image::ExifTool::IPTC::ApplicationRecord' => {
|
|
||||||
161 => {
|
|
||||||
Name => 'Completed',
|
|
||||||
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
|
|
||||||
},
|
|
||||||
162 => {
|
|
||||||
Name => 'MatureContent',
|
|
||||||
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
|
|
||||||
},
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
1; # End
|
|
||||||
Generated
+459
-458
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user