refactor(api): Rewrite PDFGenerator
This commit is contained in:
@@ -0,0 +1,204 @@
|
|||||||
|
from base64 import b64encode
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
||||||
|
from typing import List, cast
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
from exiftool import ExifTool
|
||||||
|
from jinja2 import Template
|
||||||
|
from weasyprint import CSS, HTML
|
||||||
|
from weasyprint.text.fonts import FontConfiguration
|
||||||
|
|
||||||
|
from ..models import Story
|
||||||
|
from .types import AbstractGenerator
|
||||||
|
|
||||||
|
DATA_PATH = Path(__file__).parent / "pdf"
|
||||||
|
ASSET_PATH = DATA_PATH / "assets"
|
||||||
|
|
||||||
|
COPYRIGHT_DATA = {
|
||||||
|
1: {
|
||||||
|
"name": "All Rights Reserved",
|
||||||
|
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
||||||
|
"freedoms": "No reuse, redistribution, or modification without permission.",
|
||||||
|
"printing": "Not allowed without explicit permission.",
|
||||||
|
"asset": None,
|
||||||
|
},
|
||||||
|
2: {
|
||||||
|
"name": "Public Domain",
|
||||||
|
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
||||||
|
"freedoms": "Free to use for any purpose without permission.",
|
||||||
|
"printing": "Allowed for personal or commercial purposes.",
|
||||||
|
"asset": ASSET_PATH / "cc-zero.png",
|
||||||
|
},
|
||||||
|
3: {
|
||||||
|
"name": "Creative Commons Attribution (CC-BY)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
||||||
|
"printing": "Allowed with proper credit.",
|
||||||
|
"asset": ASSET_PATH / "by.png",
|
||||||
|
},
|
||||||
|
4: {
|
||||||
|
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
||||||
|
"printing": "Allowed for non-commercial purposes with proper credit.",
|
||||||
|
"asset": ASSET_PATH / "by-nc.png",
|
||||||
|
},
|
||||||
|
5: {
|
||||||
|
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
||||||
|
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
||||||
|
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
||||||
|
"asset": ASSET_PATH / "by-nc-nd.png",
|
||||||
|
},
|
||||||
|
6: {
|
||||||
|
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
||||||
|
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
||||||
|
"asset": ASSET_PATH / "by-nc-sa.png",
|
||||||
|
},
|
||||||
|
7: {
|
||||||
|
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
||||||
|
"printing": "Allowed with proper credit under the same license.",
|
||||||
|
"asset": ASSET_PATH / "by-sa.png",
|
||||||
|
},
|
||||||
|
8: {
|
||||||
|
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
||||||
|
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
||||||
|
"printing": "Allowed in original form with proper credit.",
|
||||||
|
"asset": ASSET_PATH / "by-nd.png",
|
||||||
|
},
|
||||||
|
} # Maps Wattpad Copyright IDs to their corresponding data.
|
||||||
|
|
||||||
|
with open(DATA_PATH / "stylesheet.css") as reader:
|
||||||
|
STYLESHEET = reader.read()
|
||||||
|
|
||||||
|
|
||||||
|
with open(DATA_PATH / "book.html") as reader:
|
||||||
|
TEMPLATE = reader.read()
|
||||||
|
|
||||||
|
|
||||||
|
class PDFGenerator(AbstractGenerator):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
metadata: Story,
|
||||||
|
part_trees: List[BeautifulSoup],
|
||||||
|
cover: bytes,
|
||||||
|
images: List[List[bytes]] | None,
|
||||||
|
author: bytes,
|
||||||
|
):
|
||||||
|
self.story = metadata
|
||||||
|
self.parts = part_trees
|
||||||
|
self.cover = cover
|
||||||
|
self.images = images
|
||||||
|
self.author = author
|
||||||
|
|
||||||
|
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
|
||||||
|
self.content = TEMPLATE
|
||||||
|
|
||||||
|
def generate_chapters(self) -> dict[int, str]:
|
||||||
|
"""Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
|
||||||
|
data: dict[int, str] = {}
|
||||||
|
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
|
||||||
|
if self.images:
|
||||||
|
for img_idx, (img_data, img_tag) in enumerate(
|
||||||
|
zip(self.images[idx], tree.find_all("img"))
|
||||||
|
):
|
||||||
|
img_tag[
|
||||||
|
"src"
|
||||||
|
] = f"data:image/jpg;base64,{b64encode(img_data).decode()}"
|
||||||
|
|
||||||
|
data[part["id"]] = tree.prettify()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def populate_template(self, parts: dict[int, str]):
|
||||||
|
"""Populate HTML Template with Story data."""
|
||||||
|
copyright = COPYRIGHT_DATA[self.story["copyright"]]
|
||||||
|
data = {
|
||||||
|
"statement": copyright["statement"].format(
|
||||||
|
username=self.story["user"]["username"],
|
||||||
|
published_year=self.story["createDate"].split("-", 2)[0],
|
||||||
|
),
|
||||||
|
"author": self.story["user"]["username"],
|
||||||
|
"freedoms": copyright["freedoms"],
|
||||||
|
"printing": copyright["printing"],
|
||||||
|
"book_id": self.story["id"],
|
||||||
|
"book_title": self.story["title"],
|
||||||
|
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
||||||
|
"username": self.story["user"]["username"],
|
||||||
|
"description": self.story["description"],
|
||||||
|
"avatar": b64encode(self.author).decode(),
|
||||||
|
"copyright": {
|
||||||
|
"data": b64encode(copyright["asset"].read_bytes()).decode()
|
||||||
|
if copyright["asset"]
|
||||||
|
else "",
|
||||||
|
"name": copyright["name"],
|
||||||
|
},
|
||||||
|
"parts": parts,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.content: str = Template(self.content).render(data)
|
||||||
|
|
||||||
|
def generate_pdf(self):
|
||||||
|
"""Generate and write the PDF to a temporary file (self.book)."""
|
||||||
|
font_config = FontConfiguration()
|
||||||
|
|
||||||
|
stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
|
||||||
|
|
||||||
|
html_obj = HTML(string=self.content)
|
||||||
|
html_obj.write_pdf(
|
||||||
|
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_metadata(self):
|
||||||
|
"""Write metadata to generated PDF file at self.book, using ExifTool."""
|
||||||
|
|
||||||
|
clean_description = (
|
||||||
|
self.story["description"].strip().replace("\n", "$/")
|
||||||
|
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"Author": self.story["user"]["username"],
|
||||||
|
"Title": self.story["title"],
|
||||||
|
"Subject": clean_description,
|
||||||
|
"CreationDate": self.story["createDate"],
|
||||||
|
"ModDate": self.story["modifyDate"],
|
||||||
|
"Keywords": ",".join(self.story["tags"]),
|
||||||
|
"Language": self.story["language"]["name"],
|
||||||
|
"Completed": self.story["completed"],
|
||||||
|
"MatureContent": self.story["mature"],
|
||||||
|
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
||||||
|
} # As per https://exiftool.org/TagNames/PDF.html
|
||||||
|
|
||||||
|
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
|
||||||
|
# Custom configuration adds Completed and MatureContent tags.
|
||||||
|
# exiftool logger logs executed command
|
||||||
|
et.execute(
|
||||||
|
*(
|
||||||
|
[f"-{key}={value}" for key, value in metadata.items()]
|
||||||
|
+ [
|
||||||
|
"-overwrite_original",
|
||||||
|
self.book.file.name,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def compile(self):
|
||||||
|
parts = self.generate_chapters()
|
||||||
|
self.populate_template(parts)
|
||||||
|
self.generate_pdf()
|
||||||
|
self.add_metadata()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def dump(self) -> BytesIO:
|
||||||
|
self.book.seek(0)
|
||||||
|
buffer = BytesIO(self.book.read())
|
||||||
|
self.book.close()
|
||||||
|
|
||||||
|
return buffer
|
||||||
Reference in New Issue
Block a user