feat(api): Use new Parser and Generators

2025-06-09 14:43:18 +00:00
parent a790021057
commit 3853e0d586
6 changed files with 271 additions and 668 deletions
@@ -1,116 +1,108 @@
 from io import BytesIO
-from typing import List
+from typing import Generator, List
-import bs4
+from bs4 import BeautifulSoup
 from aiohttp_client_cache.session import CachedSession
 from ebooklib import epub
 from ..models import Story
-
+from .types import AbstractGenerator
 headers = {}
-class EPUBGenerator:
+class EPUBGenerator(AbstractGenerator):
-    """EPUB Generation utilities"""
+    def __init__(
-
+        self,
-    def __init__(self, data: Story, cover: bytes):
+        metadata: Story,
-        """Initialize EPUBGenerator. Create epub.EpubBook() and set metadata and cover."""
+        part_trees: List[BeautifulSoup],
-        self.epub = epub.EpubBook()
+        cover: bytes,
-        self.data = data
+        images: List[Generator[bytes]] | None,
    ):
        self.story = metadata
        self.parts = part_trees
        self.cover = cover
        self.images = images
-        # set metadata, defined in https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-2
+        self.book: epub.EpubBook = epub.EpubBook()
        self.epub.add_author(data["user"]["username"])
-        self.epub.add_metadata("DC", "title", data["title"])
+    def add_metadata(self):
-        self.epub.add_metadata("DC", "description", data["description"])
+        """Add metadata to epub."""
-        self.epub.add_metadata("DC", "date", data["createDate"])
+        self.book.add_author(self.story["user"]["username"])
        self.epub.add_metadata("DC", "modified", data["modifyDate"])
        self.epub.add_metadata("DC", "language", data["language"]["name"])
-        self.epub.add_metadata(
+        self.book.add_metadata("DC", "title", self.story["title"])
-            None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
+        self.book.add_metadata("DC", "description", self.story["description"])
        self.book.add_metadata("DC", "date", self.story["createDate"])
        self.book.add_metadata("DC", "modified", self.story["modifyDate"])
        self.book.add_metadata("DC", "language", self.story["language"]["name"])
        self.book.add_metadata(
            None, "meta", "", {"name": "tags", "content": ", ".join(self.story["tags"])}
        )
-        self.epub.add_metadata(
+        self.book.add_metadata(
            None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
        )
        self.epub.add_metadata(
            None,
            "meta",
            "",
-            {"name": "completed", "content": str(int(data["completed"]))},
+            {"name": "mature", "content": str(int(self.story["mature"]))},
        )
        self.book.add_metadata(
            None,
            "meta",
            "",
            {"name": "completed", "content": str(int(self.story["completed"]))},
        )
-        # Set cover
+    def add_cover(self):
-        self.epub.set_cover("cover.jpg", cover)
+        """Add cover to epub."""
        self.book.set_cover("cover.jpg", self.cover)
        cover_chapter = epub.EpubHtml(
            file_name="titlepage.xhtml",  # Standard for cover page
        )
        cover_chapter.set_content('<img src="cover.jpg">')
-        self.epub.add_item(cover_chapter)
+        self.book.add_item(cover_chapter)
-    async def add_chapters(
+    def add_chapters(self):
-        self, contents: List[bs4.Tag], download_images: bool = False
+        """Add chapters to epub, replacing references to image urls to static image paths if images are provided during initialization."""
-    ):
+        chapters = []
        """Add chapters to the Epub, downloading images if necessary. Sets the table of contents and spine."""
        chapters: List[epub.EpubHtml] = []
-        for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)):
+        for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
            title = part["title"]
            # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
            chapter = epub.EpubHtml(
-                title=title,
+                title=part["title"], file_name=f"{idx}_{part['id']}"
                file_name=f"{cidx}_{part['id']}.xhtml",  # See issue #30
                lang=self.data["language"]["name"],
                uid=str(part["id"]).encode(),
            )
-            str_content = content.prettify()
+            if self.images:
-            if download_images:  # ! TODO : Download images elsewhere
+                for img_idx, (img_data, img_tag) in enumerate(
-                soup = content
+                    zip(self.images[idx], tree.find_all("img"))
-
+                ):
-                async with CachedSession(
+                    path = f"static/{idx}_{part['id']}/{img_idx}.jpeg"
                    headers=headers, cache=None
                ) as session:  # Don't cache images.
                    for idx, image in enumerate(soup.find_all("img")):
                        if not image["src"]:
                            continue
                        # Find all image tags and filter for those with sources
                        async with session.get(image["src"]) as response:
                    img = epub.EpubImage(
-                                media_type="image/jpeg",
+                        media_type="image/jpeg", content=img_data, file_name=path
                                content=await response.read(),
                                file_name=f"static/{cidx}/{idx}.jpeg",
                    )
-                            self.epub.add_item(img)
+                    self.book.add_item(img)
                            # Fetch image and pack
-                            str_content = str_content.replace(
+                    img_tag["src"] = path
                                str(image["src"]), f"static/{cidx}/{idx}.jpeg"
                            )
            chapter.set_content(str_content)
            self.epub.add_item(chapter)
            chapter.set_content(tree.prettify())
            self.book.add_item(chapter)
            chapters.append(chapter)
-            yield title
+        # ! Review, are these needed? #11
-
+        self.book.toc = chapters
        self.epub.toc = chapters
        # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
-        self.epub.add_item(epub.EpubNcx())
+        self.book.add_item(epub.EpubNcx())
-        self.epub.add_item(epub.EpubNav())
+        self.book.add_item(epub.EpubNav())
        # create spine
-        self.epub.spine = ["nav"] + chapters
+        self.book.spine = ["nav"] + chapters
    def compile(self):
        self.add_metadata()
        self.add_cover()
        self.add_chapters()
        return True
    def dump(self) -> BytesIO:
        # Thanks https://stackoverflow.com/a/75398222
        buffer = BytesIO()
-        epub.write_epub(buffer, self.epub)
+        epub.write_epub(buffer, self.book)
        buffer.seek(0)
@@ -1,109 +0,0 @@
 from io import BytesIO
 from typing import List
 from bs4 import BeautifulSoup
 from ebooklib import epub
 from ..models import Story
 from .types import AbstractGenerator
 class EPUBGenerator(AbstractGenerator):
    def __init__(
        self,
        metadata: Story,
        part_trees: List[BeautifulSoup],
        cover: bytes,
        images: List[List[bytes]] | None,
    ):
        self.story = metadata
        self.parts = part_trees
        self.cover = cover
        self.images = images
        self.book: epub.EpubBook = epub.EpubBook()
    def add_metadata(self):
        """Add metadata to epub."""
        self.book.add_author(self.story["user"]["username"])
        self.book.add_metadata("DC", "title", self.story["title"])
        self.book.add_metadata("DC", "description", self.story["description"])
        self.book.add_metadata("DC", "date", self.story["createDate"])
        self.book.add_metadata("DC", "modified", self.story["modifyDate"])
        self.book.add_metadata("DC", "language", self.story["language"]["name"])
        self.book.add_metadata(
            None, "meta", "", {"name": "tags", "content": ", ".join(self.story["tags"])}
        )
        self.book.add_metadata(
            None,
            "meta",
            "",
            {"name": "mature", "content": str(int(self.story["mature"]))},
        )
        self.book.add_metadata(
            None,
            "meta",
            "",
            {"name": "completed", "content": str(int(self.story["completed"]))},
        )
    def add_cover(self):
        """Add cover to epub."""
        self.book.set_cover("cover.jpg", self.cover)
        cover_chapter = epub.EpubHtml(
            file_name="titlepage.xhtml",  # Standard for cover page
        )
        cover_chapter.set_content('<img src="cover.jpg">')
        self.book.add_item(cover_chapter)
    def add_chapters(self):
        """Add chapters to epub, replacing references to image urls to static image paths if images are provided during initialization."""
        chapters = []
        for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
            chapter = epub.EpubHtml(
                title=part["title"], file_name=f"{idx}_{part['id']}"
            )
            if self.images:
                for img_idx, (img_data, img_tag) in enumerate(
                    zip(self.images[idx], tree.find_all("img"))
                ):
                    path = f"static/{idx}_{part['id']}/{img_idx}.jpeg"
                    img = epub.EpubImage(
                        media_type="image/jpeg", content=img_data, file_name=path
                    )
                    self.book.add_item(img)
                    img_tag["src"] = path
            chapter.set_content(tree.prettify())
            self.book.add_item(chapter)
            chapters.append(chapter)
        # ! Review, are these needed? #11
        self.book.toc = chapters
        # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
        self.book.add_item(epub.EpubNcx())
        self.book.add_item(epub.EpubNav())
        # create spine
        self.book.spine = ["nav"] + chapters
    def compile(self):
        self.add_metadata()
        self.add_cover()
        self.add_chapters()
        return True
    def dump(self) -> BytesIO:
        # Thanks https://stackoverflow.com/a/75398222
        buffer = BytesIO()
        epub.write_epub(buffer, self.book)
        buffer.seek(0)
        return buffer
@@ -1,204 +0,0 @@
 from base64 import b64encode
 from io import BytesIO
 from pathlib import Path
 from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
 from typing import List, cast
 from bs4 import BeautifulSoup, Tag
 from exiftool import ExifTool
 from jinja2 import Template
 from weasyprint import CSS, HTML
 from weasyprint.text.fonts import FontConfiguration
 from ..models import Story
 from .types import AbstractGenerator
 DATA_PATH = Path(__file__).parent / "pdf"
 ASSET_PATH = DATA_PATH / "assets"
 COPYRIGHT_DATA = {
    1: {
        "name": "All Rights Reserved",
        "statement": "©️ {published_year} by {username}. All Rights Reserved.",
        "freedoms": "No reuse, redistribution, or modification without permission.",
        "printing": "Not allowed without explicit permission.",
        "asset": None,
    },
    2: {
        "name": "Public Domain",
        "statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
        "freedoms": "Free to use for any purpose without permission.",
        "printing": "Allowed for personal or commercial purposes.",
        "asset": ASSET_PATH / "cc-zero.png",
    },
    3: {
        "name": "Creative Commons Attribution (CC-BY)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
        "freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
        "printing": "Allowed with proper credit.",
        "asset": ASSET_PATH / "by.png",
    },
    4: {
        "name": "CC Attribution NonCommercial (CC-BY-NC)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
        "freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
        "printing": "Allowed for non-commercial purposes with proper credit.",
        "asset": ASSET_PATH / "by-nc.png",
    },
    5: {
        "name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
        "freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
        "printing": "Allowed for non-commercial purposes in original form with proper credit.",
        "asset": ASSET_PATH / "by-nc-nd.png",
    },
    6: {
        "name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
        "freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
        "printing": "Allowed for non-commercial purposes with proper credit under the same license.",
        "asset": ASSET_PATH / "by-nc-sa.png",
    },
    7: {
        "name": "CC Attribution ShareAlike (CC-BY-SA)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
        "freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
        "printing": "Allowed with proper credit under the same license.",
        "asset": ASSET_PATH / "by-sa.png",
    },
    8: {
        "name": "CC Attribution NoDerivs (CC-BY-ND)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
        "freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
        "printing": "Allowed in original form with proper credit.",
        "asset": ASSET_PATH / "by-nd.png",
    },
 }  # Maps Wattpad Copyright IDs to their corresponding data.
 with open(DATA_PATH / "stylesheet.css") as reader:
    STYLESHEET = reader.read()
 with open(DATA_PATH / "book.html") as reader:
    TEMPLATE = reader.read()
 class PDFGenerator(AbstractGenerator):
    def __init__(
        self,
        metadata: Story,
        part_trees: List[BeautifulSoup],
        cover: bytes,
        images: List[List[bytes]] | None,
        author: bytes,
    ):
        self.story = metadata
        self.parts = part_trees
        self.cover = cover
        self.images = images
        self.author = author
        self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
        self.content = TEMPLATE
    def generate_chapters(self) -> dict[int, str]:
        """Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
        data: dict[int, str] = {}
        for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
            if self.images:
                for img_idx, (img_data, img_tag) in enumerate(
                    zip(self.images[idx], tree.find_all("img"))
                ):
                    img_tag[
                        "src"
                    ] = f"data:image/jpg;base64,{b64encode(img_data).decode()}"
            data[part["id"]] = tree.prettify()
        return data
    def populate_template(self, parts: dict[int, str]):
        """Populate HTML Template with Story data."""
        copyright = COPYRIGHT_DATA[self.story["copyright"]]
        data = {
            "statement": copyright["statement"].format(
                username=self.story["user"]["username"],
                published_year=self.story["createDate"].split("-", 2)[0],
            ),
            "author": self.story["user"]["username"],
            "freedoms": copyright["freedoms"],
            "printing": copyright["printing"],
            "book_id": self.story["id"],
            "book_title": self.story["title"],
            "cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
            "username": self.story["user"]["username"],
            "description": self.story["description"],
            "avatar": b64encode(self.author).decode(),
            "copyright": {
                "data": b64encode(copyright["asset"].read_bytes()).decode()
                if copyright["asset"]
                else "",
                "name": copyright["name"],
            },
            "parts": parts,
        }
        self.content: str = Template(self.content).render(data)
    def generate_pdf(self):
        """Generate and write the PDF to a temporary file (self.book)."""
        font_config = FontConfiguration()
        stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
        html_obj = HTML(string=self.content)
        html_obj.write_pdf(
            self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
        )
    def add_metadata(self):
        """Write metadata to generated PDF file at self.book, using ExifTool."""
        clean_description = (
            self.story["description"].strip().replace("\n", "$/")
        )  # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
        metadata = {
            "Author": self.story["user"]["username"],
            "Title": self.story["title"],
            "Subject": clean_description,
            "CreationDate": self.story["createDate"],
            "ModDate": self.story["modifyDate"],
            "Keywords": ",".join(self.story["tags"]),
            "Language": self.story["language"]["name"],
            "Completed": self.story["completed"],
            "MatureContent": self.story["mature"],
            "Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
        }  # As per https://exiftool.org/TagNames/PDF.html
        with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
            # Custom configuration adds Completed and MatureContent tags.
            # exiftool logger logs executed command
            et.execute(
                *(
                    [f"-{key}={value}" for key, value in metadata.items()]
                    + [
                        "-overwrite_original",
                        self.book.file.name,
                    ]
                )
            )
    def compile(self):
        parts = self.generate_chapters()
        self.populate_template(parts)
        self.generate_pdf()
        self.add_metadata()
        return True
    def dump(self) -> BytesIO:
        self.book.seek(0)
        buffer = BytesIO(self.book.read())
        self.book.close()
        return buffer
@@ -1,273 +1,182 @@
 import tempfile
 from base64 import b64encode
 from io import BytesIO
-from typing import List, cast
+from pathlib import Path
 from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
 from typing import Generator, List, cast
-import bs4
+from bs4 import BeautifulSoup, Tag
 from bs4 import BeautifulSoup
 from eliot import start_action
 from exiftool import ExifTool
 from jinja2 import Template
 from weasyprint import CSS, HTML
 from weasyprint.text.fonts import FontConfiguration
 from ..logs import exiftool_logger
 from ..models import Story
-from ..utils import smart_trim
+from .types import AbstractGenerator
 DATA_PATH = Path(__file__).parent / "pdf"
 ASSET_PATH = DATA_PATH / "assets"
-async def fetch_image(*args, **kwargs):
+COPYRIGHT_DATA = {
    # TODO
    raise NotImplementedError()
 class PDFGenerator:
    """PDF Generation utilities"""
    def __init__(self, data: Story, cover: bytes):
        """Initialize PDGenerator, create PDF Temporary file."""
        self.data = data
        self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
        self.cover = cover
        self.content: str = ""
        self.copyright = {
    1: {
        "name": "All Rights Reserved",
        "statement": "©️ {published_year} by {username}. All Rights Reserved.",
        "freedoms": "No reuse, redistribution, or modification without permission.",
        "printing": "Not allowed without explicit permission.",
-                "image_url": None,
+        "asset": None,
    },
    2: {
        "name": "Public Domain",
        "statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
        "freedoms": "Free to use for any purpose without permission.",
        "printing": "Allowed for personal or commercial purposes.",
-                "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
+        "asset": ASSET_PATH / "cc-zero.png",
    },
    3: {
        "name": "Creative Commons Attribution (CC-BY)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
        "freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
        "printing": "Allowed with proper credit.",
-                "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
+        "asset": ASSET_PATH / "by.png",
    },
    4: {
        "name": "CC Attribution NonCommercial (CC-BY-NC)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
        "freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
        "printing": "Allowed for non-commercial purposes with proper credit.",
-                "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
+        "asset": ASSET_PATH / "by-nc.png",
    },
    5: {
        "name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
        "freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
        "printing": "Allowed for non-commercial purposes in original form with proper credit.",
-                "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
+        "asset": ASSET_PATH / "by-nc-nd.png",
    },
    6: {
        "name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
        "freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
        "printing": "Allowed for non-commercial purposes with proper credit under the same license.",
-                "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
+        "asset": ASSET_PATH / "by-nc-sa.png",
    },
    7: {
        "name": "CC Attribution ShareAlike (CC-BY-SA)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
        "freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
        "printing": "Allowed with proper credit under the same license.",
-                "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
+        "asset": ASSET_PATH / "by-sa.png",
    },
    8: {
        "name": "CC Attribution NoDerivs (CC-BY-ND)",
        "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
        "freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
        "printing": "Allowed in original form with proper credit.",
-                "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
+        "asset": ASSET_PATH / "by-nd.png",
    },
 }  # Maps Wattpad Copyright IDs to their corresponding data.
 with open(DATA_PATH / "stylesheet.css") as reader:
    STYLESHEET = reader.read()
 with open(DATA_PATH / "book.html") as reader:
    TEMPLATE = reader.read()
 class PDFGenerator(AbstractGenerator):
    def __init__(
        self,
        metadata: Story,
        part_trees: List[BeautifulSoup],
        cover: bytes,
        images: List[Generator[bytes]] | None,
        author_image: bytes,
    ):
        self.story = metadata
        self.parts = part_trees
        self.cover = cover
        self.images = images
        self.author = author_image
        self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
        self.content = TEMPLATE
    def generate_chapters(self) -> dict[int, str]:
        """Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
        data: dict[int, str] = {}
        for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
            if self.images:
                for img_idx, (img_data, img_tag) in enumerate(
                    zip(self.images[idx], tree.find_all("img"))
                ):
                    img_tag["src"] = (
                        f"data:image/jpg;base64,{b64encode(img_data).decode()}"
                    )
            data[part["id"]] = tree.prettify()
        return data
    def populate_template(self, parts: dict[int, str]):
        """Populate HTML Template with Story data."""
        copyright = COPYRIGHT_DATA[self.story["copyright"]]
        data = {
            "statement": copyright["statement"].format(
                username=self.story["user"]["username"],
                published_year=self.story["createDate"].split("-", 2)[0],
            ),
            "author": self.story["user"]["username"],
            "freedoms": copyright["freedoms"],
            "printing": copyright["printing"],
            "book_id": self.story["id"],
            "book_title": self.story["title"],
            "cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
            "username": self.story["user"]["username"],
            "description": self.story["description"],
            "avatar": b64encode(self.author).decode(),
            "copyright": {
                "data": b64encode(copyright["asset"].read_bytes()).decode()
                if copyright["asset"]
                else "",
                "name": copyright["name"],
            },
            "parts": parts,
        }
-        with open("./pdf/stylesheet.css") as reader:
+        self.content: str = Template(self.content).render(data)
            self.stylesheet = reader.read()
        with open("./pdf/book.html") as reader:
            self.template = reader.read()
    async def generate_cover_and_copyright_html(
        self,
    ) -> str:
        """Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
        copyright_data = self.copyright[self.data["copyright"]]
        template = self.template
        about_copyright = (
            template.replace(
                "{statement}",
                copyright_data["statement"].format(
                    username=self.data["user"]["username"],
                    published_year=self.data["createDate"].split("-", 2)[0],
                ),
            )
            .replace("{author}", self.data["user"]["username"])
            .replace("{freedoms}", copyright_data["freedoms"])
            .replace(
                "{printing}",
                copyright_data["printing"],
            )
            .replace("{book_id}", self.data["id"])
            .replace("{book_title}", self.data["title"])
        )
        copyright_image = (
            await fetch_image(copyright_data["image_url"], should_cache=True)
            if copyright_data["image_url"]
            else None
        )
        image_block = (
            """<img src="{image_url}" 
 alt="{name}" 
 width="88" 
 height="31" 
 id="copyright-license-image">""".format(
                image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
                name=copyright_data["name"],
            )
            if copyright_image
            else ""
        )
        about_copyright = (
            about_copyright.replace(
                "{copyright_image}",
                image_block,
            )
            if image_block
            else about_copyright.replace("{copyright_image}", "")
        )
        about_copyright = about_copyright.replace(
            "{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
        )
        self.template = about_copyright
        return about_copyright
    async def generate_about_author_chapter(self) -> str:
        """Generate About the Author file, fetch avatar."""
        author_avatar = (
            await fetch_image(
                self.data["user"]["avatar"].replace("128", "512")
            )  # Increase image resolution
            if self.data["user"]["avatar"]
            else None
        )
        about_author = self.template.replace(
            "{username}", self.data["user"]["username"]
        ).replace("{description}", smart_trim(self.data["user"]["description"]))
        about_author = (
            about_author.replace(
                "{avatar}",
                f"""
                <img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
            )
            if author_avatar
            else about_author.replace("{avatar}", "")
        )
        self.template = about_author
        return about_author
    def generate_toc(self):
        ids = [part["id"] for part in self.data["parts"]]
        clean = BeautifulSoup(
            """
        <section id="contents" class="toc">
        <h1>Table of Contents</h1>
        <ul></ul>
        </section>
        """,
            "html.parser",
        )  # html.parser doesn't create <html>/<body> tags automatically
        ul = cast(bs4.Tag, clean.find("ul"))
        for part_id in ids:
            li = clean.new_tag("li")
            a = clean.new_tag("a")
            a["href"] = f"#{part_id}"
            li.append(a)
            ul.append(li)
        insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
        insert_point.append(clean)
        return str(clean)
    async def add_chapters(
        self, contents: List[bs4.Tag], download_images: bool = False
    ):
        """Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
        # # Cover and Copyright Page
        await self.generate_cover_and_copyright_html()
        await self.generate_about_author_chapter()
        self.tree = BeautifulSoup(self.template, "lxml")
        self.generate_toc()
        for part, content in zip(self.data["parts"], contents):
            insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
            insert_point.append(content)
            yield part["title"]
        # # About the Author page
        # about_author_html = await self.generate_about_author_chapter()
        # chapters.insert(0, cover_and_copyright_html)
        # chapters.append(about_author_html)
        with start_action(
            action_type="generate_pdf",
            output_filename=self.file.name,
            title=self.data["title"],
        ):
            # PDF Generation with wkhtmltopdf, written to self.file
            # At this stage, we have a bunch of HTML Files representing all the chapters that need to be generated. PDFKit handles ToC generation, so that's not included.
    def generate_pdf(self):
        """Generate and write the PDF to a temporary file (self.book)."""
        font_config = FontConfiguration()
-            stylesheet_obj = CSS(string=self.stylesheet, font_config=font_config)
+        stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
-            html_obj = HTML(string=str(self.tree))
+        html_obj = HTML(string=self.content)
        html_obj.write_pdf(
-                self.file.name, stylesheets=[stylesheet_obj], font_config=font_config
+            self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
        )
-        with start_action(action_type="add_metadata") as action:
+    def add_metadata(self):
-            # Metadata generation with Exiftool
+        """Write metadata to generated PDF file at self.book, using ExifTool."""
        clean_description = (
-                self.data["description"].strip().replace("\n", "$/")
+            self.story["description"].strip().replace("\n", "$/")
        )  # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
            action.log(f"clean_description: {clean_description}")
        metadata = {
-                "Author": self.data["user"]["username"],
+            "Author": self.story["user"]["username"],
-                "Title": self.data["title"],
+            "Title": self.story["title"],
            "Subject": clean_description,
-                "CreationDate": self.data["createDate"],
+            "CreationDate": self.story["createDate"],
-                "ModDate": self.data["modifyDate"],
+            "ModDate": self.story["modifyDate"],
-                "Keywords": ",".join(self.data["tags"]),
+            "Keywords": ",".join(self.story["tags"]),
-                "Language": self.data["language"]["name"],
+            "Language": self.story["language"]["name"],
-                "Completed": self.data["completed"],
+            "Completed": self.story["completed"],
-                "MatureContent": self.data["mature"],
+            "MatureContent": self.story["mature"],
            "Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
        }  # As per https://exiftool.org/TagNames/PDF.html
-            action.log(f"options: {metadata}")
+        with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
            with ExifTool(
                config_file="../exiftool.config", logger=exiftool_logger
            ) as et:
            # Custom configuration adds Completed and MatureContent tags.
            # exiftool logger logs executed command
            et.execute(
@@ -275,14 +184,21 @@ id="copyright-license-image">""".format(
                    [f"-{key}={value}" for key, value in metadata.items()]
                    + [
                        "-overwrite_original",
-                            self.file.file.name,
+                        self.book.file.name,
                    ]
                )
            )
    def compile(self):
        parts = self.generate_chapters()
        self.populate_template(parts)
        self.generate_pdf()
        self.add_metadata()
        return True
    def dump(self) -> BytesIO:
-        self.file.seek(0)
+        self.book.seek(0)
-        buffer = BytesIO(self.file.read())
+        buffer = BytesIO(self.book.read())
-        self.file.close()
+        self.book.close()
        return buffer
@@ -1,6 +1,6 @@
 from io import BytesIO
 from tempfile import _TemporaryFileWrapper
-from typing import List, Literal
+from typing import Generator, List, Literal
 from bs4 import BeautifulSoup
 from ebooklib.epub import EpubBook
@@ -23,7 +23,7 @@ class AbstractGenerator:
        metadata: Story,
        part_trees: List[BeautifulSoup],
        cover: bytes,
-        images: List[List[bytes]] | None,
+        images: List[Generator[bytes]] | None,
    ):
        self.story = metadata
        self.parts = part_trees
@@ -7,6 +7,7 @@ from typing import Optional
 from zipfile import ZipFile
 from aiohttp import ClientResponseError
 from bs4 import BeautifulSoup
 from eliot import start_action
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import (
@@ -19,6 +20,7 @@ from fastapi.staticfiles import StaticFiles
 from create_book import (
    EPUBGenerator,
    PDFGenerator,
    StoryNotFoundError,
    WattpadError,
    fetch_cookies,
@@ -26,10 +28,10 @@ from create_book import (
    fetch_story,
    fetch_story_content_zip,
    fetch_story_from_partId,
    generate_clean_part_html,
    logger,
    slugify,
 )
 from create_book.parser import clean_tree, download_tree_images
 app = FastAPI()
 BUILD_PATH = Path(__file__).parent / "build"
@@ -73,7 +75,7 @@ app.add_middleware(RequestCancelledMiddleware)
 class DownloadFormat(Enum):
-    # pdf = "pdf"
+    pdf = "pdf"
    epub = "epub"
@@ -169,30 +171,36 @@ async def handle_download(
        if not cover_data:
            raise HTTPException(status_code=422)
        match format:
            case DownloadFormat.epub:
                book = EPUBGenerator(metadata, cover_data)
                media_type = "application/epub+zip"
            # case DownloadFormat.pdf:
            #     book = PDFGenerator(metadata, cover_data)
            #     media_type = "application/pdf"
        logger.info(f"Retrieved story metadata and cover ({story_id=})")
        story_zip = await fetch_story_content_zip(story_id, cookies)
        archive = ZipFile(story_zip, "r")
-        part_contents = [
+        part_trees: list[BeautifulSoup] = [
-            generate_clean_part_html(
+            clean_tree(
-                part, archive.read(str(part["id"])).decode("utf-8")
+                part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
            )
            for part in metadata["parts"]
        ]
-        async for title in book.add_chapters(
+        if download_images:
-            part_contents, download_images=download_images
+            images = [await download_tree_images(tree) for tree in part_trees]
-        ):
+
-            ...
+        match format:
            case DownloadFormat.epub:
                book = EPUBGenerator(metadata, part_trees, cover_data, images)
                media_type = "application/epub+zip"
            case DownloadFormat.pdf:
                author_image = await fetch_image(
                    metadata["user"]["avatar"].replace("-256-", "-512-")
                )
                if not author_image:
                    raise HTTPException(status_code=422)
                book = PDFGenerator(
                    metadata, part_trees, cover_data, images, author_image
                )
                media_type = "application/pdf"
        logger.info(f"Retrieved story metadata and cover ({story_id=})")
        book_buffer = book.dump()