feat(api): Add DownloadFormat type, restructure utils

2024-12-06 07:27:56 +00:00
parent 0f6cdd91a9
commit 0835992b23
2 changed files with 112 additions and 100 deletions
@@ -269,93 +269,97 @@ async def fetch_cover(url: str) -> bytes:
 # --- EPUB Generation --- #
-def set_metadata(book: EpubBook, data: Story) -> None:
+class EPUBGenerator:
-    """Set book metadata."""
+    def __init__(self, epub: EpubBook, data: Story):
-    book.add_author(data["user"]["username"])
+        self.epub = epub
        self.data = data
-    book.add_metadata("DC", "title", data["title"])
+        # set metadata
-    book.add_metadata("DC", "description", data["description"])
+        self.epub.add_author(data["user"]["username"])
    book.add_metadata("DC", "date", data["createDate"])
    book.add_metadata("DC", "modified", data["modifyDate"])
    book.add_metadata("DC", "language", data["language"]["name"])
-    book.add_metadata(
+        self.epub.add_metadata("DC", "title", data["title"])
-        None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
+        self.epub.add_metadata("DC", "description", data["description"])
-    )
+        self.epub.add_metadata("DC", "date", data["createDate"])
-    book.add_metadata(
+        self.epub.add_metadata("DC", "modified", data["modifyDate"])
-        None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
+        self.epub.add_metadata("DC", "language", data["language"]["name"])
    )
    book.add_metadata(
        None, "meta", "", {"name": "completed", "content": str(int(data["completed"]))}
    )
-
+        self.epub.add_metadata(
-async def set_cover(book: EpubBook, data: Story) -> None:
+            None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
-    """Set book cover."""
+        )
-    book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
+        self.epub.add_metadata(
-    chapter = epub.EpubHtml(
+            None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
-        file_name="titlepage.xhtml",  # Standard for cover page
+        )
-    )
+        self.epub.add_metadata(
-    chapter.set_content('<img src="cover.jpg">')
+            None,
-
+            "meta",
-
+            "",
-async def add_chapters(
+            {"name": "completed", "content": str(int(data["completed"]))},
    book: EpubBook,
    data: Story,
    download_images: bool = False,
    cookies: Optional[dict] = None,
 ):
    chapters = []
    for cidx, part in enumerate(data["parts"]):
        content = await fetch_part_content(part["id"], cookies=cookies)
        title = part["title"]
        # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
        chapter = epub.EpubHtml(
            title=title,
            file_name=f"{cidx}.xhtml",  # Used to be clean_title.xhtml, but that broke Arabic support as slugify turns arabic strings into '', leading to multiple files with the same name, breaking those chapters.
            lang=data["language"]["name"],
        )
-        if download_images:
+    async def set_cover(self) -> None:
-            soup = BeautifulSoup(content, "lxml")
+        """Set book cover."""
        self.epub.set_cover("cover.jpg", await fetch_cover(self.data["cover"]))
        chapter = epub.EpubHtml(
            file_name="titlepage.xhtml",  # Standard for cover page
        )
        chapter.set_content('<img src="cover.jpg">')
-            async with CachedSession(
+    async def add_chapters(
-                headers=headers, cache=None
+        self,
-            ) as session:  # Don't cache images.
+        download_images: bool = False,
-                for idx, image in enumerate(soup.find_all("img")):
+        cookies: Optional[dict] = None,
-                    if not image["src"]:
+    ):
-                        continue
+        chapters = []
                    # Find all image tags and filter for those with sources
-                    async with session.get(image["src"]) as response:
+        for cidx, part in enumerate(self.data["parts"]):
-                        img = epub.EpubImage(
+            content = await fetch_part_content(part["id"], cookies=cookies)
-                            media_type="image/jpeg",
+            title = part["title"]
                            content=await response.read(),
                            file_name=f"static/{cidx}/{idx}.jpeg",
                        )
                        book.add_item(img)
                        # Fetch image and pack
-                        content = content.replace(
+            # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
-                            str(image["src"]), f"static/{cidx}/{idx}.jpeg"
+            chapter = epub.EpubHtml(
-                        )
+                title=title,
                file_name=f"{cidx}.xhtml",  # Used to be clean_title.xhtml, but that broke Arabic support as slugify turns arabic strings into '', leading to multiple files with the same name, breaking those chapters.
                lang=self.data["language"]["name"],
            )
-        chapter.set_content(f"<h1>{title}</h1>" + content)
+            if download_images:
                soup = BeautifulSoup(content, "lxml")
-        chapters.append(chapter)
+                async with CachedSession(
                    headers=headers, cache=None
                ) as session:  # Don't cache images.
                    for idx, image in enumerate(soup.find_all("img")):
                        if not image["src"]:
                            continue
                        # Find all image tags and filter for those with sources
-        yield title  # Yield the chapter's title upon insertion preceeded by retrieval.
+                        async with session.get(image["src"]) as response:
                            img = epub.EpubImage(
                                media_type="image/jpeg",
                                content=await response.read(),
                                file_name=f"static/{cidx}/{idx}.jpeg",
                            )
                            self.epub.add_item(img)
                            # Fetch image and pack
-    for chapter in chapters:
+                            content = content.replace(
-        book.add_item(chapter)
+                                str(image["src"]), f"static/{cidx}/{idx}.jpeg"
                            )
-    book.toc = chapters
+            chapter.set_content(f"<h1>{title}</h1>" + content)
-    # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
+            chapters.append(chapter)
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())
-    # create spine
+            yield title  # Yield the chapter's title upon insertion preceeded by retrieval.
-    book.spine = ["nav"] + chapters
+
        for chapter in chapters:
            self.epub.add_item(chapter)
        self.epub.toc = chapters
        # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
        self.epub.add_item(epub.EpubNcx())
        self.epub.add_item(epub.EpubNav())
        # create spine
        self.epub.spine = ["nav"] + chapters
@@ -14,9 +14,7 @@ from fastapi.staticfiles import StaticFiles
 from ebooklib import epub
 from create_book import (
    retrieve_story,
-    set_cover,
+    EPUBGenerator,
    set_metadata,
    add_chapters,
    slugify,
    wp_get_cookies,
    fetch_story_from_partId,
@@ -69,6 +67,11 @@ class RequestCancelledMiddleware:
 app.add_middleware(RequestCancelledMiddleware)
 class DownloadFormat(Enum):
    pdf = "pdf"
    epub = "epub"
 class DownloadMode(Enum):
    story = "story"
    part = "part"
@@ -106,6 +109,7 @@ async def handle_download(
    download_id: int,
    download_images: bool = False,
    mode: DownloadMode = DownloadMode.story,
    format: DownloadFormat = DownloadFormat.epub,
    username: Optional[str] = None,
    password: Optional[str] = None,
 ):
@@ -146,33 +150,37 @@ async def handle_download(
        logger.info(f"Retrieved story id ({story_id=})")
-        book = epub.EpubBook()
+        match format:
-        set_metadata(book, metadata)
+            case DownloadFormat.epub:
-        await set_cover(book, metadata)
+                book = EPUBGenerator(epub.EpubBook(), metadata)
                await book.set_cover()
-        async for title in add_chapters(
+                async for title in book.add_chapters(
-            book, metadata, download_images=download_images, cookies=cookies
+                    download_images=download_images, cookies=cookies
-        ):
+                ):
-            ...
+                    ...
-        # Book is compiled
+                # Book is compiled
-        temp_file = tempfile.NamedTemporaryFile(
+                temp_file = tempfile.NamedTemporaryFile(
-            suffix=".epub", delete=True
+                    suffix=".epub", delete=True
-        )  # Thanks https://stackoverflow.com/a/75398222
+                )  # Thanks https://stackoverflow.com/a/75398222
-        # create epub file
+                # create epub file
-        epub.write_epub(temp_file, book, {})
+                epub.write_epub(temp_file, book.epub, {})
-        temp_file.file.seek(0)
+                temp_file.file.seek(0)
-        book_data = temp_file.file.read()
+                book_data = temp_file.file.read()
-        return StreamingResponse(
+                return StreamingResponse(
-            BytesIO(book_data),
+                    BytesIO(book_data),
-            media_type="application/epub+zip",
+                    media_type="application/epub+zip",
-            headers={
+                    headers={
-                "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"'  # Thanks https://stackoverflow.com/a/72729058
+                        "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"'  # Thanks https://stackoverflow.com/a/72729058
-            },
+                    },
-        )
+                )
            case DownloadFormat.pdf:
                ...
 app.mount("/", StaticFiles(directory=BUILD_PATH), "static")