feat(api): Add DownloadFormat type, restructure utils

This commit is contained in:
TheOnlyWayUp
2024-12-06 07:27:56 +00:00
parent 0f6cdd91a9
commit 0835992b23
2 changed files with 112 additions and 100 deletions
+79 -75
View File
@@ -269,93 +269,97 @@ async def fetch_cover(url: str) -> bytes:
# --- EPUB Generation --- # # --- EPUB Generation --- #
def set_metadata(book: EpubBook, data: Story) -> None: class EPUBGenerator:
"""Set book metadata.""" def __init__(self, epub: EpubBook, data: Story):
book.add_author(data["user"]["username"]) self.epub = epub
self.data = data
book.add_metadata("DC", "title", data["title"]) # set metadata
book.add_metadata("DC", "description", data["description"]) self.epub.add_author(data["user"]["username"])
book.add_metadata("DC", "date", data["createDate"])
book.add_metadata("DC", "modified", data["modifyDate"])
book.add_metadata("DC", "language", data["language"]["name"])
book.add_metadata( self.epub.add_metadata("DC", "title", data["title"])
None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])} self.epub.add_metadata("DC", "description", data["description"])
) self.epub.add_metadata("DC", "date", data["createDate"])
book.add_metadata( self.epub.add_metadata("DC", "modified", data["modifyDate"])
None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))} self.epub.add_metadata("DC", "language", data["language"]["name"])
)
book.add_metadata(
None, "meta", "", {"name": "completed", "content": str(int(data["completed"]))}
)
self.epub.add_metadata(
async def set_cover(book: EpubBook, data: Story) -> None: None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
"""Set book cover.""" )
book.set_cover("cover.jpg", await fetch_cover(data["cover"])) self.epub.add_metadata(
chapter = epub.EpubHtml( None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
file_name="titlepage.xhtml", # Standard for cover page )
) self.epub.add_metadata(
chapter.set_content('<img src="cover.jpg">') None,
"meta",
"",
async def add_chapters( {"name": "completed", "content": str(int(data["completed"]))},
book: EpubBook,
data: Story,
download_images: bool = False,
cookies: Optional[dict] = None,
):
chapters = []
for cidx, part in enumerate(data["parts"]):
content = await fetch_part_content(part["id"], cookies=cookies)
title = part["title"]
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
chapter = epub.EpubHtml(
title=title,
file_name=f"{cidx}.xhtml", # Used to be clean_title.xhtml, but that broke Arabic support as slugify turns arabic strings into '', leading to multiple files with the same name, breaking those chapters.
lang=data["language"]["name"],
) )
if download_images: async def set_cover(self) -> None:
soup = BeautifulSoup(content, "lxml") """Set book cover."""
self.epub.set_cover("cover.jpg", await fetch_cover(self.data["cover"]))
chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page
)
chapter.set_content('<img src="cover.jpg">')
async with CachedSession( async def add_chapters(
headers=headers, cache=None self,
) as session: # Don't cache images. download_images: bool = False,
for idx, image in enumerate(soup.find_all("img")): cookies: Optional[dict] = None,
if not image["src"]: ):
continue chapters = []
# Find all image tags and filter for those with sources
async with session.get(image["src"]) as response: for cidx, part in enumerate(self.data["parts"]):
img = epub.EpubImage( content = await fetch_part_content(part["id"], cookies=cookies)
media_type="image/jpeg", title = part["title"]
content=await response.read(),
file_name=f"static/{cidx}/{idx}.jpeg",
)
book.add_item(img)
# Fetch image and pack
content = content.replace( # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
str(image["src"]), f"static/{cidx}/{idx}.jpeg" chapter = epub.EpubHtml(
) title=title,
file_name=f"{cidx}.xhtml", # Used to be clean_title.xhtml, but that broke Arabic support as slugify turns arabic strings into '', leading to multiple files with the same name, breaking those chapters.
lang=self.data["language"]["name"],
)
chapter.set_content(f"<h1>{title}</h1>" + content) if download_images:
soup = BeautifulSoup(content, "lxml")
chapters.append(chapter) async with CachedSession(
headers=headers, cache=None
) as session: # Don't cache images.
for idx, image in enumerate(soup.find_all("img")):
if not image["src"]:
continue
# Find all image tags and filter for those with sources
yield title # Yield the chapter's title upon insertion preceeded by retrieval. async with session.get(image["src"]) as response:
img = epub.EpubImage(
media_type="image/jpeg",
content=await response.read(),
file_name=f"static/{cidx}/{idx}.jpeg",
)
self.epub.add_item(img)
# Fetch image and pack
for chapter in chapters: content = content.replace(
book.add_item(chapter) str(image["src"]), f"static/{cidx}/{idx}.jpeg"
)
book.toc = chapters chapter.set_content(f"<h1>{title}</h1>" + content)
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py chapters.append(chapter)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# create spine yield title # Yield the chapter's title upon insertion preceeded by retrieval.
book.spine = ["nav"] + chapters
for chapter in chapters:
self.epub.add_item(chapter)
self.epub.toc = chapters
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
self.epub.add_item(epub.EpubNcx())
self.epub.add_item(epub.EpubNav())
# create spine
self.epub.spine = ["nav"] + chapters
+33 -25
View File
@@ -14,9 +14,7 @@ from fastapi.staticfiles import StaticFiles
from ebooklib import epub from ebooklib import epub
from create_book import ( from create_book import (
retrieve_story, retrieve_story,
set_cover, EPUBGenerator,
set_metadata,
add_chapters,
slugify, slugify,
wp_get_cookies, wp_get_cookies,
fetch_story_from_partId, fetch_story_from_partId,
@@ -69,6 +67,11 @@ class RequestCancelledMiddleware:
app.add_middleware(RequestCancelledMiddleware) app.add_middleware(RequestCancelledMiddleware)
class DownloadFormat(Enum):
pdf = "pdf"
epub = "epub"
class DownloadMode(Enum): class DownloadMode(Enum):
story = "story" story = "story"
part = "part" part = "part"
@@ -106,6 +109,7 @@ async def handle_download(
download_id: int, download_id: int,
download_images: bool = False, download_images: bool = False,
mode: DownloadMode = DownloadMode.story, mode: DownloadMode = DownloadMode.story,
format: DownloadFormat = DownloadFormat.epub,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
): ):
@@ -146,33 +150,37 @@ async def handle_download(
logger.info(f"Retrieved story id ({story_id=})") logger.info(f"Retrieved story id ({story_id=})")
book = epub.EpubBook() match format:
set_metadata(book, metadata) case DownloadFormat.epub:
await set_cover(book, metadata) book = EPUBGenerator(epub.EpubBook(), metadata)
await book.set_cover()
async for title in add_chapters( async for title in book.add_chapters(
book, metadata, download_images=download_images, cookies=cookies download_images=download_images, cookies=cookies
): ):
... ...
# Book is compiled # Book is compiled
temp_file = tempfile.NamedTemporaryFile( temp_file = tempfile.NamedTemporaryFile(
suffix=".epub", delete=True suffix=".epub", delete=True
) # Thanks https://stackoverflow.com/a/75398222 ) # Thanks https://stackoverflow.com/a/75398222
# create epub file # create epub file
epub.write_epub(temp_file, book, {}) epub.write_epub(temp_file, book.epub, {})
temp_file.file.seek(0) temp_file.file.seek(0)
book_data = temp_file.file.read() book_data = temp_file.file.read()
return StreamingResponse( return StreamingResponse(
BytesIO(book_data), BytesIO(book_data),
media_type="application/epub+zip", media_type="application/epub+zip",
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.epub"' # Thanks https://stackoverflow.com/a/72729058
}, },
) )
case DownloadFormat.pdf:
...
app.mount("/", StaticFiles(directory=BUILD_PATH), "static") app.mount("/", StaticFiles(directory=BUILD_PATH), "static")