fix(api): Add logging for PDF Generation

This commit is contained in:
TheOnlyWayUp
2024-12-08 12:04:39 +00:00
parent 7ef988ba42
commit 9747839ae9
2 changed files with 162 additions and 128 deletions
+116 -82
View File
@@ -245,7 +245,7 @@ story_ta = TypeAdapter(Story)
# --- PDF Dependencies --- # # --- PDF Dependencies --- #
wp_copyright: Dict[int, CopyrightData] = { wp_copyright_data: Dict[int, CopyrightData] = {
1: { 1: {
"name": "All Rights Reserved", "name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.", "statement": "©️ {published_year} by {username}. All Rights Reserved.",
@@ -404,11 +404,11 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_cover(url: str) -> bytes: async def fetch_image(url: str, should_cache: bool = False) -> bytes:
"""Fetch cover image bytes.""" """Fetch image bytes."""
with start_action(action_type="api_fetch_cover", url=url): with start_action(action_type="api_fetch_image", url=url):
async with CachedSession( async with CachedSession(
headers=headers, cache=None headers=headers, cache=cache if should_cache else None
) as session: # Don't cache images. ) as session: # Don't cache images.
async with session.get(url) as response: async with session.get(url) as response:
response.raise_for_status() response.raise_for_status()
@@ -534,6 +534,94 @@ class PDFGenerator:
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
self.cover = cover self.cover = cover
async def genernate_cover_and_copyright_file(
self,
) -> tempfile._TemporaryFileWrapper:
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
copyright_data = wp_copyright_data[self.data["copyright"]]
about_copyright = (
copyright_template.replace(
"{statement}",
copyright_data["statement"].format(
username=self.data["user"]["username"],
published_year=self.data["createDate"].split("-", 2)[0],
),
)
.replace("{freedoms}", copyright_data["freedoms"])
.replace(
"{printing}",
copyright_data["printing"],
)
.replace("{book_id}", self.data["id"])
.replace("{book_title}", self.data["title"])
)
copyright_image = (
await fetch_image(copyright_data["image_url"], should_cache=True)
if copyright_data["image_url"]
else None
)
image_block = (
"""<img src="{image_url}"
alt="{name}"
width="88"
height="31"
style="margin-bottom: 1rem;">""".format(
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
name=copyright_data["name"],
)
if copyright_image
else ""
)
about_copyright = (
about_copyright.replace(
"{copyright_image}",
image_block,
)
if image_block
else about_copyright.replace("{copyright_image}", "")
)
about_copyright = about_copyright.replace(
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
)
cover_and_copyright_file = tempfile.NamedTemporaryFile(
suffix=".html", delete=True
)
cover_and_copyright_file.write(about_copyright.encode())
cover_and_copyright_file.seek(0)
return cover_and_copyright_file
async def generate_about_author_file(self) -> tempfile._TemporaryFileWrapper:
"""Generate About the Author file, fetch avatar."""
author_avatar = (
await fetch_image(
self.data["user"]["avatar"].replace("128", "512")
) # Increase image resolution
if self.data["user"]["avatar"]
else None
)
about_author = author_template.replace(
"{username}", self.data["user"]["username"]
).replace("{description}", smart_trim(self.data["user"]["description"]))
about_author = (
about_author.replace(
"{avatar}",
f"""
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
)
if author_avatar
else about_author.replace("{avatar}", "")
)
about_author_file = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
about_author_file.write(about_author.encode())
about_author_file.seek(0)
return about_author_file
async def add_chapters(self, contents: List[str], download_images: bool = False): async def add_chapters(self, contents: List[str], download_images: bool = False):
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages.""" """Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
@@ -577,88 +665,25 @@ class PDFGenerator:
yield part["title"] yield part["title"]
# Cover and Copyright Page # Cover and Copyright Page
copyright_data = wp_copyright[self.data["copyright"]] cover_and_copyright_file = await self.genernate_cover_and_copyright_file()
about_copyright = (
copyright_template.replace(
"{statement}",
copyright_data["statement"].format(
username=self.data["user"]["username"],
published_year=self.data["createDate"].split("-", 2)[0],
),
)
.replace("{freedoms}", copyright_data["freedoms"])
.replace(
"{printing}",
copyright_data["printing"],
)
.replace("{book_id}", self.data["id"])
.replace("{book_title}", self.data["title"])
)
copyright_image = (
await fetch_cover(copyright_data["image_url"])
if copyright_data["image_url"]
else None
)
image_block = (
"""<img src="{image_url}"
alt="{name}"
width="88"
height="31"
style="margin-bottom: 1rem;">""".format(
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
name=copyright_data["name"],
)
if copyright_image
else ""
)
about_copyright = (
about_copyright.replace(
"{copyright_image}",
image_block,
)
if image_block
else about_copyright.replace("{copyright_image}", "")
)
about_copyright = about_copyright.replace(
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
)
cover_and_copyright_file = tempfile.NamedTemporaryFile(
suffix=".html", delete=True
)
cover_and_copyright_file.write(about_copyright.encode())
cover_and_copyright_file.seek(0)
# About the Author page # About the Author page
author_avatar = ( about_author_file = await self.generate_about_author_file()
await fetch_cover(
self.data["user"]["avatar"].replace("128", "512")
) # Increase image resolution
if self.data["user"]["avatar"]
else None
)
about_author = author_template.replace(
"{username}", self.data["user"]["username"]
).replace("{description}", smart_trim(self.data["user"]["description"]))
about_author = (
about_author.replace(
"{avatar}",
f"""
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
)
if author_avatar
else about_author.replace("{avatar}", "")
)
about_author_file = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
about_author_file.write(about_author.encode())
chapters.append(about_author_file) chapters.append(about_author_file)
about_author_file.seek(0)
chapter_filenames = [chapter.file.name for chapter in chapters]
with start_action(
action_type="generate_pdf",
chapter_filenames=chapter_filenames,
output_filename=self.file.name,
cover_filename=cover_and_copyright_file.file.name,
title=self.data["title"],
):
# PDF Generation with wkhtmltopdf, written to self.file # PDF Generation with wkhtmltopdf, written to self.file
pdfkit.from_file( pdfkit.from_file(
[chapter.file.name for chapter in chapters], chapter_filenames,
self.file.name, self.file.name,
cover=cover_and_copyright_file.file.name, cover=cover_and_copyright_file.file.name,
toc={ toc={
@@ -677,10 +702,14 @@ style="margin-bottom: 1rem;">""".format(
cover_first=True, cover_first=True,
) )
with start_action(action_type="add_metadata") as action:
# Metadata generation with Exiftool # Metadata generation with Exiftool
clean_description = ( clean_description = (
self.data["description"].strip().replace("\n", "$/") self.data["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option. ) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
action.log(f"clean_description: {clean_description}")
metadata = { metadata = {
"Author": self.data["user"]["username"], "Author": self.data["user"]["username"],
"Title": self.data["title"], "Title": self.data["title"],
@@ -694,8 +723,13 @@ style="margin-bottom: 1rem;">""".format(
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader", "Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html } # As per https://exiftool.org/TagNames/PDF.html
with ExifTool(config_file="../exiftool.config", logger=logger) as et: action.log(f"options: {metadata}")
with ExifTool(
config_file="../exiftool.config", logger=exiftool_logger
) as et:
# Custom configuration adds Completed and MatureContent tags. # Custom configuration adds Completed and MatureContent tags.
# exiftool logger logs executed command
et.execute( et.execute(
*( *(
[f"-{key}={value}" for key, value in metadata.items()] [f"-{key}={value}" for key, value in metadata.items()]
+2 -2
View File
@@ -21,7 +21,7 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_from_partId, fetch_story_from_partId,
fetch_part_content, fetch_part_content,
fetch_cover, fetch_image,
fetch_cookies, fetch_cookies,
WattpadError, WattpadError,
StoryNotFoundError, StoryNotFoundError,
@@ -167,7 +167,7 @@ async def handle_download(
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies) story_id, metadata = await fetch_story_from_partId(download_id, cookies)
cover_data = await fetch_cover( cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-") metadata["cover"].replace("-256-", "-512-")
) # Increase resolution ) # Increase resolution