fix(api): Add logging for PDF Generation

This commit is contained in:
TheOnlyWayUp
2024-12-08 12:04:39 +00:00
parent 7ef988ba42
commit 9747839ae9
2 changed files with 162 additions and 128 deletions
+160 -126
View File
@@ -245,7 +245,7 @@ story_ta = TypeAdapter(Story)
# --- PDF Dependencies --- # # --- PDF Dependencies --- #
wp_copyright: Dict[int, CopyrightData] = { wp_copyright_data: Dict[int, CopyrightData] = {
1: { 1: {
"name": "All Rights Reserved", "name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.", "statement": "©️ {published_year} by {username}. All Rights Reserved.",
@@ -404,11 +404,11 @@ async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> st
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) @backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_cover(url: str) -> bytes: async def fetch_image(url: str, should_cache: bool = False) -> bytes:
"""Fetch cover image bytes.""" """Fetch image bytes."""
with start_action(action_type="api_fetch_cover", url=url): with start_action(action_type="api_fetch_image", url=url):
async with CachedSession( async with CachedSession(
headers=headers, cache=None headers=headers, cache=cache if should_cache else None
) as session: # Don't cache images. ) as session: # Don't cache images.
async with session.get(url) as response: async with session.get(url) as response:
response.raise_for_status() response.raise_for_status()
@@ -534,6 +534,94 @@ class PDFGenerator:
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
self.cover = cover self.cover = cover
async def genernate_cover_and_copyright_file(
self,
) -> tempfile._TemporaryFileWrapper:
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
copyright_data = wp_copyright_data[self.data["copyright"]]
about_copyright = (
copyright_template.replace(
"{statement}",
copyright_data["statement"].format(
username=self.data["user"]["username"],
published_year=self.data["createDate"].split("-", 2)[0],
),
)
.replace("{freedoms}", copyright_data["freedoms"])
.replace(
"{printing}",
copyright_data["printing"],
)
.replace("{book_id}", self.data["id"])
.replace("{book_title}", self.data["title"])
)
copyright_image = (
await fetch_image(copyright_data["image_url"], should_cache=True)
if copyright_data["image_url"]
else None
)
image_block = (
"""<img src="{image_url}"
alt="{name}"
width="88"
height="31"
style="margin-bottom: 1rem;">""".format(
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
name=copyright_data["name"],
)
if copyright_image
else ""
)
about_copyright = (
about_copyright.replace(
"{copyright_image}",
image_block,
)
if image_block
else about_copyright.replace("{copyright_image}", "")
)
about_copyright = about_copyright.replace(
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
)
cover_and_copyright_file = tempfile.NamedTemporaryFile(
suffix=".html", delete=True
)
cover_and_copyright_file.write(about_copyright.encode())
cover_and_copyright_file.seek(0)
return cover_and_copyright_file
async def generate_about_author_file(self) -> tempfile._TemporaryFileWrapper:
"""Generate About the Author file, fetch avatar."""
author_avatar = (
await fetch_image(
self.data["user"]["avatar"].replace("128", "512")
) # Increase image resolution
if self.data["user"]["avatar"]
else None
)
about_author = author_template.replace(
"{username}", self.data["user"]["username"]
).replace("{description}", smart_trim(self.data["user"]["description"]))
about_author = (
about_author.replace(
"{avatar}",
f"""
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
)
if author_avatar
else about_author.replace("{avatar}", "")
)
about_author_file = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
about_author_file.write(about_author.encode())
about_author_file.seek(0)
return about_author_file
async def add_chapters(self, contents: List[str], download_images: bool = False): async def add_chapters(self, contents: List[str], download_images: bool = False):
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages.""" """Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
@@ -577,135 +665,81 @@ class PDFGenerator:
yield part["title"] yield part["title"]
# Cover and Copyright Page # Cover and Copyright Page
copyright_data = wp_copyright[self.data["copyright"]] cover_and_copyright_file = await self.genernate_cover_and_copyright_file()
about_copyright = (
copyright_template.replace(
"{statement}",
copyright_data["statement"].format(
username=self.data["user"]["username"],
published_year=self.data["createDate"].split("-", 2)[0],
),
)
.replace("{freedoms}", copyright_data["freedoms"])
.replace(
"{printing}",
copyright_data["printing"],
)
.replace("{book_id}", self.data["id"])
.replace("{book_title}", self.data["title"])
)
copyright_image = (
await fetch_cover(copyright_data["image_url"])
if copyright_data["image_url"]
else None
)
image_block = (
"""<img src="{image_url}"
alt="{name}"
width="88"
height="31"
style="margin-bottom: 1rem;">""".format(
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
name=copyright_data["name"],
)
if copyright_image
else ""
)
about_copyright = (
about_copyright.replace(
"{copyright_image}",
image_block,
)
if image_block
else about_copyright.replace("{copyright_image}", "")
)
about_copyright = about_copyright.replace(
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
)
cover_and_copyright_file = tempfile.NamedTemporaryFile(
suffix=".html", delete=True
)
cover_and_copyright_file.write(about_copyright.encode())
cover_and_copyright_file.seek(0)
# About the Author page # About the Author page
author_avatar = ( about_author_file = await self.generate_about_author_file()
await fetch_cover(
self.data["user"]["avatar"].replace("128", "512")
) # Increase image resolution
if self.data["user"]["avatar"]
else None
)
about_author = author_template.replace(
"{username}", self.data["user"]["username"]
).replace("{description}", smart_trim(self.data["user"]["description"]))
about_author = (
about_author.replace(
"{avatar}",
f"""
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
)
if author_avatar
else about_author.replace("{avatar}", "")
)
about_author_file = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
about_author_file.write(about_author.encode())
chapters.append(about_author_file) chapters.append(about_author_file)
about_author_file.seek(0)
# PDF Generation with wkhtmltopdf, written to self.file chapter_filenames = [chapter.file.name for chapter in chapters]
pdfkit.from_file(
[chapter.file.name for chapter in chapters],
self.file.name,
cover=cover_and_copyright_file.file.name,
toc={
"toc-header-text": "Table of Contents",
"xsl-style-sheet": "./pdf/toc.xsl",
},
options={
"footer-html": "./pdf/footer.html",
"margin-top": "10mm",
"margin-bottom": "10mm",
"title": self.data["title"],
"encoding": "UTF-8",
"user-style-sheet": "./pdf/stylesheet.css",
"enable-local-file-access": "",
},
cover_first=True,
)
# Metadata generation with Exiftool with start_action(
clean_description = ( action_type="generate_pdf",
self.data["description"].strip().replace("\n", "$/") chapter_filenames=chapter_filenames,
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option. output_filename=self.file.name,
metadata = { cover_filename=cover_and_copyright_file.file.name,
"Author": self.data["user"]["username"], title=self.data["title"],
"Title": self.data["title"], ):
"Subject": clean_description, # PDF Generation with wkhtmltopdf, written to self.file
"CreationDate": self.data["createDate"],
"ModDate": self.data["modifyDate"],
"Keywords": ",".join(self.data["tags"]),
"Language": self.data["language"]["name"],
"Completed": self.data["completed"],
"MatureContent": self.data["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
with ExifTool(config_file="../exiftool.config", logger=logger) as et: pdfkit.from_file(
# Custom configuration adds Completed and MatureContent tags. chapter_filenames,
et.execute( self.file.name,
*( cover=cover_and_copyright_file.file.name,
[f"-{key}={value}" for key, value in metadata.items()] toc={
+ [ "toc-header-text": "Table of Contents",
"-overwrite_original", "xsl-style-sheet": "./pdf/toc.xsl",
self.file.file.name, },
] options={
) "footer-html": "./pdf/footer.html",
"margin-top": "10mm",
"margin-bottom": "10mm",
"title": self.data["title"],
"encoding": "UTF-8",
"user-style-sheet": "./pdf/stylesheet.css",
"enable-local-file-access": "",
},
cover_first=True,
) )
with start_action(action_type="add_metadata") as action:
# Metadata generation with Exiftool
clean_description = (
self.data["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
action.log(f"clean_description: {clean_description}")
metadata = {
"Author": self.data["user"]["username"],
"Title": self.data["title"],
"Subject": clean_description,
"CreationDate": self.data["createDate"],
"ModDate": self.data["modifyDate"],
"Keywords": ",".join(self.data["tags"]),
"Language": self.data["language"]["name"],
"Completed": self.data["completed"],
"MatureContent": self.data["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
action.log(f"options: {metadata}")
with ExifTool(
config_file="../exiftool.config", logger=exiftool_logger
) as et:
# Custom configuration adds Completed and MatureContent tags.
# exiftool logger logs executed command
et.execute(
*(
[f"-{key}={value}" for key, value in metadata.items()]
+ [
"-overwrite_original",
self.file.file.name,
]
)
)
# Close files and delete them from tmp # Close files and delete them from tmp
for chapter in chapters: for chapter in chapters:
chapter.file.close() chapter.file.close()
+2 -2
View File
@@ -21,7 +21,7 @@ from create_book import (
fetch_story, fetch_story,
fetch_story_from_partId, fetch_story_from_partId,
fetch_part_content, fetch_part_content,
fetch_cover, fetch_image,
fetch_cookies, fetch_cookies,
WattpadError, WattpadError,
StoryNotFoundError, StoryNotFoundError,
@@ -167,7 +167,7 @@ async def handle_download(
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies) story_id, metadata = await fetch_story_from_partId(download_id, cookies)
cover_data = await fetch_cover( cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-") metadata["cover"].replace("-256-", "-512-")
) # Increase resolution ) # Increase resolution