fix(api): Add comments and docstrings

This commit is contained in:
TheOnlyWayUp
2024-12-08 11:51:16 +00:00
parent c51f32654c
commit 7ef988ba42
2 changed files with 108 additions and 91 deletions
+102 -88
View File
@@ -109,13 +109,13 @@ logger.info(f"Using {cache=}")
# --- Utilities --- # # --- Utilities --- #
def smart_trim(text: str): def smart_trim(text: str, max_length: int = 400) -> str:
max_len = 400 """Truncate a string intelligently at newlines. Coherence and max-length adherence."""
chunks = [t for t in text.split("\n") if t] chunks = [t for t in text.split("\n") if t]
to_return = "" to_return = ""
for chunk in chunks: for chunk in chunks:
if len(to_return) + len(chunk) < max_len: if len(to_return) + len(chunk) < max_length:
to_return = chunk + "<br />" to_return = chunk + "<br />"
else: else:
to_return = to_return.rstrip("<br />") to_return = to_return.rstrip("<br />")
@@ -124,7 +124,7 @@ def smart_trim(text: str):
return to_return return to_return
def clean_part_text(text: str): def clean_part_text(text: str) -> str:
"""Remove unnecessary newlines from Text""" """Remove unnecessary newlines from Text"""
soup = BeautifulSoup(text) soup = BeautifulSoup(text)
@@ -243,6 +243,73 @@ class Story(TypedDict):
story_ta = TypeAdapter(Story) story_ta = TypeAdapter(Story)
# --- PDF Dependencies --- #
wp_copyright: Dict[int, CopyrightData] = {
1: {
"name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
"freedoms": "No reuse, redistribution, or modification without permission.",
"printing": "Not allowed without explicit permission.",
"image_url": None,
},
2: {
"name": "Public Domain",
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
"freedoms": "Free to use for any purpose without permission.",
"printing": "Allowed for personal or commercial purposes.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
},
3: {
"name": "Creative Commons Attribution (CC-BY)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
"printing": "Allowed with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
},
4: {
"name": "CC Attribution NonCommercial (CC-BY-NC)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
"printing": "Allowed for non-commercial purposes with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
},
5: {
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
},
6: {
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
},
7: {
"name": "CC Attribution ShareAlike (CC-BY-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
"printing": "Allowed with proper credit under the same license.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
},
8: {
"name": "CC Attribution NoDerivs (CC-BY-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
"printing": "Allowed in original form with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
},
}
with open("./pdf/cover_and_copyright.html") as reader:
copyright_template = reader.read()
with open("./pdf/author.html") as reader:
author_template = reader.read()
# --- Exceptions --- # # --- Exceptions --- #
@@ -351,11 +418,14 @@ async def fetch_cover(url: str) -> bytes:
return body return body
# --- EPUB Generation --- # # --- Generation --- #
class EPUBGenerator: class EPUBGenerator:
"""EPUB Generation utilities"""
def __init__(self, data: Story, cover: bytes): def __init__(self, data: Story, cover: bytes):
"""Initialize EPUBGenerator. Create epub.EpubBook() and set metadata and cover."""
self.epub = epub.EpubBook() self.epub = epub.EpubBook()
self.data = data self.data = data
self.cover = cover self.cover = cover
@@ -382,7 +452,7 @@ class EPUBGenerator:
{"name": "completed", "content": str(int(data["completed"]))}, {"name": "completed", "content": str(int(data["completed"]))},
) )
# Set book cover # Set cover
self.epub.set_cover("cover.jpg", cover) self.epub.set_cover("cover.jpg", cover)
cover_chapter = epub.EpubHtml( cover_chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page file_name="titlepage.xhtml", # Standard for cover page
@@ -391,7 +461,8 @@ class EPUBGenerator:
self.epub.add_item(cover_chapter) self.epub.add_item(cover_chapter)
async def add_chapters(self, contents: List[str], download_images: bool = False): async def add_chapters(self, contents: List[str], download_images: bool = False):
chapters = [] """Add chapters to the Epub, downloading images if necessary. Sets the table of contents and spine."""
chapters: List[epub.EpubHtml] = []
for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)): for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)):
title = part["title"] title = part["title"]
@@ -399,8 +470,9 @@ class EPUBGenerator:
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
chapter = epub.EpubHtml( chapter = epub.EpubHtml(
title=title, title=title,
file_name=f"{cidx}.xhtml", # See issue #30 file_name=f"{cidx}_{part['id']}.xhtml", # See issue #30
lang=self.data["language"]["name"], lang=self.data["language"]["name"],
uid=part["id"],
) )
if download_images: if download_images:
@@ -428,13 +500,12 @@ class EPUBGenerator:
) )
chapter.set_content(content) chapter.set_content(content)
self.epub.add_item(chapter)
chapters.append(chapter) chapters.append(chapter)
yield title yield title
for chapter in chapters:
self.epub.add_item(chapter)
self.epub.toc = chapters self.epub.toc = chapters
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
@@ -454,82 +525,17 @@ class EPUBGenerator:
return temp_file return temp_file
wp_copyright: Dict[str, CopyrightData] = {
"1": {
"name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
"freedoms": "No reuse, redistribution, or modification without permission.",
"printing": "Not allowed without explicit permission.",
"image_url": None,
},
"2": {
"name": "Public Domain",
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
"freedoms": "Free to use for any purpose without permission.",
"printing": "Allowed for personal or commercial purposes.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
},
"3": {
"name": "Creative Commons Attribution (CC-BY)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
"printing": "Allowed with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
},
"4": {
"name": "CC Attribution NonCommercial (CC-BY-NC)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
"printing": "Allowed for non-commercial purposes with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
},
"5": {
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
},
"6": {
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
},
"7": {
"name": "CC Attribution ShareAlike (CC-BY-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
"printing": "Allowed with proper credit under the same license.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
},
"8": {
"name": "CC Attribution NoDerivs (CC-BY-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
"printing": "Allowed in original form with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
},
}
with open("./pdf/cover_and_copyright.html") as reader:
copyright_template = reader.read()
with open("./pdf/author.html") as reader:
author_template = reader.read()
class PDFGenerator: class PDFGenerator:
"""PDF Generation utilities""" """PDF Generation utilities"""
def __init__(self, data: Story, cover: bytes): def __init__(self, data: Story, cover: bytes):
"""Initialize PDGenerator, create PDF Temporary file."""
self.data = data self.data = data
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
self.cover = cover self.cover = cover
async def add_chapters(self, contents: List[str], download_images: bool = False): async def add_chapters(self, contents: List[str], download_images: bool = False):
"""Add chapters to the PDF""" """Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
chapters: List[tempfile._TemporaryFileWrapper] = [] chapters: List[tempfile._TemporaryFileWrapper] = []
@@ -560,21 +566,18 @@ class PDFGenerator:
f"data:image/jpg;base64,{b64encode(image).decode()}", f"data:image/jpg;base64,{b64encode(image).decode()}",
) # Base64-encoded images are better than referencing NamedTemporaryFiles as it's less access to the local filesystem, the enable-local-file-access would be disabled if not for local fonts. ) # Base64-encoded images are better than referencing NamedTemporaryFiles as it's less access to the local filesystem, the enable-local-file-access would be disabled if not for local fonts.
tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True) tempie = tempfile.NamedTemporaryFile(
suffix=".html", delete=True
) # tempie 🫡
tempie.write(writable_html.encode()) tempie.write(writable_html.encode())
chapters.append(tempie)
tempie.file.seek(0) tempie.file.seek(0)
chapters.append(tempie)
yield part["title"] yield part["title"]
# Cover and Copyright Page # Cover and Copyright Page
copyright_data = wp_copyright[str(self.data["copyright"])] copyright_data = wp_copyright[self.data["copyright"]]
copyright_image = (
await fetch_cover(copyright_data["image_url"])
if copyright_data["image_url"]
else None
)
about_copyright = ( about_copyright = (
copyright_template.replace( copyright_template.replace(
"{statement}", "{statement}",
@@ -592,6 +595,11 @@ class PDFGenerator:
.replace("{book_title}", self.data["title"]) .replace("{book_title}", self.data["title"])
) )
copyright_image = (
await fetch_cover(copyright_data["image_url"])
if copyright_data["image_url"]
else None
)
image_block = ( image_block = (
"""<img src="{image_url}" """<img src="{image_url}"
alt="{name}" alt="{name}"
@@ -622,6 +630,7 @@ style="margin-bottom: 1rem;">""".format(
cover_and_copyright_file.write(about_copyright.encode()) cover_and_copyright_file.write(about_copyright.encode())
cover_and_copyright_file.seek(0) cover_and_copyright_file.seek(0)
# About the Author page
author_avatar = ( author_avatar = (
await fetch_cover( await fetch_cover(
self.data["user"]["avatar"].replace("128", "512") self.data["user"]["avatar"].replace("128", "512")
@@ -647,6 +656,7 @@ style="margin-bottom: 1rem;">""".format(
chapters.append(about_author_file) chapters.append(about_author_file)
about_author_file.seek(0) about_author_file.seek(0)
# PDF Generation with wkhtmltopdf, written to self.file
pdfkit.from_file( pdfkit.from_file(
[chapter.file.name for chapter in chapters], [chapter.file.name for chapter in chapters],
self.file.name, self.file.name,
@@ -665,9 +675,9 @@ style="margin-bottom: 1rem;">""".format(
"enable-local-file-access": "", "enable-local-file-access": "",
}, },
cover_first=True, cover_first=True,
verbose=True,
) )
# Metadata generation with Exiftool
clean_description = ( clean_description = (
self.data["description"].strip().replace("\n", "$/") self.data["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option. ) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
@@ -696,6 +706,7 @@ style="margin-bottom: 1rem;">""".format(
) )
) )
# Close files and delete them from tmp
for chapter in chapters: for chapter in chapters:
chapter.file.close() chapter.file.close()
@@ -703,3 +714,6 @@ style="margin-bottom: 1rem;">""".format(
self.file.seek(0) self.file.seek(0)
return self return self
# ------ #
+6 -3
View File
@@ -167,7 +167,9 @@ async def handle_download(
case DownloadMode.part: case DownloadMode.part:
story_id, metadata = await fetch_story_from_partId(download_id, cookies) story_id, metadata = await fetch_story_from_partId(download_id, cookies)
cover_data = await fetch_cover(metadata["cover"].replace("-256-", "-512-")) cover_data = await fetch_cover(
metadata["cover"].replace("-256-", "-512-")
) # Increase resolution
match format: match format:
case DownloadFormat.epub: case DownloadFormat.epub:
@@ -177,7 +179,7 @@ async def handle_download(
book = PDFGenerator(metadata, cover_data) book = PDFGenerator(metadata, cover_data)
media_type = "application/pdf" media_type = "application/pdf"
logger.info(f"Retrieved story id ({story_id=})") logger.info(f"Retrieved story metadata and cover ({story_id=})")
part_contents = [ part_contents = [
f"<h1>{part['title']}</h1>" f"<h1>{part['title']}</h1>"
@@ -192,7 +194,7 @@ async def handle_download(
book_file = book.dump().file book_file = book.dump().file
book_bytes = book_file.read() book_bytes = book_file.read()
book_file.close() book_file.close() # Deletes tempfile
return StreamingResponse( return StreamingResponse(
BytesIO(book_bytes), BytesIO(book_bytes),
@@ -205,6 +207,7 @@ async def handle_download(
@app.get("/donate") @app.get("/donate")
def donate(): def donate():
"""Redirect to donation URL."""
return RedirectResponse("https://buymeacoffee.com/theonlywayup") return RedirectResponse("https://buymeacoffee.com/theonlywayup")