diff --git a/src/api/src/create_book.py b/src/api/src/create_book.py index 753a566..98793dc 100644 --- a/src/api/src/create_book.py +++ b/src/api/src/create_book.py @@ -109,13 +109,13 @@ logger.info(f"Using {cache=}") # --- Utilities --- # -def smart_trim(text: str): - max_len = 400 +def smart_trim(text: str, max_length: int = 400) -> str: + """Truncate a string intelligently at newlines. Coherence and max-length adherence.""" chunks = [t for t in text.split("\n") if t] to_return = "" for chunk in chunks: - if len(to_return) + len(chunk) < max_len: + if len(to_return) + len(chunk) < max_length: to_return = chunk + "
" else: to_return = to_return.rstrip("
") @@ -124,7 +124,7 @@ def smart_trim(text: str): return to_return -def clean_part_text(text: str): +def clean_part_text(text: str) -> str: """Remove unnecessary newlines from Text""" soup = BeautifulSoup(text) @@ -243,6 +243,73 @@ class Story(TypedDict): story_ta = TypeAdapter(Story) +# --- PDF Dependencies --- # + +wp_copyright: Dict[int, CopyrightData] = { + 1: { + "name": "All Rights Reserved", + "statement": "©️ {published_year} by {username}. All Rights Reserved.", + "freedoms": "No reuse, redistribution, or modification without permission.", + "printing": "Not allowed without explicit permission.", + "image_url": None, + }, + 2: { + "name": "Public Domain", + "statement": "This work is in the public domain. Originally published in {published_year} by {username}.", + "freedoms": "Free to use for any purpose without permission.", + "printing": "Allowed for personal or commercial purposes.", + "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png", + }, + 3: { + "name": "Creative Commons Attribution (CC-BY)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.", + "freedoms": "Allows reuse, redistribution, and modification with credit to the author.", + "printing": "Allowed with proper credit.", + "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png", + }, + 4: { + "name": "CC Attribution NonCommercial (CC-BY-NC)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.", + "freedoms": "Allows reuse and modification for non-commercial purposes with credit.", + "printing": "Allowed for non-commercial purposes with proper credit.", + "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png", + }, + 5: { + "name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.", + "freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.", + "printing": "Allowed for non-commercial purposes in original form with proper credit.", + "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png", + }, + 6: { + "name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.", + "freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.", + "printing": "Allowed for non-commercial purposes with proper credit under the same license.", + "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png", + }, + 7: { + "name": "CC Attribution ShareAlike (CC-BY-SA)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.", + "freedoms": "Allows reuse and modification for any purpose under the same license, with credit.", + "printing": "Allowed with proper credit under the same license.", + "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png", + }, + 8: { + "name": "CC Attribution NoDerivs (CC-BY-ND)", + "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.", + "freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.", + "printing": "Allowed in original form with proper credit.", + "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png", + }, +} + + +with open("./pdf/cover_and_copyright.html") as reader: + copyright_template = reader.read() +with open("./pdf/author.html") as reader: + author_template = reader.read() + # --- Exceptions --- # @@ -351,11 +418,14 @@ async def fetch_cover(url: str) -> bytes: return body -# --- EPUB Generation --- # +# --- Generation --- # class EPUBGenerator: + """EPUB Generation utilities""" + def __init__(self, data: Story, cover: bytes): + """Initialize EPUBGenerator. Create epub.EpubBook() and set metadata and cover.""" self.epub = epub.EpubBook() self.data = data self.cover = cover @@ -382,7 +452,7 @@ class EPUBGenerator: {"name": "completed", "content": str(int(data["completed"]))}, ) - # Set book cover + # Set cover self.epub.set_cover("cover.jpg", cover) cover_chapter = epub.EpubHtml( file_name="titlepage.xhtml", # Standard for cover page @@ -391,7 +461,8 @@ class EPUBGenerator: self.epub.add_item(cover_chapter) async def add_chapters(self, contents: List[str], download_images: bool = False): - chapters = [] + """Add chapters to the Epub, downloading images if necessary. Sets the table of contents and spine.""" + chapters: List[epub.EpubHtml] = [] for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)): title = part["title"] @@ -399,8 +470,9 @@ class EPUBGenerator: # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 chapter = epub.EpubHtml( title=title, - file_name=f"{cidx}.xhtml", # See issue #30 + file_name=f"{cidx}_{part['id']}.xhtml", # See issue #30 lang=self.data["language"]["name"], + uid=part["id"], ) if download_images: @@ -428,13 +500,12 @@ class EPUBGenerator: ) chapter.set_content(content) + self.epub.add_item(chapter) + chapters.append(chapter) yield title - for chapter in chapters: - self.epub.add_item(chapter) - self.epub.toc = chapters # Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py @@ -454,82 +525,17 @@ class EPUBGenerator: return temp_file -wp_copyright: Dict[str, CopyrightData] = { - "1": { - "name": "All Rights Reserved", - "statement": "©️ {published_year} by {username}. All Rights Reserved.", - "freedoms": "No reuse, redistribution, or modification without permission.", - "printing": "Not allowed without explicit permission.", - "image_url": None, - }, - "2": { - "name": "Public Domain", - "statement": "This work is in the public domain. Originally published in {published_year} by {username}.", - "freedoms": "Free to use for any purpose without permission.", - "printing": "Allowed for personal or commercial purposes.", - "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png", - }, - "3": { - "name": "Creative Commons Attribution (CC-BY)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.", - "freedoms": "Allows reuse, redistribution, and modification with credit to the author.", - "printing": "Allowed with proper credit.", - "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png", - }, - "4": { - "name": "CC Attribution NonCommercial (CC-BY-NC)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.", - "freedoms": "Allows reuse and modification for non-commercial purposes with credit.", - "printing": "Allowed for non-commercial purposes with proper credit.", - "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png", - }, - "5": { - "name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.", - "freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.", - "printing": "Allowed for non-commercial purposes in original form with proper credit.", - "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png", - }, - "6": { - "name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.", - "freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.", - "printing": "Allowed for non-commercial purposes with proper credit under the same license.", - "image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png", - }, - "7": { - "name": "CC Attribution ShareAlike (CC-BY-SA)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.", - "freedoms": "Allows reuse and modification for any purpose under the same license, with credit.", - "printing": "Allowed with proper credit under the same license.", - "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png", - }, - "8": { - "name": "CC Attribution NoDerivs (CC-BY-ND)", - "statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.", - "freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.", - "printing": "Allowed in original form with proper credit.", - "image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png", - }, -} - - -with open("./pdf/cover_and_copyright.html") as reader: - copyright_template = reader.read() -with open("./pdf/author.html") as reader: - author_template = reader.read() - - class PDFGenerator: """PDF Generation utilities""" def __init__(self, data: Story, cover: bytes): + """Initialize PDGenerator, create PDF Temporary file.""" self.data = data self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) self.cover = cover async def add_chapters(self, contents: List[str], download_images: bool = False): - """Add chapters to the PDF""" + """Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages.""" chapters: List[tempfile._TemporaryFileWrapper] = [] @@ -560,21 +566,18 @@ class PDFGenerator: f"data:image/jpg;base64,{b64encode(image).decode()}", ) # Base64-encoded images are better than referencing NamedTemporaryFiles as it's less access to the local filesystem, the enable-local-file-access would be disabled if not for local fonts. - tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True) + tempie = tempfile.NamedTemporaryFile( + suffix=".html", delete=True + ) # tempie 🫡 tempie.write(writable_html.encode()) - chapters.append(tempie) - tempie.file.seek(0) + chapters.append(tempie) + yield part["title"] # Cover and Copyright Page - copyright_data = wp_copyright[str(self.data["copyright"])] - copyright_image = ( - await fetch_cover(copyright_data["image_url"]) - if copyright_data["image_url"] - else None - ) + copyright_data = wp_copyright[self.data["copyright"]] about_copyright = ( copyright_template.replace( "{statement}", @@ -592,6 +595,11 @@ class PDFGenerator: .replace("{book_title}", self.data["title"]) ) + copyright_image = ( + await fetch_cover(copyright_data["image_url"]) + if copyright_data["image_url"] + else None + ) image_block = ( """{name}""".format( cover_and_copyright_file.write(about_copyright.encode()) cover_and_copyright_file.seek(0) + # About the Author page author_avatar = ( await fetch_cover( self.data["user"]["avatar"].replace("128", "512") @@ -647,6 +656,7 @@ style="margin-bottom: 1rem;">""".format( chapters.append(about_author_file) about_author_file.seek(0) + # PDF Generation with wkhtmltopdf, written to self.file pdfkit.from_file( [chapter.file.name for chapter in chapters], self.file.name, @@ -665,9 +675,9 @@ style="margin-bottom: 1rem;">""".format( "enable-local-file-access": "", }, cover_first=True, - verbose=True, ) + # Metadata generation with Exiftool clean_description = ( self.data["description"].strip().replace("\n", "$/") ) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. ` ` is another option. @@ -696,6 +706,7 @@ style="margin-bottom: 1rem;">""".format( ) ) + # Close files and delete them from tmp for chapter in chapters: chapter.file.close() @@ -703,3 +714,6 @@ style="margin-bottom: 1rem;">""".format( self.file.seek(0) return self + + +# ------ # diff --git a/src/api/src/main.py b/src/api/src/main.py index aa6c98d..ea6bd07 100644 --- a/src/api/src/main.py +++ b/src/api/src/main.py @@ -167,7 +167,9 @@ async def handle_download( case DownloadMode.part: story_id, metadata = await fetch_story_from_partId(download_id, cookies) - cover_data = await fetch_cover(metadata["cover"].replace("-256-", "-512-")) + cover_data = await fetch_cover( + metadata["cover"].replace("-256-", "-512-") + ) # Increase resolution match format: case DownloadFormat.epub: @@ -177,7 +179,7 @@ async def handle_download( book = PDFGenerator(metadata, cover_data) media_type = "application/pdf" - logger.info(f"Retrieved story id ({story_id=})") + logger.info(f"Retrieved story metadata and cover ({story_id=})") part_contents = [ f"

{part['title']}

" @@ -192,7 +194,7 @@ async def handle_download( book_file = book.dump().file book_bytes = book_file.read() - book_file.close() + book_file.close() # Deletes tempfile return StreamingResponse( BytesIO(book_bytes), @@ -205,6 +207,7 @@ async def handle_download( @app.get("/donate") def donate(): + """Redirect to donation URL.""" return RedirectResponse("https://buymeacoffee.com/theonlywayup")