feat(api): Use new Parser and Generators
This commit is contained in:
@@ -1,116 +1,108 @@
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import List
|
from typing import Generator, List
|
||||||
|
|
||||||
import bs4
|
from bs4 import BeautifulSoup
|
||||||
from aiohttp_client_cache.session import CachedSession
|
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
|
|
||||||
from ..models import Story
|
from ..models import Story
|
||||||
|
from .types import AbstractGenerator
|
||||||
headers = {}
|
|
||||||
|
|
||||||
|
|
||||||
class EPUBGenerator:
|
class EPUBGenerator(AbstractGenerator):
|
||||||
"""EPUB Generation utilities"""
|
def __init__(
|
||||||
|
self,
|
||||||
def __init__(self, data: Story, cover: bytes):
|
metadata: Story,
|
||||||
"""Initialize EPUBGenerator. Create epub.EpubBook() and set metadata and cover."""
|
part_trees: List[BeautifulSoup],
|
||||||
self.epub = epub.EpubBook()
|
cover: bytes,
|
||||||
self.data = data
|
images: List[Generator[bytes]] | None,
|
||||||
|
):
|
||||||
|
self.story = metadata
|
||||||
|
self.parts = part_trees
|
||||||
self.cover = cover
|
self.cover = cover
|
||||||
|
self.images = images
|
||||||
|
|
||||||
# set metadata, defined in https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-2
|
self.book: epub.EpubBook = epub.EpubBook()
|
||||||
self.epub.add_author(data["user"]["username"])
|
|
||||||
|
|
||||||
self.epub.add_metadata("DC", "title", data["title"])
|
def add_metadata(self):
|
||||||
self.epub.add_metadata("DC", "description", data["description"])
|
"""Add metadata to epub."""
|
||||||
self.epub.add_metadata("DC", "date", data["createDate"])
|
self.book.add_author(self.story["user"]["username"])
|
||||||
self.epub.add_metadata("DC", "modified", data["modifyDate"])
|
|
||||||
self.epub.add_metadata("DC", "language", data["language"]["name"])
|
|
||||||
|
|
||||||
self.epub.add_metadata(
|
self.book.add_metadata("DC", "title", self.story["title"])
|
||||||
None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
|
self.book.add_metadata("DC", "description", self.story["description"])
|
||||||
|
self.book.add_metadata("DC", "date", self.story["createDate"])
|
||||||
|
self.book.add_metadata("DC", "modified", self.story["modifyDate"])
|
||||||
|
self.book.add_metadata("DC", "language", self.story["language"]["name"])
|
||||||
|
|
||||||
|
self.book.add_metadata(
|
||||||
|
None, "meta", "", {"name": "tags", "content": ", ".join(self.story["tags"])}
|
||||||
)
|
)
|
||||||
self.epub.add_metadata(
|
self.book.add_metadata(
|
||||||
None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
|
|
||||||
)
|
|
||||||
self.epub.add_metadata(
|
|
||||||
None,
|
None,
|
||||||
"meta",
|
"meta",
|
||||||
"",
|
"",
|
||||||
{"name": "completed", "content": str(int(data["completed"]))},
|
{"name": "mature", "content": str(int(self.story["mature"]))},
|
||||||
|
)
|
||||||
|
self.book.add_metadata(
|
||||||
|
None,
|
||||||
|
"meta",
|
||||||
|
"",
|
||||||
|
{"name": "completed", "content": str(int(self.story["completed"]))},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set cover
|
def add_cover(self):
|
||||||
self.epub.set_cover("cover.jpg", cover)
|
"""Add cover to epub."""
|
||||||
|
self.book.set_cover("cover.jpg", self.cover)
|
||||||
cover_chapter = epub.EpubHtml(
|
cover_chapter = epub.EpubHtml(
|
||||||
file_name="titlepage.xhtml", # Standard for cover page
|
file_name="titlepage.xhtml", # Standard for cover page
|
||||||
)
|
)
|
||||||
cover_chapter.set_content('<img src="cover.jpg">')
|
cover_chapter.set_content('<img src="cover.jpg">')
|
||||||
self.epub.add_item(cover_chapter)
|
self.book.add_item(cover_chapter)
|
||||||
|
|
||||||
async def add_chapters(
|
def add_chapters(self):
|
||||||
self, contents: List[bs4.Tag], download_images: bool = False
|
"""Add chapters to epub, replacing references to image urls to static image paths if images are provided during initialization."""
|
||||||
):
|
chapters = []
|
||||||
"""Add chapters to the Epub, downloading images if necessary. Sets the table of contents and spine."""
|
|
||||||
chapters: List[epub.EpubHtml] = []
|
|
||||||
|
|
||||||
for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)):
|
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
|
||||||
title = part["title"]
|
|
||||||
|
|
||||||
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
|
|
||||||
chapter = epub.EpubHtml(
|
chapter = epub.EpubHtml(
|
||||||
title=title,
|
title=part["title"], file_name=f"{idx}_{part['id']}"
|
||||||
file_name=f"{cidx}_{part['id']}.xhtml", # See issue #30
|
|
||||||
lang=self.data["language"]["name"],
|
|
||||||
uid=str(part["id"]).encode(),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
str_content = content.prettify()
|
if self.images:
|
||||||
if download_images: # ! TODO : Download images elsewhere
|
for img_idx, (img_data, img_tag) in enumerate(
|
||||||
soup = content
|
zip(self.images[idx], tree.find_all("img"))
|
||||||
|
):
|
||||||
async with CachedSession(
|
path = f"static/{idx}_{part['id']}/{img_idx}.jpeg"
|
||||||
headers=headers, cache=None
|
|
||||||
) as session: # Don't cache images.
|
|
||||||
for idx, image in enumerate(soup.find_all("img")):
|
|
||||||
if not image["src"]:
|
|
||||||
continue
|
|
||||||
# Find all image tags and filter for those with sources
|
|
||||||
|
|
||||||
async with session.get(image["src"]) as response:
|
|
||||||
img = epub.EpubImage(
|
img = epub.EpubImage(
|
||||||
media_type="image/jpeg",
|
media_type="image/jpeg", content=img_data, file_name=path
|
||||||
content=await response.read(),
|
|
||||||
file_name=f"static/{cidx}/{idx}.jpeg",
|
|
||||||
)
|
)
|
||||||
self.epub.add_item(img)
|
self.book.add_item(img)
|
||||||
# Fetch image and pack
|
|
||||||
|
|
||||||
str_content = str_content.replace(
|
img_tag["src"] = path
|
||||||
str(image["src"]), f"static/{cidx}/{idx}.jpeg"
|
|
||||||
)
|
|
||||||
|
|
||||||
chapter.set_content(str_content)
|
|
||||||
self.epub.add_item(chapter)
|
|
||||||
|
|
||||||
|
chapter.set_content(tree.prettify())
|
||||||
|
self.book.add_item(chapter)
|
||||||
chapters.append(chapter)
|
chapters.append(chapter)
|
||||||
|
|
||||||
yield title
|
# ! Review, are these needed? #11
|
||||||
|
self.book.toc = chapters
|
||||||
self.epub.toc = chapters
|
|
||||||
|
|
||||||
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
||||||
self.epub.add_item(epub.EpubNcx())
|
self.book.add_item(epub.EpubNcx())
|
||||||
self.epub.add_item(epub.EpubNav())
|
self.book.add_item(epub.EpubNav())
|
||||||
|
|
||||||
# create spine
|
# create spine
|
||||||
self.epub.spine = ["nav"] + chapters
|
self.book.spine = ["nav"] + chapters
|
||||||
|
|
||||||
|
def compile(self):
|
||||||
|
self.add_metadata()
|
||||||
|
self.add_cover()
|
||||||
|
self.add_chapters()
|
||||||
|
return True
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
def dump(self) -> BytesIO:
|
||||||
# Thanks https://stackoverflow.com/a/75398222
|
# Thanks https://stackoverflow.com/a/75398222
|
||||||
buffer = BytesIO()
|
buffer = BytesIO()
|
||||||
epub.write_epub(buffer, self.epub)
|
epub.write_epub(buffer, self.book)
|
||||||
|
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
|
|
||||||
|
|||||||
@@ -1,109 +0,0 @@
|
|||||||
from io import BytesIO
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from ebooklib import epub
|
|
||||||
|
|
||||||
from ..models import Story
|
|
||||||
from .types import AbstractGenerator
|
|
||||||
|
|
||||||
|
|
||||||
class EPUBGenerator(AbstractGenerator):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
metadata: Story,
|
|
||||||
part_trees: List[BeautifulSoup],
|
|
||||||
cover: bytes,
|
|
||||||
images: List[List[bytes]] | None,
|
|
||||||
):
|
|
||||||
self.story = metadata
|
|
||||||
self.parts = part_trees
|
|
||||||
self.cover = cover
|
|
||||||
self.images = images
|
|
||||||
|
|
||||||
self.book: epub.EpubBook = epub.EpubBook()
|
|
||||||
|
|
||||||
def add_metadata(self):
|
|
||||||
"""Add metadata to epub."""
|
|
||||||
self.book.add_author(self.story["user"]["username"])
|
|
||||||
|
|
||||||
self.book.add_metadata("DC", "title", self.story["title"])
|
|
||||||
self.book.add_metadata("DC", "description", self.story["description"])
|
|
||||||
self.book.add_metadata("DC", "date", self.story["createDate"])
|
|
||||||
self.book.add_metadata("DC", "modified", self.story["modifyDate"])
|
|
||||||
self.book.add_metadata("DC", "language", self.story["language"]["name"])
|
|
||||||
|
|
||||||
self.book.add_metadata(
|
|
||||||
None, "meta", "", {"name": "tags", "content": ", ".join(self.story["tags"])}
|
|
||||||
)
|
|
||||||
self.book.add_metadata(
|
|
||||||
None,
|
|
||||||
"meta",
|
|
||||||
"",
|
|
||||||
{"name": "mature", "content": str(int(self.story["mature"]))},
|
|
||||||
)
|
|
||||||
self.book.add_metadata(
|
|
||||||
None,
|
|
||||||
"meta",
|
|
||||||
"",
|
|
||||||
{"name": "completed", "content": str(int(self.story["completed"]))},
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_cover(self):
|
|
||||||
"""Add cover to epub."""
|
|
||||||
self.book.set_cover("cover.jpg", self.cover)
|
|
||||||
cover_chapter = epub.EpubHtml(
|
|
||||||
file_name="titlepage.xhtml", # Standard for cover page
|
|
||||||
)
|
|
||||||
cover_chapter.set_content('<img src="cover.jpg">')
|
|
||||||
self.book.add_item(cover_chapter)
|
|
||||||
|
|
||||||
def add_chapters(self):
|
|
||||||
"""Add chapters to epub, replacing references to image urls to static image paths if images are provided during initialization."""
|
|
||||||
chapters = []
|
|
||||||
|
|
||||||
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
|
|
||||||
chapter = epub.EpubHtml(
|
|
||||||
title=part["title"], file_name=f"{idx}_{part['id']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.images:
|
|
||||||
for img_idx, (img_data, img_tag) in enumerate(
|
|
||||||
zip(self.images[idx], tree.find_all("img"))
|
|
||||||
):
|
|
||||||
path = f"static/{idx}_{part['id']}/{img_idx}.jpeg"
|
|
||||||
img = epub.EpubImage(
|
|
||||||
media_type="image/jpeg", content=img_data, file_name=path
|
|
||||||
)
|
|
||||||
self.book.add_item(img)
|
|
||||||
|
|
||||||
img_tag["src"] = path
|
|
||||||
|
|
||||||
chapter.set_content(tree.prettify())
|
|
||||||
self.book.add_item(chapter)
|
|
||||||
chapters.append(chapter)
|
|
||||||
|
|
||||||
# ! Review, are these needed? #11
|
|
||||||
self.book.toc = chapters
|
|
||||||
|
|
||||||
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
|
|
||||||
self.book.add_item(epub.EpubNcx())
|
|
||||||
self.book.add_item(epub.EpubNav())
|
|
||||||
|
|
||||||
# create spine
|
|
||||||
self.book.spine = ["nav"] + chapters
|
|
||||||
|
|
||||||
def compile(self):
|
|
||||||
self.add_metadata()
|
|
||||||
self.add_cover()
|
|
||||||
self.add_chapters()
|
|
||||||
return True
|
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
|
||||||
# Thanks https://stackoverflow.com/a/75398222
|
|
||||||
buffer = BytesIO()
|
|
||||||
epub.write_epub(buffer, self.book)
|
|
||||||
|
|
||||||
buffer.seek(0)
|
|
||||||
|
|
||||||
return buffer
|
|
||||||
@@ -1,204 +0,0 @@
|
|||||||
from base64 import b64encode
|
|
||||||
from io import BytesIO
|
|
||||||
from pathlib import Path
|
|
||||||
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
|
||||||
from typing import List, cast
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
|
||||||
from exiftool import ExifTool
|
|
||||||
from jinja2 import Template
|
|
||||||
from weasyprint import CSS, HTML
|
|
||||||
from weasyprint.text.fonts import FontConfiguration
|
|
||||||
|
|
||||||
from ..models import Story
|
|
||||||
from .types import AbstractGenerator
|
|
||||||
|
|
||||||
DATA_PATH = Path(__file__).parent / "pdf"
|
|
||||||
ASSET_PATH = DATA_PATH / "assets"
|
|
||||||
|
|
||||||
COPYRIGHT_DATA = {
|
|
||||||
1: {
|
|
||||||
"name": "All Rights Reserved",
|
|
||||||
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
|
||||||
"freedoms": "No reuse, redistribution, or modification without permission.",
|
|
||||||
"printing": "Not allowed without explicit permission.",
|
|
||||||
"asset": None,
|
|
||||||
},
|
|
||||||
2: {
|
|
||||||
"name": "Public Domain",
|
|
||||||
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
|
||||||
"freedoms": "Free to use for any purpose without permission.",
|
|
||||||
"printing": "Allowed for personal or commercial purposes.",
|
|
||||||
"asset": ASSET_PATH / "cc-zero.png",
|
|
||||||
},
|
|
||||||
3: {
|
|
||||||
"name": "Creative Commons Attribution (CC-BY)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
|
||||||
"printing": "Allowed with proper credit.",
|
|
||||||
"asset": ASSET_PATH / "by.png",
|
|
||||||
},
|
|
||||||
4: {
|
|
||||||
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit.",
|
|
||||||
"asset": ASSET_PATH / "by-nc.png",
|
|
||||||
},
|
|
||||||
5: {
|
|
||||||
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
|
||||||
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
|
||||||
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
|
||||||
"asset": ASSET_PATH / "by-nc-nd.png",
|
|
||||||
},
|
|
||||||
6: {
|
|
||||||
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
|
||||||
"asset": ASSET_PATH / "by-nc-sa.png",
|
|
||||||
},
|
|
||||||
7: {
|
|
||||||
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
|
||||||
"printing": "Allowed with proper credit under the same license.",
|
|
||||||
"asset": ASSET_PATH / "by-sa.png",
|
|
||||||
},
|
|
||||||
8: {
|
|
||||||
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
|
||||||
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
|
||||||
"printing": "Allowed in original form with proper credit.",
|
|
||||||
"asset": ASSET_PATH / "by-nd.png",
|
|
||||||
},
|
|
||||||
} # Maps Wattpad Copyright IDs to their corresponding data.
|
|
||||||
|
|
||||||
with open(DATA_PATH / "stylesheet.css") as reader:
|
|
||||||
STYLESHEET = reader.read()
|
|
||||||
|
|
||||||
|
|
||||||
with open(DATA_PATH / "book.html") as reader:
|
|
||||||
TEMPLATE = reader.read()
|
|
||||||
|
|
||||||
|
|
||||||
class PDFGenerator(AbstractGenerator):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
metadata: Story,
|
|
||||||
part_trees: List[BeautifulSoup],
|
|
||||||
cover: bytes,
|
|
||||||
images: List[List[bytes]] | None,
|
|
||||||
author: bytes,
|
|
||||||
):
|
|
||||||
self.story = metadata
|
|
||||||
self.parts = part_trees
|
|
||||||
self.cover = cover
|
|
||||||
self.images = images
|
|
||||||
self.author = author
|
|
||||||
|
|
||||||
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
|
|
||||||
self.content = TEMPLATE
|
|
||||||
|
|
||||||
def generate_chapters(self) -> dict[int, str]:
|
|
||||||
"""Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
|
|
||||||
data: dict[int, str] = {}
|
|
||||||
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
|
|
||||||
if self.images:
|
|
||||||
for img_idx, (img_data, img_tag) in enumerate(
|
|
||||||
zip(self.images[idx], tree.find_all("img"))
|
|
||||||
):
|
|
||||||
img_tag[
|
|
||||||
"src"
|
|
||||||
] = f"data:image/jpg;base64,{b64encode(img_data).decode()}"
|
|
||||||
|
|
||||||
data[part["id"]] = tree.prettify()
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
def populate_template(self, parts: dict[int, str]):
|
|
||||||
"""Populate HTML Template with Story data."""
|
|
||||||
copyright = COPYRIGHT_DATA[self.story["copyright"]]
|
|
||||||
data = {
|
|
||||||
"statement": copyright["statement"].format(
|
|
||||||
username=self.story["user"]["username"],
|
|
||||||
published_year=self.story["createDate"].split("-", 2)[0],
|
|
||||||
),
|
|
||||||
"author": self.story["user"]["username"],
|
|
||||||
"freedoms": copyright["freedoms"],
|
|
||||||
"printing": copyright["printing"],
|
|
||||||
"book_id": self.story["id"],
|
|
||||||
"book_title": self.story["title"],
|
|
||||||
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
|
||||||
"username": self.story["user"]["username"],
|
|
||||||
"description": self.story["description"],
|
|
||||||
"avatar": b64encode(self.author).decode(),
|
|
||||||
"copyright": {
|
|
||||||
"data": b64encode(copyright["asset"].read_bytes()).decode()
|
|
||||||
if copyright["asset"]
|
|
||||||
else "",
|
|
||||||
"name": copyright["name"],
|
|
||||||
},
|
|
||||||
"parts": parts,
|
|
||||||
}
|
|
||||||
|
|
||||||
self.content: str = Template(self.content).render(data)
|
|
||||||
|
|
||||||
def generate_pdf(self):
|
|
||||||
"""Generate and write the PDF to a temporary file (self.book)."""
|
|
||||||
font_config = FontConfiguration()
|
|
||||||
|
|
||||||
stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
|
|
||||||
|
|
||||||
html_obj = HTML(string=self.content)
|
|
||||||
html_obj.write_pdf(
|
|
||||||
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_metadata(self):
|
|
||||||
"""Write metadata to generated PDF file at self.book, using ExifTool."""
|
|
||||||
|
|
||||||
clean_description = (
|
|
||||||
self.story["description"].strip().replace("\n", "$/")
|
|
||||||
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"Author": self.story["user"]["username"],
|
|
||||||
"Title": self.story["title"],
|
|
||||||
"Subject": clean_description,
|
|
||||||
"CreationDate": self.story["createDate"],
|
|
||||||
"ModDate": self.story["modifyDate"],
|
|
||||||
"Keywords": ",".join(self.story["tags"]),
|
|
||||||
"Language": self.story["language"]["name"],
|
|
||||||
"Completed": self.story["completed"],
|
|
||||||
"MatureContent": self.story["mature"],
|
|
||||||
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
|
||||||
} # As per https://exiftool.org/TagNames/PDF.html
|
|
||||||
|
|
||||||
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
|
|
||||||
# Custom configuration adds Completed and MatureContent tags.
|
|
||||||
# exiftool logger logs executed command
|
|
||||||
et.execute(
|
|
||||||
*(
|
|
||||||
[f"-{key}={value}" for key, value in metadata.items()]
|
|
||||||
+ [
|
|
||||||
"-overwrite_original",
|
|
||||||
self.book.file.name,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def compile(self):
|
|
||||||
parts = self.generate_chapters()
|
|
||||||
self.populate_template(parts)
|
|
||||||
self.generate_pdf()
|
|
||||||
self.add_metadata()
|
|
||||||
return True
|
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
|
||||||
self.book.seek(0)
|
|
||||||
buffer = BytesIO(self.book.read())
|
|
||||||
self.book.close()
|
|
||||||
|
|
||||||
return buffer
|
|
||||||
@@ -1,273 +1,182 @@
|
|||||||
import tempfile
|
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import List, cast
|
from pathlib import Path
|
||||||
|
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
||||||
|
from typing import Generator, List, cast
|
||||||
|
|
||||||
import bs4
|
from bs4 import BeautifulSoup, Tag
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from eliot import start_action
|
|
||||||
from exiftool import ExifTool
|
from exiftool import ExifTool
|
||||||
|
from jinja2 import Template
|
||||||
from weasyprint import CSS, HTML
|
from weasyprint import CSS, HTML
|
||||||
from weasyprint.text.fonts import FontConfiguration
|
from weasyprint.text.fonts import FontConfiguration
|
||||||
|
|
||||||
from ..logs import exiftool_logger
|
|
||||||
from ..models import Story
|
from ..models import Story
|
||||||
from ..utils import smart_trim
|
from .types import AbstractGenerator
|
||||||
|
|
||||||
|
DATA_PATH = Path(__file__).parent / "pdf"
|
||||||
|
ASSET_PATH = DATA_PATH / "assets"
|
||||||
|
|
||||||
async def fetch_image(*args, **kwargs):
|
COPYRIGHT_DATA = {
|
||||||
# TODO
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
|
|
||||||
class PDFGenerator:
|
|
||||||
"""PDF Generation utilities"""
|
|
||||||
|
|
||||||
def __init__(self, data: Story, cover: bytes):
|
|
||||||
"""Initialize PDGenerator, create PDF Temporary file."""
|
|
||||||
self.data = data
|
|
||||||
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
|
|
||||||
self.cover = cover
|
|
||||||
self.content: str = ""
|
|
||||||
self.copyright = {
|
|
||||||
1: {
|
1: {
|
||||||
"name": "All Rights Reserved",
|
"name": "All Rights Reserved",
|
||||||
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
||||||
"freedoms": "No reuse, redistribution, or modification without permission.",
|
"freedoms": "No reuse, redistribution, or modification without permission.",
|
||||||
"printing": "Not allowed without explicit permission.",
|
"printing": "Not allowed without explicit permission.",
|
||||||
"image_url": None,
|
"asset": None,
|
||||||
},
|
},
|
||||||
2: {
|
2: {
|
||||||
"name": "Public Domain",
|
"name": "Public Domain",
|
||||||
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
||||||
"freedoms": "Free to use for any purpose without permission.",
|
"freedoms": "Free to use for any purpose without permission.",
|
||||||
"printing": "Allowed for personal or commercial purposes.",
|
"printing": "Allowed for personal or commercial purposes.",
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
|
"asset": ASSET_PATH / "cc-zero.png",
|
||||||
},
|
},
|
||||||
3: {
|
3: {
|
||||||
"name": "Creative Commons Attribution (CC-BY)",
|
"name": "Creative Commons Attribution (CC-BY)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
||||||
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
||||||
"printing": "Allowed with proper credit.",
|
"printing": "Allowed with proper credit.",
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
|
"asset": ASSET_PATH / "by.png",
|
||||||
},
|
},
|
||||||
4: {
|
4: {
|
||||||
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit.",
|
"printing": "Allowed for non-commercial purposes with proper credit.",
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
|
"asset": ASSET_PATH / "by-nc.png",
|
||||||
},
|
},
|
||||||
5: {
|
5: {
|
||||||
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
||||||
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
||||||
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
|
"asset": ASSET_PATH / "by-nc-nd.png",
|
||||||
},
|
},
|
||||||
6: {
|
6: {
|
||||||
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
|
"asset": ASSET_PATH / "by-nc-sa.png",
|
||||||
},
|
},
|
||||||
7: {
|
7: {
|
||||||
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
||||||
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
||||||
"printing": "Allowed with proper credit under the same license.",
|
"printing": "Allowed with proper credit under the same license.",
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
|
"asset": ASSET_PATH / "by-sa.png",
|
||||||
},
|
},
|
||||||
8: {
|
8: {
|
||||||
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
||||||
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
||||||
"printing": "Allowed in original form with proper credit.",
|
"printing": "Allowed in original form with proper credit.",
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
|
"asset": ASSET_PATH / "by-nd.png",
|
||||||
},
|
},
|
||||||
|
} # Maps Wattpad Copyright IDs to their corresponding data.
|
||||||
|
|
||||||
|
with open(DATA_PATH / "stylesheet.css") as reader:
|
||||||
|
STYLESHEET = reader.read()
|
||||||
|
|
||||||
|
|
||||||
|
with open(DATA_PATH / "book.html") as reader:
|
||||||
|
TEMPLATE = reader.read()
|
||||||
|
|
||||||
|
|
||||||
|
class PDFGenerator(AbstractGenerator):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
metadata: Story,
|
||||||
|
part_trees: List[BeautifulSoup],
|
||||||
|
cover: bytes,
|
||||||
|
images: List[Generator[bytes]] | None,
|
||||||
|
author_image: bytes,
|
||||||
|
):
|
||||||
|
self.story = metadata
|
||||||
|
self.parts = part_trees
|
||||||
|
self.cover = cover
|
||||||
|
self.images = images
|
||||||
|
self.author = author_image
|
||||||
|
|
||||||
|
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
|
||||||
|
self.content = TEMPLATE
|
||||||
|
|
||||||
|
def generate_chapters(self) -> dict[int, str]:
|
||||||
|
"""Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
|
||||||
|
data: dict[int, str] = {}
|
||||||
|
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
|
||||||
|
if self.images:
|
||||||
|
for img_idx, (img_data, img_tag) in enumerate(
|
||||||
|
zip(self.images[idx], tree.find_all("img"))
|
||||||
|
):
|
||||||
|
img_tag["src"] = (
|
||||||
|
f"data:image/jpg;base64,{b64encode(img_data).decode()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
data[part["id"]] = tree.prettify()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def populate_template(self, parts: dict[int, str]):
|
||||||
|
"""Populate HTML Template with Story data."""
|
||||||
|
copyright = COPYRIGHT_DATA[self.story["copyright"]]
|
||||||
|
data = {
|
||||||
|
"statement": copyright["statement"].format(
|
||||||
|
username=self.story["user"]["username"],
|
||||||
|
published_year=self.story["createDate"].split("-", 2)[0],
|
||||||
|
),
|
||||||
|
"author": self.story["user"]["username"],
|
||||||
|
"freedoms": copyright["freedoms"],
|
||||||
|
"printing": copyright["printing"],
|
||||||
|
"book_id": self.story["id"],
|
||||||
|
"book_title": self.story["title"],
|
||||||
|
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
||||||
|
"username": self.story["user"]["username"],
|
||||||
|
"description": self.story["description"],
|
||||||
|
"avatar": b64encode(self.author).decode(),
|
||||||
|
"copyright": {
|
||||||
|
"data": b64encode(copyright["asset"].read_bytes()).decode()
|
||||||
|
if copyright["asset"]
|
||||||
|
else "",
|
||||||
|
"name": copyright["name"],
|
||||||
|
},
|
||||||
|
"parts": parts,
|
||||||
}
|
}
|
||||||
|
|
||||||
with open("./pdf/stylesheet.css") as reader:
|
self.content: str = Template(self.content).render(data)
|
||||||
self.stylesheet = reader.read()
|
|
||||||
with open("./pdf/book.html") as reader:
|
|
||||||
self.template = reader.read()
|
|
||||||
|
|
||||||
async def generate_cover_and_copyright_html(
|
|
||||||
self,
|
|
||||||
) -> str:
|
|
||||||
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
|
|
||||||
|
|
||||||
copyright_data = self.copyright[self.data["copyright"]]
|
|
||||||
|
|
||||||
template = self.template
|
|
||||||
about_copyright = (
|
|
||||||
template.replace(
|
|
||||||
"{statement}",
|
|
||||||
copyright_data["statement"].format(
|
|
||||||
username=self.data["user"]["username"],
|
|
||||||
published_year=self.data["createDate"].split("-", 2)[0],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.replace("{author}", self.data["user"]["username"])
|
|
||||||
.replace("{freedoms}", copyright_data["freedoms"])
|
|
||||||
.replace(
|
|
||||||
"{printing}",
|
|
||||||
copyright_data["printing"],
|
|
||||||
)
|
|
||||||
.replace("{book_id}", self.data["id"])
|
|
||||||
.replace("{book_title}", self.data["title"])
|
|
||||||
)
|
|
||||||
|
|
||||||
copyright_image = (
|
|
||||||
await fetch_image(copyright_data["image_url"], should_cache=True)
|
|
||||||
if copyright_data["image_url"]
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
image_block = (
|
|
||||||
"""<img src="{image_url}"
|
|
||||||
alt="{name}"
|
|
||||||
width="88"
|
|
||||||
height="31"
|
|
||||||
id="copyright-license-image">""".format(
|
|
||||||
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
|
|
||||||
name=copyright_data["name"],
|
|
||||||
)
|
|
||||||
if copyright_image
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
about_copyright = (
|
|
||||||
about_copyright.replace(
|
|
||||||
"{copyright_image}",
|
|
||||||
image_block,
|
|
||||||
)
|
|
||||||
if image_block
|
|
||||||
else about_copyright.replace("{copyright_image}", "")
|
|
||||||
)
|
|
||||||
about_copyright = about_copyright.replace(
|
|
||||||
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.template = about_copyright
|
|
||||||
return about_copyright
|
|
||||||
|
|
||||||
async def generate_about_author_chapter(self) -> str:
|
|
||||||
"""Generate About the Author file, fetch avatar."""
|
|
||||||
author_avatar = (
|
|
||||||
await fetch_image(
|
|
||||||
self.data["user"]["avatar"].replace("128", "512")
|
|
||||||
) # Increase image resolution
|
|
||||||
if self.data["user"]["avatar"]
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
about_author = self.template.replace(
|
|
||||||
"{username}", self.data["user"]["username"]
|
|
||||||
).replace("{description}", smart_trim(self.data["user"]["description"]))
|
|
||||||
|
|
||||||
about_author = (
|
|
||||||
about_author.replace(
|
|
||||||
"{avatar}",
|
|
||||||
f"""
|
|
||||||
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
|
|
||||||
)
|
|
||||||
if author_avatar
|
|
||||||
else about_author.replace("{avatar}", "")
|
|
||||||
)
|
|
||||||
|
|
||||||
self.template = about_author
|
|
||||||
return about_author
|
|
||||||
|
|
||||||
def generate_toc(self):
|
|
||||||
ids = [part["id"] for part in self.data["parts"]]
|
|
||||||
clean = BeautifulSoup(
|
|
||||||
"""
|
|
||||||
<section id="contents" class="toc">
|
|
||||||
<h1>Table of Contents</h1>
|
|
||||||
<ul></ul>
|
|
||||||
</section>
|
|
||||||
""",
|
|
||||||
"html.parser",
|
|
||||||
) # html.parser doesn't create <html>/<body> tags automatically
|
|
||||||
|
|
||||||
ul = cast(bs4.Tag, clean.find("ul"))
|
|
||||||
for part_id in ids:
|
|
||||||
li = clean.new_tag("li")
|
|
||||||
a = clean.new_tag("a")
|
|
||||||
a["href"] = f"#{part_id}"
|
|
||||||
li.append(a)
|
|
||||||
ul.append(li)
|
|
||||||
|
|
||||||
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
|
|
||||||
insert_point.append(clean)
|
|
||||||
return str(clean)
|
|
||||||
|
|
||||||
async def add_chapters(
|
|
||||||
self, contents: List[bs4.Tag], download_images: bool = False
|
|
||||||
):
|
|
||||||
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
|
|
||||||
|
|
||||||
# # Cover and Copyright Page
|
|
||||||
await self.generate_cover_and_copyright_html()
|
|
||||||
await self.generate_about_author_chapter()
|
|
||||||
self.tree = BeautifulSoup(self.template, "lxml")
|
|
||||||
|
|
||||||
self.generate_toc()
|
|
||||||
for part, content in zip(self.data["parts"], contents):
|
|
||||||
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
|
|
||||||
insert_point.append(content)
|
|
||||||
|
|
||||||
yield part["title"]
|
|
||||||
|
|
||||||
# # About the Author page
|
|
||||||
# about_author_html = await self.generate_about_author_chapter()
|
|
||||||
|
|
||||||
# chapters.insert(0, cover_and_copyright_html)
|
|
||||||
# chapters.append(about_author_html)
|
|
||||||
|
|
||||||
with start_action(
|
|
||||||
action_type="generate_pdf",
|
|
||||||
output_filename=self.file.name,
|
|
||||||
title=self.data["title"],
|
|
||||||
):
|
|
||||||
# PDF Generation with wkhtmltopdf, written to self.file
|
|
||||||
|
|
||||||
# At this stage, we have a bunch of HTML Files representing all the chapters that need to be generated. PDFKit handles ToC generation, so that's not included.
|
|
||||||
|
|
||||||
|
def generate_pdf(self):
|
||||||
|
"""Generate and write the PDF to a temporary file (self.book)."""
|
||||||
font_config = FontConfiguration()
|
font_config = FontConfiguration()
|
||||||
|
|
||||||
stylesheet_obj = CSS(string=self.stylesheet, font_config=font_config)
|
stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
|
||||||
|
|
||||||
html_obj = HTML(string=str(self.tree))
|
html_obj = HTML(string=self.content)
|
||||||
html_obj.write_pdf(
|
html_obj.write_pdf(
|
||||||
self.file.name, stylesheets=[stylesheet_obj], font_config=font_config
|
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
||||||
)
|
)
|
||||||
|
|
||||||
with start_action(action_type="add_metadata") as action:
|
def add_metadata(self):
|
||||||
# Metadata generation with Exiftool
|
"""Write metadata to generated PDF file at self.book, using ExifTool."""
|
||||||
|
|
||||||
clean_description = (
|
clean_description = (
|
||||||
self.data["description"].strip().replace("\n", "$/")
|
self.story["description"].strip().replace("\n", "$/")
|
||||||
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
||||||
|
|
||||||
action.log(f"clean_description: {clean_description}")
|
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"Author": self.data["user"]["username"],
|
"Author": self.story["user"]["username"],
|
||||||
"Title": self.data["title"],
|
"Title": self.story["title"],
|
||||||
"Subject": clean_description,
|
"Subject": clean_description,
|
||||||
"CreationDate": self.data["createDate"],
|
"CreationDate": self.story["createDate"],
|
||||||
"ModDate": self.data["modifyDate"],
|
"ModDate": self.story["modifyDate"],
|
||||||
"Keywords": ",".join(self.data["tags"]),
|
"Keywords": ",".join(self.story["tags"]),
|
||||||
"Language": self.data["language"]["name"],
|
"Language": self.story["language"]["name"],
|
||||||
"Completed": self.data["completed"],
|
"Completed": self.story["completed"],
|
||||||
"MatureContent": self.data["mature"],
|
"MatureContent": self.story["mature"],
|
||||||
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
||||||
} # As per https://exiftool.org/TagNames/PDF.html
|
} # As per https://exiftool.org/TagNames/PDF.html
|
||||||
|
|
||||||
action.log(f"options: {metadata}")
|
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
|
||||||
|
|
||||||
with ExifTool(
|
|
||||||
config_file="../exiftool.config", logger=exiftool_logger
|
|
||||||
) as et:
|
|
||||||
# Custom configuration adds Completed and MatureContent tags.
|
# Custom configuration adds Completed and MatureContent tags.
|
||||||
# exiftool logger logs executed command
|
# exiftool logger logs executed command
|
||||||
et.execute(
|
et.execute(
|
||||||
@@ -275,14 +184,21 @@ id="copyright-license-image">""".format(
|
|||||||
[f"-{key}={value}" for key, value in metadata.items()]
|
[f"-{key}={value}" for key, value in metadata.items()]
|
||||||
+ [
|
+ [
|
||||||
"-overwrite_original",
|
"-overwrite_original",
|
||||||
self.file.file.name,
|
self.book.file.name,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def compile(self):
|
||||||
|
parts = self.generate_chapters()
|
||||||
|
self.populate_template(parts)
|
||||||
|
self.generate_pdf()
|
||||||
|
self.add_metadata()
|
||||||
|
return True
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
def dump(self) -> BytesIO:
|
||||||
self.file.seek(0)
|
self.book.seek(0)
|
||||||
buffer = BytesIO(self.file.read())
|
buffer = BytesIO(self.book.read())
|
||||||
self.file.close()
|
self.book.close()
|
||||||
|
|
||||||
return buffer
|
return buffer
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from tempfile import _TemporaryFileWrapper
|
from tempfile import _TemporaryFileWrapper
|
||||||
from typing import List, Literal
|
from typing import Generator, List, Literal
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from ebooklib.epub import EpubBook
|
from ebooklib.epub import EpubBook
|
||||||
@@ -23,7 +23,7 @@ class AbstractGenerator:
|
|||||||
metadata: Story,
|
metadata: Story,
|
||||||
part_trees: List[BeautifulSoup],
|
part_trees: List[BeautifulSoup],
|
||||||
cover: bytes,
|
cover: bytes,
|
||||||
images: List[List[bytes]] | None,
|
images: List[Generator[bytes]] | None,
|
||||||
):
|
):
|
||||||
self.story = metadata
|
self.story = metadata
|
||||||
self.parts = part_trees
|
self.parts = part_trees
|
||||||
|
|||||||
+27
-19
@@ -7,6 +7,7 @@ from typing import Optional
|
|||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from aiohttp import ClientResponseError
|
from aiohttp import ClientResponseError
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from eliot import start_action
|
from eliot import start_action
|
||||||
from fastapi import FastAPI, HTTPException, Request
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
from fastapi.responses import (
|
from fastapi.responses import (
|
||||||
@@ -19,6 +20,7 @@ from fastapi.staticfiles import StaticFiles
|
|||||||
|
|
||||||
from create_book import (
|
from create_book import (
|
||||||
EPUBGenerator,
|
EPUBGenerator,
|
||||||
|
PDFGenerator,
|
||||||
StoryNotFoundError,
|
StoryNotFoundError,
|
||||||
WattpadError,
|
WattpadError,
|
||||||
fetch_cookies,
|
fetch_cookies,
|
||||||
@@ -26,10 +28,10 @@ from create_book import (
|
|||||||
fetch_story,
|
fetch_story,
|
||||||
fetch_story_content_zip,
|
fetch_story_content_zip,
|
||||||
fetch_story_from_partId,
|
fetch_story_from_partId,
|
||||||
generate_clean_part_html,
|
|
||||||
logger,
|
logger,
|
||||||
slugify,
|
slugify,
|
||||||
)
|
)
|
||||||
|
from create_book.parser import clean_tree, download_tree_images
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
BUILD_PATH = Path(__file__).parent / "build"
|
BUILD_PATH = Path(__file__).parent / "build"
|
||||||
@@ -73,7 +75,7 @@ app.add_middleware(RequestCancelledMiddleware)
|
|||||||
|
|
||||||
|
|
||||||
class DownloadFormat(Enum):
|
class DownloadFormat(Enum):
|
||||||
# pdf = "pdf"
|
pdf = "pdf"
|
||||||
epub = "epub"
|
epub = "epub"
|
||||||
|
|
||||||
|
|
||||||
@@ -169,30 +171,36 @@ async def handle_download(
|
|||||||
if not cover_data:
|
if not cover_data:
|
||||||
raise HTTPException(status_code=422)
|
raise HTTPException(status_code=422)
|
||||||
|
|
||||||
match format:
|
|
||||||
case DownloadFormat.epub:
|
|
||||||
book = EPUBGenerator(metadata, cover_data)
|
|
||||||
media_type = "application/epub+zip"
|
|
||||||
# case DownloadFormat.pdf:
|
|
||||||
# book = PDFGenerator(metadata, cover_data)
|
|
||||||
# media_type = "application/pdf"
|
|
||||||
|
|
||||||
logger.info(f"Retrieved story metadata and cover ({story_id=})")
|
|
||||||
|
|
||||||
story_zip = await fetch_story_content_zip(story_id, cookies)
|
story_zip = await fetch_story_content_zip(story_id, cookies)
|
||||||
archive = ZipFile(story_zip, "r")
|
archive = ZipFile(story_zip, "r")
|
||||||
|
|
||||||
part_contents = [
|
part_trees: list[BeautifulSoup] = [
|
||||||
generate_clean_part_html(
|
clean_tree(
|
||||||
part, archive.read(str(part["id"])).decode("utf-8")
|
part["title"], part["id"], archive.read(str(part["id"])).decode("utf-8")
|
||||||
)
|
)
|
||||||
for part in metadata["parts"]
|
for part in metadata["parts"]
|
||||||
]
|
]
|
||||||
|
|
||||||
async for title in book.add_chapters(
|
if download_images:
|
||||||
part_contents, download_images=download_images
|
images = [await download_tree_images(tree) for tree in part_trees]
|
||||||
):
|
|
||||||
...
|
match format:
|
||||||
|
case DownloadFormat.epub:
|
||||||
|
book = EPUBGenerator(metadata, part_trees, cover_data, images)
|
||||||
|
media_type = "application/epub+zip"
|
||||||
|
case DownloadFormat.pdf:
|
||||||
|
author_image = await fetch_image(
|
||||||
|
metadata["user"]["avatar"].replace("-256-", "-512-")
|
||||||
|
)
|
||||||
|
if not author_image:
|
||||||
|
raise HTTPException(status_code=422)
|
||||||
|
|
||||||
|
book = PDFGenerator(
|
||||||
|
metadata, part_trees, cover_data, images, author_image
|
||||||
|
)
|
||||||
|
media_type = "application/pdf"
|
||||||
|
|
||||||
|
logger.info(f"Retrieved story metadata and cover ({story_id=})")
|
||||||
|
|
||||||
book_buffer = book.dump()
|
book_buffer = book.dump()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user