feat(api): Reconstruct tree from Content HTML, move PDF Template reads to Init
This commit is contained in:
+171
-85
@@ -1,5 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import List, Optional, Tuple, cast
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
@@ -244,72 +244,6 @@ class Story(TypedDict):
|
|||||||
|
|
||||||
story_ta = TypeAdapter(Story)
|
story_ta = TypeAdapter(Story)
|
||||||
|
|
||||||
# --- PDF Dependencies --- #
|
|
||||||
|
|
||||||
wp_copyright_data: Dict[int, CopyrightData] = {
|
|
||||||
1: {
|
|
||||||
"name": "All Rights Reserved",
|
|
||||||
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
|
||||||
"freedoms": "No reuse, redistribution, or modification without permission.",
|
|
||||||
"printing": "Not allowed without explicit permission.",
|
|
||||||
"image_url": None,
|
|
||||||
},
|
|
||||||
2: {
|
|
||||||
"name": "Public Domain",
|
|
||||||
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
|
||||||
"freedoms": "Free to use for any purpose without permission.",
|
|
||||||
"printing": "Allowed for personal or commercial purposes.",
|
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
|
|
||||||
},
|
|
||||||
3: {
|
|
||||||
"name": "Creative Commons Attribution (CC-BY)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
|
||||||
"printing": "Allowed with proper credit.",
|
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
|
|
||||||
},
|
|
||||||
4: {
|
|
||||||
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit.",
|
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
|
|
||||||
},
|
|
||||||
5: {
|
|
||||||
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
|
||||||
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
|
||||||
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
|
|
||||||
},
|
|
||||||
6: {
|
|
||||||
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
|
||||||
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
|
||||||
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
|
|
||||||
},
|
|
||||||
7: {
|
|
||||||
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
|
||||||
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
|
||||||
"printing": "Allowed with proper credit under the same license.",
|
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
|
|
||||||
},
|
|
||||||
8: {
|
|
||||||
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
|
||||||
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
|
||||||
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
|
||||||
"printing": "Allowed in original form with proper credit.",
|
|
||||||
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
with open("./pdf/cover_and_copyright.html") as reader:
|
|
||||||
copyright_template = reader.read()
|
|
||||||
with open("./pdf/author.html") as reader:
|
|
||||||
author_template = reader.read()
|
|
||||||
|
|
||||||
# --- Exceptions --- #
|
# --- Exceptions --- #
|
||||||
|
|
||||||
@@ -534,21 +468,88 @@ class PDFGenerator:
|
|||||||
self.data = data
|
self.data = data
|
||||||
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
|
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
|
||||||
self.cover = cover
|
self.cover = cover
|
||||||
|
self.content: str = ""
|
||||||
|
self.copyright = {
|
||||||
|
1: {
|
||||||
|
"name": "All Rights Reserved",
|
||||||
|
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
|
||||||
|
"freedoms": "No reuse, redistribution, or modification without permission.",
|
||||||
|
"printing": "Not allowed without explicit permission.",
|
||||||
|
"image_url": None,
|
||||||
|
},
|
||||||
|
2: {
|
||||||
|
"name": "Public Domain",
|
||||||
|
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
|
||||||
|
"freedoms": "Free to use for any purpose without permission.",
|
||||||
|
"printing": "Allowed for personal or commercial purposes.",
|
||||||
|
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
|
||||||
|
},
|
||||||
|
3: {
|
||||||
|
"name": "Creative Commons Attribution (CC-BY)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
|
||||||
|
"printing": "Allowed with proper credit.",
|
||||||
|
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
|
||||||
|
},
|
||||||
|
4: {
|
||||||
|
"name": "CC Attribution NonCommercial (CC-BY-NC)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
|
||||||
|
"printing": "Allowed for non-commercial purposes with proper credit.",
|
||||||
|
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
|
||||||
|
},
|
||||||
|
5: {
|
||||||
|
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
|
||||||
|
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
|
||||||
|
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
|
||||||
|
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
|
||||||
|
},
|
||||||
|
6: {
|
||||||
|
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
|
||||||
|
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
|
||||||
|
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
|
||||||
|
},
|
||||||
|
7: {
|
||||||
|
"name": "CC Attribution ShareAlike (CC-BY-SA)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
|
||||||
|
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
|
||||||
|
"printing": "Allowed with proper credit under the same license.",
|
||||||
|
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
|
||||||
|
},
|
||||||
|
8: {
|
||||||
|
"name": "CC Attribution NoDerivs (CC-BY-ND)",
|
||||||
|
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
|
||||||
|
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
|
||||||
|
"printing": "Allowed in original form with proper credit.",
|
||||||
|
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
async def genernate_cover_and_copyright_file(
|
with open("./pdf/stylesheet.css") as reader:
|
||||||
|
self.stylesheet = reader.read()
|
||||||
|
with open("./pdf/book.html") as reader:
|
||||||
|
self.template = reader.read()
|
||||||
|
|
||||||
|
async def genernate_cover_and_copyright_html(
|
||||||
self,
|
self,
|
||||||
) -> tempfile._TemporaryFileWrapper:
|
) -> str:
|
||||||
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
|
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
|
||||||
|
|
||||||
copyright_data = wp_copyright_data[self.data["copyright"]]
|
copyright_data = self.copyright[self.data["copyright"]]
|
||||||
|
|
||||||
|
template = self.template
|
||||||
about_copyright = (
|
about_copyright = (
|
||||||
copyright_template.replace(
|
template.replace(
|
||||||
"{statement}",
|
"{statement}",
|
||||||
copyright_data["statement"].format(
|
copyright_data["statement"].format(
|
||||||
username=self.data["user"]["username"],
|
username=self.data["user"]["username"],
|
||||||
published_year=self.data["createDate"].split("-", 2)[0],
|
published_year=self.data["createDate"].split("-", 2)[0],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
.replace("{author}", self.data["user"]["username"])
|
||||||
.replace("{freedoms}", copyright_data["freedoms"])
|
.replace("{freedoms}", copyright_data["freedoms"])
|
||||||
.replace(
|
.replace(
|
||||||
"{printing}",
|
"{printing}",
|
||||||
@@ -568,7 +569,7 @@ class PDFGenerator:
|
|||||||
alt="{name}"
|
alt="{name}"
|
||||||
width="88"
|
width="88"
|
||||||
height="31"
|
height="31"
|
||||||
style="margin-bottom: 1rem;">""".format(
|
id="copyright-license-image">""".format(
|
||||||
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
|
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
|
||||||
name=copyright_data["name"],
|
name=copyright_data["name"],
|
||||||
)
|
)
|
||||||
@@ -587,15 +588,10 @@ style="margin-bottom: 1rem;">""".format(
|
|||||||
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
|
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
cover_and_copyright_file = tempfile.NamedTemporaryFile(
|
self.template = about_copyright
|
||||||
suffix=".html", delete=True
|
return about_copyright
|
||||||
)
|
|
||||||
cover_and_copyright_file.write(about_copyright.encode())
|
|
||||||
cover_and_copyright_file.seek(0)
|
|
||||||
|
|
||||||
return cover_and_copyright_file
|
async def generate_about_author_chapter(self) -> str:
|
||||||
|
|
||||||
async def generate_about_author_file(self) -> tempfile._TemporaryFileWrapper:
|
|
||||||
"""Generate About the Author file, fetch avatar."""
|
"""Generate About the Author file, fetch avatar."""
|
||||||
author_avatar = (
|
author_avatar = (
|
||||||
await fetch_image(
|
await fetch_image(
|
||||||
@@ -604,7 +600,7 @@ style="margin-bottom: 1rem;">""".format(
|
|||||||
if self.data["user"]["avatar"]
|
if self.data["user"]["avatar"]
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
about_author = author_template.replace(
|
about_author = self.template.replace(
|
||||||
"{username}", self.data["user"]["username"]
|
"{username}", self.data["user"]["username"]
|
||||||
).replace("{description}", smart_trim(self.data["user"]["description"]))
|
).replace("{description}", smart_trim(self.data["user"]["description"]))
|
||||||
|
|
||||||
@@ -617,11 +613,101 @@ style="margin-bottom: 1rem;">""".format(
|
|||||||
if author_avatar
|
if author_avatar
|
||||||
else about_author.replace("{avatar}", "")
|
else about_author.replace("{avatar}", "")
|
||||||
)
|
)
|
||||||
about_author_file = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
|
|
||||||
about_author_file.write(about_author.encode())
|
|
||||||
about_author_file.seek(0)
|
|
||||||
|
|
||||||
return about_author_file
|
return about_author
|
||||||
|
|
||||||
|
def generate_clean_part_html(self, part: Part, content: str):
|
||||||
|
chapter_title = part["title"]
|
||||||
|
chapter_id = part["id"]
|
||||||
|
|
||||||
|
clean = BeautifulSoup(
|
||||||
|
f"""
|
||||||
|
<section id="section_{chapter_id}" class="chapitre">
|
||||||
|
<h1 id="{chapter_id}" class="chapter-title">{chapter_title}</h1>
|
||||||
|
</section>
|
||||||
|
""",
|
||||||
|
"html.parser",
|
||||||
|
) # html.parser doesn't create <html>/<body> tags automatically
|
||||||
|
html = BeautifulSoup(content, "lxml")
|
||||||
|
|
||||||
|
section = clean.find("section")
|
||||||
|
if not section:
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
for child in html.find_all("p"):
|
||||||
|
for p_child in list(child.children):
|
||||||
|
if not p_child:
|
||||||
|
continue
|
||||||
|
if isinstance(p_child, bs4.element.Tag):
|
||||||
|
if p_child.name == "br":
|
||||||
|
p_child.decompose()
|
||||||
|
elif p_child.name == "img":
|
||||||
|
src = p_child["src"]
|
||||||
|
img_tag = clean.new_tag("img")
|
||||||
|
img_tag["src"] = src
|
||||||
|
break_tag = clean.new_tag("br")
|
||||||
|
section.append(img_tag)
|
||||||
|
section.append(break_tag)
|
||||||
|
elif p_child.name == "b":
|
||||||
|
content = p_child.text
|
||||||
|
p_tag = clean.new_tag("p")
|
||||||
|
bold_tag = clean.new_tag("b")
|
||||||
|
bold_content = clean.new_string(content)
|
||||||
|
|
||||||
|
bold_tag.append(bold_content)
|
||||||
|
p_tag.append(bold_tag)
|
||||||
|
|
||||||
|
section.append(p_tag)
|
||||||
|
|
||||||
|
elif p_child.name == "i":
|
||||||
|
content = p_child.text
|
||||||
|
p_tag = clean.new_tag("p")
|
||||||
|
italic_tag = clean.new_tag("i")
|
||||||
|
italic_content = clean.new_string(content)
|
||||||
|
|
||||||
|
italic_tag.append(italic_content)
|
||||||
|
p_tag.append(italic_tag)
|
||||||
|
|
||||||
|
section.append(p_tag)
|
||||||
|
|
||||||
|
elif isinstance(p_child, bs4.element.NavigableString):
|
||||||
|
content = p_child.text
|
||||||
|
p_tag = clean.new_tag("p")
|
||||||
|
p_content = clean.new_string(content)
|
||||||
|
p_tag.append(p_content)
|
||||||
|
section.append(p_tag)
|
||||||
|
|
||||||
|
if not list(child.children):
|
||||||
|
# Some p tags only contain brs, once brs are removed, they are empty and can be removed as well.
|
||||||
|
child.decompose()
|
||||||
|
|
||||||
|
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
|
||||||
|
insert_point.append(section)
|
||||||
|
return str(clean)
|
||||||
|
|
||||||
|
def generate_toc(self):
|
||||||
|
ids = [part["id"] for part in self.data["parts"]]
|
||||||
|
clean = BeautifulSoup(
|
||||||
|
"""
|
||||||
|
<section id="contents" class="toc">
|
||||||
|
<h2>Table of Contents</h2>
|
||||||
|
<ul></ul>
|
||||||
|
</section>
|
||||||
|
""",
|
||||||
|
"html.parser",
|
||||||
|
) # html.parser doesn't create <html>/<body> tags automatically
|
||||||
|
|
||||||
|
ul = cast(bs4.Tag, clean.find("ul"))
|
||||||
|
for part_id in ids:
|
||||||
|
li = clean.new_tag("li")
|
||||||
|
a = clean.new_tag("a")
|
||||||
|
a["href"] = f"#{part_id}"
|
||||||
|
li.append(a)
|
||||||
|
ul.append(li)
|
||||||
|
|
||||||
|
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
|
||||||
|
insert_point.append(clean)
|
||||||
|
return str(clean)
|
||||||
|
|
||||||
async def add_chapters(self, contents: List[str], download_images: bool = False):
|
async def add_chapters(self, contents: List[str], download_images: bool = False):
|
||||||
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
|
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
|
||||||
|
|||||||
Reference in New Issue
Block a user