feat(api): PDF Downloads functional!

Image downloads borked
This commit is contained in:
TheOnlyWayUp
2024-12-06 15:32:26 +00:00
parent 6c6c8f81b6
commit 40bad57eac
+68 -12
View File
@@ -16,6 +16,7 @@ from eliot.stdlib import EliotHandler
from dotenv import load_dotenv from dotenv import load_dotenv
from ebooklib import epub from ebooklib import epub
from ebooklib.epub import EpubBook from ebooklib.epub import EpubBook
from exiftool import ExifTool
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pydantic import TypeAdapter, model_validator, field_validator from pydantic import TypeAdapter, model_validator, field_validator
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
@@ -421,22 +422,77 @@ class PDFGenerator:
async def add_chapters(self, contents: List[str], download_images: bool = False): async def add_chapters(self, contents: List[str], download_images: bool = False):
chapters = [] chapters = []
for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)): for part, content in zip(self.data["parts"], contents):
tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True) html = BeautifulSoup(content)
tempie.write(content.encode())
chapters.append(tempie)
yield part[
"title"
] # Yield the chapter's title upon insertion preceeded by retrieval.
pdf = pdfkit.from_file( image_sources: List[str] = []
[chapter.file.name for chapter in chapters], self.file.name for image_container in html.find_all("p", {"data-media-type": "image"}):
img = image_container.findChild("img")
source = img.get("src")
image_container.replace_with(img)
image_sources.append(source)
writable_html = str(html)
if download_images:
async with CachedSession(cache=None) as session: # Don't cache images
for image_url in image_sources:
async with session.get(image_url) as response:
response.raise_for_status()
image = await response.read()
temp_img = tempfile.NamedTemporaryFile(
suffix=".jpg", delete=False
)
temp_img.write(image)
writable_html = writable_html.replace(
image_url, f"file://{temp_img.file.name}"
)
print("Replaced", image_url, "with", temp_img.file.name)
tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
tempie.write(writable_html.encode())
print(writable_html)
chapters.append(tempie)
yield part["title"]
pdfkit.from_file(
[chapter.file.name for chapter in chapters],
self.file.name,
options={
"enable-local-file-access": None,
"images": download_images,
"title": self.data["title"],
},
) )
# self.canvas.drawString(72, 72, content)
clean_description = self.data["description"].strip().replace("\n", "$/")
metadata = {
"Author": self.data["user"]["username"],
"Title": self.data["title"],
"Subject": clean_description,
"CreationDate": self.data["createDate"],
"ModDate": self.data["modifyDate"],
"Keywords": ",".join(self.data["tags"]),
"Language": self.data["language"]["name"],
"Completed": self.data["completed"],
"MatureContent": self.data["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
with ExifTool(config_file="../exiftool.config", logger=logger) as et:
et.execute(
*(
[f"-{key}={value}" for key, value in metadata.items()]
+ [
"-overwrite_original",
self.file.file.name,
]
)
)
def dump(self) -> PDFGenerator: def dump(self) -> PDFGenerator:
# self.canvas.save()
self.file.seek(0) self.file.seek(0)
return self return self