feat(api): PDF Image downloads are functional!

This commit is contained in:
TheOnlyWayUp
2024-12-07 04:17:01 +00:00
parent 40bad57eac
commit 8728b215ee
+10 -13
View File
@@ -9,6 +9,7 @@ import unicodedata
from io import BytesIO, StringIO from io import BytesIO, StringIO
from os import environ from os import environ
from enum import Enum from enum import Enum
from base64 import b64encode
import backoff import backoff
import pdfkit import pdfkit
from eliot import to_file, start_action from eliot import to_file, start_action
@@ -423,7 +424,7 @@ class PDFGenerator:
chapters = [] chapters = []
for part, content in zip(self.data["parts"], contents): for part, content in zip(self.data["parts"], contents):
html = BeautifulSoup(content) html = BeautifulSoup(content, features="lxml")
image_sources: List[str] = [] image_sources: List[str] = []
for image_container in html.find_all("p", {"data-media-type": "image"}): for image_container in html.find_all("p", {"data-media-type": "image"}):
@@ -440,19 +441,20 @@ class PDFGenerator:
response.raise_for_status() response.raise_for_status()
image = await response.read() image = await response.read()
temp_img = tempfile.NamedTemporaryFile( # temp_img = tempfile.NamedTemporaryFile(
suffix=".jpg", delete=False # suffix=".jpg", delete=False
) # )
temp_img.write(image) # temp_img.write(image)
writable_html = writable_html.replace( writable_html = writable_html.replace(
image_url, f"file://{temp_img.file.name}" image_url,
f"data:image/jpg;base64,{b64encode(image).decode()}",
) )
print("Replaced", image_url, "with", temp_img.file.name) print("Replaced", image_url, "with bytes")
tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True) tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
tempie.write(writable_html.encode()) tempie.write(writable_html.encode())
print(writable_html) # print(writable_html)
chapters.append(tempie) chapters.append(tempie)
@@ -461,11 +463,6 @@ class PDFGenerator:
pdfkit.from_file( pdfkit.from_file(
[chapter.file.name for chapter in chapters], [chapter.file.name for chapter in chapters],
self.file.name, self.file.name,
options={
"enable-local-file-access": None,
"images": download_images,
"title": self.data["title"],
},
) )
clean_description = self.data["description"].strip().replace("\n", "$/") clean_description = self.data["description"].strip().replace("\n", "$/")