feat(api): PDF Image downloads are functional!

2024-12-07 04:17:01 +00:00
parent 40bad57eac
commit 8728b215ee
1 changed files with 10 additions and 13 deletions
@@ -9,6 +9,7 @@ import unicodedata
 from io import BytesIO, StringIO
 from os import environ
 from enum import Enum
 from base64 import b64encode
 import backoff
 import pdfkit
 from eliot import to_file, start_action
@@ -423,7 +424,7 @@ class PDFGenerator:
        chapters = []
        for part, content in zip(self.data["parts"], contents):
-            html = BeautifulSoup(content)
+            html = BeautifulSoup(content, features="lxml")
            image_sources: List[str] = []
            for image_container in html.find_all("p", {"data-media-type": "image"}):
@@ -440,19 +441,20 @@ class PDFGenerator:
                            response.raise_for_status()
                            image = await response.read()
-                            temp_img = tempfile.NamedTemporaryFile(
+                            # temp_img = tempfile.NamedTemporaryFile(
-                                suffix=".jpg", delete=False
+                            #     suffix=".jpg", delete=False
-                            )
+                            # )
-                            temp_img.write(image)
+                            # temp_img.write(image)
                            writable_html = writable_html.replace(
-                                image_url, f"file://{temp_img.file.name}"
+                                image_url,
                                f"data:image/jpg;base64,{b64encode(image).decode()}",
                            )
-                            print("Replaced", image_url, "with", temp_img.file.name)
+                            print("Replaced", image_url, "with bytes")
            tempie = tempfile.NamedTemporaryFile(suffix=".html", delete=True)
            tempie.write(writable_html.encode())
-            print(writable_html)
+            # print(writable_html)
            chapters.append(tempie)
@@ -461,11 +463,6 @@ class PDFGenerator:
        pdfkit.from_file(
            [chapter.file.name for chapter in chapters],
            self.file.name,
            options={
                "enable-local-file-access": None,
                "images": download_images,
                "title": self.data["title"],
            },
        )
        clean_description = self.data["description"].strip().replace("\n", "$/")