diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4d111d3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +__pycache__ +*ipynb +build +.idea +.vscode +.venv +.env +*log +*.md +uv.lock diff --git a/Dockerfile b/Dockerfile index 8eb5054..776db64 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,23 +13,17 @@ FROM python:3.13-slim WORKDIR /app -# Install apt-fast, git, exiftool - COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/ RUN apt update RUN apt install -y aria2 -RUN apt-fast install -y git build-essential libpango-1.0-0 libpangoft2-1.0-0 wget +RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0 +# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13 +# weasyprint depends on libgoject, libpango, and libpangoft2 -ENV EXIFTOOL_VERSION="13.06" -RUN wget "https://exiftool.org/Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz" -RUN gzip -dc "Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz" | tar -xf - -WORKDIR /app/Image-ExifTool-${EXIFTOOL_VERSION} -RUN perl Makefile.PL -RUN make test -RUN make install +RUN rm -rf /var/lib/apt/lists/* +# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950 -RUN rm -rf /var/lib/apt/lists/* /app/Image-ExifTool-${EXIFTOOL_VERSION} WORKDIR /app @@ -37,17 +31,15 @@ WORKDIR /app COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -COPY src/api/requirements.txt requirements.txt -COPY src/api/src/create_book/generators/pdf/exiftool.config exiftool.config -RUN uv pip install -r requirements.txt --system +COPY src/api/pyproject.toml /app +RUN uv sync +COPY src/api/ /app COPY --from=0 /build/build /app/src/build -COPY src/api/src src -# Is this still needed? RUN ln -s /app/src/pdf/fonts /tmp/fonts WORKDIR /app/src EXPOSE 80 -CMD [ "python3", "main.py"] +CMD [ "uv", "run", "main.py"] diff --git a/src/api/pyproject.toml b/src/api/pyproject.toml index ad6344c..25b2e47 100644 --- a/src/api/pyproject.toml +++ b/src/api/pyproject.toml @@ -17,7 +17,6 @@ dependencies = [ "aiohttp-client-cache[all]", "bs4>=0.0.2", "uvicorn>=0.32.1", - "pyexiftool>=0.5.6", "weasyprint>=63.0", "jinja2>=3.1.6", ] @@ -31,5 +30,6 @@ aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-c [dependency-groups] dev = [ "ipykernel>=6.29.5", + "ipynb>=0.5.1", "ruff>=0.11.12", ] diff --git a/src/api/requirements.txt b/src/api/requirements.txt deleted file mode 100644 index ff11594..0000000 --- a/src/api/requirements.txt +++ /dev/null @@ -1,76 +0,0 @@ -aioboto3==13.2.0 -aiobotocore==2.15.2 -aiofiles==24.1.0 -aiohappyeyeballs==2.4.4 -aiohttp==3.11.9 -aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6 -aioitertools==0.12.0 -aiosignal==1.3.1 -aiosqlite==0.20.0 -annotated-types==0.7.0 -anyio==4.6.2.post1 -async-timeout==4.0.3 -attrs==23.1.0 -backoff==2.2.1 -beautifulsoup4==4.12.3 -boltons==24.1.0 -boto3==1.35.36 -botocore==1.35.36 -brotli==1.1.0 -bs4==0.0.2 -cffi==1.17.1 -click==8.1.7 -cssselect2==0.7.0 -dnspython==2.7.0 -ebooklib==0.18 -eliot==1.16.0 -exceptiongroup==1.2.2 -fastapi==0.115.5 -fonttools==4.55.2 -frozenlist==1.4.1 -h11==0.14.0 -idna==3.6 -itsdangerous==2.2.0 -jinja2==3.1.6 -jmespath==1.0.1 -lxml==5.3.0 -markdown-it-py==3.0.0 -mdurl==0.1.2 -motor==3.6.0 -multidict==6.0.4 -orjson==3.10.12 -pillow==10.4.0 -propcache==0.2.1 -pycparser==2.22 -pydantic==2.10.2 -pydantic-core==2.27.1 -pydantic-settings==2.6.1 -pydyf==0.11.0 -pyexiftool==0.5.6 -pygments==2.18.0 -pymongo==4.9.2 -pyphen==0.15.0 -pyrsistent==0.20.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -redis==5.2.0 -rich==13.9.4 -s3transfer==0.10.4 -setuptools==75.6.0 -six==1.16.0 -sniffio==1.3.1 -soupsieve==2.6 -starlette==0.41.3 -tinycss2==1.4.0 -tinyhtml5==2.0.0 -type-extensions==0.1.2 -typing-extensions==4.12.2 -url-normalize==1.4.3 -urllib3==2.2.3 -uvicorn==0.32.1 -weasyprint==63.0 -webencodings==0.5.1 -wrapt==1.17.0 -yarl==1.18.3 -zope-interface==7.2 -zopfli==0.2.3.post1 diff --git a/src/api/src/create_book/generators/pdf.py b/src/api/src/create_book/generators/pdf.py index b6c2c08..c6924c8 100644 --- a/src/api/src/create_book/generators/pdf.py +++ b/src/api/src/create_book/generators/pdf.py @@ -3,10 +3,10 @@ from io import BytesIO from pathlib import Path from tempfile import NamedTemporaryFile, _TemporaryFileWrapper +import pydyf from bs4 import BeautifulSoup -from exiftool import ExifTool from jinja2 import Template -from weasyprint import CSS, HTML +from weasyprint import CSS, HTML, Document from weasyprint.text.fonts import FontConfiguration from ..models import Story @@ -97,7 +97,7 @@ class PDFGenerator(AbstractGenerator): self.images = images self.author = author_image - self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf") + self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf") # type: ignore self.content = TEMPLATE def generate_chapters(self) -> dict[int, str]: @@ -134,6 +134,12 @@ class PDFGenerator(AbstractGenerator): "book_title": self.story["title"], "cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}", "username": self.story["user"]["username"], + "author_bio": self.story["user"]["description"], + "clean_tags": ", ".join(self.story["tags"]), + "created": self.story["createDate"], + "modified": self.story["modifyDate"], + "is_completed": self.story["completed"], + "is_mature": self.story["mature"], "description": self.story["description"], "avatar": b64encode(self.author).decode(), "copyright": { @@ -149,6 +155,11 @@ class PDFGenerator(AbstractGenerator): self.content: str = Template(self.content).render(data) + def write_custom_metadata(self, document: Document, pdf: pydyf.PDF): + """Write non-standard metadata fields to the PDF.""" + pdf.info["completed"] = pydyf.String(str(self.story["completed"])) + pdf.info["mature"] = pydyf.String(str(self.story["mature"])) + def generate_pdf(self): """Generate and write the PDF to a temporary file (self.book).""" font_config = FontConfiguration() @@ -157,47 +168,17 @@ class PDFGenerator(AbstractGenerator): html_obj = HTML(string=self.content) html_obj.write_pdf( - self.book.name, stylesheets=[stylesheet_obj], font_config=font_config + self.book.name, + stylesheets=[stylesheet_obj], + font_config=font_config, + finisher=self.write_custom_metadata, + options={"custom_metadata": True}, ) - def add_metadata(self): - """Write metadata to generated PDF file at self.book, using ExifTool.""" - - clean_description = ( - self.story["description"].strip().replace("\n", "$/") - ) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. ` ` is another option. - - metadata = { - "Author": self.story["user"]["username"], - "Title": self.story["title"], - "Subject": clean_description, - "CreationDate": self.story["createDate"], - "ModDate": self.story["modifyDate"], - "Keywords": ",".join(self.story["tags"]), - "Language": self.story["language"]["name"], - "Completed": self.story["completed"], - "MatureContent": self.story["mature"], - "Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader", - } # As per https://exiftool.org/TagNames/PDF.html - - with ExifTool(config_file=DATA_PATH / "exiftool.config") as et: - # Custom configuration adds Completed and MatureContent tags. - # exiftool logger logs executed command - et.execute( - *( - [f"-{key}={value}" for key, value in metadata.items()] - + [ - "-overwrite_original", - self.book.file.name, - ] - ) - ) - def compile(self): parts = self.generate_chapters() self.populate_template(parts) self.generate_pdf() - self.add_metadata() return True def dump(self) -> BytesIO: diff --git a/src/api/src/create_book/generators/pdf/book.html b/src/api/src/create_book/generators/pdf/book.html index 413b10b..e76d411 100644 --- a/src/api/src/create_book/generators/pdf/book.html +++ b/src/api/src/create_book/generators/pdf/book.html @@ -1,73 +1,85 @@ + +
+ - + +Ex Libris Sapientiae
+ + + + {% if copyright.data %} +{{ statement }}
+ +{{ freedoms }}
+ +Printing: {{ printing }}
+ ++ ID: {{ book_id }}. + View this Book Online +
+Ex Libris Sapientiae
- - - - {% if copyright.data %} -{{ statement }}
- -{{ freedoms }}
- -Printing: {{ printing }}
- -- ID: {{ book_id }}. - View this Book Online +