feat(api): Remove dependency on exiftool (#82)
This commit is contained in:
@@ -0,0 +1,10 @@
|
|||||||
|
__pycache__
|
||||||
|
*ipynb
|
||||||
|
build
|
||||||
|
.idea
|
||||||
|
.vscode
|
||||||
|
.venv
|
||||||
|
.env
|
||||||
|
*log
|
||||||
|
*.md
|
||||||
|
uv.lock
|
||||||
+9
-17
@@ -13,23 +13,17 @@ FROM python:3.13-slim
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install apt-fast, git, exiftool
|
|
||||||
|
|
||||||
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
|
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
|
||||||
|
|
||||||
RUN apt update
|
RUN apt update
|
||||||
RUN apt install -y aria2
|
RUN apt install -y aria2
|
||||||
RUN apt-fast install -y git build-essential libpango-1.0-0 libpangoft2-1.0-0 wget
|
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0
|
||||||
|
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
|
||||||
|
# weasyprint depends on libgoject, libpango, and libpangoft2
|
||||||
|
|
||||||
ENV EXIFTOOL_VERSION="13.06"
|
RUN rm -rf /var/lib/apt/lists/*
|
||||||
RUN wget "https://exiftool.org/Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz"
|
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950
|
||||||
RUN gzip -dc "Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz" | tar -xf -
|
|
||||||
WORKDIR /app/Image-ExifTool-${EXIFTOOL_VERSION}
|
|
||||||
RUN perl Makefile.PL
|
|
||||||
RUN make test
|
|
||||||
RUN make install
|
|
||||||
|
|
||||||
RUN rm -rf /var/lib/apt/lists/* /app/Image-ExifTool-${EXIFTOOL_VERSION}
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -37,17 +31,15 @@ WORKDIR /app
|
|||||||
|
|
||||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||||
|
|
||||||
COPY src/api/requirements.txt requirements.txt
|
COPY src/api/pyproject.toml /app
|
||||||
COPY src/api/src/create_book/generators/pdf/exiftool.config exiftool.config
|
RUN uv sync
|
||||||
RUN uv pip install -r requirements.txt --system
|
COPY src/api/ /app
|
||||||
COPY --from=0 /build/build /app/src/build
|
COPY --from=0 /build/build /app/src/build
|
||||||
COPY src/api/src src
|
|
||||||
|
|
||||||
# Is this still needed?
|
|
||||||
RUN ln -s /app/src/pdf/fonts /tmp/fonts
|
RUN ln -s /app/src/pdf/fonts /tmp/fonts
|
||||||
|
|
||||||
WORKDIR /app/src
|
WORKDIR /app/src
|
||||||
|
|
||||||
EXPOSE 80
|
EXPOSE 80
|
||||||
|
|
||||||
CMD [ "python3", "main.py"]
|
CMD [ "uv", "run", "main.py"]
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ dependencies = [
|
|||||||
"aiohttp-client-cache[all]",
|
"aiohttp-client-cache[all]",
|
||||||
"bs4>=0.0.2",
|
"bs4>=0.0.2",
|
||||||
"uvicorn>=0.32.1",
|
"uvicorn>=0.32.1",
|
||||||
"pyexiftool>=0.5.6",
|
|
||||||
"weasyprint>=63.0",
|
"weasyprint>=63.0",
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
]
|
]
|
||||||
@@ -31,5 +30,6 @@ aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-c
|
|||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
"ipykernel>=6.29.5",
|
"ipykernel>=6.29.5",
|
||||||
|
"ipynb>=0.5.1",
|
||||||
"ruff>=0.11.12",
|
"ruff>=0.11.12",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
aioboto3==13.2.0
|
|
||||||
aiobotocore==2.15.2
|
|
||||||
aiofiles==24.1.0
|
|
||||||
aiohappyeyeballs==2.4.4
|
|
||||||
aiohttp==3.11.9
|
|
||||||
aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6
|
|
||||||
aioitertools==0.12.0
|
|
||||||
aiosignal==1.3.1
|
|
||||||
aiosqlite==0.20.0
|
|
||||||
annotated-types==0.7.0
|
|
||||||
anyio==4.6.2.post1
|
|
||||||
async-timeout==4.0.3
|
|
||||||
attrs==23.1.0
|
|
||||||
backoff==2.2.1
|
|
||||||
beautifulsoup4==4.12.3
|
|
||||||
boltons==24.1.0
|
|
||||||
boto3==1.35.36
|
|
||||||
botocore==1.35.36
|
|
||||||
brotli==1.1.0
|
|
||||||
bs4==0.0.2
|
|
||||||
cffi==1.17.1
|
|
||||||
click==8.1.7
|
|
||||||
cssselect2==0.7.0
|
|
||||||
dnspython==2.7.0
|
|
||||||
ebooklib==0.18
|
|
||||||
eliot==1.16.0
|
|
||||||
exceptiongroup==1.2.2
|
|
||||||
fastapi==0.115.5
|
|
||||||
fonttools==4.55.2
|
|
||||||
frozenlist==1.4.1
|
|
||||||
h11==0.14.0
|
|
||||||
idna==3.6
|
|
||||||
itsdangerous==2.2.0
|
|
||||||
jinja2==3.1.6
|
|
||||||
jmespath==1.0.1
|
|
||||||
lxml==5.3.0
|
|
||||||
markdown-it-py==3.0.0
|
|
||||||
mdurl==0.1.2
|
|
||||||
motor==3.6.0
|
|
||||||
multidict==6.0.4
|
|
||||||
orjson==3.10.12
|
|
||||||
pillow==10.4.0
|
|
||||||
propcache==0.2.1
|
|
||||||
pycparser==2.22
|
|
||||||
pydantic==2.10.2
|
|
||||||
pydantic-core==2.27.1
|
|
||||||
pydantic-settings==2.6.1
|
|
||||||
pydyf==0.11.0
|
|
||||||
pyexiftool==0.5.6
|
|
||||||
pygments==2.18.0
|
|
||||||
pymongo==4.9.2
|
|
||||||
pyphen==0.15.0
|
|
||||||
pyrsistent==0.20.0
|
|
||||||
python-dateutil==2.9.0.post0
|
|
||||||
python-dotenv==1.0.1
|
|
||||||
redis==5.2.0
|
|
||||||
rich==13.9.4
|
|
||||||
s3transfer==0.10.4
|
|
||||||
setuptools==75.6.0
|
|
||||||
six==1.16.0
|
|
||||||
sniffio==1.3.1
|
|
||||||
soupsieve==2.6
|
|
||||||
starlette==0.41.3
|
|
||||||
tinycss2==1.4.0
|
|
||||||
tinyhtml5==2.0.0
|
|
||||||
type-extensions==0.1.2
|
|
||||||
typing-extensions==4.12.2
|
|
||||||
url-normalize==1.4.3
|
|
||||||
urllib3==2.2.3
|
|
||||||
uvicorn==0.32.1
|
|
||||||
weasyprint==63.0
|
|
||||||
webencodings==0.5.1
|
|
||||||
wrapt==1.17.0
|
|
||||||
yarl==1.18.3
|
|
||||||
zope-interface==7.2
|
|
||||||
zopfli==0.2.3.post1
|
|
||||||
@@ -3,10 +3,10 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
|
||||||
|
|
||||||
|
import pydyf
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from exiftool import ExifTool
|
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from weasyprint import CSS, HTML
|
from weasyprint import CSS, HTML, Document
|
||||||
from weasyprint.text.fonts import FontConfiguration
|
from weasyprint.text.fonts import FontConfiguration
|
||||||
|
|
||||||
from ..models import Story
|
from ..models import Story
|
||||||
@@ -97,7 +97,7 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
self.images = images
|
self.images = images
|
||||||
self.author = author_image
|
self.author = author_image
|
||||||
|
|
||||||
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf")
|
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf") # type: ignore
|
||||||
self.content = TEMPLATE
|
self.content = TEMPLATE
|
||||||
|
|
||||||
def generate_chapters(self) -> dict[int, str]:
|
def generate_chapters(self) -> dict[int, str]:
|
||||||
@@ -134,6 +134,12 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
"book_title": self.story["title"],
|
"book_title": self.story["title"],
|
||||||
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
|
||||||
"username": self.story["user"]["username"],
|
"username": self.story["user"]["username"],
|
||||||
|
"author_bio": self.story["user"]["description"],
|
||||||
|
"clean_tags": ", ".join(self.story["tags"]),
|
||||||
|
"created": self.story["createDate"],
|
||||||
|
"modified": self.story["modifyDate"],
|
||||||
|
"is_completed": self.story["completed"],
|
||||||
|
"is_mature": self.story["mature"],
|
||||||
"description": self.story["description"],
|
"description": self.story["description"],
|
||||||
"avatar": b64encode(self.author).decode(),
|
"avatar": b64encode(self.author).decode(),
|
||||||
"copyright": {
|
"copyright": {
|
||||||
@@ -149,6 +155,11 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
|
|
||||||
self.content: str = Template(self.content).render(data)
|
self.content: str = Template(self.content).render(data)
|
||||||
|
|
||||||
|
def write_custom_metadata(self, document: Document, pdf: pydyf.PDF):
|
||||||
|
"""Write non-standard metadata fields to the PDF."""
|
||||||
|
pdf.info["completed"] = pydyf.String(str(self.story["completed"]))
|
||||||
|
pdf.info["mature"] = pydyf.String(str(self.story["mature"]))
|
||||||
|
|
||||||
def generate_pdf(self):
|
def generate_pdf(self):
|
||||||
"""Generate and write the PDF to a temporary file (self.book)."""
|
"""Generate and write the PDF to a temporary file (self.book)."""
|
||||||
font_config = FontConfiguration()
|
font_config = FontConfiguration()
|
||||||
@@ -157,47 +168,17 @@ class PDFGenerator(AbstractGenerator):
|
|||||||
|
|
||||||
html_obj = HTML(string=self.content)
|
html_obj = HTML(string=self.content)
|
||||||
html_obj.write_pdf(
|
html_obj.write_pdf(
|
||||||
self.book.name, stylesheets=[stylesheet_obj], font_config=font_config
|
self.book.name,
|
||||||
|
stylesheets=[stylesheet_obj],
|
||||||
|
font_config=font_config,
|
||||||
|
finisher=self.write_custom_metadata,
|
||||||
|
options={"custom_metadata": True},
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_metadata(self):
|
|
||||||
"""Write metadata to generated PDF file at self.book, using ExifTool."""
|
|
||||||
|
|
||||||
clean_description = (
|
|
||||||
self.story["description"].strip().replace("\n", "$/")
|
|
||||||
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `
` is another option.
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"Author": self.story["user"]["username"],
|
|
||||||
"Title": self.story["title"],
|
|
||||||
"Subject": clean_description,
|
|
||||||
"CreationDate": self.story["createDate"],
|
|
||||||
"ModDate": self.story["modifyDate"],
|
|
||||||
"Keywords": ",".join(self.story["tags"]),
|
|
||||||
"Language": self.story["language"]["name"],
|
|
||||||
"Completed": self.story["completed"],
|
|
||||||
"MatureContent": self.story["mature"],
|
|
||||||
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
|
|
||||||
} # As per https://exiftool.org/TagNames/PDF.html
|
|
||||||
|
|
||||||
with ExifTool(config_file=DATA_PATH / "exiftool.config") as et:
|
|
||||||
# Custom configuration adds Completed and MatureContent tags.
|
|
||||||
# exiftool logger logs executed command
|
|
||||||
et.execute(
|
|
||||||
*(
|
|
||||||
[f"-{key}={value}" for key, value in metadata.items()]
|
|
||||||
+ [
|
|
||||||
"-overwrite_original",
|
|
||||||
self.book.file.name,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def compile(self):
|
def compile(self):
|
||||||
parts = self.generate_chapters()
|
parts = self.generate_chapters()
|
||||||
self.populate_template(parts)
|
self.populate_template(parts)
|
||||||
self.generate_pdf()
|
self.generate_pdf()
|
||||||
self.add_metadata()
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def dump(self) -> BytesIO:
|
def dump(self) -> BytesIO:
|
||||||
|
|||||||
@@ -1,73 +1,85 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="{{ langcode }}">
|
<html lang="{{ langcode }}">
|
||||||
|
|
||||||
|
|
||||||
|
<head>
|
||||||
|
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
|
||||||
|
<!-- https://doc.courtbouillon.org/weasyprint/stable/api_reference.html#weasyprint.document.DocumentMetadata -->
|
||||||
<title>{{ book_title }}</title>
|
<title>{{ book_title }}</title>
|
||||||
|
<meta name=description content="{{description}}">
|
||||||
|
<meta name=author content="{{author}}">
|
||||||
|
<meta name=keywords content="{{clean_tags}}">
|
||||||
|
<meta name=language content="{{langcode}}">
|
||||||
|
<meta name=dcterms.created content="{{created}}">
|
||||||
|
<meta name=dcterms.modified content="{{modified}}">
|
||||||
|
<meta name=generator content="Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader">
|
||||||
|
|
||||||
<section class="fullpage">
|
|
||||||
<img src="{{ cover }}" alt="Cover">
|
</head>
|
||||||
|
|
||||||
|
<section class="fullpage">
|
||||||
|
<img src="{{ cover }}" alt="Cover">
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<div id="copyright-container">
|
||||||
|
<h1 id="copyright-notice">Copyright Notice</h1>
|
||||||
|
|
||||||
|
<h2 id="copyright-title">{{ book_title }}</h2>
|
||||||
|
<p id="copyright-author">By {{ author }}</p>
|
||||||
|
|
||||||
|
<div id="copyright-separator"></div>
|
||||||
|
|
||||||
|
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
|
||||||
|
|
||||||
|
<div id="copyright-separator"></div>
|
||||||
|
|
||||||
|
{% if copyright.data %}
|
||||||
|
<img src="data:image/jpg;base64,{{copyright.data}}" alt="{{copyright.name}}" width="88" height="31"
|
||||||
|
id="copyright-license-image">
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p id="copyright-copyright">{{ statement }}</p>
|
||||||
|
|
||||||
|
<p id="copyright-rights">{{ freedoms }}</p>
|
||||||
|
|
||||||
|
<p id="copyright-printing">Printing: {{ printing }}</p>
|
||||||
|
|
||||||
|
<p id="book-link">
|
||||||
|
ID: {{ book_id }}.
|
||||||
|
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="book">
|
||||||
|
<section id="contents" class="toc">
|
||||||
|
<h1>Table of Contents</h1>
|
||||||
|
<ul>
|
||||||
|
{% for part_id in parts %}
|
||||||
|
<li><a href="#{{part_id}}"></a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
</section>
|
</section>
|
||||||
|
{% for part_id in parts %}
|
||||||
|
|
||||||
<div id="copyright-container">
|
{{parts[part_id] | safe}}
|
||||||
<h1 id="copyright-notice">Copyright Notice</h1>
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
|
||||||
<h2 id="copyright-title">{{ book_title }}</h2>
|
<h1>About the Author</h1>
|
||||||
<p id="copyright-author">By {{ author }}</p>
|
<div id="author-container">
|
||||||
|
<div id="author-about">
|
||||||
<div id="copyright-separator"></div>
|
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
|
||||||
|
<h2 id="author-name">
|
||||||
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
|
<a href="https://wattpad.com/user/{{ username }}" id="author-link">{{ username }}</a>
|
||||||
|
</h2>
|
||||||
<div id="copyright-separator"></div>
|
<hr id="author-divider">
|
||||||
|
<p id="author-bio">
|
||||||
{% if copyright.data %}
|
{{ author_bio }}
|
||||||
<img src="data:image/jpg;base64,{{copyright.data}}"
|
|
||||||
alt="{{copyright.name}}"
|
|
||||||
width="88"
|
|
||||||
height="31"
|
|
||||||
id="copyright-license-image">
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<p id="copyright-copyright">{{ statement }}</p>
|
|
||||||
|
|
||||||
<p id="copyright-rights">{{ freedoms }}</p>
|
|
||||||
|
|
||||||
<p id="copyright-printing">Printing: {{ printing }}</p>
|
|
||||||
|
|
||||||
<p id="book-link">
|
|
||||||
ID: {{ book_id }}.
|
|
||||||
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div id="book">
|
|
||||||
<section id="contents" class="toc">
|
|
||||||
<h1>Table of Contents</h1>
|
|
||||||
<ul>
|
|
||||||
{% for part_id in parts %}
|
|
||||||
<li><a href="#{{part_id}}"></a></li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</section>
|
|
||||||
{% for part_id in parts %}
|
|
||||||
|
|
||||||
{{parts[part_id] | safe}}
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h1>About the Author</h1>
|
|
||||||
<div id="author-container">
|
|
||||||
<div id="author-about">
|
|
||||||
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
|
|
||||||
<h2 id="author-name">
|
|
||||||
<a href="https://wattpad.com/user/{{ username }}" id="author-link">{{ username }}</a>
|
|
||||||
</h2>
|
|
||||||
<hr id="author-divider">
|
|
||||||
<p id="author-bio">
|
|
||||||
{{ description }}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</html>
|
</html>
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
|
|
||||||
%Image::ExifTool::UserDefined = (
|
|
||||||
'Image::ExifTool::XMP::xmp' => {
|
|
||||||
Completed => {
|
|
||||||
Writable => 'boolean', # Can be a boolean (True/False)
|
|
||||||
Groups => { 2 => 'Content' },
|
|
||||||
},
|
|
||||||
MatureContent => {
|
|
||||||
Writable => 'boolean', # Can be a boolean (True/False)
|
|
||||||
Groups => { 2 => 'Content' },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
'Image::ExifTool::IPTC::ApplicationRecord' => {
|
|
||||||
161 => {
|
|
||||||
Name => 'Completed',
|
|
||||||
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
|
|
||||||
},
|
|
||||||
162 => {
|
|
||||||
Name => 'MatureContent',
|
|
||||||
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
|
|
||||||
},
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
1; # End
|
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
The fonts need to be symlinked to /tmp/fonts, this allows the fonts to be loaded during development and during build-time.
|
||||||
|
It's assumed fonts will be present at `/tmp/fonts`, during development they're at `/src/api/src/create_book/generators/pdf`, and during deployment they're at `/app/src/api/src/create_book/generators/pdf`. This seems like a clean solution.
|
||||||
|
|
||||||
|
`Fontconfig error: Cannot load default config file: No such file: (null)`
|
||||||
|
If the fonts aren't found, this warning pops up in console. It won't cause downloads to fail, though.
|
||||||
Generated
+1068
-549
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user