1 Commits

Author SHA1 Message Date
TheOnlyWayUp 682fb4da89 frontend: Add new year, free premium text 2024-12-30 23:22:33 +00:00
60 changed files with 5495 additions and 4821 deletions
-10
View File
@@ -1,10 +0,0 @@
__pycache__
*ipynb
build
.idea
.vscode
.venv
.env
*log
*.md
src/api/uv.lock
+2 -3
View File
@@ -1,12 +1,11 @@
__pycache__ __pycache__
venv venv
*epub *epub
*.pdf *pdf
# *html *html
data data
*ipynb *ipynb
build build
.idea
.vscode .vscode
.venv .venv
.env .env
+18 -10
View File
@@ -9,21 +9,27 @@ COPY src/frontend/. .
RUN npm run build RUN npm run build
# Thanks https://stackoverflow.com/q/76988450 # Thanks https://stackoverflow.com/q/76988450
FROM python:3.13-slim FROM python:3.10-slim
WORKDIR /app WORKDIR /app
# Install apt-fast, git, exiftool
COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/ COPY --from=nobodyxu/apt-fast:latest-debian-buster-slim /usr/local/ /usr/local/
RUN apt update RUN apt update
RUN apt install -y aria2 RUN apt install -y aria2
RUN apt-fast install -y git build-essential python3.13-dev libgobject-2.0 libpango-1.0 libpangoft2-1.0 RUN apt-fast install -y git build-essential libpango-1.0-0 libpangoft2-1.0-0 wget
# aiohttp-client-cache depends on multipart, which requires python3.13-dev to build successfully on 3.13
# weasyprint depends on libgoject, libpango, and libpangoft2
RUN rm -rf /var/lib/apt/lists/* ENV EXIFTOOL_VERSION="13.06"
# https://github.com/TheOnlyWayUp/WattpadDownloader/pull/82#discussion_r2470358950 RUN wget "https://exiftool.org/Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz"
RUN gzip -dc "Image-ExifTool-${EXIFTOOL_VERSION}.tar.gz" | tar -xf -
WORKDIR /app/Image-ExifTool-${EXIFTOOL_VERSION}
RUN perl Makefile.PL
RUN make test
RUN make install
RUN rm -rf /var/lib/apt/lists/* /app/Image-ExifTool-${EXIFTOOL_VERSION}
WORKDIR /app WORKDIR /app
@@ -31,15 +37,17 @@ WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY src/api/pyproject.toml /app COPY src/api/requirements.txt requirements.txt
RUN uv sync COPY src/api/exiftool.config exiftool.config
COPY src/api/ /app RUN uv pip install -r requirements.txt --system
COPY --from=0 /build/build /app/src/build COPY --from=0 /build/build /app/src/build
COPY src/api/src src
# Is this still needed?
RUN ln -s /app/src/pdf/fonts /tmp/fonts RUN ln -s /app/src/pdf/fonts /tmp/fonts
WORKDIR /app/src WORKDIR /app/src
EXPOSE 80 EXPOSE 80
CMD [ "uv", "run", "main.py"] CMD [ "python3", "main.py"]
+2 -2
View File
@@ -1,4 +1,4 @@
WattpadDownloader ([Demo](https://wpd.my/)) WattpadDownloader ([Demo](https://wpd.rambhat.la))
--- ---
Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files. Straightforward, Extendable WebApp to download Wattpad Books as EPUB Files.
@@ -53,5 +53,5 @@ My thanks to [aerkalov/ebooklib](https://github.com/aerkalov/ebooklib) for a fas
--- ---
<div align="center"> <div align="center">
<p>TheOnlyWayUp © 2026</p> <p>TheOnlyWayUp © 2024</p>
</div> </div>
+1 -1
View File
@@ -1 +1 @@
3.13 3.10
+26
View File
@@ -0,0 +1,26 @@
%Image::ExifTool::UserDefined = (
'Image::ExifTool::XMP::xmp' => {
Completed => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
MatureContent => {
Writable => 'boolean', # Can be a boolean (True/False)
Groups => { 2 => 'Content' },
},
},
'Image::ExifTool::IPTC::ApplicationRecord' => {
161 => {
Name => 'Completed',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
162 => {
Name => 'MatureContent',
Format => 'string[0,16]', # Store as a string (e.g., "Yes"/"No")
},
},
);
1; # End
+4 -11
View File
@@ -3,7 +3,7 @@ name = "api"
version = "0.1.0" version = "0.1.0"
description = "Wattpad Downloader API" description = "Wattpad Downloader API"
readme = "../../README.md" readme = "../../README.md"
requires-python = ">=3.13" requires-python = ">=3.10"
dependencies = [ dependencies = [
"aiohttp>=3.9.1", "aiohttp>=3.9.1",
"rich>=13.9.4", "rich>=13.9.4",
@@ -17,19 +17,12 @@ dependencies = [
"aiohttp-client-cache[all]", "aiohttp-client-cache[all]",
"bs4>=0.0.2", "bs4>=0.0.2",
"uvicorn>=0.32.1", "uvicorn>=0.32.1",
"pyexiftool>=0.5.6",
"weasyprint>=63.0", "weasyprint>=63.0",
"jinja2>=3.1.6",
] ]
[tool.ruff.lint] [tool.ruff.lint]
ignore = ['E402'] # module import not at top of file ignore = ['E402']
[tool.uv.sources] [tool.uv.sources]
aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-cache.git", rev = "keydb-ttl" } # Fork which leverages keydb's EXPIREMEMBER feature for TTLs on Hash members. aiohttp-client-cache = { git = "https://github.com/TheOnlyWayUp/aiohttp-client-cache.git", rev = "keydb-ttl" }
[dependency-groups]
dev = [
"ipykernel>=6.29.5",
"ipynb>=0.5.1",
"ruff>=0.11.12",
]
+75
View File
@@ -0,0 +1,75 @@
aioboto3==13.2.0
aiobotocore==2.15.2
aiofiles==24.1.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.9
aiohttp-client-cache @ git+https://github.com/TheOnlyWayUp/aiohttp-client-cache.git@1f94f1d751e7320c0ea981d532ff02924782dae6
aioitertools==0.12.0
aiosignal==1.3.1
aiosqlite==0.20.0
annotated-types==0.7.0
anyio==4.6.2.post1
async-timeout==4.0.3
attrs==23.1.0
backoff==2.2.1
beautifulsoup4==4.12.3
boltons==24.1.0
boto3==1.35.36
botocore==1.35.36
brotli==1.1.0
bs4==0.0.2
cffi==1.17.1
click==8.1.7
cssselect2==0.7.0
dnspython==2.7.0
ebooklib==0.18
eliot==1.16.0
exceptiongroup==1.2.2
fastapi==0.115.5
fonttools==4.55.2
frozenlist==1.4.1
h11==0.14.0
idna==3.6
itsdangerous==2.2.0
jmespath==1.0.1
lxml==5.3.0
markdown-it-py==3.0.0
mdurl==0.1.2
motor==3.6.0
multidict==6.0.4
orjson==3.10.12
pillow==10.4.0
propcache==0.2.1
pycparser==2.22
pydantic==2.10.2
pydantic-core==2.27.1
pydantic-settings==2.6.1
pydyf==0.11.0
pyexiftool==0.5.6
pygments==2.18.0
pymongo==4.9.2
pyphen==0.15.0
pyrsistent==0.20.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
redis==5.2.0
rich==13.9.4
s3transfer==0.10.4
setuptools==75.6.0
six==1.16.0
sniffio==1.3.1
soupsieve==2.6
starlette==0.41.3
tinycss2==1.4.0
tinyhtml5==2.0.0
type-extensions==0.1.2
typing-extensions==4.12.2
url-normalize==1.4.3
urllib3==2.2.3
uvicorn==0.32.1
weasyprint==63.0
webencodings==0.5.1
wrapt==1.17.0
yarl==1.18.3
zope-interface==7.2
zopfli==0.2.3.post1
+792
View File
@@ -0,0 +1,792 @@
from __future__ import annotations
from typing import List, Optional, Tuple, cast
from typing_extensions import TypedDict
import re
import logging
import tempfile
import unicodedata
from os import environ
from io import BytesIO
from enum import Enum
from base64 import b64encode
import bs4
import backoff
from weasyprint import HTML, CSS, default_url_fetcher
from weasyprint.text.fonts import FontConfiguration
from ebooklib import epub
from exiftool import ExifTool
from eliot import to_file, start_action
from eliot.stdlib import EliotHandler
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from pydantic import TypeAdapter, model_validator, field_validator
from pydantic_settings import BaseSettings
from aiohttp import ClientResponseError
from aiohttp_client_cache.session import CachedSession
from aiohttp_client_cache import FileBackend, RedisBackend
load_dotenv(override=True)
handler = EliotHandler()
logging.getLogger("fastapi").setLevel(logging.INFO)
logging.getLogger("fastapi").addHandler(handler)
exiftool_logger = logging.getLogger("exiftool")
exiftool_logger.addHandler(handler)
logger = logging.Logger("wpd")
logger.addHandler(handler)
if environ.get("DEBUG"):
to_file(open("eliot.log", "wb"))
# --- #
class CacheTypes(Enum):
file = "file"
redis = "redis"
class Config(BaseSettings):
USE_CACHE: bool = True
CACHE_TYPE: CacheTypes = CacheTypes.file
REDIS_CONNECTION_URL: str = ""
@field_validator("USE_CACHE", mode="before")
def validate_use_cache(cls, value):
# Return default if value is an empty string
if value == "":
return True # Default value for USE_CACHE
return value
@field_validator("CACHE_TYPE", mode="before")
def validate_cache_type(cls, value):
# Thanks https://stackoverflow.com/a/78157474
if value == "":
return "file"
return value
@model_validator(mode="after")
def prevent_mismatched_redis_url(self):
match self.CACHE_TYPE:
case CacheTypes.file:
if self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
)
case CacheTypes.redis:
if not self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
)
return self
config = Config()
# --- #
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
if config.USE_CACHE:
match config.CACHE_TYPE:
case CacheTypes.file:
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
case CacheTypes.redis:
cache = RedisBackend(
cache_name="wpd-aiohttp-cache",
address=config.REDIS_CONNECTION_URL,
expire_after=43200, # 12 hours
)
else:
cache = None
logger.info(f"Using {cache=}")
# --- Utilities --- #
def smart_trim(text: str, max_length: int = 400) -> str:
"""Truncate a string intelligently at newlines. Coherence and max-length adherence."""
chunks = [t for t in text.split("\n") if t]
to_return = ""
for chunk in chunks:
if len(to_return) + len(chunk) < max_length:
to_return = chunk + "<br />"
else:
to_return = to_return.rstrip("<br />")
break
return to_return
def generate_clean_part_html(part: Part, content: str) -> bs4.Tag:
"""Rebuild HTML Structure for a Part."""
chapter_title = part["title"]
chapter_id = part["id"]
clean = BeautifulSoup(
f"""
<section id="section_{chapter_id}" class="chapitre">
<h1 id="{chapter_id}" class="chapter-title">{chapter_title}</h1>
</section>
""",
"html.parser",
) # html.parser doesn't create <html>/<body> tags automatically
html = BeautifulSoup(content, "lxml")
for br in html.find_all("br"):
# Check if no content after br
if not br.next_sibling or br.next_sibling.name in ["br", None]:
br.decompose()
section = cast(bs4.Tag, clean.find("section"))
if not section:
raise Exception()
for child in html.find_all("p"):
for p_child in list(child.children):
if not p_child:
continue
if isinstance(p_child, bs4.element.Tag):
if p_child.name == "br":
p_child.decompose()
elif p_child.name == "img":
src = p_child["src"]
img_tag = clean.new_tag("img")
img_tag["src"] = src
break_tag = clean.new_tag("br")
section.append(img_tag)
section.append(break_tag)
elif p_child.name == "b":
content = p_child.text
p_tag = clean.new_tag("p")
bold_tag = clean.new_tag("b")
bold_content = clean.new_string(content)
bold_tag.append(bold_content)
p_tag.append(bold_tag)
section.append(p_tag)
elif p_child.name == "i":
content = p_child.text
p_tag = clean.new_tag("p")
italic_tag = clean.new_tag("i")
italic_content = clean.new_string(content)
italic_tag.append(italic_content)
p_tag.append(italic_tag)
section.append(p_tag)
elif isinstance(p_child, bs4.element.NavigableString):
content = p_child.text
p_tag = clean.new_tag("p")
p_content = clean.new_string(content)
p_tag.append(p_content)
section.append(p_tag)
if not list(child.children):
# Some p tags only contain brs, once brs are removed, they are empty and can be removed as well.
child.decompose()
return section
def slugify(value, allow_unicode=False) -> str:
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
Thanks https://stackoverflow.com/a/295466.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-\s]+", "-", value).strip("-_")
async def fetch_cookies(username: str, password: str) -> dict:
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
Args:
username (str): Username.
password (str): Password.
Raises:
ValueError: Bad status code.
ValueError: No cookies returned.
Returns:
dict: Authorization cookies.
"""
with start_action(action_type="api_fetch_cookies"):
async with CachedSession(headers=headers, cache=None) as session:
async with session.post(
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
data={
"username": username.lower(),
"password": password,
}, # the username.lower() is for caching
) as response:
if response.status != 204:
raise ValueError("Not a 204.")
cookies = {
k: v.value
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
}
if not cookies:
raise ValueError("No cookies.")
return cookies
# --- Models --- #
class CopyrightData(TypedDict):
name: str
statement: str
freedoms: str
printing: str
image_url: Optional[str]
class Language(TypedDict):
name: str
class User(TypedDict):
username: str
avatar: str
description: str
class Part(TypedDict):
id: int
title: str
class Story(TypedDict):
id: str
title: str
createDate: str
modifyDate: str
language: Language
user: User
description: str
cover: str
completed: bool
tags: List[str]
mature: bool
url: str
parts: List[Part]
isPaywalled: bool
copyright: int
story_ta = TypeAdapter(Story)
# --- Exceptions --- #
class WattpadError(Exception):
"""Base Exception class for Wattpad related errors."""
class StoryNotFoundError(WattpadError):
"""Display the "This story was not found" error to the user."""
...
class PartNotFoundError(StoryNotFoundError): ...
# --- API Calls --- #
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_from_partId(
part_id: int, cookies: Optional[dict] = None
) -> Tuple[int, Story]:
"""Fetch Story metadata from a Part ID."""
with start_action(action_type="api_fetch_storyFromPartId"):
async with CachedSession(
headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
) as response:
body = await response.json()
if response.status == 400:
match body.get("error_code"):
case 1020: # "Story part not found"
logger.info(f"{part_id=} not found on Wattpad, returning.")
raise PartNotFoundError()
response.raise_for_status()
return int(body["groupId"]), story_ta.validate_python(body["group"])
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
"""Fetch Story metadata from a Story ID."""
with start_action(action_type="api_fetch_story", story_id=story_id):
async with CachedSession(
headers=headers, cookies=cookies, cache=None if cookies else cache
) as session:
async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
) as response:
body = await response.json()
if response.status == 400:
match body.get("error_code"):
case 1017: # "Story not found"
logger.info(f"{story_id=} not found on Wattpad, returning.")
raise StoryNotFoundError()
response.raise_for_status()
return story_ta.validate_python(body)
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_content_zip(
story_id: int, cookies: Optional[dict] = None
) -> BytesIO:
"""BytesIO Stream of an Archive of Part Contents for a Story."""
with start_action(action_type="api_fetch_storyZip", story_id=story_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session:
async with session.get(
f"https://www.wattpad.com/apiv2/?m=storytext&group_id={story_id}&output=zip"
) as response:
response.raise_for_status()
bytes_stream = BytesIO(await response.read())
return bytes_stream
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_image(url: str, should_cache: bool = False) -> bytes:
"""Fetch image bytes."""
with start_action(action_type="api_fetch_image", url=url):
async with CachedSession(
headers=headers, cache=cache if should_cache else None
) as session: # Don't cache images.
async with session.get(url) as response:
response.raise_for_status()
body = await response.read()
return body
# --- Generation --- #
class EPUBGenerator:
"""EPUB Generation utilities"""
def __init__(self, data: Story, cover: bytes):
"""Initialize EPUBGenerator. Create epub.EpubBook() and set metadata and cover."""
self.epub = epub.EpubBook()
self.data = data
self.cover = cover
# set metadata, defined in https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#section-2
self.epub.add_author(data["user"]["username"])
self.epub.add_metadata("DC", "title", data["title"])
self.epub.add_metadata("DC", "description", data["description"])
self.epub.add_metadata("DC", "date", data["createDate"])
self.epub.add_metadata("DC", "modified", data["modifyDate"])
self.epub.add_metadata("DC", "language", data["language"]["name"])
self.epub.add_metadata(
None, "meta", "", {"name": "tags", "content": ", ".join(data["tags"])}
)
self.epub.add_metadata(
None, "meta", "", {"name": "mature", "content": str(int(data["mature"]))}
)
self.epub.add_metadata(
None,
"meta",
"",
{"name": "completed", "content": str(int(data["completed"]))},
)
# Set cover
self.epub.set_cover("cover.jpg", cover)
cover_chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page
)
cover_chapter.set_content('<img src="cover.jpg">')
self.epub.add_item(cover_chapter)
async def add_chapters(
self, contents: List[bs4.Tag], download_images: bool = False
):
"""Add chapters to the Epub, downloading images if necessary. Sets the table of contents and spine."""
chapters: List[epub.EpubHtml] = []
for cidx, (part, content) in enumerate(zip(self.data["parts"], contents)):
title = part["title"]
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
chapter = epub.EpubHtml(
title=title,
file_name=f"{cidx}_{part['id']}.xhtml", # See issue #30
lang=self.data["language"]["name"],
uid=str(part["id"]).encode(),
)
str_content = content.prettify()
if download_images:
soup = content
async with CachedSession(
headers=headers, cache=None
) as session: # Don't cache images.
for idx, image in enumerate(soup.find_all("img")):
if not image["src"]:
continue
# Find all image tags and filter for those with sources
async with session.get(image["src"]) as response:
img = epub.EpubImage(
media_type="image/jpeg",
content=await response.read(),
file_name=f"static/{cidx}/{idx}.jpeg",
)
self.epub.add_item(img)
# Fetch image and pack
str_content = str_content.replace(
str(image["src"]), f"static/{cidx}/{idx}.jpeg"
)
chapter.set_content(str_content)
self.epub.add_item(chapter)
chapters.append(chapter)
yield title
self.epub.toc = chapters
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
self.epub.add_item(epub.EpubNcx())
self.epub.add_item(epub.EpubNav())
# create spine
self.epub.spine = ["nav"] + chapters
def dump(self) -> BytesIO:
# Thanks https://stackoverflow.com/a/75398222
buffer = BytesIO()
epub.write_epub(buffer, self.epub)
buffer.seek(0)
return buffer
class PDFGenerator:
"""PDF Generation utilities"""
def __init__(self, data: Story, cover: bytes):
"""Initialize PDGenerator, create PDF Temporary file."""
self.data = data
self.file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=True)
self.cover = cover
self.content: str = ""
self.copyright = {
1: {
"name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
"freedoms": "No reuse, redistribution, or modification without permission.",
"printing": "Not allowed without explicit permission.",
"image_url": None,
},
2: {
"name": "Public Domain",
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
"freedoms": "Free to use for any purpose without permission.",
"printing": "Allowed for personal or commercial purposes.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/cc-zero.png",
},
3: {
"name": "Creative Commons Attribution (CC-BY)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
"printing": "Allowed with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by.png",
},
4: {
"name": "CC Attribution NonCommercial (CC-BY-NC)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
"printing": "Allowed for non-commercial purposes with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png",
},
5: {
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-nd.png",
},
6: {
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
"image_url": "http://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc-sa.png",
},
7: {
"name": "CC Attribution ShareAlike (CC-BY-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
"printing": "Allowed with proper credit under the same license.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png",
},
8: {
"name": "CC Attribution NoDerivs (CC-BY-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
"printing": "Allowed in original form with proper credit.",
"image_url": "https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nd.png",
},
}
with open("./pdf/stylesheet.css") as reader:
self.stylesheet = reader.read()
with open("./pdf/book.html") as reader:
self.template = reader.read()
async def generate_cover_and_copyright_html(
self,
) -> str:
"""Generate Cover and Copyright file, fetch copyright image (cached), use self.cover for cover."""
copyright_data = self.copyright[self.data["copyright"]]
template = self.template
about_copyright = (
template.replace(
"{statement}",
copyright_data["statement"].format(
username=self.data["user"]["username"],
published_year=self.data["createDate"].split("-", 2)[0],
),
)
.replace("{author}", self.data["user"]["username"])
.replace("{freedoms}", copyright_data["freedoms"])
.replace(
"{printing}",
copyright_data["printing"],
)
.replace("{book_id}", self.data["id"])
.replace("{book_title}", self.data["title"])
)
copyright_image = (
await fetch_image(copyright_data["image_url"], should_cache=True)
if copyright_data["image_url"]
else None
)
image_block = (
"""<img src="{image_url}"
alt="{name}"
width="88"
height="31"
id="copyright-license-image">""".format(
image_url=f"data:image/jpg;base64,{b64encode(copyright_image).decode()}",
name=copyright_data["name"],
)
if copyright_image
else ""
)
about_copyright = (
about_copyright.replace(
"{copyright_image}",
image_block,
)
if image_block
else about_copyright.replace("{copyright_image}", "")
)
about_copyright = about_copyright.replace(
"{cover}", f"data:image/jpg;base64,{b64encode(self.cover).decode()}"
)
self.template = about_copyright
return about_copyright
async def generate_about_author_chapter(self) -> str:
"""Generate About the Author file, fetch avatar."""
author_avatar = (
await fetch_image(
self.data["user"]["avatar"].replace("128", "512")
) # Increase image resolution
if self.data["user"]["avatar"]
else None
)
about_author = self.template.replace(
"{username}", self.data["user"]["username"]
).replace("{description}", smart_trim(self.data["user"]["description"]))
about_author = (
about_author.replace(
"{avatar}",
f"""
<img src="data:image/jpg;base64,{b64encode(author_avatar).decode()}" alt="Author's profile picture" id="author-profile-picture">""",
)
if author_avatar
else about_author.replace("{avatar}", "")
)
self.template = about_author
return about_author
def generate_toc(self):
ids = [part["id"] for part in self.data["parts"]]
clean = BeautifulSoup(
"""
<section id="contents" class="toc">
<h1>Table of Contents</h1>
<ul></ul>
</section>
""",
"html.parser",
) # html.parser doesn't create <html>/<body> tags automatically
ul = cast(bs4.Tag, clean.find("ul"))
for part_id in ids:
li = clean.new_tag("li")
a = clean.new_tag("a")
a["href"] = f"#{part_id}"
li.append(a)
ul.append(li)
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
insert_point.append(clean)
return str(clean)
async def add_chapters(
self, contents: List[bs4.Tag], download_images: bool = False
):
"""Add chapters to the PDF, downloading images if necessary. Also add Cover, Copyright, and About the Author pages."""
# # Cover and Copyright Page
await self.generate_cover_and_copyright_html()
await self.generate_about_author_chapter()
self.tree = BeautifulSoup(self.template, "lxml")
self.generate_toc()
for part, content in zip(self.data["parts"], contents):
insert_point = cast(bs4.Tag, self.tree.find("div", {"id": "book"}))
insert_point.append(content)
yield part["title"]
# # About the Author page
# about_author_html = await self.generate_about_author_chapter()
# chapters.insert(0, cover_and_copyright_html)
# chapters.append(about_author_html)
with start_action(
action_type="generate_pdf",
output_filename=self.file.name,
title=self.data["title"],
):
# PDF Generation with wkhtmltopdf, written to self.file
# At this stage, we have a bunch of HTML Files representing all the chapters that need to be generated. PDFKit handles ToC generation, so that's not included.
font_config = FontConfiguration()
stylesheet_obj = CSS(string=self.stylesheet, font_config=font_config)
html_obj = HTML(string=str(self.tree))
html_obj.write_pdf(
self.file.name, stylesheets=[stylesheet_obj], font_config=font_config
)
with start_action(action_type="add_metadata") as action:
# Metadata generation with Exiftool
clean_description = (
self.data["description"].strip().replace("\n", "$/")
) # exiftool doesn't parse \ns correctly, they support $/ for the same instead. `&#xa;` is another option.
action.log(f"clean_description: {clean_description}")
metadata = {
"Author": self.data["user"]["username"],
"Title": self.data["title"],
"Subject": clean_description,
"CreationDate": self.data["createDate"],
"ModDate": self.data["modifyDate"],
"Keywords": ",".join(self.data["tags"]),
"Language": self.data["language"]["name"],
"Completed": self.data["completed"],
"MatureContent": self.data["mature"],
"Producer": "Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader",
} # As per https://exiftool.org/TagNames/PDF.html
action.log(f"options: {metadata}")
with ExifTool(
config_file="../exiftool.config", logger=exiftool_logger
) as et:
# Custom configuration adds Completed and MatureContent tags.
# exiftool logger logs executed command
et.execute(
*(
[f"-{key}={value}" for key, value in metadata.items()]
+ [
"-overwrite_original",
self.file.file.name,
]
)
)
def dump(self) -> BytesIO:
self.file.seek(0)
buffer = BytesIO(self.file.read())
self.file.close()
return buffer
# ------ #
-13
View File
@@ -1,13 +0,0 @@
# ruff: noqa: F401
from .create_book import (
fetch_cookies,
fetch_story,
fetch_story_content_zip,
fetch_story_from_partId,
)
from .exceptions import PartNotFoundError, StoryNotFoundError, WattpadError
from .generators import EPUBGenerator, PDFGenerator
from .logs import logger
from .parser import fetch_image
from .utils import slugify
-46
View File
@@ -1,46 +0,0 @@
from enum import Enum
from pydantic import field_validator, model_validator
from pydantic_settings import BaseSettings
class CacheTypes(Enum):
file = "file"
redis = "redis"
class Config(BaseSettings):
# Values can be overriden by envvars.
USE_CACHE: bool = True
CACHE_TYPE: CacheTypes = CacheTypes.file
REDIS_CONNECTION_URL: str = ""
@field_validator("USE_CACHE", mode="before")
def validate_use_cache(cls, value):
# Return default if value is an empty string
if value == "":
return True # Default value for USE_CACHE
return value
@field_validator("CACHE_TYPE", mode="before")
def validate_cache_type(cls, value):
# Thanks https://stackoverflow.com/a/78157474
if value == "":
return "file"
return value
@model_validator(mode="after")
def prevent_mismatched_redis_url(self):
match self.CACHE_TYPE:
case CacheTypes.file:
if self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL provided when File cache selected. To use Redis as a cache, set CACHE_TYPE=redis."
)
case CacheTypes.redis:
if not self.REDIS_CONNECTION_URL:
raise ValueError(
"REDIS_CONNECTION_URL not provided when Redis cache selected. To use File cache, set CACHE_TYPE=file."
)
return self
-129
View File
@@ -1,129 +0,0 @@
from __future__ import annotations
from io import BytesIO
from typing import Optional
import backoff
from aiohttp import ClientResponseError
from aiohttp_client_cache.session import CachedSession
from eliot import start_action
from pydantic import TypeAdapter
from .exceptions import PartNotFoundError, StoryNotFoundError
from .logs import logger
from .models import Story
from .vars import cache, headers
story_ta = TypeAdapter(Story)
# --- #
async def fetch_cookies(username: str, password: str) -> dict:
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
Args:
username (str): Username.
password (str): Password.
Raises:
ValueError: Bad status code.
ValueError: No cookies returned.
Returns:
dict: Authorization cookies.
"""
with start_action(action_type="api_fetch_cookies"):
async with CachedSession(headers=headers, cache=None) as session:
async with session.post(
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth.login",
data={
"username": username.lower(),
"password": password,
}, # the username.lower() is for caching
) as response:
if response.status != 204:
raise ValueError("Not a 204.")
cookies = {
k: v.value
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
}
if not cookies:
raise ValueError("No cookies.")
return cookies
# --- API Calls --- #
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_from_partId(
part_id: int, cookies: Optional[dict] = None
) -> tuple[int, Story]:
"""Fetch Story metadata from a Part ID."""
with start_action(action_type="api_fetch_storyFromPartId"):
async with CachedSession(
headers=headers, cache=None if cookies else cache
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=groupId,group(tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright)"
) as response:
body = await response.json()
if response.status == 400:
match body.get("error_code"):
case 1020: # "Story part not found"
logger.info(f"{part_id=} not found on Wattpad, returning.")
raise PartNotFoundError()
response.raise_for_status()
return int(body["groupId"]), story_ta.validate_python(body["group"])
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story(story_id: int, cookies: Optional[dict] = None) -> Story:
"""Fetch Story metadata from a Story ID."""
with start_action(action_type="api_fetch_story", story_id=story_id):
async with CachedSession(
headers=headers, cookies=cookies, cache=None if cookies else cache
) as session:
async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username,avatar,description),parts(id,title),cover,copyright"
) as response:
body = await response.json()
if response.status == 400:
match body.get("error_code"):
case 1017: # "Story not found"
logger.info(f"{story_id=} not found on Wattpad, returning.")
raise StoryNotFoundError()
response.raise_for_status()
return story_ta.validate_python(body)
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_story_content_zip(
story_id: int, cookies: Optional[dict] = None
) -> BytesIO:
"""BytesIO Stream of an Archive of Part Contents for a Story."""
with start_action(action_type="api_fetch_storyZip", story_id=story_id):
async with CachedSession(
headers=headers,
cookies=cookies,
cache=None if cookies else cache,
) as session:
async with session.get(
f"https://www.wattpad.com/apiv2/?m=storytext&group_id={story_id}&output=zip"
) as response:
response.raise_for_status()
bytes_stream = BytesIO(await response.read())
return bytes_stream
-12
View File
@@ -1,12 +0,0 @@
class WattpadError(Exception):
"""Base Exception class for Wattpad related errors."""
class StoryNotFoundError(WattpadError):
"""Display the "This story was not found" error to the user."""
...
class PartNotFoundError(StoryNotFoundError):
...
@@ -1,4 +0,0 @@
# ruff: noqa: F401
from .epub import EPUBGenerator
from .pdf import PDFGenerator
-109
View File
@@ -1,109 +0,0 @@
from io import BytesIO
from bs4 import BeautifulSoup
from ebooklib import epub
from re import sub
from ..models import Story
from .types import AbstractGenerator
class EPUBGenerator(AbstractGenerator):
def __init__(
self,
metadata: Story,
part_trees: list[BeautifulSoup],
cover: bytes,
images: list[list[bytes | None]],
):
self.story = metadata
self.parts = part_trees
self.cover = cover
self.images = images
self.book: epub.EpubBook = epub.EpubBook()
def add_metadata(self):
"""Add metadata to epub."""
self.book.add_author(self.story["user"]["username"])
self.book.add_metadata("DC", "title", self.story["title"])
self.book.add_metadata("DC", "description", self.story["description"])
self.book.add_metadata("DC", "date", self.story["createDate"])
self.book.add_metadata("DC", "modified", self.story["modifyDate"])
self.book.add_metadata("DC", "language", self.story["language"]["name"])
self.book.add_metadata(
None, "meta", "", {"name": "tags", "content": ", ".join(self.story["tags"])}
)
self.book.add_metadata(
None,
"meta",
"",
{"name": "mature", "content": str(int(self.story["mature"]))},
)
self.book.add_metadata(
None,
"meta",
"",
{"name": "completed", "content": str(int(self.story["completed"]))},
)
def add_cover(self):
"""Add cover to epub."""
self.book.set_cover("cover.jpg", self.cover)
cover_chapter = epub.EpubHtml(
file_name="titlepage.xhtml", # Standard for cover page
)
cover_chapter.set_content('<img src="cover.jpg">')
self.book.add_item(cover_chapter)
def add_chapters(self):
"""Add chapters to epub, replacing references to image urls to static image paths if images are provided during initialization."""
chapters = []
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
chapter = epub.EpubHtml(
title=sub(r'[\x00-\x1F\x7F]', '', part["title"]), file_name=f"{idx}_{part['id']}.xhtml" # Removes control characters from chapter title
)
if self.images:
for img_idx, (img_data, img_tag) in enumerate(
zip(self.images[idx], tree.find_all("img"))
):
path = f"static/{idx}_{part['id']}/{img_idx}.jpeg"
img = epub.EpubImage(
media_type="image/jpeg", content=img_data, file_name=path
)
self.book.add_item(img)
img_tag["src"] = path
chapter.set_content(tree.prettify())
self.book.add_item(chapter)
chapters.append(chapter)
# ! Review, are these needed? #11
self.book.toc = chapters
# Thanks https://github.com/aerkalov/ebooklib/blob/master/samples/09_create_image/create.py
self.book.add_item(epub.EpubNcx())
self.book.add_item(epub.EpubNav())
# create spine
self.book.spine = ["nav"] + chapters
def compile(self):
self.add_metadata()
self.add_cover()
self.add_chapters()
return True
def dump(self) -> BytesIO:
# Thanks https://stackoverflow.com/a/75398222
buffer = BytesIO()
epub.write_epub(buffer, self.book)
buffer.seek(0)
return buffer
-189
View File
@@ -1,189 +0,0 @@
from base64 import b64encode
from io import BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper
import pydyf
from bs4 import BeautifulSoup
from jinja2 import Template
from weasyprint import CSS, HTML, Document
from weasyprint.text.fonts import FontConfiguration
from ..models import Story
from .types import AbstractGenerator
DATA_PATH = Path(__file__).parent / "pdf"
ASSET_PATH = DATA_PATH / "assets"
COPYRIGHT_DATA = {
1: {
"name": "All Rights Reserved",
"statement": "©️ {published_year} by {username}. All Rights Reserved.",
"freedoms": "No reuse, redistribution, or modification without permission.",
"printing": "Not allowed without explicit permission.",
"asset": None,
},
2: {
"name": "Public Domain",
"statement": "This work is in the public domain. Originally published in {published_year} by {username}.",
"freedoms": "Free to use for any purpose without permission.",
"printing": "Allowed for personal or commercial purposes.",
"asset": ASSET_PATH / "cc-zero.png",
},
3: {
"name": "Creative Commons Attribution (CC-BY)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution 4.0 International License.",
"freedoms": "Allows reuse, redistribution, and modification with credit to the author.",
"printing": "Allowed with proper credit.",
"asset": ASSET_PATH / "by.png",
},
4: {
"name": "CC Attribution NonCommercial (CC-BY-NC)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes with credit.",
"printing": "Allowed for non-commercial purposes with proper credit.",
"asset": ASSET_PATH / "by-nc.png",
},
5: {
"name": "CC Attribution NonCommercial NoDerivs (CC-BY-NC-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for non-commercial purposes with credit; no modifications allowed.",
"printing": "Allowed for non-commercial purposes in original form with proper credit.",
"asset": ASSET_PATH / "by-nc-nd.png",
},
6: {
"name": "CC Attribution NonCommercial ShareAlike (CC-BY-NC-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for non-commercial purposes under the same license, with credit.",
"printing": "Allowed for non-commercial purposes with proper credit under the same license.",
"asset": ASSET_PATH / "by-nc-sa.png",
},
7: {
"name": "CC Attribution ShareAlike (CC-BY-SA)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.",
"freedoms": "Allows reuse and modification for any purpose under the same license, with credit.",
"printing": "Allowed with proper credit under the same license.",
"asset": ASSET_PATH / "by-sa.png",
},
8: {
"name": "CC Attribution NoDerivs (CC-BY-ND)",
"statement": "©️ {published_year} by {username}. This work is licensed under a Creative Commons Attribution-NoDerivs 4.0 International License.",
"freedoms": "Allows sharing in original form for any purpose with credit; no modifications allowed.",
"printing": "Allowed in original form with proper credit.",
"asset": ASSET_PATH / "by-nd.png",
},
} # Maps Wattpad Copyright IDs to their corresponding data.
with open(DATA_PATH / "stylesheet.css") as reader:
STYLESHEET = reader.read()
with open(DATA_PATH / "book.html") as reader:
TEMPLATE = reader.read()
class PDFGenerator(AbstractGenerator):
def __init__(
self,
metadata: Story,
part_trees: list[BeautifulSoup],
cover: bytes,
images: list[list[bytes | None]],
author_image: bytes,
):
self.story = metadata
self.parts = part_trees
self.cover = cover
self.images = images
self.author = author_image
self.book: _TemporaryFileWrapper = NamedTemporaryFile(suffix=".pdf") # type: ignore
self.content = TEMPLATE
def generate_chapters(self) -> dict[int, str]:
"""Return a dictionary of part_ids to content trees, with image URLs replaced with base64 encoded images if provided during initialization."""
data: dict[int, str] = {}
for idx, (part, tree) in enumerate(zip(self.story["parts"], self.parts)):
if self.images:
for img_idx, (img_data, img_tag) in enumerate(
zip(self.images[idx], tree.find_all("img"))
):
if not img_data:
continue
img_tag["src"] = (
f"data:image/jpg;base64,{b64encode(img_data).decode()}"
)
data[part["id"]] = tree.prettify()
return data
def populate_template(self, parts: dict[int, str]):
"""Populate HTML Template with Story data."""
copyright = COPYRIGHT_DATA[self.story["copyright"]]
data = {
"statement": copyright["statement"].format(
username=self.story["user"]["username"],
published_year=self.story["createDate"].split("-", 2)[0],
),
"author": self.story["user"]["username"],
"freedoms": copyright["freedoms"],
"printing": copyright["printing"],
"book_id": self.story["id"],
"book_title": self.story["title"],
"cover": f"data:image/jpg;base64,{b64encode(self.cover).decode()}",
"username": self.story["user"]["username"],
"author_bio": self.story["user"]["description"],
"clean_tags": ", ".join(self.story["tags"]),
"created": self.story["createDate"],
"modified": self.story["modifyDate"],
"is_completed": self.story["completed"],
"is_mature": self.story["mature"],
"description": self.story["description"],
"avatar": b64encode(self.author).decode(),
"copyright": {
"data": (
b64encode(copyright["asset"].read_bytes()).decode()
if copyright["asset"]
else ""
),
"name": copyright["name"],
},
"parts": parts,
}
self.content: str = Template(self.content).render(data)
def write_custom_metadata(self, document: Document, pdf: pydyf.PDF):
"""Write non-standard metadata fields to the PDF."""
pdf.info["completed"] = pydyf.String(str(self.story["completed"]))
pdf.info["mature"] = pydyf.String(str(self.story["mature"]))
def generate_pdf(self):
"""Generate and write the PDF to a temporary file (self.book)."""
font_config = FontConfiguration()
stylesheet_obj = CSS(string=STYLESHEET, font_config=font_config)
html_obj = HTML(string=self.content)
html_obj.write_pdf(
self.book.name,
stylesheets=[stylesheet_obj],
font_config=font_config,
finisher=self.write_custom_metadata,
options={"custom_metadata": True},
)
def compile(self):
parts = self.generate_chapters()
self.populate_template(parts)
self.generate_pdf()
return True
def dump(self) -> BytesIO:
self.book.seek(0)
buffer = BytesIO(self.book.read())
self.book.close()
return buffer
Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

@@ -1,85 +0,0 @@
<!DOCTYPE html>
<html lang="{{ langcode }}">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- https://doc.courtbouillon.org/weasyprint/stable/api_reference.html#weasyprint.document.DocumentMetadata -->
<title>{{ book_title }}</title>
<meta name=description content="{{description}}">
<meta name=author content="{{author}}">
<meta name=keywords content="{{clean_tags}}">
<meta name=language content="{{langcode}}">
<meta name=dcterms.created content="{{created}}">
<meta name=dcterms.modified content="{{modified}}">
<meta name=generator content="Dhanush Rambhatla (TheOnlyWayUp - https://rambhat.la) and WattpadDownloader">
</head>
<section class="fullpage">
<img src="{{ cover }}" alt="Cover">
</section>
<div id="copyright-container">
<h1 id="copyright-notice">Copyright Notice</h1>
<h2 id="copyright-title">{{ book_title }}</h2>
<p id="copyright-author">By {{ author }}</p>
<div id="copyright-separator"></div>
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
<div id="copyright-separator"></div>
{% if copyright.data %}
<img src="data:image/jpg;base64,{{copyright.data}}" alt="{{copyright.name}}" width="88" height="31"
id="copyright-license-image">
{% endif %}
<p id="copyright-copyright">{{ statement }}</p>
<p id="copyright-rights">{{ freedoms }}</p>
<p id="copyright-printing">Printing: {{ printing }}</p>
<p id="book-link">
ID: {{ book_id }}.
<a href="https://wattpad.com/story/{{ book_id }}" target="_blank" id="copyright-link">View this Book Online</a>
</p>
</div>
<div id="book">
<section id="contents" class="toc">
<h1>Table of Contents</h1>
<ul>
{% for part_id in parts %}
<li><a href="#{{part_id}}"></a></li>
{% endfor %}
</ul>
</section>
{% for part_id in parts %}
{{parts[part_id] | safe}}
{% endfor %}
</div>
<h1>About the Author</h1>
<div id="author-container">
<div id="author-about">
<img src="data:image/jpg;base64,{{avatar}}" alt="{{author}}'s profile picture" id="author-profile-picture">
<h2 id="author-name">
<a href="https://wattpad.com/user/{{ username }}" id="author-link">{{ username }}</a>
</h2>
<hr id="author-divider">
<p id="author-bio">
{{ author_bio }}
</p>
</div>
</div>
</html>
@@ -1,5 +0,0 @@
The fonts need to be symlinked to /tmp/fonts, this allows the fonts to be loaded during development and during build-time.
It's assumed fonts will be present at `/tmp/fonts`, during development they're at `/src/api/src/create_book/generators/pdf`, and during deployment they're at `/app/src/api/src/create_book/generators/pdf`. This seems like a clean solution.
`Fontconfig error: Cannot load default config file: No such file: (null)`
If the fonts aren't found, this warning pops up in console. It won't cause downloads to fail, though.
@@ -1,47 +0,0 @@
from io import BytesIO
from tempfile import _TemporaryFileWrapper
from typing import Literal
from bs4 import BeautifulSoup
from ebooklib.epub import EpubBook
from ..models import Story
class AbstractGenerator:
"""Compile parsed part trees to a file.
Args:
metadata (Story): Story Metadata.
part_trees (List[BeautifulSoup]): Parsed part trees.
cover (bytes): Cover image.
images (List[List[bytes | None]]): An array of images for each chapter, if images have been downloaded.
"""
def __init__(
self,
metadata: Story,
part_trees: list[BeautifulSoup],
cover: bytes,
images: list[list[bytes | None]],
):
self.story = metadata
self.parts = part_trees
self.cover = cover
self.images = images
self.book: EpubBook | _TemporaryFileWrapper = None # type: ignore
def compile(self) -> Literal[True]:
"""Compile the part trees into the corresponding in-memory representation of the generator format.
Returns:
Literal[True]: Compiled successfully.
"""
return True
def dump(self) -> BytesIO:
"""Return a Buffer of the compiled file."""
buffer = BytesIO()
return buffer
-19
View File
@@ -1,19 +0,0 @@
import logging
from os import environ
from eliot import to_file
from eliot.stdlib import EliotHandler
handler = EliotHandler()
logging.getLogger("fastapi").setLevel(logging.INFO)
logging.getLogger("fastapi").addHandler(handler)
exiftool_logger = logging.getLogger("exiftool")
exiftool_logger.addHandler(handler)
logger = logging.Logger("wpd")
logger.addHandler(handler)
if environ.get("DEBUG"):
to_file(open("eliot.log", "wb"))
-42
View File
@@ -1,42 +0,0 @@
from typing import Optional, TypedDict
class CopyrightData(TypedDict):
name: str
statement: str
freedoms: str
printing: str
image_url: Optional[str]
class Language(TypedDict):
name: str
class User(TypedDict):
username: str
avatar: str
description: str
class Part(TypedDict):
id: int
title: str
class Story(TypedDict):
id: str
title: str
createDate: str
modifyDate: str
language: Language
user: User
description: str
cover: str
completed: bool
tags: list[str]
mature: bool
url: str
parts: list[Part]
isPaywalled: bool
copyright: int
-92
View File
@@ -1,92 +0,0 @@
import asyncio
from itertools import batched
from typing import cast
from aiohttp import ClientSession
from bs4 import BeautifulSoup, Tag
from eliot import start_action
from urllib.parse import urlparse
from .vars import headers
def clean_tree(title: str, id: int, body: str) -> BeautifulSoup:
original_soup = BeautifulSoup(body, features="lxml")
new_soup = BeautifulSoup(
f"""
<h1 class="chapter-title" id={id}>{title}</h1>
<section class="chapter-body"></section>
""",
features="html.parser", # head/body tags aren't generated
)
insert_at = cast(Tag, new_soup.find("section"))
children = cast(Tag, original_soup.find("body")).children
for tag in cast(list[Tag], list(children)):
if tag.name != "p": # Casted to lower
continue
style = tag.attrs.get("style")
for child in cast(list[Tag], tag.children):
# tag is a <p> enclosing either text, media, or a break
if child.name in [None, "b", "i", "u", "strong", "em"]:
# text is enclosed, can be italic, bold, underlined, or a mix
tag.attrs = {}
p_tag = tag
if style:
p_tag["style"] = style
insert_at.append(p_tag)
break
elif child.name == "img":
# image is enclosed
img_tag = Tag(name="img")
img_tag.attrs = {
"height": child.attrs.get("data-original-height"),
"width": child.attrs.get("data-original-width"),
"src": child["src"],
}
if style:
img_tag["style"] = style
insert_at.append(img_tag)
elif child.name == "br":
# br tag is enclosed
br_tag = Tag(name="br", can_be_empty_element=True)
if style:
br_tag["style"] = style
insert_at.append(br_tag)
return new_soup
async def fetch_image(url: str) -> bytes | None:
"""Fetch image bytes."""
with start_action(action_type="api_fetch_image", url=url):
async with ClientSession(headers=headers) as session: # Don't cache images.
async with session.get(url) as response:
if not response.ok:
return None
body = await response.read()
return body
async def fetch_tree_images(tree: BeautifulSoup):
"""Return a Generator of bytes containing image data for all images referenced in the tree."""
image_urls = []
for img in tree.find_all("img"):
parsed = urlparse(img["src"])
if parsed.scheme and parsed.netloc: # Test if valid URL
image_urls.append(img["src"])
images = []
for chunk in batched(image_urls, 3):
for image_data in await asyncio.gather(*[fetch_image(url) for url in chunk]):
images.append(image_data)
return images
-25
View File
@@ -1,25 +0,0 @@
import re
import unicodedata
def slugify(value, allow_unicode=False) -> str:
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
Thanks https://stackoverflow.com/a/295466.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-\s]+", "-", value).strip("-_")
-28
View File
@@ -1,28 +0,0 @@
from aiohttp_client_cache import FileBackend, RedisBackend
from dotenv import load_dotenv
from .config import CacheTypes, Config
from .logs import logger
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
load_dotenv(override=True)
config = Config()
if config.USE_CACHE:
match config.CACHE_TYPE:
case CacheTypes.file:
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
case CacheTypes.redis:
cache = RedisBackend(
cache_name="wpd-aiohttp-cache",
address=config.REDIS_CONNECTION_URL,
expire_after=43200, # 12 hours
)
else:
cache = None
logger.info(f"Using {cache=}")
+36 -67
View File
@@ -1,15 +1,13 @@
"""WattpadDownloader API Server.""" """WattpadDownloader API Server."""
import asyncio
from enum import Enum
from pathlib import Path
from typing import Optional from typing import Optional
import asyncio
from pathlib import Path
from enum import Enum
from zipfile import ZipFile from zipfile import ZipFile
from aiohttp import ClientResponseError
from bs4 import BeautifulSoup
from eliot import start_action from eliot import start_action
from fastapi import FastAPI, HTTPException, Request from aiohttp import ClientResponseError
from fastapi import FastAPI, Request
from fastapi.responses import ( from fastapi.responses import (
FileResponse, FileResponse,
HTMLResponse, HTMLResponse,
@@ -17,25 +15,29 @@ from fastapi.responses import (
StreamingResponse, StreamingResponse,
) )
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from create_book import ( from create_book import (
EPUBGenerator, EPUBGenerator,
PDFGenerator, PDFGenerator,
StoryNotFoundError,
WattpadError,
fetch_cookies,
fetch_image,
fetch_story, fetch_story,
fetch_story_content_zip,
fetch_story_from_partId, fetch_story_from_partId,
logger, fetch_story_content_zip,
fetch_image,
fetch_cookies,
WattpadError,
StoryNotFoundError,
generate_clean_part_html,
slugify, slugify,
logger,
) )
from create_book.parser import clean_tree, fetch_tree_images
app = FastAPI() app = FastAPI()
BUILD_PATH = Path(__file__).parent / "build" BUILD_PATH = Path(__file__).parent / "build"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
class RequestCancelledMiddleware: class RequestCancelledMiddleware:
# Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734 # Thanks https://github.com/fastapi/fastapi/discussions/11360#discussion-6427734
@@ -168,72 +170,39 @@ async def handle_download(
cover_data = await fetch_image( cover_data = await fetch_image(
metadata["cover"].replace("-256-", "-512-") metadata["cover"].replace("-256-", "-512-")
) # Increase resolution ) # Increase resolution
if not cover_data:
raise HTTPException(status_code=422)
story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
# Transform part metadata into an easily-indexable dictionary
part_id_title_dictionary = {
str(part["id"]): part["title"] for part in metadata["parts"]
}
part_trees: list[BeautifulSoup] = []
for id in archive.namelist():
if (
id not in part_id_title_dictionary
): # If a part is deleted and the old story_zip is cached, this is needed to avoid a KeyError exception
continue
part_trees.append(
clean_tree(
part_id_title_dictionary[id],
id,
archive.read(id).decode("utf-8"),
)
)
images = (
[await fetch_tree_images(tree) for tree in part_trees]
if download_images
else []
)
match format: match format:
case DownloadFormat.epub: case DownloadFormat.epub:
book = EPUBGenerator(metadata, part_trees, cover_data, images) book = EPUBGenerator(metadata, cover_data)
media_type = "application/epub+zip" media_type = "application/epub+zip"
case DownloadFormat.pdf: case DownloadFormat.pdf:
author_image = await fetch_image( book = PDFGenerator(metadata, cover_data)
metadata["user"]["avatar"].replace("-256-", "-512-")
)
if not author_image:
raise HTTPException(status_code=422)
book = PDFGenerator(
metadata, part_trees, cover_data, images, author_image
)
media_type = "application/pdf" media_type = "application/pdf"
logger.info(f"Retrieved story metadata and cover ({story_id=})") logger.info(f"Retrieved story metadata and cover ({story_id=})")
book.compile() story_zip = await fetch_story_content_zip(story_id, cookies)
archive = ZipFile(story_zip, "r")
part_contents = [
generate_clean_part_html(
part, archive.read(str(part["id"])).decode("utf-8")
)
for part in metadata["parts"]
]
async for title in book.add_chapters(
part_contents, download_images=download_images
):
...
book_buffer = book.dump() book_buffer = book.dump()
async def iterfile():
while chunk := book_buffer.read(512 * 4): # 4 kb/s
await asyncio.sleep(0.1) # throttle download speed
yield chunk
return StreamingResponse( return StreamingResponse(
iterfile(), book_buffer,
media_type=media_type, media_type=media_type,
headers={ headers={
"Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"', # Thanks https://stackoverflow.com/a/72729058 "Content-Disposition": f'attachment; filename="{slugify(metadata["title"])}_{story_id}{"_images" if download_images else ""}.{format.value}"' # Thanks https://stackoverflow.com/a/72729058
"Content-Length": str(book_buffer.getbuffer().nbytes),
}, },
) )
@@ -250,4 +219,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static")
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=80) uvicorn.run("main:app", host="0.0.0.0", port=80, workers=16)
+54
View File
@@ -0,0 +1,54 @@
<!DOCTYPE html>
<html lang="{langcode}">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{book_title}</title>
<section class="fullpage">
<img src="{cover}" alt="Cover">
</section>
<div id="copyright-container">
<h1 id="copyright-notice">Copyright Notice</h1>
<h2 id="copyright-title">{book_title}</h2>
<p id="copyright-author">By {author}</p>
<div id="copyright-separator"></div>
<p id="copyright-ex-libris">Ex Libris Sapientiae</p>
<div id="copyright-separator"></div>
{copyright_image}
<p id="copyright-copyright">{statement}</p>
<p id="copyright-rights">{freedoms}</p>
<p id="copyright-printing">Printing: {printing}</p>
<p id="copyright-printing">ID: {book_id}. <a href="https://wattpad.com/story/{book_id}" target="_blank" id="copyright-link">View this Book Online</a></p>
</div>
<div id="book">
</div>
<h1>About the Author</h1>
<div id="author-container">
<div id="author-about">
{avatar}
<h2 id="author-name"><a href="https://wattpad.com/user/{username}" id="author-link">{username}</a></h2>
<hr id="author-divider">
<p id="author-bio">
{description}
</p>
</div>
</div>
</html>
@@ -205,8 +205,6 @@ section {
#contents a { #contents a {
color: inherit; color: inherit;
text-decoration: none; text-decoration: none;
display: flex;
justify-content: space-between;
} }
#contents a::before { #contents a::before {
content: target-counter(attr(href), h2-counter) '. ' target-text(attr(href)); content: target-counter(attr(href), h2-counter) '. ' target-text(attr(href));
@@ -391,14 +389,6 @@ a:hover {
} }
#book-link {
font-size: 14px;
color: #666;
margin: 8px 0;
text-align: center;
}
#copyright-separator { #copyright-separator {
width: 100%; width: 100%;
max-width: 400px; max-width: 400px;
+120
View File
@@ -0,0 +1,120 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:outline="http://wkhtmltopdf.org/outline"
xmlns="http://www.w3.org/1999/xhtml">
<xsl:output doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
indent="yes" />
<xsl:template match="outline:outline">
<html>
<head>
<style>
@font-face {
font-family: 'PT Serif';
src: url('./fonts/PTSerif-Regular.ttf') format('truetype');
font-weight: 400;
font-style: normal;
}
@font-face {
font-family: 'PT Serif';
src: url('./fonts/PTSerif-Bold.ttf') format('truetype');
font-weight: 700;
font-style: normal;
}
@font-face {
font-family: 'PT Serif';
src: url('./fonts/PTSerif-Italic.ttf') format('truetype');
font-weight: 400;
font-style: italic;
}
@font-face {
font-family: 'PT Serif';
src: url('./fonts/PTSerif-BoldItalic.ttf') format('truetype');
font-weight: 700;
font-style: italic;
}
.pt-serif-regular {
font-family: "PT Serif", serif;
font-weight: 400;
font-style: normal;
}
.pt-serif-bold {
font-family: "PT Serif", serif;
font-weight: 700;
font-style: normal;
}
.pt-serif-regular-italic {
font-family: "PT Serif", serif;
font-weight: 400;
font-style: italic;
}
.pt-serif-bold-italic {
font-family: "PT Serif", serif;
font-weight: 700;
font-style: italic;
}
h1 {
text-align: center;
font-family: "PT Serif", serif !important;
font-weight: 700 !important;
font-style: normal !important;
font-size: 36px !important; /* Uniform size */
margin-bottom: 20px; /* Space below the heading */
border-bottom: 4px solid black; /* Black line */
padding-bottom: 10px; /* Space between text and line */
}
div {border-bottom: 1px dashed rgb(100,000,100);
padding-top: 5px;}
span {float: right;}
li {list-style: none;}
ul {
font-size: 22px;
font-family: arial;
}
ul ul {font-size: 80%; }
ul {padding-left: 0em;}
ul ul {padding-left: 1em;}
a {text-decoration:none; color: black;}
</style>
</head>
<body>
<h1>Table of Contents</h1>
<ul><xsl:apply-templates select="outline:item/outline:item"/></ul>
</body>
</html>
</xsl:template>
<xsl:template match="outline:item">
<li>
<xsl:if test="@title!=''">
<div>
<a class="pt-serif-regular">
<xsl:if test="@link">
<xsl:attribute name="href"><xsl:value-of select="@link"/></xsl:attribute>
</xsl:if>
<xsl:if test="@backLink">
<xsl:attribute name="name"><xsl:value-of select="@backLink"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="@title" />
</a>
<span> <xsl:value-of select="@page" /> </span>
</div>
</xsl:if>
<ul>
<xsl:comment>added to prevent self-closing tags in QtXmlPatterns</xsl:comment>
<xsl:apply-templates select="outline:item"/>
</ul>
</li>
</xsl:template>
</xsl:stylesheet>
+1044 -1330
View File
File diff suppressed because it is too large Load Diff
+6 -17
View File
@@ -1,23 +1,12 @@
node_modules
# Output
.output
.vercel
.netlify
.wrangler
/.svelte-kit
/build
# OS
.DS_Store .DS_Store
Thumbs.db node_modules
/build
# Env /.svelte-kit
/package
.env .env
.env.* .env.*
!.env.example !.env.example
!.env.test .vercel
.output
# Vite
vite.config.js.timestamp-* vite.config.js.timestamp-*
vite.config.ts.timestamp-* vite.config.ts.timestamp-*
-6
View File
@@ -1,6 +0,0 @@
# Package Managers
package-lock.json
pnpm-lock.yaml
yarn.lock
bun.lock
bun.lockb
-18
View File
@@ -1,18 +0,0 @@
{
"useTabs": false,
"singleQuote": false,
"trailingComma": "none",
"printWidth": 100,
"plugins": [
"prettier-plugin-svelte",
"prettier-plugin-tailwindcss"
],
"overrides": [
{
"files": "*.svelte",
"options": {
"parser": "svelte"
}
}
]
}
+5 -5
View File
@@ -1,6 +1,6 @@
# sv # create-svelte
Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli). Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/main/packages/create-svelte).
## Creating a project ## Creating a project
@@ -8,10 +8,10 @@ If you're seeing this, you've probably already done this step. Congrats!
```bash ```bash
# create a new project in the current directory # create a new project in the current directory
npx sv create npm create svelte@latest
# create a new project in my-app # create a new project in my-app
npx sv create my-app npm create svelte@latest my-app
``` ```
## Developing ## Developing
@@ -35,4 +35,4 @@ npm run build
You can preview the production build with `npm run preview`. You can preview the production build with `npm run preview`.
> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment. > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
-13
View File
@@ -1,13 +0,0 @@
{
"extends": "./.svelte-kit/tsconfig.json",
"compilerOptions": {
"allowJs": true,
"checkJs": false,
"moduleResolution": "bundler"
}
// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
//
// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
// from the referenced tsconfig.json - TypeScript does not merge them in
}
+2155 -1348
View File
File diff suppressed because it is too large Load Diff
+19 -17
View File
@@ -1,27 +1,29 @@
{ {
"name": "frontend", "name": "frontend",
"private": true,
"version": "0.0.1", "version": "0.0.1",
"type": "module",
"scripts": { "scripts": {
"dev": "vite dev", "dev": "vite dev",
"build": "vite build", "build": "vite build",
"preview": "vite preview", "preview": "vite preview"
"prepare": "svelte-kit sync || echo ''",
"format": "prettier --write .",
"lint": "prettier --check ."
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/adapter-static": "^3.0.8", "@fontsource/fira-mono": "^4.5.10",
"@sveltejs/kit": "^2.16.0", "@neoconfetti/svelte": "^1.0.0",
"@sveltejs/vite-plugin-svelte": "^5.0.0", "@sveltejs/adapter-auto": "^3.0.0",
"@tailwindcss/vite": "^4.0.0", "@sveltejs/adapter-static": "^3.0.1",
"daisyui": "^5.0.38", "@sveltejs/kit": "^2.0.0",
"prettier": "^3.5.3", "@sveltejs/vite-plugin-svelte": "^3.0.0",
"prettier-plugin-svelte": "^3.4.0", "@tailwindcss/typography": "^0.5.10",
"prettier-plugin-tailwindcss": "^0.6.11", "autoprefixer": "^10.4.16",
"svelte": "^5.0.0", "daisyui": "^4.4.20",
"tailwindcss": "^4.0.0", "postcss": "^8.4.32",
"vite": "^6.2.6" "postcss-load-config": "^5.0.2",
"svelte": "^4.2.7",
"tailwindcss": "^3.3.6",
"vite": "^5.0.3"
},
"type": "module",
"dependencies": {
"svelte-preprocess": "^5.1.3"
} }
} }
+13
View File
@@ -0,0 +1,13 @@
const tailwindcss = require("tailwindcss");
const autoprefixer = require("autoprefixer");
const config = {
plugins: [
//Some plugins, like tailwindcss/nesting, need to run before Tailwind,
tailwindcss(),
//But others, like autoprefixer, need to run after,
autoprefixer,
],
};
module.exports = config;
-4
View File
@@ -1,4 +0,0 @@
@import 'tailwindcss';
@plugin "daisyui" {
themes: bumblebee --default, abyss --prefersdark, cupcake, dracula;
}
+12 -22
View File
@@ -1,37 +1,27 @@
<!doctype html> <!doctype html>
<html lang="en" data-theme="bumblebee"> <html lang="en" data-theme="nord">
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<link rel="icon" href="%sveltekit.assets%/favicon.svg" /> <link rel="icon" href="%sveltekit.assets%/favicon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1" /> <meta name="viewport" content="width=device-width, initial-scale=1" />
<script defer src="https://stats.towu.dev/script.js" data-website-id="28dd7588-390c-49ed-a42c-085ad369ed93"></script>
<title>WP Downloader</title> <title>Wattpad Downloader</title>
<meta name="title" content="WP Downloader" /> <meta name="title" content="Wattpad Downloader" />
<meta <meta name="description" content="Read your way, download Wattpad Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!" />
name="description"
content="Read your way, download WP Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!"
/>
<!-- Open Graph / Facebook --> <!-- Open Graph / Facebook -->
<meta property="og:type" content="website" /> <meta property="og:type" content="website" />
<meta property="og:url" content="https://wpd.my/"> <meta property="og:url" content="https://wpd.rambhat.la/" />
<meta property="og:title" content="WP Downloader" /> <meta property="og:title" content="Wattpad Downloader" />
<meta <meta property="og:description" content="Read your way, download Wattpad Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!" />
property="og:description" <meta property="og:image" content="https://wpd.rambhat.la/embed.png" />
content="Read your way, download WP Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!"
/>
<meta property="og:image" content="https://wpd.my/embed.png" />
<!-- Twitter --> <!-- Twitter -->
<meta property="twitter:card" content="summary_large_image" /> <meta property="twitter:card" content="summary_large_image" />
<meta property="twitter:url" content="https://wpd.my/" /> <meta property="twitter:url" content="https://wpd.rambhat.la/" />
<meta property="twitter:title" content="WP Downloader" /> <meta property="twitter:title" content="Wattpad Downloader" />
<meta <meta property="twitter:description" content="Read your way, download Wattpad Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!" />
property="twitter:description" <meta property="twitter:image" content="https://wpd.rambhat.la/embed.png" />
content="Read your way, download WP Books as PDFs or EPUBs in seconds. Have an Ad-Free experience with Unlimited Offline Reading. Try it now!"
/>
<meta property="twitter:image" content="https://wpd.my/embed.png" />
<!-- Meta Tags Generated with https://metatags.io --> <!-- Meta Tags Generated with https://metatags.io -->
+4
View File
@@ -0,0 +1,4 @@
/* Write your global styles here, in PostCSS syntax */
@tailwind base;
@tailwind components;
@tailwind utilities;
+4 -4
View File
@@ -1,9 +1,9 @@
<div class="flex"> <div class="flex">
<div class="hero min-h-screen"> <div class="hero min-h-screen">
<div class="hero-content text-center"> <div class="hero-content text-center">
<div class="max-w-lg rounded-md bg-base-200 p-16"> <div class="bg-base-200 p-16 max-w-lg rounded-md">
<h1 class="text-5xl font-bold">There was an error.</h1> <h1 class="text-5xl font-bold">There was an error.</h1>
<div class="join py-6"> <div class="py-6 join">
<a class="btn btn-primary btn-lg" href="/">Home</a> <a class="btn btn-primary btn-lg" href="/">Home</a>
</div> </div>
</div> </div>
@@ -11,7 +11,7 @@
</div> </div>
<footer <footer
class="footer footer-center fixed bottom-0 bg-base-300 p-4 text-base-content" class="footer footer-center p-4 bg-base-300 text-base-content bottom-0 fixed"
> >
<aside class="text-2xl"> <aside class="text-2xl">
<p> <p>
@@ -19,7 +19,7 @@
href="https://github.com/TheOnlyWayUp" href="https://github.com/TheOnlyWayUp"
class="underline" class="underline"
target="_blank">TheOnlyWayUp</a target="_blank">TheOnlyWayUp</a
> © 2026 > © 2024
</p> </p>
</aside> </aside>
</footer> </footer>
+6 -13
View File
@@ -1,12 +1,5 @@
<script> <script>
import "../app.css"; import "../app.pcss";
/**
* @typedef {Object} Props
* @property {import('svelte').Snippet} [children]
*/
/** @type {Props} */
let { children } = $props();
</script> </script>
<svelte:head> <svelte:head>
@@ -17,15 +10,15 @@
</style> </style>
</svelte:head> </svelte:head>
{@render children()} <slot />
<footer <footer
class="footer footer-center fixed bottom-0 bg-base-300 p-4 text-base-content" class="footer footer-center p-4 bg-base-300 text-base-content bottom-0 fixed"
> >
<aside> <aside>
<div class="flex w-full max-w-lg flex-row"> <div class="flex flex-row max-w-lg w-full">
<a <a
href="https://buymeacoffee.com/theonlywayup" href="/donate"
target="_blank" target="_blank"
class="link" class="link"
data-umami-event="Footer Donate">Buy me a Coffee!</a data-umami-event="Footer Donate">Buy me a Coffee!</a
@@ -44,7 +37,7 @@
> >
</div> </div>
<p> <p>
Copyright © 2026 - All rights reserved by <a Copyright © 2024 - All rights reserved by <a
href="https://rambhat.la" href="https://rambhat.la"
class="link" class="link"
target="_blank" target="_blank"
+136 -132
View File
@@ -1,116 +1,104 @@
<script> <script>
let downloadImages = $state(false); let download_images = false;
let downloadAsPdf = $state(false); // 0 = epub, 1 = pdf let download_as_pdf = false; // 0 = epub, 1 = pdf
let isPaidStory = $state(false); let is_paid_story = false;
let invalidUrl = $state(false); let invalid_url = false;
let afterDownloadPage = $state(false); let after_download_page = false;
let credentials = $state({ let credentials = {
username: "", username: "",
password: "" password: "",
}); };
let downloadId = $state(""); let download_id = "";
/** @type {"story" | "part" | ""} */ let mode = "";
let mode = $state(""); let input_url = "";
let inputUrl = $state("");
let buttonDisabled = $derived( let button_disabled = false;
!inputUrl || (isPaidStory && !(credentials.username && credentials.password)) $: button_disabled =
); !input_url ||
(is_paid_story && !(credentials.username && credentials.password));
let url = $derived( $: url =
`/download/` + `/download/` +
downloadId + download_id +
`?om=1` + `?om=1` +
(downloadImages ? "&download_images=true" : "") + (download_images ? "&download_images=true" : "") +
(isPaidStory (is_paid_story
? `&username=${encodeURIComponent(credentials.username)}&password=${encodeURIComponent(credentials.password)}` ? `&username=${encodeURIComponent(credentials.username)}&password=${encodeURIComponent(credentials.password)}`
: "") + : "") +
`&mode=${mode}` + `&mode=${mode}` +
(downloadAsPdf ? "&format=pdf" : "&format=epub") (download_as_pdf ? "&format=pdf" : "&format=epub");
);
/** @type {HTMLDialogElement} */ $: {
let storyURLTutorialModal; if (input_url.length) {
input_url = input_url.toLowerCase();
/** @param {string} input */ invalid_url = false;
const setInputAsValid = (input) => {
invalidUrl = false;
inputUrl = input;
downloadId = input;
};
/** @param {string} input */ if (/^\d+$/.test(input_url)) {
const setInputAsInvalid = (input) => {
invalidUrl = true;
inputUrl = input;
downloadId = input;
};
/** @param {string} input */
const setInputUrl = (input) => {
input = input.toLowerCase();
if (!input) {
setInputAsValid("");
return;
}
if (/^\d+$/.test(input)) {
// All numbers // All numbers
download_id = input_url;
mode = "story"; mode = "story";
setInputAsValid(input); } else if (input_url.includes("wattpad.com/")) {
return; // Is a string and contains contain wattpad.com/
}
if (!input.includes("wattpad.com/")) { if (input_url.includes("/story/")) {
setInputAsInvalid(input.match(/\d+/g)?.join("") ?? "");
return;
}
// Is a string and contains wattpad.com/
if (input.includes("/story/")) {
// https://wattpad.com/story/237369078-wattpad-books-presents // https://wattpad.com/story/237369078-wattpad-books-presents
input_url = input_url.split("-")[0].split("?")[0].split("/story/")[1]; // removes tracking fields and title
download_id = input_url;
mode = "story"; mode = "story";
setInputAsValid( } else if (input_url.includes("/stories/")) {
input.split("-", 1)[0].split("?", 1)[0].split("/story/")[1] // removes tracking fields and title
);
} else if (input.includes("/stories/")) {
// https://www.wattpad.com/api/v3/stories/237369078?fields=... // https://www.wattpad.com/api/v3/stories/237369078?fields=...
input_url = input_url.split("?")[0].split("/stories/")[1]; // removes params
download_id = input_url;
mode = "story"; mode = "story";
setInputAsValid(
input.split("?", 1)[0].split("/stories/")[1] // removes params
);
} else { } else {
// https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me // https://www.wattpad.com/939051741-wattpad-books-presents-the-qb-bad-boy-and-me
input = input.split("-", 1)[0].split("?", 1)[0].split("wattpad.com/")[1]; // removes tracking fields and title input_url = input_url
if (/^\d+$/.test(input)) { .split("-")[0]
// If "wattpad.com/{downloadId}" contains only numbers .split("?")[0]
.split("wattpad.com/")[1]; // removes tracking fields and title
download_id = input_url;
if (/^\d+$/.test(download_id)) {
// If "wattpad.com/{download_id}" contains only numbers
mode = "part"; mode = "part";
setInputAsValid(input);
} else { } else {
setInputAsInvalid(""); invalid_url = true;
input_url = "";
download_id = "";
} }
} }
} else {
invalid_url = true;
}
input_url = input_url.match(/\d+/g)?.join("") || "";
download_id = input_url;
// Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it. // Originally, I was going to call the Wattpad API (wattpad.com/api/v3/stories/${story_id}), but Wattpad kept blocking those requests. I suspect it has something to do with the Origin header, I wasn't able to remove it.
// In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url). // In the future, if this is considered, it would be cool if we could derive the Story ID from a pasted Part URL. Refer to @AaronBenDaniel's https://github.com/AaronBenDaniel/WattpadDownloader/blob/49b29b245188149f2d24c0b1c59e4c7f90f289a9/src/api/src/create_book.py#L156 (https://www.wattpad.com/api/v3/story_parts/{part_id}?fields=url).
}; } else {
invalid_url = false;
download_id = "";
}
}
</script> </script>
<div> <div>
<div class="hero min-h-screen"> <div class="hero min-h-screen">
<div <div
class="hero-content bg-base-100/50 flex-col rounded py-32 shadow-sm lg:flex-row-reverse lg:p-16" class="hero-content flex-col lg:flex-row-reverse bg-base-100/50 lg:p-16 py-32 rounded shadow-sm"
> >
{#if !afterDownloadPage} {#if !after_download_page}
<div class="text-center lg:p-10 lg:text-left"> <div class="text-center lg:text-left lg:p-10">
<h1 <h1
class="bg-gradient-to-r from-red-700 via-yellow-600 to-pink-600 bg-clip-text text-5xl font-extrabold text-transparent" class="font-extrabold text-transparent text-5xl bg-clip-text bg-gradient-to-r to-pink-600 via-yellow-600 from-red-700"
> >
WP Downloader Wattpad Downloader
</h1> </h1>
<div role="alert" class="alert mt-10 max-w-md break-words bg-green-200"> <div
role="alert"
class="alert bg-amber-200 mt-10 break-words max-w-md"
>
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
fill="none" fill="none"
@@ -126,11 +114,9 @@
</svg> </svg>
<div> <div>
<p> <p>
Donators get access to <span class="font-semibold">high-speed PDF Downloads</span> Hey everyone, have a great new year! You're now on the Donator
version for a few days :)
</p> </p>
<a href="https://buymeacoffee.com/theonlywayup" class="link" target="_blank"
>Donate now</a
>
</div> </div>
</div> </div>
<!-- <div role="alert" class="alert bg-cyan-300 mt-5"> <!-- <div role="alert" class="alert bg-cyan-300 mt-5">
@@ -149,15 +135,14 @@
</svg> </svg>
<span class="text-lg">Please Donate</span> <span class="text-lg">Please Donate</span>
</div> --> </div> -->
<p class="max-w-md pt-6 text-lg"> <p class="pt-6 text-lg">
Download your favourite books with a single click. Have a great new year! Download your favourite books with a single click!
</p> </p>
<ul class="list list-inside pt-4 text-xl"> <ul class="pt-4 list list-inside text-xl">
<!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. --> <!-- TODO: 'max-lg: hidden' to hide on screen sizes smaller than lg. I'll do this when I figure out how to make this show up _below_ the card on smaller screen sizes. -->
<!-- <li>12/24 - ⚡ Super-fast Downloads!</li> <li>12/24 - ⚡ Super-fast Downloads!</li>
<li>12/24 - 📑 PDF Downloads!</li> --> <li>12/24 - 📑 PDF Downloads!</li>
<li>05/25 - ⚖️ Legal Compliance</li> <li>12/24 - 📂 Improved Performance</li>
<li>12/24 - 📂 Less Errors, Throttled Downloads</li>
<li>11/24 - 🔗 Paste Links!</li> <li>11/24 - 🔗 Paste Links!</li>
<li>11/24 - 📨 Send to Kindle Support!</li> <li>11/24 - 📨 Send to Kindle Support!</li>
@@ -174,46 +159,49 @@
<li>06/24 - 🖼️ Image Downloading!</li> <li>06/24 - 🖼️ Image Downloading!</li>
</ul> </ul>
</div> </div>
<div class="card bg-base-100 w-full max-w-sm shrink-0 shadow-2xl"> <div class="card shrink-0 w-full max-w-sm shadow-2xl bg-base-100">
<form class="card-body"> <form class="card-body">
<div class="form-control"> <div class="form-control">
<input <input
type="text" type="text"
placeholder="Story URL" placeholder="Story URL"
class="input input-bordered" class="input input-bordered"
class:input-warning={invalidUrl} class:input-warning={invalid_url}
bind:value={() => inputUrl, setInputUrl} bind:value={input_url}
required required
name="input_url" name="input_url"
/> />
<label class="label" for="input_url"> <label class="label" for="input_url">
{#if invalidUrl} {#if invalid_url}
<p class=" text-red-500"> <p class=" text-red-500">
Refer to (<button Refer to (<button
class="link font-semibold" class="link font-semibold"
onclick={() => storyURLTutorialModal.showModal()} onclick="StoryURLTutorialModal.showModal()"
data-umami-event="Part StoryURLTutorialModal Open" data-umami-event="Part StoryURLTutorialModal Open"
>How to get a Story URL</button >How to get a Story URL</button
>). >).
</p> </p>
{:else} {:else}
<button <button
class="link label-text font-semibold text-gray-800" class="label-text link font-semibold"
onclick={() => storyURLTutorialModal.showModal()} onclick="StoryURLTutorialModal.showModal()"
data-umami-event="StoryURLTutorialModal Open">How to get a Story URL</button data-umami-event="StoryURLTutorialModal Open"
>How to get a Story URL</button
> >
{/if} {/if}
</label> </label>
<label class="label cursor-pointer text-gray-800"> <label class="cursor-pointer label">
<span class="label-text">This is a Paid Story, and I've purchased it</span> <span class="label-text"
>This is a Paid Story, and I've purchased it</span
>
<input <input
type="checkbox" type="checkbox"
class="checkbox-warning checkbox shadow-md" class="checkbox checkbox-warning shadow-md"
bind:checked={isPaidStory} bind:checked={is_paid_story}
/> />
</label> </label>
{#if isPaidStory} {#if is_paid_story}
<label class="input input-bordered flex items-center gap-2"> <label class="input input-bordered flex items-center gap-2">
Username Username
<input <input
@@ -242,53 +230,61 @@
<div class="form-control mt-6"> <div class="form-control mt-6">
<a <a
class="btn rounded-l-none" class="btn rounded-l-none"
class:btn-primary={!downloadAsPdf} class:btn-primary={!download_as_pdf}
class:btn-secondary={downloadAsPdf} class:btn-secondary={download_as_pdf}
class:btn-disabled={buttonDisabled} class:btn-disabled={button_disabled}
data-umami-event="Download" data-umami-event="Download"
href={url} href={url}
onclick={() => (afterDownloadPage = true)}>Download</a on:click={() => (after_download_page = true)}>Download</a
> >
<!-- <label class="swap w-fit label mt-2"> <label class="swap w-fit label mt-2">
<input type="checkbox" bind:checked={downloadAsPdf} /> <input type="checkbox" bind:checked={download_as_pdf} />
<div class="swap-on"> <div class="swap-on">
Downloading as <span class=" underline text-bold">PDF</span> (Click) Downloading as <span class=" underline text-bold">PDF</span> (Click)
</div> </div>
<div class="swap-off"> <div class="swap-off">
Downloading as <span class=" underline text-bold">EPUB</span> (Click) Downloading as <span class=" underline text-bold">EPUB</span> (Click)
</div> </div>
</label> --> </label>
<label class="label cursor-pointer"> <label class="cursor-pointer label">
<span class="label-text text-gray-800">Include Images (<strong>Slower Download</strong>)</span> <span class="label-text"
>Include Images (<strong>Slower Download</strong>)</span
>
<input <input
type="checkbox" type="checkbox"
class="checkbox-warning checkbox shadow-md" class="checkbox checkbox-warning shadow-md"
bind:checked={downloadImages} bind:checked={download_images}
/> />
</label> </label>
</div> </div>
</form> </form>
<button
data-feedback-fish
class="link pb-4"
data-umami-event="Feedback">Feedback</button
>
</div> </div>
{:else} {:else}
<div class="max-w-4xl text-center"> <div class="text-center max-w-4xl">
<h1 class="text-3xl font-bold"> <h1 class="font-bold text-3xl">
Your download has <span Your download has <span
class="bg-gradient-to-r from-red-700 via-yellow-600 to-pink-600 bg-clip-text text-transparent" class="text-transparent bg-clip-text bg-gradient-to-r to-pink-600 via-yellow-600 from-red-700"
>Started</span >Started</span
> >
</h1> </h1>
<div class="space-y-2 py-4"> <div class="py-4 space-y-2">
<p class="text-2xl"> <p class="text-2xl">
If you found this site useful, please consider <a If you found this site useful, please consider <a
href="https://github.com/TheOnlyWayUp/WattpadDownloader" href="https://github.com/TheOnlyWayUp/WattpadDownloader"
target="_blank" target="_blank"
class="link" class="link"
data-umami-event="Star">starring the project</a data-umami-event="Star">starring the project</a
> to support WPDownloader. > to support WattpadDownloader.
</p> </p>
<p class="pt-2 text-lg"> <p class="text-lg pt-2">
You can also join us on <a You can also join us on <a
href="https://discord.gg/P9RHC4KCwd" href="https://discord.gg/P9RHC4KCwd"
target="_blank" target="_blank"
@@ -297,16 +293,17 @@
>, where we release features early and discuss updates. >, where we release features early and discuss updates.
</p> </p>
</div> </div>
<div class="grid grid-rows-2 justify-center gap-y-10"> <div class="grid justify-center grid-rows-2 gap-y-10">
<a <a
href="https://buymeacoffee.com/theonlywayup" href="/donate"
target="_blank" target="_blank"
class="btn btn-lg mt-10 bg-cyan-200 hover:bg-green-200">Buy me a Coffee! 🍵</a class="btn bg-cyan-200 btn-lg mt-10 hover:bg-green-200"
>Buy me a Coffee! 🍵</a
> >
<button <button
onclick={() => { on:click={() => {
afterDownloadPage = false; after_download_page = false;
inputUrl = ""; input_url = "";
}} }}
class="btn btn-outline btn-lg">Download More</button class="btn btn-outline btn-lg">Download More</button
> >
@@ -317,24 +314,31 @@
</div> </div>
</div> </div>
<dialog class="modal" bind:this={storyURLTutorialModal}> <!-- Open the modal using ID.showModal() method -->
<dialog id="StoryURLTutorialModal" class="modal">
<div class="modal-box"> <div class="modal-box">
<form method="dialog"> <form method="dialog">
<button class="btn btn-circle btn-ghost btn-sm absolute right-2 top-2"></button> <button class="btn btn-sm btn-circle btn-ghost absolute right-2 top-2"
>✕</button
>
</form> </form>
<h3 class="text-lg font-bold">How to get a Story URL</h3> <h3 class="font-bold text-lg">Finding the Story URL</h3>
<ol class="list list-inside list-disc space-y-4 py-4"> <ol class="list list-disc list-inside py-4 space-y-4">
<li>Copy the URL from the Website, or hit share and copy the URL on the App.</li> <li>
Copy the URL from the Website, or hit share and copy the URL on the App.
</li>
<li> <li>
For example, For example,
<span class="bg-slate-100 p-1 font-mono" <span class="font-mono bg-slate-100 p-1"
>wattpad.com/<span class="rounded-sm bg-amber-200">story</span >wattpad.com/<span class="bg-amber-200 rounded-sm">story</span
>/237369078-wattpad-books-presents</span >/237369078-wattpad-books-presents</span
>. >.
</li> </li>
<li> <li>
<span class="bg-slate-100 p-1 font-mono">https://www.wattpad.com/939103774-given</span> is okay <span class="font-mono bg-slate-100 p-1"
too. >https://www.wattpad.com/939103774-given</span
> is okay too.
</li> </li>
<li>Paste the URL and hit Download!</li> <li>Paste the URL and hit Download!</li>
</ol> </ol>
+1 -1
View File
@@ -8,7 +8,7 @@
<url> <url>
<loc>https://wpd.my/</loc> <loc>https://wpd.rambhat.la/</loc>
<lastmod>2024-04-12T08:19:07+00:00</lastmod> <lastmod>2024-04-12T08:19:07+00:00</lastmod>
</url> </url>
+13 -2
View File
@@ -1,5 +1,16 @@
import adapter from '@sveltejs/adapter-static'; import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
import adapter from "@sveltejs/adapter-static";
const config = { kit: { adapter: adapter({ strict: false }) } }; /** @type {import('@sveltejs/kit').Config} */
const config = {
kit: {
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
// If your environment is not supported or you settled on a specific environment, switch out the adapter.
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
adapter: adapter({ strict: false }),
},
preprocess: [vitePreprocess({})],
};
export default config; export default config;
+21
View File
@@ -0,0 +1,21 @@
const daisyui = require("daisyui");
const typography = require("@tailwindcss/typography");
/** @type {import('tailwindcss').Config}*/
const config = {
content: ["./src/**/*.{html,js,svelte,ts}"],
theme: {
extend: {},
},
plugins: [typography, daisyui],
daisyui: {
themes: [
"bumblebee"
],
},
};
module.exports = config;
+1 -5
View File
@@ -1,10 +1,6 @@
import tailwindcss from '@tailwindcss/vite';
import { sveltekit } from '@sveltejs/kit/vite'; import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite'; import { defineConfig } from 'vite';
export default defineConfig({ export default defineConfig({
plugins: [tailwindcss(), sveltekit()], plugins: [sveltekit()]
server: {
allowedHosts: true
}
}); });