+112
-54
@@ -1,9 +1,10 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
from typing import Optional
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import re
|
import re
|
||||||
import backoff
|
import backoff
|
||||||
from aiohttp import ClientResponseError
|
from aiohttp import ClientResponseError, ClientSession
|
||||||
from aiohttp_client_cache.session import CachedSession
|
from aiohttp_client_cache.session import CachedSession
|
||||||
from aiohttp_client_cache import FileBackend
|
from aiohttp_client_cache import FileBackend
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -13,59 +14,46 @@ headers = {
|
|||||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||||
}
|
}
|
||||||
|
|
||||||
cache = FileBackend(
|
cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
|
||||||
use_temp=True,
|
|
||||||
expire_after=43200, # 12 hours
|
# --- Utilities --- #
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
async def wp_get_cookies(username: str, password: str) -> dict:
|
||||||
async def retrieve_story(story_id: int, retry=True) -> dict:
|
# source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
|
||||||
"""Taking a story_id, return its information from the Wattpad API."""
|
"""Retrieves authorization cookies from Wattpad by logging in with user creds.
|
||||||
async with CachedSession(headers=headers, cache=cache) as session:
|
|
||||||
async with session.get(
|
Args:
|
||||||
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
username (str): Username.
|
||||||
|
password (str): Password.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: Bad status code.
|
||||||
|
ValueError: No cookies returned.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Authorization cookies.
|
||||||
|
"""
|
||||||
|
async with ClientSession(headers=headers) as session:
|
||||||
|
async with session.post(
|
||||||
|
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth%2Flogin",
|
||||||
|
data={
|
||||||
|
"username": username.lower(),
|
||||||
|
"password": password,
|
||||||
|
}, # the username.lower() is for caching
|
||||||
) as response:
|
) as response:
|
||||||
if not response.ok:
|
if response.status != 204:
|
||||||
if response.status in [404, 400]:
|
raise ValueError("Not a 204.")
|
||||||
return {}
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
body = await response.json()
|
cookies = {
|
||||||
|
k: v.value
|
||||||
|
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
|
||||||
|
}
|
||||||
|
|
||||||
return body
|
if not cookies:
|
||||||
|
raise ValueError("No cookies.")
|
||||||
|
|
||||||
|
return cookies
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
|
||||||
async def fetch_part_content(part_id: int) -> str:
|
|
||||||
"""Return the HTML Content of a Part."""
|
|
||||||
async with CachedSession(headers=headers, cache=cache) as session:
|
|
||||||
async with session.get(
|
|
||||||
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
|
||||||
) as response:
|
|
||||||
if not response.ok:
|
|
||||||
if response.status in [404, 400]:
|
|
||||||
return ""
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
body = await response.text()
|
|
||||||
|
|
||||||
return body
|
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
|
||||||
async def fetch_cover(url: str) -> bytes:
|
|
||||||
"""Fetch image bytes."""
|
|
||||||
async with CachedSession(headers=headers, cache=cache) as session:
|
|
||||||
async with session.get(url) as response:
|
|
||||||
if not response.ok:
|
|
||||||
if response.status in [404, 400]:
|
|
||||||
return bytes()
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
body = await response.read()
|
|
||||||
|
|
||||||
return body
|
|
||||||
|
|
||||||
|
|
||||||
def slugify(value, allow_unicode=False) -> str:
|
def slugify(value, allow_unicode=False) -> str:
|
||||||
@@ -91,7 +79,71 @@ def slugify(value, allow_unicode=False) -> str:
|
|||||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
||||||
|
|
||||||
|
|
||||||
# --- #
|
# --- API Calls --- #
|
||||||
|
|
||||||
|
|
||||||
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
|
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
|
||||||
|
"""Taking a story_id, return its information from the Wattpad API."""
|
||||||
|
async with (
|
||||||
|
CachedSession(headers=headers, cache=cache)
|
||||||
|
if not cookies
|
||||||
|
else ClientSession(headers=headers, cookies=cookies)
|
||||||
|
) as session: # Don't cache requests with Cookies.
|
||||||
|
async with session.get(
|
||||||
|
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
|
||||||
|
) as response:
|
||||||
|
if not response.ok:
|
||||||
|
if response.status in [404, 400]:
|
||||||
|
return {}
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
body = await response.json()
|
||||||
|
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
|
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
|
||||||
|
"""Return the HTML Content of a Part."""
|
||||||
|
async with (
|
||||||
|
CachedSession(headers=headers, cache=cache)
|
||||||
|
if not cookies
|
||||||
|
else ClientSession(headers=headers, cookies=cookies)
|
||||||
|
) as session: # Don't cache requests with Cookies.
|
||||||
|
async with session.get(
|
||||||
|
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
|
||||||
|
) as response:
|
||||||
|
if not response.ok:
|
||||||
|
if response.status in [404, 400]:
|
||||||
|
return ""
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
body = await response.text()
|
||||||
|
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
|
||||||
|
async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
|
||||||
|
"""Fetch image bytes."""
|
||||||
|
async with (
|
||||||
|
CachedSession(headers=headers, cache=cache)
|
||||||
|
if not cookies
|
||||||
|
else ClientSession(headers=headers, cookies=cookies)
|
||||||
|
) as session: # Don't cache requests with Cookies.
|
||||||
|
async with session.get(url) as response:
|
||||||
|
if not response.ok:
|
||||||
|
if response.status in [404, 400]:
|
||||||
|
return bytes()
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
body = await response.read()
|
||||||
|
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
# --- EPUB Generation --- #
|
||||||
|
|
||||||
|
|
||||||
def set_metadata(book, data):
|
def set_metadata(book, data):
|
||||||
@@ -113,15 +165,17 @@ def set_metadata(book, data):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def set_cover(book, data):
|
async def set_cover(book, data, cookies: Optional[dict] = None):
|
||||||
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
|
book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
|
||||||
|
|
||||||
|
|
||||||
async def add_chapters(book, data, download_images: bool = False):
|
async def add_chapters(
|
||||||
|
book, data, download_images: bool = False, cookies: Optional[dict] = None
|
||||||
|
):
|
||||||
chapters = []
|
chapters = []
|
||||||
|
|
||||||
for part in data["parts"]:
|
for part in data["parts"]:
|
||||||
content = await fetch_part_content(part["id"])
|
content = await fetch_part_content(part["id"], cookies=cookies)
|
||||||
title = part["title"]
|
title = part["title"]
|
||||||
clean_title = slugify(title)
|
clean_title = slugify(title)
|
||||||
|
|
||||||
@@ -134,7 +188,11 @@ async def add_chapters(book, data, download_images: bool = False):
|
|||||||
|
|
||||||
if download_images:
|
if download_images:
|
||||||
soup = BeautifulSoup(content, "lxml")
|
soup = BeautifulSoup(content, "lxml")
|
||||||
async with CachedSession(cache=cache, headers=headers) as session:
|
async with (
|
||||||
|
CachedSession(headers=headers, cache=cache)
|
||||||
|
if not cookies
|
||||||
|
else ClientSession(headers=headers, cookies=cookies)
|
||||||
|
) as session: # Don't cache requests with Cookies.
|
||||||
for idx, image in enumerate(soup.find_all("img")):
|
for idx, image in enumerate(soup.find_all("img")):
|
||||||
if not image["src"]:
|
if not image["src"]:
|
||||||
continue
|
continue
|
||||||
|
|||||||
+37
-11
@@ -1,8 +1,16 @@
|
|||||||
|
from typing import Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from create_book import retrieve_story, set_cover, set_metadata, add_chapters, slugify
|
from create_book import (
|
||||||
|
retrieve_story,
|
||||||
|
set_cover,
|
||||||
|
set_metadata,
|
||||||
|
add_chapters,
|
||||||
|
slugify,
|
||||||
|
wp_get_cookies,
|
||||||
|
)
|
||||||
import tempfile
|
import tempfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
@@ -17,29 +25,47 @@ def home():
|
|||||||
|
|
||||||
|
|
||||||
@app.get("/download/{story_id}")
|
@app.get("/download/{story_id}")
|
||||||
async def download_book(story_id: int, download_images: bool = False):
|
async def download_book(
|
||||||
data = await retrieve_story(story_id)
|
story_id: int,
|
||||||
|
download_images: bool = False,
|
||||||
|
username: Optional[str] = None,
|
||||||
|
password: Optional[str] = None,
|
||||||
|
):
|
||||||
|
if username and not password or password and not username:
|
||||||
|
return HTMLResponse(
|
||||||
|
status_code=422,
|
||||||
|
content='Include both the username _and_ password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||||
|
)
|
||||||
|
|
||||||
|
if username and password:
|
||||||
|
try:
|
||||||
|
cookies = await wp_get_cookies(username=username, password=password)
|
||||||
|
except ValueError:
|
||||||
|
return HTMLResponse(
|
||||||
|
status_code=403,
|
||||||
|
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cookies = None
|
||||||
|
|
||||||
|
data = await retrieve_story(story_id, cookies=cookies)
|
||||||
book = epub.EpubBook()
|
book = epub.EpubBook()
|
||||||
|
|
||||||
# Metadata and Cover are updated
|
|
||||||
try:
|
try:
|
||||||
set_metadata(book, data)
|
set_metadata(book, data)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# raise HTTPException(
|
|
||||||
# status_code=404,
|
|
||||||
# detail='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
|
||||||
# )
|
|
||||||
# return FileResponse(BUILD_PATH / "index.html", status_code=404)
|
|
||||||
return HTMLResponse(
|
return HTMLResponse(
|
||||||
status_code=404,
|
status_code=404,
|
||||||
content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
await set_cover(book, data)
|
await set_cover(book, data, cookies=cookies)
|
||||||
# print("Metadata Downloaded")
|
# print("Metadata Downloaded")
|
||||||
|
|
||||||
# Chapters are downloaded
|
# Chapters are downloaded
|
||||||
async for title in add_chapters(book, data, download_images=download_images):
|
async for title in add_chapters(
|
||||||
|
book, data, download_images=download_images, cookies=cookies
|
||||||
|
):
|
||||||
# print(f"Part ({title}) downloaded")
|
# print(f"Part ({title}) downloaded")
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,32 @@
|
|||||||
<script>
|
<script>
|
||||||
let story_id = "";
|
let story_id = "";
|
||||||
let download_images = false;
|
let download_images = false;
|
||||||
|
let is_paid_story = false;
|
||||||
|
let credentials = {
|
||||||
|
username: "",
|
||||||
|
password: "",
|
||||||
|
};
|
||||||
|
|
||||||
let after_download_page = false;
|
let after_download_page = false;
|
||||||
|
let url = "";
|
||||||
|
|
||||||
|
let button_disabled = false;
|
||||||
|
$: button_disabled =
|
||||||
|
!story_id ||
|
||||||
|
(is_paid_story && !(credentials.username && credentials.password));
|
||||||
|
|
||||||
|
$: url =
|
||||||
|
`/download/${story_id}?om=1` +
|
||||||
|
(download_images ? "&download_images=true" : "") +
|
||||||
|
(is_paid_story
|
||||||
|
? `&username=${credentials.username}&password=${credentials.password}`
|
||||||
|
: "");
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div class="hero min-h-screen">
|
<div class="hero min-h-screen">
|
||||||
<div
|
<div
|
||||||
class="hero-content flex-col lg:flex-row-reverse glass p-16 rounded shadow-sm"
|
class="hero-content flex-col lg:flex-row-reverse bg-base-100/50 p-16 rounded shadow-sm"
|
||||||
>
|
>
|
||||||
{#if !after_download_page}
|
{#if !after_download_page}
|
||||||
<div class="text-center lg:text-left lg:p-10">
|
<div class="text-center lg:text-left lg:p-10">
|
||||||
@@ -36,22 +55,57 @@
|
|||||||
/>
|
/>
|
||||||
<label class="label" for="story_id">
|
<label class="label" for="story_id">
|
||||||
<button
|
<button
|
||||||
class="label-text link"
|
class="label-text link font-semibold"
|
||||||
onclick="StoryIDTutorialModal.showModal()"
|
onclick="StoryIDTutorialModal.showModal()"
|
||||||
data-umami-event="StoryIDTutorialModal Open"
|
data-umami-event="StoryIDTutorialModal Open"
|
||||||
>How to get a Story ID</button
|
>How to get a Story ID</button
|
||||||
>
|
>
|
||||||
</label>
|
</label>
|
||||||
|
<label class="cursor-pointer label">
|
||||||
|
<span class="label-text"
|
||||||
|
>This is a Paid Story, and I've purchased it</span
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
class="checkbox checkbox-warning shadow-md"
|
||||||
|
bind:checked={is_paid_story}
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
{#if is_paid_story}
|
||||||
|
<label class="input input-bordered flex items-center gap-2">
|
||||||
|
Username
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
class="grow"
|
||||||
|
name="username"
|
||||||
|
placeholder="foxtail.chicken"
|
||||||
|
bind:value={credentials.username}
|
||||||
|
required
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
<label class="input input-bordered flex items-center gap-2">
|
||||||
|
Password
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
class="grow"
|
||||||
|
placeholder="supersecretpassword"
|
||||||
|
name="password"
|
||||||
|
bind:value={credentials.password}
|
||||||
|
required
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="form-control mt-6">
|
<div class="form-control mt-6">
|
||||||
<a
|
<a
|
||||||
class="btn btn-primary rounded-l-none"
|
class="btn btn-primary rounded-l-none"
|
||||||
class:btn-disabled={!story_id}
|
class:btn-disabled={button_disabled}
|
||||||
data-umami-event="Download"
|
data-umami-event="Download"
|
||||||
href={`/download/${story_id}${download_images ? "?download_images=true" : ""}`}
|
href={url}
|
||||||
on:click={() => (after_download_page = true)}>Download</a
|
on:click={() => (after_download_page = true)}>Download</a
|
||||||
>
|
>
|
||||||
|
|
||||||
<label class="cursor-pointer label">
|
<label class="cursor-pointer label">
|
||||||
<span class="label-text"
|
<span class="label-text"
|
||||||
>Include Images (<strong>Slower Download</strong>)</span
|
>Include Images (<strong>Slower Download</strong>)</span
|
||||||
|
|||||||
Reference in New Issue
Block a user