Merge pull request #6 from TheOnlyWayUp/pub-auth

Add authentication
This commit is contained in:
Dhanush R
2024-07-01 00:32:16 +05:30
committed by GitHub
3 changed files with 207 additions and 69 deletions
+112 -54
View File
@@ -1,9 +1,10 @@
import asyncio import asyncio
from typing import Optional
from ebooklib import epub from ebooklib import epub
import unicodedata import unicodedata
import re import re
import backoff import backoff
from aiohttp import ClientResponseError from aiohttp import ClientResponseError, ClientSession
from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache.session import CachedSession
from aiohttp_client_cache import FileBackend from aiohttp_client_cache import FileBackend
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@@ -13,59 +14,46 @@ headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
} }
cache = FileBackend( cache = FileBackend(use_temp=True, expire_after=43200) # 12 hours
use_temp=True,
expire_after=43200, # 12 hours # --- Utilities --- #
)
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15) async def wp_get_cookies(username: str, password: str) -> dict:
async def retrieve_story(story_id: int, retry=True) -> dict: # source: https://github.com/TheOnlyWayUp/WP-DM-Export/blob/dd4c7c51cb43f2108e0f63fc10a66cd24a740e4e/src/API/src/main.py#L25-L58
"""Taking a story_id, return its information from the Wattpad API.""" """Retrieves authorization cookies from Wattpad by logging in with user creds.
async with CachedSession(headers=headers, cache=cache) as session:
async with session.get( Args:
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover" username (str): Username.
password (str): Password.
Raises:
ValueError: Bad status code.
ValueError: No cookies returned.
Returns:
dict: Authorization cookies.
"""
async with ClientSession(headers=headers) as session:
async with session.post(
"https://www.wattpad.com/auth/login?nextUrl=%2F&_data=routes%2Fauth%2Flogin",
data={
"username": username.lower(),
"password": password,
}, # the username.lower() is for caching
) as response: ) as response:
if not response.ok: if response.status != 204:
if response.status in [404, 400]: raise ValueError("Not a 204.")
return {}
response.raise_for_status()
body = await response.json() cookies = {
k: v.value
for k, v in response.cookies.items() # Thanks https://stackoverflow.com/a/32281245
}
return body if not cookies:
raise ValueError("No cookies.")
return cookies
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_part_content(part_id: int) -> str:
"""Return the HTML Content of a Part."""
async with CachedSession(headers=headers, cache=cache) as session:
async with session.get(
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
) as response:
if not response.ok:
if response.status in [404, 400]:
return ""
response.raise_for_status()
body = await response.text()
return body
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_cover(url: str) -> bytes:
"""Fetch image bytes."""
async with CachedSession(headers=headers, cache=cache) as session:
async with session.get(url) as response:
if not response.ok:
if response.status in [404, 400]:
return bytes()
response.raise_for_status()
body = await response.read()
return body
def slugify(value, allow_unicode=False) -> str: def slugify(value, allow_unicode=False) -> str:
@@ -91,7 +79,71 @@ def slugify(value, allow_unicode=False) -> str:
return re.sub(r"[-\s]+", "-", value).strip("-_") return re.sub(r"[-\s]+", "-", value).strip("-_")
# --- # # --- API Calls --- #
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def retrieve_story(story_id: int, cookies: Optional[dict] = None) -> dict:
"""Taking a story_id, return its information from the Wattpad API."""
async with (
CachedSession(headers=headers, cache=cache)
if not cookies
else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/api/v3/stories/{story_id}?fields=tags,id,title,createDate,modifyDate,language(name),description,completed,mature,url,isPaywalled,user(username),parts(id,title),cover"
) as response:
if not response.ok:
if response.status in [404, 400]:
return {}
response.raise_for_status()
body = await response.json()
return body
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_part_content(part_id: int, cookies: Optional[dict] = None) -> str:
"""Return the HTML Content of a Part."""
async with (
CachedSession(headers=headers, cache=cache)
if not cookies
else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get(
f"https://www.wattpad.com/apiv2/?m=storytext&id={part_id}"
) as response:
if not response.ok:
if response.status in [404, 400]:
return ""
response.raise_for_status()
body = await response.text()
return body
@backoff.on_exception(backoff.expo, ClientResponseError, max_time=15)
async def fetch_cover(url: str, cookies: Optional[dict] = None) -> bytes:
"""Fetch image bytes."""
async with (
CachedSession(headers=headers, cache=cache)
if not cookies
else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
async with session.get(url) as response:
if not response.ok:
if response.status in [404, 400]:
return bytes()
response.raise_for_status()
body = await response.read()
return body
# --- EPUB Generation --- #
def set_metadata(book, data): def set_metadata(book, data):
@@ -113,15 +165,17 @@ def set_metadata(book, data):
) )
async def set_cover(book, data): async def set_cover(book, data, cookies: Optional[dict] = None):
book.set_cover("cover.jpg", await fetch_cover(data["cover"])) book.set_cover("cover.jpg", await fetch_cover(data["cover"], cookies=cookies))
async def add_chapters(book, data, download_images: bool = False): async def add_chapters(
book, data, download_images: bool = False, cookies: Optional[dict] = None
):
chapters = [] chapters = []
for part in data["parts"]: for part in data["parts"]:
content = await fetch_part_content(part["id"]) content = await fetch_part_content(part["id"], cookies=cookies)
title = part["title"] title = part["title"]
clean_title = slugify(title) clean_title = slugify(title)
@@ -134,7 +188,11 @@ async def add_chapters(book, data, download_images: bool = False):
if download_images: if download_images:
soup = BeautifulSoup(content, "lxml") soup = BeautifulSoup(content, "lxml")
async with CachedSession(cache=cache, headers=headers) as session: async with (
CachedSession(headers=headers, cache=cache)
if not cookies
else ClientSession(headers=headers, cookies=cookies)
) as session: # Don't cache requests with Cookies.
for idx, image in enumerate(soup.find_all("img")): for idx, image in enumerate(soup.find_all("img")):
if not image["src"]: if not image["src"]:
continue continue
+37 -11
View File
@@ -1,8 +1,16 @@
from typing import Optional
from pathlib import Path from pathlib import Path
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse from fastapi.responses import FileResponse, HTMLResponse, StreamingResponse
from ebooklib import epub from ebooklib import epub
from create_book import retrieve_story, set_cover, set_metadata, add_chapters, slugify from create_book import (
retrieve_story,
set_cover,
set_metadata,
add_chapters,
slugify,
wp_get_cookies,
)
import tempfile import tempfile
from io import BytesIO from io import BytesIO
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
@@ -17,29 +25,47 @@ def home():
@app.get("/download/{story_id}") @app.get("/download/{story_id}")
async def download_book(story_id: int, download_images: bool = False): async def download_book(
data = await retrieve_story(story_id) story_id: int,
download_images: bool = False,
username: Optional[str] = None,
password: Optional[str] = None,
):
if username and not password or password and not username:
return HTMLResponse(
status_code=422,
content='Include both the username _and_ password, or neither. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
if username and password:
try:
cookies = await wp_get_cookies(username=username, password=password)
except ValueError:
return HTMLResponse(
status_code=403,
content='Incorrect Username and/or Password. Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
)
else:
cookies = None
data = await retrieve_story(story_id, cookies=cookies)
book = epub.EpubBook() book = epub.EpubBook()
# Metadata and Cover are updated
try: try:
set_metadata(book, data) set_metadata(book, data)
except KeyError: except KeyError:
# raise HTTPException(
# status_code=404,
# detail='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
# )
# return FileResponse(BUILD_PATH / "index.html", status_code=404)
return HTMLResponse( return HTMLResponse(
status_code=404, status_code=404,
content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>', content='Story not found. Check the ID - Support is available on the <a href="https://discord.gg/P9RHC4KCwd" target="_blank">Discord</a>',
) )
await set_cover(book, data) await set_cover(book, data, cookies=cookies)
# print("Metadata Downloaded") # print("Metadata Downloaded")
# Chapters are downloaded # Chapters are downloaded
async for title in add_chapters(book, data, download_images=download_images): async for title in add_chapters(
book, data, download_images=download_images, cookies=cookies
):
# print(f"Part ({title}) downloaded") # print(f"Part ({title}) downloaded")
... ...
+58 -4
View File
@@ -1,13 +1,32 @@
<script> <script>
let story_id = ""; let story_id = "";
let download_images = false; let download_images = false;
let is_paid_story = false;
let credentials = {
username: "",
password: "",
};
let after_download_page = false; let after_download_page = false;
let url = "";
let button_disabled = false;
$: button_disabled =
!story_id ||
(is_paid_story && !(credentials.username && credentials.password));
$: url =
`/download/${story_id}?om=1` +
(download_images ? "&download_images=true" : "") +
(is_paid_story
? `&username=${credentials.username}&password=${credentials.password}`
: "");
</script> </script>
<div> <div>
<div class="hero min-h-screen"> <div class="hero min-h-screen">
<div <div
class="hero-content flex-col lg:flex-row-reverse glass p-16 rounded shadow-sm" class="hero-content flex-col lg:flex-row-reverse bg-base-100/50 p-16 rounded shadow-sm"
> >
{#if !after_download_page} {#if !after_download_page}
<div class="text-center lg:text-left lg:p-10"> <div class="text-center lg:text-left lg:p-10">
@@ -36,22 +55,57 @@
/> />
<label class="label" for="story_id"> <label class="label" for="story_id">
<button <button
class="label-text link" class="label-text link font-semibold"
onclick="StoryIDTutorialModal.showModal()" onclick="StoryIDTutorialModal.showModal()"
data-umami-event="StoryIDTutorialModal Open" data-umami-event="StoryIDTutorialModal Open"
>How to get a Story ID</button >How to get a Story ID</button
> >
</label> </label>
<label class="cursor-pointer label">
<span class="label-text"
>This is a Paid Story, and I've purchased it</span
>
<input
type="checkbox"
class="checkbox checkbox-warning shadow-md"
bind:checked={is_paid_story}
/>
</label>
{#if is_paid_story}
<label class="input input-bordered flex items-center gap-2">
Username
<input
type="text"
class="grow"
name="username"
placeholder="foxtail.chicken"
bind:value={credentials.username}
required
/>
</label>
<label class="input input-bordered flex items-center gap-2">
Password
<input
type="password"
class="grow"
placeholder="supersecretpassword"
name="password"
bind:value={credentials.password}
required
/>
</label>
{/if}
</div> </div>
<div class="form-control mt-6"> <div class="form-control mt-6">
<a <a
class="btn btn-primary rounded-l-none" class="btn btn-primary rounded-l-none"
class:btn-disabled={!story_id} class:btn-disabled={button_disabled}
data-umami-event="Download" data-umami-event="Download"
href={`/download/${story_id}${download_images ? "?download_images=true" : ""}`} href={url}
on:click={() => (after_download_page = true)}>Download</a on:click={() => (after_download_page = true)}>Download</a
> >
<label class="cursor-pointer label"> <label class="cursor-pointer label">
<span class="label-text" <span class="label-text"
>Include Images (<strong>Slower Download</strong>)</span >Include Images (<strong>Slower Download</strong>)</span