diff --git a/src/api/requirements.txt b/src/api/requirements.txt index 06164e5..356667e 100644 --- a/src/api/requirements.txt +++ b/src/api/requirements.txt @@ -9,6 +9,8 @@ asttokens==2.4.1 async-timeout==4.0.3 attrs==23.1.0 backoff==2.2.1 +beautifulsoup4==4.12.3 +bs4==0.0.2 click==8.1.7 comm==0.2.0 debugpy==1.8.0 @@ -48,6 +50,7 @@ pyzmq==25.1.2 rich==13.7.0 six==1.16.0 sniffio==1.3.0 +soupsieve==2.5 stack-data==0.6.3 starlette==0.32.0.post1 tornado==6.4 diff --git a/src/api/src/create_book.py b/src/api/src/create_book.py index a389c7f..85abf08 100644 --- a/src/api/src/create_book.py +++ b/src/api/src/create_book.py @@ -6,6 +6,7 @@ import backoff from aiohttp import ClientResponseError from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache import FileBackend +from bs4 import BeautifulSoup headers = { @@ -116,19 +117,38 @@ async def set_cover(book, data): book.set_cover("cover.jpg", await fetch_cover(data["cover"])) -async def add_chapters(book, data): +async def add_chapters(book, data, download_images: bool = False): chapters = [] for part in data["parts"]: content = await fetch_part_content(part["id"]) title = part["title"] + clean_title = slugify(title) # Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1 chapter = epub.EpubHtml( title=title, - file_name=f"{slugify(title)}.xhtml", + file_name=f"{clean_title}.xhtml", lang=data["language"]["name"], ) + + if download_images: + soup = BeautifulSoup(content, "lxml") + async with CachedSession(cache=cache, headers=headers) as session: + for idx, image in enumerate(soup.find_all("img")): + if not image["src"]: + continue + async with session.get(image["src"]) as response: + img = epub.EpubImage( + media_type="image/jpeg", + content=await response.read(), + file_name=f"static/{clean_title}/{idx}.jpeg", + ) + book.add_item(img) + content = content.replace( + str(image), f'' + ) + chapter.set_content(f"

{title}

" + content) chapters.append(chapter) diff --git a/src/api/src/main.py b/src/api/src/main.py index f639dba..d49126c 100644 --- a/src/api/src/main.py +++ b/src/api/src/main.py @@ -17,7 +17,7 @@ def home(): @app.get("/download/{story_id}") -async def download_book(story_id: int): +async def download_book(story_id: int, download_images: bool = False): data = await retrieve_story(story_id) book = epub.EpubBook() @@ -27,7 +27,7 @@ async def download_book(story_id: int): # print("Metadata Downloaded") # Chapters are downloaded - async for title in add_chapters(book, data): + async for title in add_chapters(book, data, download_images=download_images): # print(f"Part ({title}) downloaded") ... @@ -57,4 +57,4 @@ app.mount("/", StaticFiles(directory=BUILD_PATH), "static") if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=80) + uvicorn.run(app, host="0.0.0.0", port=1112)