diff --git a/src/api/requirements.txt b/src/api/requirements.txt
index 06164e5..356667e 100644
--- a/src/api/requirements.txt
+++ b/src/api/requirements.txt
@@ -9,6 +9,8 @@ asttokens==2.4.1
async-timeout==4.0.3
attrs==23.1.0
backoff==2.2.1
+beautifulsoup4==4.12.3
+bs4==0.0.2
click==8.1.7
comm==0.2.0
debugpy==1.8.0
@@ -48,6 +50,7 @@ pyzmq==25.1.2
rich==13.7.0
six==1.16.0
sniffio==1.3.0
+soupsieve==2.5
stack-data==0.6.3
starlette==0.32.0.post1
tornado==6.4
diff --git a/src/api/src/create_book.py b/src/api/src/create_book.py
index a389c7f..85abf08 100644
--- a/src/api/src/create_book.py
+++ b/src/api/src/create_book.py
@@ -6,6 +6,7 @@ import backoff
from aiohttp import ClientResponseError
from aiohttp_client_cache.session import CachedSession
from aiohttp_client_cache import FileBackend
+from bs4 import BeautifulSoup
headers = {
@@ -116,19 +117,38 @@ async def set_cover(book, data):
book.set_cover("cover.jpg", await fetch_cover(data["cover"]))
-async def add_chapters(book, data):
+async def add_chapters(book, data, download_images: bool = False):
chapters = []
for part in data["parts"]:
content = await fetch_part_content(part["id"])
title = part["title"]
+ clean_title = slugify(title)
# Thanks https://eu17.proxysite.com/process.php?d=5VyWYcoQl%2BVF0BYOuOavtvjOloFUZz2BJ%2Fepiusk6Nz7PV%2B9i8rs7cFviGftrBNll%2B0a3qO7UiDkTt4qwCa0fDES&b=1
chapter = epub.EpubHtml(
title=title,
- file_name=f"{slugify(title)}.xhtml",
+ file_name=f"{clean_title}.xhtml",
lang=data["language"]["name"],
)
+
+ if download_images:
+ soup = BeautifulSoup(content, "lxml")
+ async with CachedSession(cache=cache, headers=headers) as session:
+ for idx, image in enumerate(soup.find_all("img")):
+ if not image["src"]:
+ continue
+ async with session.get(image["src"]) as response:
+ img = epub.EpubImage(
+ media_type="image/jpeg",
+ content=await response.read(),
+ file_name=f"static/{clean_title}/{idx}.jpeg",
+ )
+ book.add_item(img)
+ content = content.replace(
+ str(image), f'
'
+ )
+
chapter.set_content(f"