Adapt for the rewrite of book 1
Some checks reported errors
continuous-integration/drone/push Build was killed

This commit is contained in:
Michel Roux 2023-03-05 16:18:01 +01:00
parent 2258ad48dd
commit 41cbe3640e
2 changed files with 18 additions and 10 deletions

View File

@ -82,7 +82,7 @@ def async_wrap(func):
makedirs(name=OUTPUT_DIR, exist_ok=True) makedirs(name=OUTPUT_DIR, exist_ok=True)
cover_req = get( cover_req = get(
url="https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg", url="https://i0.wp.com/wanderinginn.com/wp-content/uploads/2023/03/Wandering_Inn-Vol1-eCover.jpg", # noqa: E501
stream=True, stream=True,
) )
Image.open(cover_req.raw).save(f"{OUTPUT_DIR}/cover.png") Image.open(cover_req.raw).save(f"{OUTPUT_DIR}/cover.png")
@ -94,17 +94,19 @@ def process_volume(epub: Epub, urls: List[str]):
page_html = BeautifulSoup(markup=page_req.text, features="lxml") page_html = BeautifulSoup(markup=page_req.text, features="lxml")
page_content = page_html.select_one("div.entry-content") page_content = page_html.select_one("div.entry-content")
page_title = page_html.select_one("h1.entry-title") page_title = page_html.select_one("h1.entry-title")
page_date = page_html.find(name="meta", property="article:modified_time")
if not page_content or not page_title or type(page_date) is not Tag: if not page_content or not page_title:
raise Exception() raise Exception("Missing title or content")
title = page_title.get_text().strip()
if not title:
continue
galleries = page_content.select("div.tiled-gallery") galleries = page_content.select("div.tiled-gallery")
for gallery in galleries: for gallery in galleries:
gallery.decompose() gallery.decompose()
title = page_title.get_text().strip()
chapter = create_chapter_from_string( chapter = create_chapter_from_string(
html=page_content.prettify(), html=page_content.prettify(),
title=title, title=title,
@ -118,7 +120,9 @@ def process_volume(epub: Epub, urls: List[str]):
@async_wrap @async_wrap
def process_book(url: str, creator: str, publisher: str, language: str): def process_book(
url: str, creator: str, publisher: str, language: str, strip_first=False
):
toc_req = get(url) toc_req = get(url)
toc_html = BeautifulSoup(markup=toc_req.text, features="lxml") toc_html = BeautifulSoup(markup=toc_req.text, features="lxml")
toc_content = toc_html.select("div.entry-content > p") toc_content = toc_html.select("div.entry-content > p")
@ -127,10 +131,13 @@ def process_book(url: str, creator: str, publisher: str, language: str):
if not toc_title or type(toc_date) is not Tag: if not toc_title or type(toc_date) is not Tag:
raise Exception("Missing title or date") raise Exception("Missing title or date")
title = toc_title.get_text().strip()
if strip_first:
toc_content.pop(0)
for i, toc_line in enumerate(toc_content): for i, toc_line in enumerate(toc_content):
if i % 2 == 0: if i % 2 == 0:
title = toc_title.get_text().strip()
volume = toc_line.get_text().strip() volume = toc_line.get_text().strip()
elif volume: elif volume:
epub = Epub( epub = Epub(
@ -163,6 +170,7 @@ async def create_books():
creator="Pirateaba", creator="Pirateaba",
publisher="Xefir", publisher="Xefir",
language="en", language="en",
strip_first=True,
), ),
) )

6
poetry.lock generated
View File

@ -630,14 +630,14 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
[[package]] [[package]]
name = "platformdirs" name = "platformdirs"
version = "3.0.0" version = "3.1.0"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "platformdirs-3.0.0-py3-none-any.whl", hash = "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567"}, {file = "platformdirs-3.1.0-py3-none-any.whl", hash = "sha256:13b08a53ed71021350c9e300d4ea8668438fb0046ab3937ac9a29913a1a1350a"},
{file = "platformdirs-3.0.0.tar.gz", hash = "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9"}, {file = "platformdirs-3.1.0.tar.gz", hash = "sha256:accc3665857288317f32c7bebb5a8e482ba717b474f3fc1d18ca7f9214be0cef"},
] ]
[package.extras] [package.extras]