Michel Roux
d3a7ac0fc2
Some checks reported errors
continuous-integration/drone/push Build encountered an error
59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
import asyncio
|
|
import executor
|
|
import bs4
|
|
import pypub
|
|
import requests
|
|
|
|
|
|
def fetchVolume(title, metadata, volume_title, output_dir, links):
|
|
book = pypub.Epub(f"{title} - {volume_title}", *metadata)
|
|
|
|
for link in links:
|
|
chapter_response = requests.get(link["href"])
|
|
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
|
chapter_content = chapter_html.select_one("div.entry-content")
|
|
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
|
|
|
|
a_remove = chapter_content.find_all("a")
|
|
hr_remove = chapter_content.find_all("hr")
|
|
div_remove = chapter_content.find_all("div.tiled-gallery")
|
|
|
|
for removed in a_remove:
|
|
removed.decompose()
|
|
for removed in hr_remove:
|
|
removed.decompose()
|
|
for removed in div_remove:
|
|
removed.decompose()
|
|
|
|
print(f"{title} - {chapter_title}")
|
|
book.add_chapter(
|
|
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
|
|
)
|
|
|
|
print(f"{title} - {volume_title}")
|
|
book.create_epub(output_dir)
|
|
|
|
|
|
@executor.executor
|
|
def process(metadata, output_dir, url):
|
|
response = requests.get(url)
|
|
|
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
|
content = html.select("div.entry-content > p")
|
|
title = html.select_one("#site-title > span > a").get_text()
|
|
|
|
volume_title = None
|
|
tasks = []
|
|
|
|
for paragraph in content:
|
|
if paragraph.strong is not None:
|
|
volume_title = paragraph.strong.get_text()
|
|
else:
|
|
tasks.append(
|
|
fetchVolume(
|
|
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
|
)
|
|
)
|
|
|
|
asyncio.run(asyncio.gather(*tasks))
|