This commit is contained in:
parent
a69acf171c
commit
bd60f3c0c5
@ -4,10 +4,8 @@ type: docker
|
||||
|
||||
steps:
|
||||
- name: epub
|
||||
image: python:slim
|
||||
image: python
|
||||
commands:
|
||||
- apt-get update
|
||||
- apt-get install -y git
|
||||
- pip install flake8 black
|
||||
- flake8 --ignore=E501
|
||||
- black --check .
|
||||
|
12
executor.py
12
executor.py
@ -1,10 +1,12 @@
|
||||
import asyncio
|
||||
import functools
|
||||
|
||||
|
||||
def executor(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
await asyncio.get_event_loop().run_in_executor(
|
||||
None, lambda: func(*args, **kwargs)
|
||||
def executor(f):
|
||||
@functools.wraps(f)
|
||||
async def wrapped(*args, **kwargs):
|
||||
return await asyncio.get_running_loop().run_in_executor(
|
||||
None, lambda: f(*args, **kwargs)
|
||||
)
|
||||
|
||||
return wrapper
|
||||
return wrapped
|
||||
|
@ -31,7 +31,7 @@ def process(metadata, output_dir):
|
||||
book.add_chapter(
|
||||
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
||||
)
|
||||
chapter_title = paragraph.get_text()
|
||||
chapter_title = paragraph.get_text().strip()
|
||||
elif paragraph.name == "p":
|
||||
chapter_content += paragraph.prettify()
|
||||
|
||||
|
12
twi.py
12
twi.py
@ -1,4 +1,3 @@
|
||||
import asyncio
|
||||
import bs4
|
||||
import executor
|
||||
import logging
|
||||
@ -13,7 +12,7 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
|
||||
chapter_response = requests.get(link["href"])
|
||||
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
||||
chapter_content = chapter_html.select_one("div.entry-content")
|
||||
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
|
||||
chapter_title = chapter_html.select_one("h1.entry-title").get_text().strip()
|
||||
|
||||
a_remove = chapter_content.find_all("a")
|
||||
hr_remove = chapter_content.find_all("hr")
|
||||
@ -41,19 +40,14 @@ def process(metadata, output_dir, url):
|
||||
|
||||
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||
content = html.select("div.entry-content > p")
|
||||
title = html.select_one("#site-title > span > a").get_text()
|
||||
title = html.select_one("#site-title > span > a").get_text().strip()
|
||||
|
||||
volume_title = None
|
||||
tasks = []
|
||||
|
||||
for paragraph in content:
|
||||
if paragraph.strong is not None:
|
||||
volume_title = paragraph.strong.get_text()
|
||||
volume_title = paragraph.strong.get_text().strip()
|
||||
else:
|
||||
tasks.append(
|
||||
fetchVolume(
|
||||
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
||||
)
|
||||
)
|
||||
|
||||
asyncio.run(asyncio.gather(*tasks))
|
||||
|
Reference in New Issue
Block a user