This commit is contained in:
parent
a69acf171c
commit
bd60f3c0c5
@ -4,10 +4,8 @@ type: docker
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: epub
|
- name: epub
|
||||||
image: python:slim
|
image: python
|
||||||
commands:
|
commands:
|
||||||
- apt-get update
|
|
||||||
- apt-get install -y git
|
|
||||||
- pip install flake8 black
|
- pip install flake8 black
|
||||||
- flake8 --ignore=E501
|
- flake8 --ignore=E501
|
||||||
- black --check .
|
- black --check .
|
||||||
|
12
executor.py
12
executor.py
@ -1,10 +1,12 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
import functools
|
||||||
|
|
||||||
|
|
||||||
def executor(func):
|
def executor(f):
|
||||||
async def wrapper(*args, **kwargs):
|
@functools.wraps(f)
|
||||||
await asyncio.get_event_loop().run_in_executor(
|
async def wrapped(*args, **kwargs):
|
||||||
None, lambda: func(*args, **kwargs)
|
return await asyncio.get_running_loop().run_in_executor(
|
||||||
|
None, lambda: f(*args, **kwargs)
|
||||||
)
|
)
|
||||||
|
|
||||||
return wrapper
|
return wrapped
|
||||||
|
@ -31,7 +31,7 @@ def process(metadata, output_dir):
|
|||||||
book.add_chapter(
|
book.add_chapter(
|
||||||
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
||||||
)
|
)
|
||||||
chapter_title = paragraph.get_text()
|
chapter_title = paragraph.get_text().strip()
|
||||||
elif paragraph.name == "p":
|
elif paragraph.name == "p":
|
||||||
chapter_content += paragraph.prettify()
|
chapter_content += paragraph.prettify()
|
||||||
|
|
||||||
|
16
twi.py
16
twi.py
@ -1,4 +1,3 @@
|
|||||||
import asyncio
|
|
||||||
import bs4
|
import bs4
|
||||||
import executor
|
import executor
|
||||||
import logging
|
import logging
|
||||||
@ -13,7 +12,7 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
|
|||||||
chapter_response = requests.get(link["href"])
|
chapter_response = requests.get(link["href"])
|
||||||
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
||||||
chapter_content = chapter_html.select_one("div.entry-content")
|
chapter_content = chapter_html.select_one("div.entry-content")
|
||||||
chapter_title = chapter_html.select_one("h1.entry-title").get_text()
|
chapter_title = chapter_html.select_one("h1.entry-title").get_text().strip()
|
||||||
|
|
||||||
a_remove = chapter_content.find_all("a")
|
a_remove = chapter_content.find_all("a")
|
||||||
hr_remove = chapter_content.find_all("hr")
|
hr_remove = chapter_content.find_all("hr")
|
||||||
@ -41,19 +40,14 @@ def process(metadata, output_dir, url):
|
|||||||
|
|
||||||
html = bs4.BeautifulSoup(response.content, "html.parser")
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||||
content = html.select("div.entry-content > p")
|
content = html.select("div.entry-content > p")
|
||||||
title = html.select_one("#site-title > span > a").get_text()
|
title = html.select_one("#site-title > span > a").get_text().strip()
|
||||||
|
|
||||||
volume_title = None
|
volume_title = None
|
||||||
tasks = []
|
|
||||||
|
|
||||||
for paragraph in content:
|
for paragraph in content:
|
||||||
if paragraph.strong is not None:
|
if paragraph.strong is not None:
|
||||||
volume_title = paragraph.strong.get_text()
|
volume_title = paragraph.strong.get_text().strip()
|
||||||
else:
|
else:
|
||||||
tasks.append(
|
fetchVolume(
|
||||||
fetchVolume(
|
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
||||||
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
asyncio.run(asyncio.gather(*tasks))
|
|
||||||
|
Reference in New Issue
Block a user