2022-01-18 20:43:45 +00:00
|
|
|
import bs4
|
|
|
|
import executor
|
2022-01-18 21:05:00 +00:00
|
|
|
import logging
|
2022-01-18 20:43:45 +00:00
|
|
|
import pypub
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
@executor.executor
|
|
|
|
def process(metadata, output_dir):
|
2022-01-19 12:37:45 +00:00
|
|
|
book = pypub.Epub("Gravesong", **metadata)
|
2022-01-18 20:43:45 +00:00
|
|
|
|
|
|
|
response = requests.post(
|
|
|
|
"https://wanderinginn.com/wp-pass.php",
|
|
|
|
data={"post_password": "Iwalkedameadowweary", "Submit": "Enter"},
|
|
|
|
headers={"Referer": "https://wanderinginn.com/2022/01/11/gravesong/"},
|
|
|
|
)
|
|
|
|
|
|
|
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
|
|
|
entry_content = html.select_one("div.entry-content")
|
|
|
|
content_imgs = entry_content.find_all("img")
|
|
|
|
|
|
|
|
for content_img in content_imgs:
|
|
|
|
content_img.decompose()
|
|
|
|
|
|
|
|
chapter_title = "Introduction"
|
|
|
|
chapter_content = ""
|
|
|
|
|
|
|
|
for paragraph in entry_content.children:
|
|
|
|
if paragraph.name == "h1":
|
2022-01-18 21:05:00 +00:00
|
|
|
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
2022-01-18 20:43:45 +00:00
|
|
|
book.add_chapter(
|
|
|
|
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
|
|
|
)
|
2022-01-18 21:53:50 +00:00
|
|
|
chapter_title = paragraph.get_text().strip()
|
2022-01-18 20:43:45 +00:00
|
|
|
elif paragraph.name == "p":
|
|
|
|
chapter_content += paragraph.prettify()
|
|
|
|
|
2022-01-18 21:05:00 +00:00
|
|
|
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
2022-01-18 20:43:45 +00:00
|
|
|
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
|
|
|
|
|
2022-01-18 21:05:00 +00:00
|
|
|
logging.log(logging.INFO, "Gravesong - Book")
|
2022-01-18 20:43:45 +00:00
|
|
|
book.create_epub(output_dir)
|