New way of generating epubs
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2022-08-18 12:59:02 +02:00
parent 4d550b3806
commit 28b2091182
7 changed files with 90 additions and 30 deletions

3
.flake8 Normal file
View File

@ -0,0 +1,3 @@
[flake8]
exclude = .venv/
max-line-length = 100

View File

@ -2,6 +2,6 @@
for FILE in output/epubs/* for FILE in output/epubs/*
do do
ebook-polish -fpuiU "${FILE}" "${FILE}" ebook-polish -fjpuiHU "${FILE}" "${FILE}"
ebook-convert "${FILE}" "${FILE%.*}.pdf" ebook-convert "${FILE}" "${FILE%.*}.pdf"
done done

View File

@ -1,13 +1,23 @@
import bs4 import bs4
import executor
import logging import logging
import pypub
import requests import requests
import utils
from ebooklib import epub
@executor.executor @utils.executor
def process(metadata, output_dir): def process(metadatas, output_dir):
book = pypub.Epub("Gravesong", **metadata) book = epub.EpubBook()
book_items = []
book.set_identifier("gravesong")
book.set_title("Gravesong")
book.set_language(metadatas["language"])
book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
book.add_metadata("DC", "description", "Advance Patreon Edition")
for author in metadatas["authors"]:
book.add_author(author)
response = requests.post( response = requests.post(
"https://wanderinginn.com/wp-pass.php", "https://wanderinginn.com/wp-pass.php",
@ -28,15 +38,34 @@ def process(metadata, output_dir):
for paragraph in entry_content.children: for paragraph in entry_content.children:
if paragraph.name == "h1": if paragraph.name == "h1":
logging.log(logging.INFO, f"Gravesong - {chapter_title}") logging.log(logging.INFO, f"Gravesong - {chapter_title}")
book.add_chapter( book_items.append(
pypub.create_chapter_from_string(chapter_content, chapter_title) epub.EpubHtml(
title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml",
lang=metadatas["language"],
)
) )
chapter_title = paragraph.get_text().strip() chapter_title = paragraph.get_text().strip()
elif paragraph.name == "p": elif paragraph.name == "p":
chapter_content += paragraph.prettify() chapter_content += paragraph.prettify()
logging.log(logging.INFO, f"Gravesong - {chapter_title}") logging.log(logging.INFO, f"Gravesong - {chapter_title}")
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title)) book_items.append(
epub.EpubHtml(
title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml",
lang=metadatas["language"],
)
)
logging.log(logging.INFO, "Gravesong - Book") logging.log(logging.INFO, "Gravesong - Book")
book.create_epub(output_dir) for book_item in book_items:
book.add_item(book_item)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.toc = book_items
book.spine = ["cover", "nav", *book_items]
epub.write_epub(f"{output_dir}/Gravesong.epub", book)

View File

@ -1,3 +1,3 @@
requests==2.28.1 requests==2.28.1
beautifulsoup4==4.11.1 beautifulsoup4==4.11.1
git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub EbookLib==0.17.1

9
run.py
View File

@ -36,18 +36,16 @@ async def main():
await asyncio.gather( await asyncio.gather(
gravesong.process( gravesong.process(
{ {
"creator": "Pirateaba", "authors": ["Pirateaba"],
"language": "en", "language": "en",
"publisher": "Xefir",
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg", "cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
}, },
output_epubs, output_epubs,
), ),
twi.process( twi.process(
{ {
"creator": "Pirateaba", "authors": ["Pirateaba"],
"language": "en", "language": "en",
"publisher": "Xefir",
"cover": f"{output_imgs}/twi.jpg", "cover": f"{output_imgs}/twi.jpg",
}, },
output_epubs, output_epubs,
@ -55,9 +53,8 @@ async def main():
), ),
twi.process( twi.process(
{ {
"creator": "Pirateaba", "authors": ["Pirateaba", "ElliVia"],
"language": "fr", "language": "fr",
"publisher": "Maroti, ElliVia",
"cover": f"{output_imgs}/twi.jpg", "cover": f"{output_imgs}/twi.jpg",
}, },
output_epubs, output_epubs,

53
twi.py
View File

@ -1,12 +1,22 @@
import bs4 import bs4
import executor
import logging import logging
import pypub
import requests import requests
import utils
from ebooklib import epub
def fetchVolume(title, metadata, volume_title, output_dir, links): def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
book = pypub.Epub(f"{title} - {volume_title}", **metadata) book = epub.EpubBook()
book_items = []
book.set_identifier(title.replace(" ", "").lower())
book.set_title(title)
book.set_language(metadatas["language"])
book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
book.add_metadata("DC", "description", description)
for author in metadatas["authors"]:
book.add_author(author)
for link in links: for link in links:
chapter_response = requests.get(link["href"]) chapter_response = requests.get(link["href"])
@ -18,9 +28,6 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
chapter_content.find_all("a"), chapter_content.find_all("a"),
chapter_content.find_all("h3"), chapter_content.find_all("h3"),
chapter_content.find_all("hr"), chapter_content.find_all("hr"),
chapter_content.find_all("img"),
chapter_content.find_all("iframe"),
chapter_content.find_all("script"),
chapter_content.select("div.tiled-gallery"), chapter_content.select("div.tiled-gallery"),
] ]
@ -29,21 +36,36 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
removed.decompose() removed.decompose()
logging.log(logging.INFO, f"{title} - {chapter_title}") logging.log(logging.INFO, f"{title} - {chapter_title}")
book.add_chapter( book_items.append(
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title) epub.EpubHtml(
title=chapter_title,
content=utils.generate_title_html(
chapter_title, chapter_content.prettify()
),
file_name=f"{chapter_title}.xhtml",
lang=metadatas["language"],
)
) )
logging.log(logging.INFO, f"{title} - {volume_title}") logging.log(logging.INFO, f"{title} - {volume_title}")
book.create_epub(output_dir) for book_item in book_items:
book.add_item(book_item)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.toc = book_items
book.spine = ["cover", "nav", *book_items]
epub.write_epub(f"{output_dir}/{title} - {volume_title}.epub", book)
@executor.executor @utils.executor
def process(metadata, output_dir, url): def process(metadatas, output_dir, url):
response = requests.get(url) response = requests.get(url)
html = bs4.BeautifulSoup(response.content, "html.parser") html = bs4.BeautifulSoup(response.content, "html.parser")
content = html.select("div.entry-content > p") content = html.select("div.entry-content > p")
title = html.select_one("#site-title > span > a").get_text().strip() title = html.select_one("#site-title > span > a").get_text().strip()
description = html.select_one("#site-description").get_text().strip()
volume_title = None volume_title = None
@ -52,5 +74,10 @@ def process(metadata, output_dir, url):
volume_title = paragraph.strong.get_text().strip() volume_title = paragraph.strong.get_text().strip()
else: else:
fetchVolume( fetchVolume(
title, metadata, volume_title, output_dir, paragraph.find_all("a") title,
description,
metadatas,
volume_title,
output_dir,
paragraph.find_all("a"),
) )

View File

@ -10,3 +10,7 @@ def executor(f):
) )
return wrapped return wrapped
def generate_title_html(title, content):
return f"<h1 style='text-align:center;margin:4rem'>{title}</h1>{content}"