New way of generating epubs
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
4d550b3806
commit
28b2091182
@ -2,6 +2,6 @@
|
|||||||
|
|
||||||
for FILE in output/epubs/*
|
for FILE in output/epubs/*
|
||||||
do
|
do
|
||||||
ebook-polish -fpuiU "${FILE}" "${FILE}"
|
ebook-polish -fjpuiHU "${FILE}" "${FILE}"
|
||||||
ebook-convert "${FILE}" "${FILE%.*}.pdf"
|
ebook-convert "${FILE}" "${FILE%.*}.pdf"
|
||||||
done
|
done
|
||||||
|
47
gravesong.py
47
gravesong.py
@ -1,13 +1,23 @@
|
|||||||
import bs4
|
import bs4
|
||||||
import executor
|
|
||||||
import logging
|
import logging
|
||||||
import pypub
|
|
||||||
import requests
|
import requests
|
||||||
|
import utils
|
||||||
|
|
||||||
|
from ebooklib import epub
|
||||||
|
|
||||||
|
|
||||||
@executor.executor
|
@utils.executor
|
||||||
def process(metadata, output_dir):
|
def process(metadatas, output_dir):
|
||||||
book = pypub.Epub("Gravesong", **metadata)
|
book = epub.EpubBook()
|
||||||
|
book_items = []
|
||||||
|
book.set_identifier("gravesong")
|
||||||
|
book.set_title("Gravesong")
|
||||||
|
book.set_language(metadatas["language"])
|
||||||
|
book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
|
||||||
|
book.add_metadata("DC", "description", "Advance Patreon Edition")
|
||||||
|
|
||||||
|
for author in metadatas["authors"]:
|
||||||
|
book.add_author(author)
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://wanderinginn.com/wp-pass.php",
|
"https://wanderinginn.com/wp-pass.php",
|
||||||
@ -28,15 +38,34 @@ def process(metadata, output_dir):
|
|||||||
for paragraph in entry_content.children:
|
for paragraph in entry_content.children:
|
||||||
if paragraph.name == "h1":
|
if paragraph.name == "h1":
|
||||||
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
||||||
book.add_chapter(
|
book_items.append(
|
||||||
pypub.create_chapter_from_string(chapter_content, chapter_title)
|
epub.EpubHtml(
|
||||||
|
title=chapter_title,
|
||||||
|
content=utils.generate_title_html(chapter_title, chapter_content),
|
||||||
|
file_name=f"{chapter_title}.xhtml",
|
||||||
|
lang=metadatas["language"],
|
||||||
|
)
|
||||||
)
|
)
|
||||||
chapter_title = paragraph.get_text().strip()
|
chapter_title = paragraph.get_text().strip()
|
||||||
elif paragraph.name == "p":
|
elif paragraph.name == "p":
|
||||||
chapter_content += paragraph.prettify()
|
chapter_content += paragraph.prettify()
|
||||||
|
|
||||||
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
logging.log(logging.INFO, f"Gravesong - {chapter_title}")
|
||||||
book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title))
|
book_items.append(
|
||||||
|
epub.EpubHtml(
|
||||||
|
title=chapter_title,
|
||||||
|
content=utils.generate_title_html(chapter_title, chapter_content),
|
||||||
|
file_name=f"{chapter_title}.xhtml",
|
||||||
|
lang=metadatas["language"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
logging.log(logging.INFO, "Gravesong - Book")
|
logging.log(logging.INFO, "Gravesong - Book")
|
||||||
book.create_epub(output_dir)
|
for book_item in book_items:
|
||||||
|
book.add_item(book_item)
|
||||||
|
|
||||||
|
book.add_item(epub.EpubNcx())
|
||||||
|
book.add_item(epub.EpubNav())
|
||||||
|
book.toc = book_items
|
||||||
|
book.spine = ["cover", "nav", *book_items]
|
||||||
|
epub.write_epub(f"{output_dir}/Gravesong.epub", book)
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
requests==2.28.1
|
requests==2.28.1
|
||||||
beautifulsoup4==4.11.1
|
beautifulsoup4==4.11.1
|
||||||
git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub
|
EbookLib==0.17.1
|
||||||
|
9
run.py
9
run.py
@ -36,18 +36,16 @@ async def main():
|
|||||||
await asyncio.gather(
|
await asyncio.gather(
|
||||||
gravesong.process(
|
gravesong.process(
|
||||||
{
|
{
|
||||||
"creator": "Pirateaba",
|
"authors": ["Pirateaba"],
|
||||||
"language": "en",
|
"language": "en",
|
||||||
"publisher": "Xefir",
|
|
||||||
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
|
"cover": f"{output_imgs}/gravesong-by-boboplushie.jpg",
|
||||||
},
|
},
|
||||||
output_epubs,
|
output_epubs,
|
||||||
),
|
),
|
||||||
twi.process(
|
twi.process(
|
||||||
{
|
{
|
||||||
"creator": "Pirateaba",
|
"authors": ["Pirateaba"],
|
||||||
"language": "en",
|
"language": "en",
|
||||||
"publisher": "Xefir",
|
|
||||||
"cover": f"{output_imgs}/twi.jpg",
|
"cover": f"{output_imgs}/twi.jpg",
|
||||||
},
|
},
|
||||||
output_epubs,
|
output_epubs,
|
||||||
@ -55,9 +53,8 @@ async def main():
|
|||||||
),
|
),
|
||||||
twi.process(
|
twi.process(
|
||||||
{
|
{
|
||||||
"creator": "Pirateaba",
|
"authors": ["Pirateaba", "ElliVia"],
|
||||||
"language": "fr",
|
"language": "fr",
|
||||||
"publisher": "Maroti, ElliVia",
|
|
||||||
"cover": f"{output_imgs}/twi.jpg",
|
"cover": f"{output_imgs}/twi.jpg",
|
||||||
},
|
},
|
||||||
output_epubs,
|
output_epubs,
|
||||||
|
53
twi.py
53
twi.py
@ -1,12 +1,22 @@
|
|||||||
import bs4
|
import bs4
|
||||||
import executor
|
|
||||||
import logging
|
import logging
|
||||||
import pypub
|
|
||||||
import requests
|
import requests
|
||||||
|
import utils
|
||||||
|
|
||||||
|
from ebooklib import epub
|
||||||
|
|
||||||
|
|
||||||
def fetchVolume(title, metadata, volume_title, output_dir, links):
|
def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
|
||||||
book = pypub.Epub(f"{title} - {volume_title}", **metadata)
|
book = epub.EpubBook()
|
||||||
|
book_items = []
|
||||||
|
book.set_identifier(title.replace(" ", "").lower())
|
||||||
|
book.set_title(title)
|
||||||
|
book.set_language(metadatas["language"])
|
||||||
|
book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read())
|
||||||
|
book.add_metadata("DC", "description", description)
|
||||||
|
|
||||||
|
for author in metadatas["authors"]:
|
||||||
|
book.add_author(author)
|
||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
chapter_response = requests.get(link["href"])
|
chapter_response = requests.get(link["href"])
|
||||||
@ -18,9 +28,6 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
|
|||||||
chapter_content.find_all("a"),
|
chapter_content.find_all("a"),
|
||||||
chapter_content.find_all("h3"),
|
chapter_content.find_all("h3"),
|
||||||
chapter_content.find_all("hr"),
|
chapter_content.find_all("hr"),
|
||||||
chapter_content.find_all("img"),
|
|
||||||
chapter_content.find_all("iframe"),
|
|
||||||
chapter_content.find_all("script"),
|
|
||||||
chapter_content.select("div.tiled-gallery"),
|
chapter_content.select("div.tiled-gallery"),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -29,21 +36,36 @@ def fetchVolume(title, metadata, volume_title, output_dir, links):
|
|||||||
removed.decompose()
|
removed.decompose()
|
||||||
|
|
||||||
logging.log(logging.INFO, f"{title} - {chapter_title}")
|
logging.log(logging.INFO, f"{title} - {chapter_title}")
|
||||||
book.add_chapter(
|
book_items.append(
|
||||||
pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title)
|
epub.EpubHtml(
|
||||||
|
title=chapter_title,
|
||||||
|
content=utils.generate_title_html(
|
||||||
|
chapter_title, chapter_content.prettify()
|
||||||
|
),
|
||||||
|
file_name=f"{chapter_title}.xhtml",
|
||||||
|
lang=metadatas["language"],
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.log(logging.INFO, f"{title} - {volume_title}")
|
logging.log(logging.INFO, f"{title} - {volume_title}")
|
||||||
book.create_epub(output_dir)
|
for book_item in book_items:
|
||||||
|
book.add_item(book_item)
|
||||||
|
|
||||||
|
book.add_item(epub.EpubNcx())
|
||||||
|
book.add_item(epub.EpubNav())
|
||||||
|
book.toc = book_items
|
||||||
|
book.spine = ["cover", "nav", *book_items]
|
||||||
|
epub.write_epub(f"{output_dir}/{title} - {volume_title}.epub", book)
|
||||||
|
|
||||||
|
|
||||||
@executor.executor
|
@utils.executor
|
||||||
def process(metadata, output_dir, url):
|
def process(metadatas, output_dir, url):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
|
|
||||||
html = bs4.BeautifulSoup(response.content, "html.parser")
|
html = bs4.BeautifulSoup(response.content, "html.parser")
|
||||||
content = html.select("div.entry-content > p")
|
content = html.select("div.entry-content > p")
|
||||||
title = html.select_one("#site-title > span > a").get_text().strip()
|
title = html.select_one("#site-title > span > a").get_text().strip()
|
||||||
|
description = html.select_one("#site-description").get_text().strip()
|
||||||
|
|
||||||
volume_title = None
|
volume_title = None
|
||||||
|
|
||||||
@ -52,5 +74,10 @@ def process(metadata, output_dir, url):
|
|||||||
volume_title = paragraph.strong.get_text().strip()
|
volume_title = paragraph.strong.get_text().strip()
|
||||||
else:
|
else:
|
||||||
fetchVolume(
|
fetchVolume(
|
||||||
title, metadata, volume_title, output_dir, paragraph.find_all("a")
|
title,
|
||||||
|
description,
|
||||||
|
metadatas,
|
||||||
|
volume_title,
|
||||||
|
output_dir,
|
||||||
|
paragraph.find_all("a"),
|
||||||
)
|
)
|
||||||
|
@ -10,3 +10,7 @@ def executor(f):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
|
def generate_title_html(title, content):
|
||||||
|
return f"<h1 style='text-align:center;margin:4rem'>{title}</h1>{content}"
|
Reference in New Issue
Block a user