diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..3b3f762 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .venv/ +max-line-length = 100 diff --git a/convert.sh b/convert.sh index 6d686e1..9860f7a 100644 --- a/convert.sh +++ b/convert.sh @@ -2,6 +2,6 @@ for FILE in output/epubs/* do - ebook-polish -fpuiU "${FILE}" "${FILE}" + ebook-polish -fjpuiHU "${FILE}" "${FILE}" ebook-convert "${FILE}" "${FILE%.*}.pdf" done diff --git a/gravesong.py b/gravesong.py index 8ff04a0..747d942 100644 --- a/gravesong.py +++ b/gravesong.py @@ -1,13 +1,23 @@ import bs4 -import executor import logging -import pypub import requests +import utils + +from ebooklib import epub -@executor.executor -def process(metadata, output_dir): - book = pypub.Epub("Gravesong", **metadata) +@utils.executor +def process(metadatas, output_dir): + book = epub.EpubBook() + book_items = [] + book.set_identifier("gravesong") + book.set_title("Gravesong") + book.set_language(metadatas["language"]) + book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read()) + book.add_metadata("DC", "description", "Advance Patreon Edition") + + for author in metadatas["authors"]: + book.add_author(author) response = requests.post( "https://wanderinginn.com/wp-pass.php", @@ -28,15 +38,34 @@ def process(metadata, output_dir): for paragraph in entry_content.children: if paragraph.name == "h1": logging.log(logging.INFO, f"Gravesong - {chapter_title}") - book.add_chapter( - pypub.create_chapter_from_string(chapter_content, chapter_title) + book_items.append( + epub.EpubHtml( + title=chapter_title, + content=utils.generate_title_html(chapter_title, chapter_content), + file_name=f"{chapter_title}.xhtml", + lang=metadatas["language"], + ) ) chapter_title = paragraph.get_text().strip() elif paragraph.name == "p": chapter_content += paragraph.prettify() logging.log(logging.INFO, f"Gravesong - {chapter_title}") - book.add_chapter(pypub.create_chapter_from_string(chapter_content, chapter_title)) + book_items.append( + epub.EpubHtml( + title=chapter_title, + content=utils.generate_title_html(chapter_title, chapter_content), + file_name=f"{chapter_title}.xhtml", + lang=metadatas["language"], + ) + ) logging.log(logging.INFO, "Gravesong - Book") - book.create_epub(output_dir) + for book_item in book_items: + book.add_item(book_item) + + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + book.toc = book_items + book.spine = ["cover", "nav", *book_items] + epub.write_epub(f"{output_dir}/Gravesong.epub", book) diff --git a/requirements.txt b/requirements.txt index 553ae22..38dedd6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests==2.28.1 beautifulsoup4==4.11.1 -git+https://git.crystalyx.net/Xefir/pypub@fix/py3#pypub +EbookLib==0.17.1 diff --git a/run.py b/run.py index 3efa65b..113d036 100644 --- a/run.py +++ b/run.py @@ -36,18 +36,16 @@ async def main(): await asyncio.gather( gravesong.process( { - "creator": "Pirateaba", + "authors": ["Pirateaba"], "language": "en", - "publisher": "Xefir", "cover": f"{output_imgs}/gravesong-by-boboplushie.jpg", }, output_epubs, ), twi.process( { - "creator": "Pirateaba", + "authors": ["Pirateaba"], "language": "en", - "publisher": "Xefir", "cover": f"{output_imgs}/twi.jpg", }, output_epubs, @@ -55,9 +53,8 @@ async def main(): ), twi.process( { - "creator": "Pirateaba", + "authors": ["Pirateaba", "ElliVia"], "language": "fr", - "publisher": "Maroti, ElliVia", "cover": f"{output_imgs}/twi.jpg", }, output_epubs, diff --git a/twi.py b/twi.py index fdfd984..1d9d270 100644 --- a/twi.py +++ b/twi.py @@ -1,12 +1,22 @@ import bs4 -import executor import logging -import pypub import requests +import utils + +from ebooklib import epub -def fetchVolume(title, metadata, volume_title, output_dir, links): - book = pypub.Epub(f"{title} - {volume_title}", **metadata) +def fetchVolume(title, description, metadatas, volume_title, output_dir, links): + book = epub.EpubBook() + book_items = [] + book.set_identifier(title.replace(" ", "").lower()) + book.set_title(title) + book.set_language(metadatas["language"]) + book.set_cover("cover.jpg", open(metadatas["cover"], "rb").read()) + book.add_metadata("DC", "description", description) + + for author in metadatas["authors"]: + book.add_author(author) for link in links: chapter_response = requests.get(link["href"]) @@ -18,9 +28,6 @@ def fetchVolume(title, metadata, volume_title, output_dir, links): chapter_content.find_all("a"), chapter_content.find_all("h3"), chapter_content.find_all("hr"), - chapter_content.find_all("img"), - chapter_content.find_all("iframe"), - chapter_content.find_all("script"), chapter_content.select("div.tiled-gallery"), ] @@ -29,21 +36,36 @@ def fetchVolume(title, metadata, volume_title, output_dir, links): removed.decompose() logging.log(logging.INFO, f"{title} - {chapter_title}") - book.add_chapter( - pypub.create_chapter_from_string(chapter_content.prettify(), chapter_title) + book_items.append( + epub.EpubHtml( + title=chapter_title, + content=utils.generate_title_html( + chapter_title, chapter_content.prettify() + ), + file_name=f"{chapter_title}.xhtml", + lang=metadatas["language"], + ) ) logging.log(logging.INFO, f"{title} - {volume_title}") - book.create_epub(output_dir) + for book_item in book_items: + book.add_item(book_item) + + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + book.toc = book_items + book.spine = ["cover", "nav", *book_items] + epub.write_epub(f"{output_dir}/{title} - {volume_title}.epub", book) -@executor.executor -def process(metadata, output_dir, url): +@utils.executor +def process(metadatas, output_dir, url): response = requests.get(url) html = bs4.BeautifulSoup(response.content, "html.parser") content = html.select("div.entry-content > p") title = html.select_one("#site-title > span > a").get_text().strip() + description = html.select_one("#site-description").get_text().strip() volume_title = None @@ -52,5 +74,10 @@ def process(metadata, output_dir, url): volume_title = paragraph.strong.get_text().strip() else: fetchVolume( - title, metadata, volume_title, output_dir, paragraph.find_all("a") + title, + description, + metadatas, + volume_title, + output_dir, + paragraph.find_all("a"), ) diff --git a/executor.py b/utils.py similarity index 67% rename from executor.py rename to utils.py index eb1768f..551b04a 100644 --- a/executor.py +++ b/utils.py @@ -10,3 +10,7 @@ def executor(f): ) return wrapped + + +def generate_title_html(title, content): + return f"