From 59c9f8710cfa76c24e4573d65e53e2524adc6f86 Mon Sep 17 00:00:00 2001 From: Michel Roux Date: Thu, 18 Aug 2022 13:43:30 +0200 Subject: [PATCH] Remove iframes and chapter index --- gravesong.py | 6 ++++-- twi.py | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/gravesong.py b/gravesong.py index 747d942..d545222 100644 --- a/gravesong.py +++ b/gravesong.py @@ -34,6 +34,7 @@ def process(metadatas, output_dir): chapter_title = "Introduction" chapter_content = "" + chapter_index = 1 for paragraph in entry_content.children: if paragraph.name == "h1": @@ -42,11 +43,12 @@ def process(metadatas, output_dir): epub.EpubHtml( title=chapter_title, content=utils.generate_title_html(chapter_title, chapter_content), - file_name=f"{chapter_title}.xhtml", + file_name=f"{chapter_index}.xhtml", lang=metadatas["language"], ) ) chapter_title = paragraph.get_text().strip() + chapter_index += 1 elif paragraph.name == "p": chapter_content += paragraph.prettify() @@ -55,7 +57,7 @@ def process(metadatas, output_dir): epub.EpubHtml( title=chapter_title, content=utils.generate_title_html(chapter_title, chapter_content), - file_name=f"{chapter_title}.xhtml", + file_name=f"{chapter_index}.xhtml", lang=metadatas["language"], ) ) diff --git a/twi.py b/twi.py index 1d9d270..8b30383 100644 --- a/twi.py +++ b/twi.py @@ -18,6 +18,7 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links): for author in metadatas["authors"]: book.add_author(author) + chapter_index = 1 for link in links: chapter_response = requests.get(link["href"]) chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser") @@ -28,6 +29,8 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links): chapter_content.find_all("a"), chapter_content.find_all("h3"), chapter_content.find_all("hr"), + chapter_content.find_all("iframe"), + chapter_content.find_all("script"), chapter_content.select("div.tiled-gallery"), ] @@ -42,10 +45,11 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links): content=utils.generate_title_html( chapter_title, chapter_content.prettify() ), - file_name=f"{chapter_title}.xhtml", + file_name=f"{chapter_index}.xhtml", lang=metadatas["language"], ) ) + chapter_index += 1 logging.log(logging.INFO, f"{title} - {volume_title}") for book_item in book_items: