Remove iframes and chapter index
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Michel Roux 2022-08-18 13:43:30 +02:00
parent 28b2091182
commit 59c9f8710c
2 changed files with 9 additions and 3 deletions

View File

@ -34,6 +34,7 @@ def process(metadatas, output_dir):
chapter_title = "Introduction"
chapter_content = ""
chapter_index = 1
for paragraph in entry_content.children:
if paragraph.name == "h1":
@ -42,11 +43,12 @@ def process(metadatas, output_dir):
epub.EpubHtml(
title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml",
file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"],
)
)
chapter_title = paragraph.get_text().strip()
chapter_index += 1
elif paragraph.name == "p":
chapter_content += paragraph.prettify()
@ -55,7 +57,7 @@ def process(metadatas, output_dir):
epub.EpubHtml(
title=chapter_title,
content=utils.generate_title_html(chapter_title, chapter_content),
file_name=f"{chapter_title}.xhtml",
file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"],
)
)

6
twi.py
View File

@ -18,6 +18,7 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
for author in metadatas["authors"]:
book.add_author(author)
chapter_index = 1
for link in links:
chapter_response = requests.get(link["href"])
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
@ -28,6 +29,8 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
chapter_content.find_all("a"),
chapter_content.find_all("h3"),
chapter_content.find_all("hr"),
chapter_content.find_all("iframe"),
chapter_content.find_all("script"),
chapter_content.select("div.tiled-gallery"),
]
@ -42,10 +45,11 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
content=utils.generate_title_html(
chapter_title, chapter_content.prettify()
),
file_name=f"{chapter_title}.xhtml",
file_name=f"{chapter_index}.xhtml",
lang=metadatas["language"],
)
)
chapter_index += 1
logging.log(logging.INFO, f"{title} - {volume_title}")
for book_item in book_items: