Remove iframes and chapter index
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
28b2091182
commit
59c9f8710c
@ -34,6 +34,7 @@ def process(metadatas, output_dir):
|
|||||||
|
|
||||||
chapter_title = "Introduction"
|
chapter_title = "Introduction"
|
||||||
chapter_content = ""
|
chapter_content = ""
|
||||||
|
chapter_index = 1
|
||||||
|
|
||||||
for paragraph in entry_content.children:
|
for paragraph in entry_content.children:
|
||||||
if paragraph.name == "h1":
|
if paragraph.name == "h1":
|
||||||
@ -42,11 +43,12 @@ def process(metadatas, output_dir):
|
|||||||
epub.EpubHtml(
|
epub.EpubHtml(
|
||||||
title=chapter_title,
|
title=chapter_title,
|
||||||
content=utils.generate_title_html(chapter_title, chapter_content),
|
content=utils.generate_title_html(chapter_title, chapter_content),
|
||||||
file_name=f"{chapter_title}.xhtml",
|
file_name=f"{chapter_index}.xhtml",
|
||||||
lang=metadatas["language"],
|
lang=metadatas["language"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
chapter_title = paragraph.get_text().strip()
|
chapter_title = paragraph.get_text().strip()
|
||||||
|
chapter_index += 1
|
||||||
elif paragraph.name == "p":
|
elif paragraph.name == "p":
|
||||||
chapter_content += paragraph.prettify()
|
chapter_content += paragraph.prettify()
|
||||||
|
|
||||||
@ -55,7 +57,7 @@ def process(metadatas, output_dir):
|
|||||||
epub.EpubHtml(
|
epub.EpubHtml(
|
||||||
title=chapter_title,
|
title=chapter_title,
|
||||||
content=utils.generate_title_html(chapter_title, chapter_content),
|
content=utils.generate_title_html(chapter_title, chapter_content),
|
||||||
file_name=f"{chapter_title}.xhtml",
|
file_name=f"{chapter_index}.xhtml",
|
||||||
lang=metadatas["language"],
|
lang=metadatas["language"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
6
twi.py
6
twi.py
@ -18,6 +18,7 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
|
|||||||
for author in metadatas["authors"]:
|
for author in metadatas["authors"]:
|
||||||
book.add_author(author)
|
book.add_author(author)
|
||||||
|
|
||||||
|
chapter_index = 1
|
||||||
for link in links:
|
for link in links:
|
||||||
chapter_response = requests.get(link["href"])
|
chapter_response = requests.get(link["href"])
|
||||||
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
chapter_html = bs4.BeautifulSoup(chapter_response.content, "html.parser")
|
||||||
@ -28,6 +29,8 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
|
|||||||
chapter_content.find_all("a"),
|
chapter_content.find_all("a"),
|
||||||
chapter_content.find_all("h3"),
|
chapter_content.find_all("h3"),
|
||||||
chapter_content.find_all("hr"),
|
chapter_content.find_all("hr"),
|
||||||
|
chapter_content.find_all("iframe"),
|
||||||
|
chapter_content.find_all("script"),
|
||||||
chapter_content.select("div.tiled-gallery"),
|
chapter_content.select("div.tiled-gallery"),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -42,10 +45,11 @@ def fetchVolume(title, description, metadatas, volume_title, output_dir, links):
|
|||||||
content=utils.generate_title_html(
|
content=utils.generate_title_html(
|
||||||
chapter_title, chapter_content.prettify()
|
chapter_title, chapter_content.prettify()
|
||||||
),
|
),
|
||||||
file_name=f"{chapter_title}.xhtml",
|
file_name=f"{chapter_index}.xhtml",
|
||||||
lang=metadatas["language"],
|
lang=metadatas["language"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
chapter_index += 1
|
||||||
|
|
||||||
logging.log(logging.INFO, f"{title} - {volume_title}")
|
logging.log(logging.INFO, f"{title} - {volume_title}")
|
||||||
for book_item in book_items:
|
for book_item in book_items:
|
||||||
|
Reference in New Issue
Block a user