Michel Roux
2cc25e8cf1
All checks were successful
continuous-integration/drone/push Build is passing
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
import os
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from pypub import Epub, create_chapter_from_string
|
|
|
|
if not os.path.exists('output'):
|
|
os.makedirs('output')
|
|
|
|
html = requests.get('https://wanderinginn.com/table-of-contents/')
|
|
soup = BeautifulSoup(html.text, 'html.parser')
|
|
|
|
tags = soup.select('div.entry-content > p')
|
|
proceed = True
|
|
volume = 0
|
|
volumes = [None, None, None]
|
|
|
|
for volume_tag in tags:
|
|
if not proceed:
|
|
proceed = True
|
|
continue
|
|
|
|
if 'Volume' in volume_tag.get_text() and int(volume_tag.get_text().replace('Volume ', '')) < len(volumes):
|
|
proceed = False
|
|
continue
|
|
|
|
if 'Volume' in volume_tag.get_text():
|
|
volume = int(volume_tag.get_text().replace('Volume ', ''))
|
|
volumes.append(
|
|
Epub("The Wandering Inn - Volume %d" % volume, creator='Pirateaba', rights='Pirateaba',
|
|
language='en', cover='cover.png')
|
|
)
|
|
continue
|
|
|
|
for link_tag in volume_tag:
|
|
if link_tag.name == 'a':
|
|
chapter = requests.get(link_tag['href'])
|
|
chapter_soup = soup = BeautifulSoup(chapter.text, 'html.parser')
|
|
|
|
title = chapter_soup.select_one('h1.entry-title').get_text()
|
|
text = chapter_soup.select_one('div.entry-content')
|
|
|
|
for pagination in text.find_all('a'):
|
|
pagination.decompose()
|
|
for cut in text.find_all('hr'):
|
|
cut.decompose()
|
|
|
|
if volume >= 7:
|
|
for image in text.find_all('img'):
|
|
image.decompose()
|
|
|
|
print(title)
|
|
|
|
chapter = create_chapter_from_string(str(text), title)
|
|
volumes[volume].add_chapter(chapter)
|
|
|
|
for ebook in volumes:
|
|
if ebook is None:
|
|
continue
|
|
|
|
ebook.create_epub('output')
|