import os
import requests
from bs4 import BeautifulSoup
from pypub import Epub, create_chapter_from_string
if not os.path.exists('output'):
os.makedirs('output')
html = requests.get('https://wanderinginn.com/table-of-contents/')
soup = BeautifulSoup(html.text, 'html.parser')
tags = soup.select('div.entry-content > p')
proceed = True
volume = 0
volumes = [None, None, None]
for volume_tag in tags:
if not proceed:
continue
if 'Volume' in volume_tag.get_text() and int(volume_tag.get_text().replace('Volume ', '')) < 3:
proceed = False
if 'Volume' in volume_tag.get_text():
volume = int(volume_tag.get_text().replace('Volume ', ''))
volumes.append(Epub("The Wandering Inn - Volume %d" % volume, cover='cover.png'))
for link_tag in volume_tag:
if link_tag.name == 'a':
chapter = requests.get(link_tag['href'])
chapter_soup = soup = BeautifulSoup(chapter.text, 'html.parser')
title = chapter_soup.select_one('h1.entry-title').get_text()
text = chapter_soup.select_one('div.entry-content')
for pagination in text.find_all('a'):
pagination.decompose()
for image in text.find_all('img'):
image.decompose()
print(title)
chapter = create_chapter_from_string(str(text), title)
volumes[volume].add_chapter(chapter)
for ebook in volumes:
if ebook is None:
ebook.create_epub('output')