import os import requests from bs4 import BeautifulSoup from pypub import Epub, create_chapter_from_string if not os.path.exists('output'): os.makedirs('output') html = requests.get('https://wanderinginn.com/table-of-contents/') soup = BeautifulSoup(html.text, 'html.parser') tags = soup.select('div.entry-content > p') proceed = True volume = 0 volumes = [None, None, None] for volume_tag in tags: if not proceed: proceed = True continue if 'Volume' in volume_tag.get_text() and int(volume_tag.get_text().replace('Volume ', '')) < 3: proceed = False continue if 'Volume' in volume_tag.get_text(): volume = int(volume_tag.get_text().replace('Volume ', '')) volumes.append( Epub("The Wandering Inn - Volume %d" % volume, creator='Pirateaba', rights='Pirateaba', language='en', cover='cover.png') ) continue for link_tag in volume_tag: if link_tag.name == 'a': chapter = requests.get(link_tag['href']) chapter_soup = soup = BeautifulSoup(chapter.text, 'html.parser') title = chapter_soup.select_one('h1.entry-title').get_text() text = chapter_soup.select_one('div.entry-content') for pagination in text.find_all('a'): pagination.decompose() for image in text.find_all('img'): image.decompose() print(title) chapter = create_chapter_from_string(str(text), title) volumes[volume].add_chapter(chapter) for ebook in volumes: if ebook is None: continue ebook.create_epub('output')