import fs from 'fs' import cheerio from 'cheerio' import fetch from 'node-fetch' import Epub from 'epub-gen' if (!fs.existsSync('output')) { fs.mkdirSync('output') } const metadata = (volume, title, author, tocTitle, description) => ({ title: `${title} - Volume ${volume}`, author, cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png', output: `output/${title} - Volume ${volume}.epub`, version: 3, lang: 'fr', tocTitle, appendChapterTitles: true, content: [], links: [], verbose: true, description, }) const fetchPage = async (url) => { const response = await fetch(url) const responseHtml = await response.text() const html = cheerio.load(responseHtml) const title = html('h1.entry-title').text() const content = html('div.entry-content') content.find('a').remove() content.find('hr').remove() content.find('div.tiled-gallery').remove() const data = content.html() console.log(title) return { title, data, } } const run = async (url, authors) => { const books = [] const response = await fetch(url) const responseHtml = await response.text() const html = cheerio.load(responseHtml) const content = html('div.entry-content > p') const title = html('#site-title > span > a').text() const summary = html('h1.entry-title').text() const description = html('#site-description').text() let volume = 0; content.each((i, el) => { if (i % 2 === 0) { volume = html(el).text().replace(/Volume /, '').trim() if (volume === '') return books.push(metadata(volume, title, authors, summary, description)) } else { html('a', el).each((i, el) => { books[volume - 1].links.push(html(el).attr('href')) }) } }) books.map(async book => { for (const link of book.links) { book.content.push(await fetchPage(link)) } new Epub(book) }) } run( 'https://aubergevagabonde.wordpress.com/sommaire/', ['Maroti', 'ElliVia', 'Pirateaba'] ) run( 'https://wanderinginn.com/table-of-contents/', ['Pirateaba'] )