import { EPub } from '@lesjoursfr/html-to-epub' import fs from 'fs' import { parse } from 'node-html-parser' if (!fs.existsSync('output')) { fs.mkdirSync('output') } const cover = 'https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg' const css = fs.readFileSync('template.css') const fetchPage = async (url) => { const response = await fetch(url) const responseHtml = await response.text() const html = parse(responseHtml) const title = html.querySelector('h1.entry-title').text const content = html.querySelector('div.entry-content') for (const el of ['a', 'h3', 'hr', 'iframe', 'script', 'div.tiled-gallery']) { content.querySelectorAll(el).map((c) => c.remove()) } content.querySelectorAll('img').map((c) => { for (const attr in c.attributes) { if (attr !== 'src') { c.removeAttribute(attr) } } }) const data = content.toString() console.log(title) return { title, data, } } const run = async (url, author, lang) => { const response = await fetch(url) const responseHtml = await response.text() const html = parse(responseHtml) const content = html.querySelectorAll('div.entry-content > p') const title = html.querySelector('#site-title > span > a').text let volume = 0 for (let i = 0; i < content.length; i++) { if (i % 2 === 0) { volume = parseInt(content[i].text.replace('Volume', '').trim()) } else if (volume) { const metadatas = { title, author, cover, version: 3, css, lang, verbose: true, content: [], } for (const node of content[i].querySelectorAll('a')) { const link = parse(node.getAttribute('href')) const page = await fetchPage(link) metadatas.content.push(page) } const epub = new EPub( metadatas, `output/${title} - Volume ${volume}.epub` ) await epub.render() } } } run( 'https://aubergevagabonde.wordpress.com/sommaire/', ['ElliVia', 'Pirateaba'], 'fr' ) run('https://wanderinginn.com/table-of-contents/', ['Pirateaba'], 'en')