import { EPub } from '@lesjoursfr/html-to-epub'
import fs from 'fs'
import { parse } from 'node-html-parser'
if (!fs.existsSync('output')) {
fs.mkdirSync('output')
}
const cover =
'https://i0.wp.com/thefantasyinn.com/wp-content/uploads/2018/08/twi.jpg'
const css = fs.readFileSync('template.css')
const fetchPage = async (url) => {
const response = await fetch(url)
const responseHtml = await response.text()
const html = parse(responseHtml)
const title = html.querySelector('h1.entry-title').text
const content = html.querySelector('div.entry-content')
content.querySelectorAll('a').map((c) => c.remove())
content.querySelectorAll('h3').map((c) => c.remove())
content.querySelectorAll('hr').map((c) => c.remove())
content.querySelectorAll('iframe').map((c) => c.remove())
content.querySelectorAll('script').map((c) => c.remove())
content.querySelectorAll('div.tiled-gallery').map((c) => c.remove())
content.querySelectorAll('img').map((c) => {
for (const attr in c.attributes) {
if (attr !== 'src') {
c.removeAttribute(attr)
}
}
})
const data = content.toString()
console.log(title)
return {
title,
data,
}
}
const run = async (url, author, lang) => {
const response = await fetch(url)
const responseHtml = await response.text()
const html = parse(responseHtml)
const content = html.querySelectorAll('div.entry-content > p')
const title = html.querySelector('#site-title > span > a').text
let volume = 0
for (let i = 0; i < content.length; i++) {
if (i % 2 === 0) {
volume = parseInt(content[i].text.replace('Volume', '').trim())
} else if (volume) {
const metadatas = {
title,
author,
cover,
version: 3,
css,
lang,
verbose: true,
content: [],
}
for (const node of content[i].querySelectorAll('a')) {
const link = parse(node.getAttribute('href'))
const page = await fetchPage(link)
metadatas.content.push(page)
}
const epub = new EPub(
metadatas,
`output/${title} - Volume ${volume}.epub`
)
await epub.render()
}
}
}
run(
'https://aubergevagabonde.wordpress.com/sommaire/',
['ElliVia', 'Pirateaba'],
'fr'
)
run('https://wanderinginn.com/table-of-contents/', ['Pirateaba'], 'en')