Michel Roux
27ad5667b8
All checks were successful
continuous-integration/drone/push Build is passing
87 lines
2.2 KiB
JavaScript
87 lines
2.2 KiB
JavaScript
import fs from 'fs'
|
|
import cheerio from 'cheerio'
|
|
import fetch from 'node-fetch'
|
|
import Epub from 'epub-gen-funstory'
|
|
|
|
if (!fs.existsSync('output')) {
|
|
fs.mkdirSync('output')
|
|
}
|
|
|
|
const metadata = (volume, title, author, tocTitle, description) => ({
|
|
title: `${title} - Volume ${volume}`,
|
|
author,
|
|
cover: 'https://i.pinimg.com/originals/0b/fd/cf/0bfdcfb42ba3ff0a22f4a7bc52928af4.png',
|
|
output: `output/${title} - Volume ${volume}.epub`,
|
|
version: 3,
|
|
lang: 'fr',
|
|
tocTitle,
|
|
appendChapterTitles: true,
|
|
content: [],
|
|
links: [],
|
|
verbose: true,
|
|
description,
|
|
})
|
|
|
|
const fetchPage = async (url) => {
|
|
const response = await fetch(url)
|
|
const responseHtml = await response.text()
|
|
const html = cheerio.load(responseHtml)
|
|
|
|
const title = html('h1.entry-title').text()
|
|
const content = html('div.entry-content')
|
|
content.find('a').remove()
|
|
content.find('h3').remove()
|
|
content.find('hr').remove()
|
|
content.find('div.tiled-gallery').remove()
|
|
const data = content.html()
|
|
|
|
console.log(title)
|
|
|
|
return {
|
|
title,
|
|
data,
|
|
}
|
|
}
|
|
|
|
const run = async (url, authors) => {
|
|
const books = []
|
|
const response = await fetch(url)
|
|
const responseHtml = await response.text()
|
|
const html = cheerio.load(responseHtml)
|
|
|
|
const content = html('div.entry-content > p')
|
|
const title = html('#site-title > span > a').text()
|
|
const summary = html('h1.entry-title').text()
|
|
const description = html('#site-description').text()
|
|
let volume = 0;
|
|
|
|
content.each((i, el) => {
|
|
if (i % 2 === 0) {
|
|
volume = html(el).text().replace(/Volume /, '').trim()
|
|
if (volume === '') return
|
|
books.push(metadata(volume, title, authors, summary, description))
|
|
} else {
|
|
html('a', el).each((i, el) => {
|
|
books[volume - 1].links.push(html(el).attr('href'))
|
|
})
|
|
}
|
|
})
|
|
|
|
books.map(async book => {
|
|
for (const link of book.links) {
|
|
book.content.push(await fetchPage(link))
|
|
}
|
|
|
|
new Epub(book)
|
|
})
|
|
}
|
|
|
|
run(
|
|
'https://aubergevagabonde.wordpress.com/sommaire/',
|
|
['Maroti', 'ElliVia', 'Pirateaba']
|
|
)
|
|
run(
|
|
'https://wanderinginn.com/table-of-contents/',
|
|
['Pirateaba']
|
|
)
|