rssssing/index.js

346 lines
7.2 KiB
JavaScript

const fetch = require('node-fetch')
const config = require('./config.js')
const Path = require('path')
let cache = require('./cache.json')
const { JSDOM } = require('jsdom')
let waitingList = new Map()
const handleNitterUser = async user => {
let data
let index = 0
let sources = cache.nitter[user] ?
[ cache.nitter[user] ].concat(config.sources.nitter) :
config.sources.nitter
while(!data && index < sources.length) {
let source = sources[index]
let rss = await fetchRss(source, user + '/rss')
try {
data = processNitter(rss, user)
} catch(err) {
if(err.constructor.name == NoMatchesError.name || err.constructor.name == DOMException.name) {
console.log(`Failed to fetch ${user} from ${source}`)
index++
} else {
console.error(err)
break
}
}
}
console.log(`Found ${user} at ${sources[index]}`)
cache.nitter[user] = sources[index]
return data
}
const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
class NoMatchesError extends Error {}
const processRss = (rss, reducerCallback, cdata) => {
let { document } = new JSDOM(rss, {
contentType: 'text/xml'
}).window
let items = document.querySelectorAll('channel item')
if(items.length == 0) {
throw new NoMatchesError('Got no matches')
}
let posts = []
for(let item of items) {
let description = new JSDOM(item.querySelector('description').textContent).window.document
let dateString = item.querySelector('pubDate').textContent
let link = item.querySelector('link').textContent
let post = reducerCallback(item, description, dateString, link)
if(post) {
post.date = new Date(dateString).valueOf() ?? 0
post.link = link
posts.push(post)
}
}
return posts
}
const fetchRss = async (hostname, path) => {
let waitFor = waitingList.get(hostname)
if(waitFor !== 0) {
await sleep(waitFor)
waitingList.set(hostname, 0)
}
return await fetch(new URL(path, 'https://' + hostname))
.then(response => {
waitingList.set(hostname, config.courtesyWait)
return response.text()
})
.catch(console.error)
}
const getImages = (user, description) => {
let images = description.querySelectorAll('img')
if(images) {
let imageUrls = []
for(let image of images) {
let { src } = image
if(!src) {
let finalSrc = image.srcset.split(', ').pop()
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
}
imageUrls.push(src)
}
if(imageUrls.length > 0) {
return {
images: imageUrls,
user
}
}
}
}
const processNitter = (rss, user) => {
return processRss(rss, (item, description) => {
let creator = item.getElementsByTagName('dc:creator')[0]
if(creator.innerHTML.slice(1) === user)
return getImages(user, description)
}, true)
}
const handleTumblrUser = async (user) => {
let rss = await fetchRss(user + '.tumblr.com', 'rss')
console.log('Found ' + user)
return processTumblr(rss, user)
}
const processTumblr = (rss, user) => {
return processRss(rss, (item, description) => {
let reblog = description.querySelector('p > a.tumblr_blog')
// If it's a reblog, skip it
if(reblog && reblog.innerHTML !== user) {
return
}
return getImages(user, description)
})
}
const oneDay = 1000 * 60 * 60 * 24
const printFeed = async (sources, directory, header) => {
// Coalate
let feed = []
let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo
let missingSources = 0
for(let source of sources) {
if(source == undefined) {
missingSources++
continue
}
for(let post of source) {
if(tooLongAgo && post.date > tooLongAgo)
feed.push(post)
}
}
feed = feed.sort((a, b) => a.date < b.date)
if(missingSources) {
console.log('Missing ' + missingSources + ' feeds!')
}
// Render
let pages = []
for(let i = 0; i < Math.ceil(feed.length / config.pageSize); i++) {
pages.push(feed.slice(i * config.pageSize, (i + 1) * config.pageSize) )
}
// Write
for(let i = 0; i < pages.length; i++) {
let nextPage = i + 1
let link = nextPage === pages.length ?
`<a href="data:text/html,">end</a>` :
`<a href="${nextPage}.html">next</a>`
Bun.write(
Path.join(directory, (i == 0 ? 'index' : i) + '.html'),
renderPage(`Page ${i + 1}`, pages[i], header, link)
)
}
}
const renderPage = (title, posts, header, footer) => {
let html = `\
<html>
<head>
<title>${title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {
max-width: 640px;
float: right;
font-family: sans-serif;
}
p {
padding: 30px;
}
img {
margin: 10px auto;
max-width: 100%;
}
p a, footer a {
float: right
}
hr {
clear: both
}
</style>
</head>
<body>`
if(header) {
html += `
<header>
${header}
</header>
`
}
for(let post of posts) {
let date = new Date(post.date)
html += `
${post.images.map(renderImage).join('\n')}
<p><b>${post.user}</b> ${config.printDate(date)} <a href="${post.link}">open</a></p><hr>`
}
if(footer) {
html += `
<footer>
${footer}
</footer>`
}
html += `
</body>
</html>`
return html
}
const renderImage = image => `\
<a href="${image}"><img src="${image}" loading="lazy"></img></a>`
const main = async () => {
let feeds = []
let allSources = []
for(let feedName in config.feeds) {
let feed = config.feeds[feedName]
let sources = []
const subscribe = (sourcePromise, type, name) =>
sourcePromise
.catch(error => console.error(error) )
.then(source => {
sources.push(source)
allSources.push({
type,
name,
link: Path.join(type, name),
source
})
})
if(feed.nitter) {
for(let user of feed.nitter) {
await subscribe(handleNitterUser(user), 'nitter', user)
}
console.log('Caching sources...')
Bun.write('cache.json', JSON.stringify(cache, null, 2))
}
if(feed.tumblr) {
for(let user of feed.tumblr) {
await subscribe(handleTumblrUser(user), 'tumblr', user)
}
}
let link = feed.main ? '' : feedName
feeds.push({
name: feedName,
main: feed.main,
sources,
link
})
}
const buildFeedNav = depth => {
const buildLink = (page, name = page.link) => {
let link = '../'.repeat(depth) + page.link
if(config.linkToIndex)
link += '/index.html'
return `<div><a href="${link}">${name}</a></div>`
}
return `\
<details>
<summary>Feeds</summary>
<section>
${buildLink({ link: '' }, 'main')}
${feeds.filter(feed => !feed.main).map(feed => buildLink(feed)).join('\n')}
<hr>
${allSources.map(source => buildLink(source)).join('\n')}
</section>
</details>
<hr>`
}
console.log('Writing...')
for(let source of allSources) {
console.log(source)
await printFeed([ source.source ], Path.join('out', source.link), buildFeedNav(2))
}
for(let feed of feeds) {
await printFeed(feed.sources, Path.join('out', feed.link), buildFeedNav(feed.main ? 0 : 1))
}
console.log('Done!')
}
main()