const fetch = require('node-fetch') const config = require('./config.js') const Path = require('path') let cache = require('./cache.json') const { JSDOM } = require('jsdom') let waitingList = new Map() const handleNitterUser = async user => { let data let index = 0 let sources = cache.nitter[user] ? [ cache.nitter[user] ].concat(config.sources.nitter) : config.sources.nitter while(!data && index < sources.length) { let source = sources[index] let rss = await fetchRss(source, user + '/rss') try { data = processNitter(rss, user) } catch(err) { if(err.constructor.name == NoMatchesError.name || err.constructor.name == DOMException.name) { console.log(`Failed to fetch ${user} from ${source}`) index++ } else { console.error(err) break } } } console.log(`Found ${user} at ${sources[index]}`) cache.nitter[user] = sources[index] return data } const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) class NoMatchesError extends Error {} const processRss = (rss, reducerCallback, cdata) => { let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window let items = document.querySelectorAll('channel item') if(items.length == 0) { throw new NoMatchesError('Got no matches') } let posts = [] for(let item of items) { let description = new JSDOM(item.querySelector('description').textContent).window.document let dateString = item.querySelector('pubDate').textContent let link = item.querySelector('link').textContent let post = reducerCallback(item, description, dateString, link) if(post) { post.date = new Date(dateString).valueOf() ?? 0 post.link = link posts.push(post) } } return posts } const fetchRss = async (hostname, path) => { let waitFor = waitingList.get(hostname) if(waitFor !== 0) { await sleep(waitFor) waitingList.set(hostname, 0) } return await fetch(new URL(path, 'https://' + hostname)) .then(response => { waitingList.set(hostname, config.courtesyWait) return response.text() }) .catch(console.error) } const getImages = (user, description) => { let images = description.querySelectorAll('img') if(images) { let imageUrls = [] for(let image of images) { let { src } = image if(!src) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) } imageUrls.push(src) } if(imageUrls.length > 0) { return { images: imageUrls, user } } } } const processNitter = (rss, user) => { return processRss(rss, (item, description) => { let creator = item.getElementsByTagName('dc:creator')[0] if(creator.innerHTML.slice(1) === user) return getImages(user, description) }, true) } const handleTumblrUser = async (user) => { let rss = await fetchRss(user + '.tumblr.com', 'rss') console.log('Found ' + user) return processTumblr(rss, user) } const processTumblr = (rss, user) => { return processRss(rss, (item, description) => { let reblog = description.querySelector('p > a.tumblr_blog') // If it's a reblog, skip it if(reblog && reblog.innerHTML !== user) { return } return getImages(user, description) }) } const oneDay = 1000 * 60 * 60 * 24 const printFeed = async (sources, directory, header) => { // Coalate let feed = [] let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo let missingSources = 0 for(let source of sources) { if(source == undefined) { missingSources++ continue } for(let post of source) { if(tooLongAgo && post.date > tooLongAgo) feed.push(post) } } feed = feed.sort((a, b) => a.date < b.date) if(missingSources) { console.log('Missing ' + missingSources + ' feeds!') } // Render let pages = [] for(let i = 0; i < Math.ceil(feed.length / config.pageSize); i++) { pages.push(feed.slice(i * config.pageSize, (i + 1) * config.pageSize) ) } // Write for(let i = 0; i < pages.length; i++) { let nextPage = i + 1 let link = nextPage === pages.length ? `end` : `next` Bun.write( Path.join(directory, (i == 0 ? 'index' : i) + '.html'), renderPage(`Page ${i + 1}`, pages[i], header, link) ) } } const renderPage = (title, posts, header, footer) => { let html = `\ ${title} ` if(header) { html += `
${header}
` } for(let post of posts) { let date = new Date(post.date) html += ` ${post.images.map(renderImage).join('\n')}

${post.user} ${config.printDate(date)} open


` } if(footer) { html += ` ` } html += ` ` return html } const renderImage = image => `\ ` const main = async () => { let feeds = [] let allSources = [] for(let feedName in config.feeds) { let feed = config.feeds[feedName] let sources = [] const subscribe = (sourcePromise, type, name) => sourcePromise .catch(error => console.error(error) ) .then(source => { sources.push(source) allSources.push({ type, name, link: Path.join(type, name), source }) }) if(feed.nitter) { for(let user of feed.nitter) { await subscribe(handleNitterUser(user), 'nitter', user) } console.log('Caching sources...') Bun.write('cache.json', JSON.stringify(cache, null, 2)) } if(feed.tumblr) { for(let user of feed.tumblr) { await subscribe(handleTumblrUser(user), 'tumblr', user) } } let link = feed.main ? '' : feedName feeds.push({ name: feedName, main: feed.main, sources, link }) } const buildFeedNav = depth => { const buildLink = (page, name = page.link) => { let link = '../'.repeat(depth) + page.link if(config.linkToIndex) link += '/index.html' return `
${name}
` } return `\
Feeds
${buildLink({ link: '' }, 'main')} ${feeds.filter(feed => !feed.main).map(feed => buildLink(feed)).join('\n')}
${allSources.map(source => buildLink(source)).join('\n')}

` } console.log('Writing...') for(let source of allSources) { console.log(source) await printFeed([ source.source ], Path.join('out', source.link), buildFeedNav(2)) } for(let feed of feeds) { await printFeed(feed.sources, Path.join('out', feed.link), buildFeedNav(feed.main ? 0 : 1)) } console.log('Done!') } main()