const fetch = require('node-fetch') const config = require('./config.js') const Path = require('path') let cache = require('./cache.json') const { JSDOM } = require('jsdom') let waitingList = new Map() const getMatches = regex => string => { let match let matches = [] while((match = regex.exec(string)) != null) { if (match.index === regex.lastIndex) { regex.lastIndex++; } matches.push(match) } return matches } const handleNitterUser = async user => { let data let index = 0 let sources = cache.nitter[user] ? [ cache.nitter[user] ].concat(config.sources.nitter) : config.sources.nitter while(!data && index < sources.length) { let source = sources[index] let rss = await fetchRss(source, user + '/rss') try { data = processNitter(rss, user) } catch(err) { if(err.constructor.name == NoMatchesError.name) { console.log(`Failed to fetch ${user} from ${source}`) index++ } else { console.error(err) break } } } console.log(`Found ${user} at ${sources[index]}`) cache.nitter[user] = sources[index] return data } const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) class NoMatchesError extends Error {} const processRss = (rss, reducerCallback, cdata) => { let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window let items = document.querySelectorAll('channel item') if(items.length == 0) { throw new NoMatchesError('Got no matches') } let posts = [] for(let item of items) { let description = new JSDOM(item.querySelector('description').textContent).window.document // let description = item.querySelector('description') let dateString = item.querySelector('pubDate').textContent let link = item.querySelector('link').textContent let post = reducerCallback(item, description, dateString, link) if(post) { post.date = new Date(dateString).valueOf() ?? 0 post.link = link posts.push(post) } } return posts } const fetchRss = async (hostname, path) => { let waitFor = waitingList.get(hostname) if(waitFor !== 0) { await sleep(waitFor) waitingList.set(hostname, 0) } return await fetch(new URL(path, 'https://' + hostname)) .then(response => { waitingList.set(hostname, config.courtesyWait) return response.text() }) .catch(console.error) } const getImages = (user, description) => { let images = description.querySelectorAll('img') if(images) { let imageUrls = [] for(let image of images) { let { src } = image if(!src) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) } imageUrls.push(src) } if(imageUrls.length > 0) { return { images: imageUrls, user } } } } const processNitter = (rss, user) => { return processRss(rss, (item, description) => { // if(dcCreatorRegex.test(item)) // return // let images = [] // for(let [, url] of getImageMatches(description) ) { // images.push(url) // } // if(images.length > 0) { // return { images, user } // } let creator = item.getElementsByTagName('dc:creator')[0] if(creator.innerHTML.slice(1) === user) return getImages(user, description) }, true) } const handleTumblrUser = async (user) => { let rss = await fetchRss(user + '.tumblr.com', 'rss') console.log('Found ' + user) return processTumblr(rss, user) } const processTumblr = (rss, user) => { // const unescapedRss = unescape(rss) return processRss(rss, (item, description) => { let reblog = description.querySelector('p > a.tumblr_blog') // If it's a reblog, skip it if(reblog && reblog.innerHTML !== user) { return } return getImages(user, description) }) } const oneDay = 1000 * 60 * 60 * 24 const print = async feeds => { // Coalate let masterFeed = [] let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo for(let feed of feeds) { for(let post of feed) { if(tooLongAgo && post.date > tooLongAgo) masterFeed.push(post) } } masterFeed = masterFeed.sort((a, b) => a.date < b.date) // Render let pages = [] for(let i = 0; i < Math.ceil(masterFeed.length / config.pageSize); i++) { pages.push(masterFeed.slice(i * config.pageSize, (i + 1) * config.pageSize) ) } // Write console.log('Writing...') for(let i = 0; i < pages.length; i++) { Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i, pages.length) ) } } const renderPage = (posts, index, pageCount) => { let html = `\
${post.user} ${date.getMonth()}/${date.getDate()}/${date.getFullYear()} open