// Ascii font used is "Shimrod" import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" // | o | // . . |- . | ,-. // | | | | | `-. // `-` `-' ' ' `-' export function PromiseBatch() { let promises = [] this.add = (promise) => promises.push(promise) this.complete = () => Promise.all(promises) } export const annotate = annotation => data => { console.log(annotation), data } export const write = async (path, content) => { let dir = Path.dirname(path) try { await FS.access(dir) } catch(e) { await FS.mkdir(dir, { recursive: true }) } return await FS.writeFile(path, content) } export const download = async (url, path, courtesyWait) => { let response = await delayedFetch(url, {}, courtesyWait) .catch(err => console.error(`Failed download of ${url}:`, err) ) if(response.ok) { await write(path, response.body) } else { throw createNetworkingError(response) } } export const createNetworkingError = response => { return new Error(`Request failed, ${response.status}: ${response.statusText}`) } export const getLinkExtname = link => Path.extname(new URL(link).pathname) export const buildImagePathHandler = (source, id) => (url, i, array) => { let path = `images/${source.name}-${id}` if(array.length > 1) path += `-${i}` return path + getLinkExtname(url) } export const writeStylesheet = (path, { directory, batch }) => batch.add( FS.readFile(path) .then(content => write(Path.join(directory, 'style.css'), content)) ) export const postIdFromPathname = post => { let { pathname } = new URL(post.link) return pathname.slice(pathname.lastIndexOf('/') + 1) } export const testWhitelist = (array, whitelist) => whitelist.find(tag => !array.includes(tag)) !== undefined export const testBlacklist = (array, blacklist) => blacklist.find(tag => array.includes(tag)) !== undefined export const createView = (directory, pageSize, extra = {}) => { return { batch: new PromiseBatch(), directory, pageSize, ...extra } } // // ;-. ,-. ,-. // | `-. `-. // ' `-' `-' export async function fetchRss(source) { let { hostname } = source let error let response try { response = await delayedFetch( new URL(source.pathname, 'https://' + hostname), {}, source.courtesyWait ?? 5 * 1000 ) } catch(err) { error = err } source.errored = error !== undefined || !response.ok if(source.errored) { source.error = error ?? createNetworkingError(response) source.rss = '' } else { source.rss = await response.text() console.log(`Found ${source.name} at ${hostname}`) } return source } let waitingList = new Map() export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname) let waitFor = waitingList.get(domain) ?? 0 waitingList.set(domain, waitFor + courtesyWait) if(waitFor !== 0) { await sleep(waitFor) } return await fetch(url, options) } export const getCacheFilename = (source) => source.name + '.xml' export const getCachePath = (source, { directory }) => Path.join(directory, getCacheFilename(source)) export const cacheSource = (source, cache) => write(getCachePath(source, cache), createCache(source, cache)) export const cacheSources = (sources, cache) => Promise.all(sources.map(source => cacheSource(source, cache))) export const openCache = async (source, cache) => { let path = getCachePath(source, cache) let exists try { await FS.access(path) exists = true } catch(err) { exists = false } if(exists) { let rss = await FS.readFile(path, { encoding: 'utf8' }) let channel = createChannel(rss) let date = readPubDate(channel.querySelector('pubDate')) let link = new URL(channel.querySelector('link').textContent) source.cache = { channel, date, link } } else { source.cache = { date: new Date(0), } if(source.hostname) source.cache.link = buildCacheLink(source) } source.latestPostDate = source.cache.date return source } export const buildCacheLink = source => new URL('https://' + source.hostname) export const createCache = (source, cache) => `\ ${source.displayName} ${source.description} ${buildCacheLink(source)} ${new Date(source.latestPostDate).toUTCString()} rssssing ${source.posts.map(post => post.item.outerHTML.replaceAll(/\n\s*/g, '')).join('\n')} ` export const createChannel = rss => { let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window return document.querySelector('channel') } export const readPubDate = (pubDate) => pubDate ? new Date(pubDate.textContent).valueOf() : 0 class NoMatchesError extends Error {} export const createPosts = (channel, source, fromDate, reducerCallback) => { // let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window let items = channel.querySelectorAll('item') // if(items.length === 0) { // // throw new NoMatchesError('Got no matches') // return source // } for(let item of items) { let post = createPost(item, source, reducerCallback) if(post && post.date > fromDate) { source.posts.push(post) } } return source } export const createPost = (item, source, reducerCallback) => { let description = new JSDOM(item.querySelector('description').textContent).window.document let date = readPubDate(item.querySelector('pubDate')) let link = item.querySelector('link').textContent let guid = item.querySelector('guid')?.textContent let title = item.querySelector('title')?.textContent let post = { source, item, description, date, link, guid, title, occurances: [] } return reducerCallback(post) } export const processCategories = (post) => { let categoryMatches = post.item.querySelectorAll('category') post.categories = [] for(let category of categoryMatches) { post.categories.push(category.textContent) } return post } export const extractImages = (post, cache = true) => { let images = post.description.querySelectorAll('img') if(images) { let imageUrls = [] for(let image of images) { let { src } = image if(!src) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) } // Sending through URL prevents potential XSS imageUrls.push(new URL(src).href) } return imageUrls } } // o // . , . ,-. , , , // |/ | |-' |/|/ // ' ' `-' ' ' export const createPages = (list, { pageSize }) => { let posts = [] let pages = [] let lastPageLink = 'about:blank' list.posts.sort((a, b) => b.date - a.date) for(let i = list.posts.length - 1; i >= 0; i--) { posts.push(list.posts[i]) if(i % pageSize == 0) { let title = getPageTitle(list, pages.length) let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pages.length) let page = { filename, title, posts: posts.reverse(), lastPageLink } for(let i = 0; i < page.posts.length; i++) { page.posts[i].occurances.push({ index: i, list, page }) } pages.push(page) posts = [] lastPageLink = filename } } return pages } export const writePage = (page, { header = '', directory, batch }) => { let html = renderPage(page.title, page.posts, header, renderNextPageLink(page.lastPageLink)) let promise = write(Path.join(directory, page.filename), html) batch.add(promise.then(annotate(`Created "${page.title}" (${page.filename})`))) } export const getFinalPageFilename = list => (list.main ? 'index' : list.name) + '.html' export const getPageFilename = (list, i) => list.name + '-' + i + '.html' export const getPageTitle = (list, i) => list.displayName + ' - ' + (i + 1) export const renderPage = (title, posts, header, footer) => `\ ${title}
${header}
${posts.map(renderPost).join('\n')}
` export const renderPost = (post, index) => { let details = [] if(post.title) details.push([ 'title', `"${post.title}"` ]) if(post.categories && post.categories.length > 0) details.push([ 'categories', post.categories.map(name => `${name}`).join(', ') ]) details.push([ 'source', `${post.source.hostname}` ]) details.push([ 'lists', post.occurances.map(occ => `${occ.list.displayName}`).join(', ') ]) return `\
${post.images.map(renderImage).join('\n')}
${post.source.displayName} ${renderDate(new Date(post.date))} open

` } export const renderPostDetail = (name, value) => `
  • ${name} ${value}
  • ` export const renderImage = href => { return `\ ` } export const renderDate = date => (date.getMonth() + 1) + '.' + date.getDate() + '.' + date.getFullYear() export const renderNextPageLink = link => `\ next` export const renderNav = (feeds, sources) => `\
    Feeds


    ` export const renderNavEntry = (list) => { let extra = '' if(list.errored) { extra += ' (errored)' } else if (list.posts.length == 0) { extra += ' (empty)' } return `
  • ${list.displayName}${extra}
  • ` } // | | | o // ,-. ,-. | | ,-: |- . ,-. ;-. // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' export const downloadImages = (images, getImagePath, courtesyWait, { directory, batch }) => { let out = [] for(let i = 0; i < images.length; i ++) { let url = images[i] let relativePath = getImagePath(url, i, images) let fullPath = Path.join(directory, relativePath) let promise = FS.access(fullPath) .catch(() => download(url, fullPath, courtesyWait) .then(annotate( `Downloaded ${relativePath}`)) ) out.push(relativePath) batch.add(promise) } return out } export const pullImages = (post, view, discardPostIfNoImages = false, getPostId = postIdFromPathname) => { let images = extractImages(post) if(!discardPostIfNoImages || images.length > 0) { post.images = downloadImages( images, buildImagePathHandler(post.source, getPostId(post)), post.source.courtesyWait, view ) return post } } export const createFeed = (name, sources, main = false) => { return { name, displayName: name, main, posts: sources.reduce((posts, source) => posts.concat(source.posts), []) } } export const fetchRssFromInstances = async (source) => { let index = 0 let instances = source.instances let cachedLink = source.cache.link if(cachedLink) { instances.unshift(cachedLink.hostname) } while(!source.rss && index != instances.length) { source.hostname = instances[index] source = await fetchRss(source) if(source.errored) { console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) index++ } else { break } } return source } export const populateSource = (source, postReducerCallback, useCache = true) => { let fromDate = 0 source.posts = [] if(useCache) { fromDate = source.latestPostDate if(source.cache.channel) source = createPosts(source.cache.channel, source, 0, postReducerCallback) } let remoteReducerCallback = post => { if(post.date > source.latestPostDate) source.latestPostDate = post.date return postReducerCallback(post) } source = createPosts(createChannel(source.rss), source, fromDate, remoteReducerCallback) return source } export const writeView = (sources, feeds, view) => { view.header = renderNav(feeds, sources) let pages = [] for(let feed of feeds) { pages = pages.concat(createPages(feed, view)) } for(let source of sources) { pages = pages.concat(createPages(source, view)) } for(let page of pages) { writePage(page, view) } writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view) } export const createSource = async (source, getRss, postReducerCallback, cache) => { source = await openCache(source, cache) source = await getRss(source) source = populateSource(source, postReducerCallback, cache.populate) return source } // | | ,- // ;-. | ,-: |- | ,-. ;-. ;-.-. ,-. // | | | | | | |- | | | | | | `-. // |-' ' `-` `-' | `-' ' ' ' ' `-' // ' -' export const tumblr = { createSource(user, courtesyWait, postReducerCallback, cache) { let lowercaseUser = user.toLowerCase() let source = { description: `Aggregate feed for @${lowercaseUser} on tumblr.com`, hostname: lowercaseUser + '.tumblr.com', pathname: 'rss', courtesyWait, name: `tumblr-${lowercaseUser}`, displayName: user, user: lowercaseUser, } return createSource(source, fetchRss, postReducerCallback, cache) }, createSources(users, ...args) { return Promise.all(users.map(user => tumblr.createSource(user, ...args))) }, isRepost(post) { let reblog = post.description.querySelector('p > a.tumblr_blog') return reblog && reblog.innerHTML !== post.source.user }, matchesTags(post, whitelist, blacklist) { if(whitelist && testWhitelist(post.categories, whitelist)) { return false } if(blacklist && testBlacklist(post.categories, blacklist)) { return false } return true }, pullImages } export const nitter = { createSource(user, instances, courtesyWait, postReducerCallback, cache) { let source = { description: `Aggregate feed for @${user} on twitter.com`, instances, pathname: user + '/rss', courtesyWait, name: `nitter-${user}`, displayName: user, user } return createSource(source, fetchRssFromInstances, postReducerCallback, cache) }, createSources(users, ...args) { return Promise.all(users.map(user => nitter.createSource(user, ...args))) }, isRepost(post) { let creator = post.item.getElementsByTagName('dc:creator')[0] return creator.innerHTML.slice(1) === post.source.user }, pullImages } // TODO: Mastodon support // // "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm // https://mastodon.social/@brownpau.rss (note the "@")" // - https://mastodon.social/@brownpau/100523448408374430 export const mastodon = { createSource(user, instances, courtesyWait, postReducerCallback, cache) { } } export const platforms = { tumblr, nitter }