// Ascii font used is "Shimrod" import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" let cache = await FS.readFile('./cache.json', { encoding: 'utf-8' }) .then(json => JSON.parse(json) ) // | o | // . . |- . | ,-. // | | | | | `-. // `-` `-' ' ' `-' export function PromiseBatch() { let promises = [] this.add = (promise) => promises.push(promise) this.complete = () => Promise.all(promises) } export const annotate = annotation => data => { console.log(annotation), data } export const write = async (path, content) => { let dir = Path.dirname(path) try { await FS.access(dir) } catch(e) { await FS.mkdir(dir, { recursive: true }) } return await FS.writeFile(path, content) } export const download = async (url, path, courtesyWait) => { let response = await delayedFetch(url, {}, courtesyWait) .catch(err => console.error(`Failed download of ${url}:`, err) ) if(response.ok) { await write(path, response.body) } else { throw createNetworkingError(response) } } export const createNetworkingError = response => { return new Error(`Request failed, ${response.status}: ${response.statusText}`) } export const getLinkExtname = link => Path.extname(new URL(link).pathname) export const buildImagePathHandler = (source, id) => (url, i, array) => { let path = `images/${source.name}-${id}` if(array.length > 1) path += `-${i}` return path + getLinkExtname(url) } export const writeStylesheet = (path, { directory, batch }) => batch.add( FS.readFile(path) .then(content => write(Path.join(directory, 'style.css'), content)) ) export const postIdFromPathname = post => { let { pathname } = new URL(post.link) return pathname.slice(pathname.lastIndexOf('/') + 1) } export const createLock = async (path) => { let lockExists = false try { await FS.access(path) lockExists = true } catch(err) { lockExists = false } let lock = { sources: {}, lists: {} } if(lockExists) { Object.assign(lock, JSON.parse(await FS.readFile(path, { encoding: 'utf8' }))) } return lock } export const writeLock = (lock, path) => write(path, JSON.stringify(lock) ) export const testWhitelist = (array, whitelist) => whitelist.find(tag => !array.includes(tag)) !== undefined export const testBlacklist = (array, blacklist) => blacklist.find(tag => array.includes(tag)) !== undefined export const createView = (directory, pageSize, extra = {}) => { return { batch: new PromiseBatch(), directory, pageSize, ...extra } } // // ;-. ,-. ,-. // | `-. `-. // ' `-' `-' export async function fetchRss(source) { let { hostname } = source let error let response try { response = await delayedFetch( new URL(source.pathname, 'https://' + hostname), {}, source.courtesyWait ?? 5 * 1000 ) } catch(err) { error = err } source.errored = error !== undefined || !response.ok if(source.errored) { source.error = error ?? createNetworkingError(response) source.rss = '' } else { source.rss = await response.text() } return source } let waitingList = new Map() export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { let [ domain ] = /[\w-]+.[\w-]+$/.exec(url.hostname) let timeout = waitingList.get(domain) ?? 0 let now = Date.now() if(timeout == null || timeout <= now) { waitingList.set(domain, timeout + courtesyWait) } else { await sleep(timeout - now) } return await fetch(url, options) } class NoMatchesError extends Error {} export const processRss = (source, fromDate, reducerCallback) => { let { document } = new JSDOM(source.rss, { contentType: 'text/xml' }).window let items = document.querySelectorAll('channel item') if(items.length == 0) { throw new NoMatchesError('Got no matches') } for(let item of items) { let post = processRssItem(source, item, reducerCallback) if(post && post.date > fromDate) { source.posts.push(post) } } return source } export const processRssItem = (source, item, reducerCallback) => { let description = new JSDOM(item.querySelector('description').textContent).window.document let dateString = item.querySelector('pubDate').textContent let link = item.querySelector('link').textContent let guid = item.querySelector('guid')?.textContent let title = item.querySelector('title')?.textContent let post = { source, item, description, dateString, date: new Date(dateString).valueOf() ?? 0, link, guid, title } return reducerCallback(post) } export const processCategories = (post) => { let categoryMatches = post.item.querySelectorAll('category') post.categories = [] for(let category of categoryMatches) { post.categories.push(category.textContent) } return post } export const extractImages = (post, cache = true) => { let images = post.description.querySelectorAll('img') if(images) { let imageUrls = [] for(let image of images) { let { src } = image if(!src) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) } // Sending through URL prevents potential XSS imageUrls.push(new URL(src).href) } return imageUrls } } // o // . , . ,-. , , , // |/ | |-' |/|/ // ' ' `-' ' ' export const writePages = (list, { pageSize, header = '', directory, batch }) => { let posts = [] let lastPageLink = 'about:blank' let pageIndex = 0 list.posts.sort((a, b) => b.date - a.date) for(let i = list.posts.length - 1; i >= 0; i--) { posts.push(list.posts[i]) if(i % pageSize == 0) { let title = getPageTitle(list, pageIndex) let html = renderPage(title, posts.reverse(), header, renderNextPageLink(lastPageLink)) let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pageIndex) let promise = write(Path.join(directory, filename), html) batch.add(promise.then(annotate(`Created "${title}" (${filename})`))) posts = [] lastPageLink = filename pageIndex++ } } // lock.lists[list.name] = { // pageIndex, // lastPostDate: posts[0]?.date ?? lastPostDate // } } export const getFinalPageFilename = list => (list.main ? 'index' : list.name) + '.html' export const getPageFilename = (list, i) => list.name + '-' + i + '.html' export const getPageTitle = (list, i) => list.displayName + ' - ' + (i + 1) export const renderPage = (title, posts, header, footer) => `\ ${title}
${header}
${posts.map(renderPost).join('\n')}
` export const renderPost = post => { let date = new Date(post.date) let details = [ [ 'source', `${post.source.hostname}` ] ] if(post.title) details.push([ 'title', `"${post.title}"` ]) if(post.categories && post.categories.length > 0) details.push([ 'categories', post.categories.map(name => `${name}`).join(', ') ]) return `\ ${post.images.map(renderImage).join('\n')}
${post.source.displayName} ${renderDate(date)} open

` } export const renderPostDetail = (name, value) => `
  • ${name} ${value}
  • ` export const renderImage = href => { return `\ ` } export const renderDate = date => (date.getMonth() + 1) + '.' + date.getDate() + '.' + date.getFullYear() export const renderNextPageLink = link => `\ next` export const renderNav = (feeds, sources) => `\
    Feeds


    ` export const renderNavEntry = (list) => { let extra = '' if(list.errored) { extra += ' (errored)' } else if (list.posts.length == 0) { extra += ' (empty)' } return `
  • ${list.displayName}${extra}
  • ` } // | | | o // ,-. ,-. | | ,-: |- . ,-. ;-. // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' export const downloadImages = (images, getImagePath, courtesyWait, { directory, batch }) => { let out = [] for(let i = 0; i < images.length; i ++) { let url = images[i] let relativePath = getImagePath(url, i, images) let fullPath = Path.join(directory, relativePath) let promise = FS.access(fullPath) .catch(() => download(url, fullPath, courtesyWait) .then(annotate( `Downloaded ${relativePath}`)) ) out.push(relativePath) batch.add(promise) } return out } export const pullImages = (post, renderer, discardPostIfNoImages = false, getPostId = postIdFromPathname) => { let images = extractImages(post) if(!discardPostIfNoImages || images.length > 0) { post.images = downloadImages( images, buildImagePathHandler(post.source, getPostId(post)), post.source.courtesyWait, renderer ) return post } } export const createFeed = (name, sources, main = false) => { return { name, displayName: name, main, posts: sources.reduce((posts, source) => posts.concat(source.posts), []) } } export const fetchRssFromInstances = async (source, lock) => { let index = 0 let instances = source.instances let lockHostname = lock.sources[source.name]?.hostname if(lockHostname) { instances.unshift(lockHostname) } while(!source.rss && index != instances.length) { source.hostname = instances[index] source = await fetchRss(source) if(source.errored) { console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) index++ } else { break } } return source } // const addPostsToLock = (source, renderer) => { // (renderer.lock.sources[source.name] ??= {}) // .postData = source.posts.map(post => post.description) // } export const populateSource = (source, postReducerCallback, lock) => { let sourceLock = lock.sources[source.name] ??= {} source.posts = [] source = processRss(source, sourceLock.timestamp ?? 0, postReducerCallback, lock) if(sourceLock.items) { for(let itemText of sourceLock.items) { let item = new JSDOM(itemText, { contentType: 'text/xml' }).window.document.documentElement source.posts.push(processRssItem(source, item, postReducerCallback)) } } lock.sources[source.name] = sourceLock lockSource(source, lock) return source } export const lockSource = (source, lock) => { let date = 0 let items = [] for(let post of source.posts) { if(post.date > date) date = post.date items.push(post.item.outerHTML) } lock.sources[source.name] = { hostname: source.hostname, timestamp: date, items } } export const lockSources = (sources, lock) => { sources.forEach(source => lockSource(source, lock)) } export const writeView = (sources, feeds, renderer) => { renderer.header = renderNav(feeds, sources) for(let feed of feeds) { writePages(feed, renderer) } for(let source of sources) { writePages(source, renderer) } writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), renderer) } // | | ,- // ;-. | ,-: |- | ,-. ;-. ;-.-. ,-. // | | | | | | |- | | | | | | `-. // |-' ' `-` `-' | `-' ' ' ' ' `-' // ' -' export const tumblr = { async createSource(user, courtesyWait, postReducerCallback, lock) { let lowercaseUser = user.toLowerCase() let source = { hostname: lowercaseUser + '.tumblr.com', pathname: 'rss', courtesyWait, name: `tumblr-${lowercaseUser}`, displayName: user, user: lowercaseUser, } source = await fetchRss(source) source = populateSource(source, postReducerCallback, lock) return source }, createSources(users, ...args) { return Promise.all(users.map(user => tumblr.createSource(user, ...args))) }, isRepost(post) { let reblog = post.description.querySelector('p > a.tumblr_blog') return reblog && reblog.innerHTML !== post.source.user }, matchesTags(post, whitelist, blacklist) { if(whitelist && testWhitelist(post.categories, whitelist)) { return false } if(blacklist && testBlacklist(post.categories, blacklist)) { return false } return true }, pullImages } export const nitter = { async createSource(user, instances, courtesyWait, postReducerCallback, lock) { let source = { instances, pathname: user + '/rss', courtesyWait, name: `nitter-${user}`, displayName: user, user } source = await fetchRssFromInstances(source, lock) source = populateSource(source, postReducerCallback, lock) return source }, createSources(users, ...args) { return Promise.all(users.map(user => nitter.createSource(user, ...args))) }, isRepost(post) { let creator = post.item.getElementsByTagName('dc:creator')[0] return creator.innerHTML.slice(1) === post.source.user }, pullImages } // TODO: Mastodon support // // "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm // https://mastodon.social/@brownpau.rss (note the "@")" // - https://mastodon.social/@brownpau/100523448408374430 export const platforms = { tumblr, nitter }