// Ascii font used is "Shimrod" import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" // | o | // . . |- . | ,-. // | | | | | `-. // `-` `-' ' ' `-' export function PromiseBatch() { let promises = [] this.add = (promise) => promises.push(promise) this.complete = () => Promise.all(promises) } export const annotate = annotation => data => { console.log(annotation), data } export const write = async (path, content) => { let dir = Path.dirname(path) try { await FS.access(dir) } catch(e) { await FS.mkdir(dir, { recursive: true }) } return await FS.writeFile(path, content) } export const createNetworkingError = response => { return new Error(`Request failed for ${response.url}, ${response.status}: ${response.statusText}`) } export const getLinkExtname = link => Path.extname(new URL(link).pathname) export const getImageBasePath = (source, postId) => `${source.name}-${postId}` export const writeStylesheet = (path, { directory, batch }) => batch.add( FS.readFile(path) .then(content => write(Path.join(directory, 'style.css'), content)) ) export const getPostIdFromPathname = post => { let { pathname } = new URL(post.link) return pathname.slice(pathname.lastIndexOf('/') + 1) } export const testWhitelist = (array, whitelist) => whitelist.find(tag => !array.includes(tag)) !== undefined export const testBlacklist = (array, blacklist) => blacklist.find(tag => array.includes(tag)) !== undefined export const doesExist = async (path) => { let exists try { await FS.access(path) exists = true } catch(err) { exists = false } } export const ensureDir = async (path) => { let exists = doesExist(path) if(!exists) { await FS.mkdir(path, { recursive: true }) } return exists } export const isUnset = (value) => { return typeof value === "undefined" || value === null } // // ;-. ,-. ,-. // | `-. `-. // ' `-' `-' export async function fetchChannel(source) { let { hostname } = source let error let response let rss let channel try { response = await delayedFetch( new URL(source.pathname, 'https://' + hostname), {}, source.courtesyWait ) } catch(err) { error = err } source.errored = error !== undefined || !response.ok if(source.errored) { source.error = error ?? createNetworkingError(response) return } console.log(`Found ${source.name} at ${hostname}`) try { channel = createChannel(await response.text()) } catch(err) { error = err } source.errored = error !== undefined if(source.errored) { source.error = error return } return channel } let waitingList = new Map() export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname) let waitFor = waitingList.get(domain) ?? 0 waitingList.set(domain, waitFor + courtesyWait) if(waitFor !== 0) { console.log(`Waiting ${waitFor}ms to download ${url}`) await sleep(waitFor) } return await fetch(url, options) } export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => { let attemptsTried = 0 let response = undefined while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) { if(attemptsTried > 0) console.error(`Failed to fetch ${url}, retrying...`) response = await delayedFetch(url, options, courtesyWait) attemptsTried++ } return response } export const getCacheFilename = (source) => source.name + '.xml' export const getCachePath = (source, cache) => Path.join(cache.directory, getCacheFilename(source)) export const cacheSource = (source, cache) => write(getCachePath(source, cache), renderCache(source, cache)) export const cacheSources = (sources, cache) => Promise.all(sources.map(source => cacheSource(source, cache))) export const openCache = async (source, cache) => { let path = getCachePath(source, cache) let exists = await doesExist(path) let rss if(exists) rss = await FS.readFile(path, { encoding: 'utf8' }) if(exists && rss) { // if(source.user == 'nanoraptor') { // source.asdf = 'b' // source.cache.asdf = 'b' // } let channel = createChannel(rss) source.cache = { channel, date: readPubDate(channel.querySelector('pubDate')), link: new URL(channel.querySelector('link').textContent), } } else { source.cache = { date: new Date(0) } if(source.hostname) source.cache.link = buildCacheLink(source) } source.latestPostDate = source.cache.date return source } export const buildCacheLink = source => new URL('https://' + source.hostname) // .replaceAll(/\n\s*/g, '') export const renderCache = (source, cache) => `\ ${source.displayName} ${source.description} ${buildCacheLink(source)} ${new Date(source.latestPostDate).toUTCString()} rssssing ${source.items.map(item => item.outerHTML.replaceAll(/\n\s*/g, '')).join('\n')} ` export const createChannel = rss => { let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window return document.querySelector('channel') } export const readPubDate = (pubDate) => pubDate ? new Date(pubDate.textContent).valueOf() : 0 export const createPosts = async (channel, source, fromDate, reducerCallback) => { let items = channel.querySelectorAll('item') let promises = [] for(let item of items) { let post = createPost(item, source) if(post.date <= fromDate) continue source.items.push(item) let postResolvable = reducerCallback(post) if(postResolvable instanceof Promise) { postResolvable .then(post => { if(post) { source.posts.push(post) } }) } else { if(postResolvable) { source.posts.push(postResolvable) } } promises.push(postResolvable) } await Promise.all(promises) return source } export const createPost = (item, source) => { let description = new JSDOM(item.querySelector('description').textContent).window.document let date = readPubDate(item.querySelector('pubDate')) let link = item.querySelector('link').textContent let guid = item.querySelector('guid')?.textContent let title = item.querySelector('title')?.textContent let post = { source, item, description, date, link, guid, title, occurances: [] } return post } export const processCategories = (post) => { let categoryMatches = post.item.querySelectorAll('category') post.categories = [] for(let category of categoryMatches) { post.categories.push(category.textContent) } return post } export const extractImages = (post) => { let images = post.description.querySelectorAll('img') if(images) { let imageUrls = [] for(let image of images) { let { src } = image if(isUnset(src)) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) } // Sending through URL prevents potential XSS imageUrls.push(new URL(src).href) } return imageUrls } } // o // . , . ,-. , , , // |/ | |-' |/|/ // ' ' `-' ' ' export const createView = async (directory, pageSize, extra = {}) => { let view = { batch: new PromiseBatch(), directory, pageSize, ...extra } await ensureDir(view.directory) if(view.imageStoreDirectory) await openImageStore(view) return view } export const openImageStore = async view => { let imageStorePath = Path.join(view.directory, view.imageStoreDirectory) view.imageStore = new Map() if(!await ensureDir(imageStorePath)) { return view } let dirents = await FS.readdir(imageStorePath, { withFileTypes: true }) for(let dirent of dirents) { if(dirent.isFile()) { let basename = dirent.name.slice(0, dirent.name.lastIndexOf('.')) view.imageStore.set(basename, Path.join(view.imageStoreDirectory, dirent.name)) } } return view } export const writeView = (sources, feeds, view) => { view.header = renderNav(feeds, sources) let pages = [] for(let feed of feeds) { pages = pages.concat(createPages(feed, view)) } for(let source of sources) { pages = pages.concat(createPages(source, view)) } for(let page of pages) { writePage(page, view) } writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view) } export const createPages = (list, { pageSize }) => { let posts = [] let pages = [] let lastPageLink = 'about:blank' list.posts.sort((a, b) => b.date - a.date) for(let i = list.posts.length - 1; i >= 0; i--) { posts.push(list.posts[i]) if(i % pageSize == 0) { let title = getPageTitle(list, pages.length) let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pages.length) let page = { filename, title, posts: posts.reverse(), lastPageLink } for(let i = 0; i < page.posts.length; i++) { page.posts[i].occurances.push({ index: i, list, page }) } pages.push(page) posts = [] lastPageLink = filename } } return pages } export const writePage = (page, { header = '', directory, batch }) => { let html = renderPage(page.title, page.posts, header, renderNextPageLink(page.lastPageLink)) let promise = write(Path.join(directory, page.filename), html) batch.add(promise.then(annotate(`Created "${page.title}" (${page.filename})`))) } export const getFinalPageFilename = list => (list.main ? 'index' : list.name) + '.html' export const getPageFilename = (list, i) => list.name + '-' + i + '.html' export const getPageTitle = (list, i) => list.displayName + ' - ' + (i + 1) export const renderPage = (title, posts, header, footer) => `\ ${title}
${header}
${posts.map(renderPost).join('\n')}
` export const renderPost = (post, index) => { let details = [] if(post.title) details.push([ 'title', `"${post.title}"` ]) if(post.categories && post.categories.length > 0) details.push([ 'categories', post.categories.map(name => `${name}`).join(', ') ]) details.push([ 'source', `${post.source.hostname}` ]) details.push([ 'lists', post.occurances.map(occ => `${occ.list.displayName}`).join(', ') ]) return `\
${post.images.map(renderImage).join('\n')}
${post.source.displayName} (${post.source.type}) ${renderDate(new Date(post.date))} open

` } export const renderPostDetail = (name, value) => `
  • ${name} ${value}
  • ` export const renderImage = href => { return `\ ` } export const renderDate = date => (date.getMonth() + 1) + '.' + date.getDate() + '.' + date.getFullYear() export const renderNextPageLink = link => `\ next` export const renderNav = (feeds, sources) => { let sections = {} for(let source of sources) { let section = sections[source.type] if(section) { section.push(source) } else { sections[source.type] = [ source ] } } let out = '' for(let name in sections) { out += `
  • ${name}
    ${sections[name].map(renderNavEntry).join('\n')}
  • ` } return `\
    Feeds


    ` } export const renderNavEntry = (list) => { let extra = '' if(list.errored) { extra += ' (errored)' } else if (list.posts.length == 0) { extra += ' (empty)' } return `${list.displayName}${extra}` } // | | | o // ,-. ,-. | | ,-: |- . ,-. ;-. // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' export const downloadImage = async (url, basename, { courtesyWait, retryAttempts }, { batch, directory, imageStoreDirectory }) => { let response = await retryDelayedFetch(url, {}, courtesyWait, retryAttempts) .catch(err => console.error(`Failed download of ${url}:`, err, err.errors) ) if(response == undefined) { console.error('Could not download image: ' + url) return url } if(response.ok) { let mimetype = response.headers.get('Content-Type').split(';')[0] let extension = imageExtensions[mimetype] if(typeof extension !== 'string') { console.error(`Unknown mimetype for ${url}: ${mimetype}. Cannot download`) return url } let relativePath = basename + extension let path = Path.join(directory, imageStoreDirectory, relativePath) const download = () => write(path, response.body) .then(annotate( `Downloaded ${relativePath}`)) // TODO: See if the image is downloaded before even trying to download it batch.add(FS.access(path).catch(download)) return relativePath } else { console.error( createNetworkingError(response) ) return url } } export const downloadImages = (images, source, postId, view) => { let basePath = getImageBasePath(source, postId) let pathnames = [] for(let i = 0; i < images.length; i++) { let basename = images.length > 1 ? basePath + '-' + i : basePath let pathname = view.imageStore.get(basename) if(isUnset(pathname)) { pathname = downloadImage(images[i], basename, source, view) } pathnames.push(pathname) } return Promise.all(pathnames) } export const imageExtensions = { 'image/apng': '.apng', 'image/avif': '.avif', 'image/bmp': '.bmp', 'image/gif': '.gif', 'image/vnd.microsoft.icon': '.icon', 'image/jpeg': '.jpg', 'image/png': '.png', 'image/svg+xml': '.xml', 'image/tiff': '.tif', 'image/webp': '.webp' } export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => { let images = extractImages(post) if(!discardPostIfNoImages || images.length > 0) { post.images = await downloadImages( images, post.source, getPostId(post), view ) return post } } export const createFeed = (name, sources, main = false) => { return { name, displayName: name, main, posts: sources.reduce((posts, source) => posts.concat(source.posts), []) } } export const fetchChannelFromInstances = async (source) => { let index = 0 let instances = source.instances let cachedLink = source.cache.link let channel if(cachedLink) { instances.unshift(cachedLink.hostname) } while(!channel && index != instances.length) { source.hostname = instances[index] channel = await fetchChannel(source) if(source.errored) { console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) index++ } else { break } } return channel } export const populateSource = async (channel, source, postReducerCallback, cache) => { let fromDate = 0 source.items = [] source.posts = [] if(cache.enabled) { fromDate = source.latestPostDate if(source.cache.channel) source = await createPosts(source.cache.channel, source, 0, postReducerCallback) } let remoteReducerCallback = post => { if(post.date > source.latestPostDate) source.latestPostDate = post.date return postReducerCallback(post) } if(channel ?? false) source = await createPosts(channel, source, fromDate, remoteReducerCallback) return source } export const createSource = async (source, getChannel, postReducerCallback, cache) => { if(cache.enabled) source = await openCache(source, cache) source = await populateSource(await getChannel(source), source, postReducerCallback, cache) if(cache.enabled) cache.batch.add(cacheSource(source, cache)) return source } export const createSourceOptions = (options, view) => { if(isUnset(options.courtesyWait)) options.courtesyWait = 1000 if(isUnset(options.retryAttempts)) options.retryAttempts = 3 return options } // | | ,- // ;-. | ,-: |- | ,-. ;-. ;-.-. ,-. // | | | | | | |- | | | | | | `-. // |-' ' `-` `-' | `-' ' ' ' ' `-' // ' -' export const tumblr = { createSource(user, options, postReducerCallback, cache) { let lowercaseUser = user.toLowerCase() let source = { type: 'tumblr', description: `Aggregate feed for @${lowercaseUser} on tumblr.com`, hostname: lowercaseUser + '.tumblr.com', pathname: 'rss', name: `tumblr-${lowercaseUser}`, displayName: user, user: lowercaseUser, ...createSourceOptions(options) } return createSource(source, fetchChannel, postReducerCallback, cache) }, createSources(users, ...args) { return Promise.all(users.map(user => tumblr.createSource(user, ...args))) }, isRepost(post) { let reblog = post.description.querySelector('p > a.tumblr_blog') return reblog && reblog.innerHTML !== post.source.user }, matchesTags(post, whitelist, blacklist) { if(whitelist && testWhitelist(post.categories, whitelist)) { return false } if(blacklist && testBlacklist(post.categories, blacklist)) { return false } return true }, pullImages } export const nitter = { createSource(user, options, instances, postReducerCallback, cache) { let source = { type: 'nitter', description: `Aggregate feed for @${user} on twitter.com`, instances, pathname: user + '/rss', name: `nitter-${user}`, displayName: user, user, ...createSourceOptions(options) } return createSource(source, fetchChannelFromInstances, postReducerCallback, cache) }, createSources(users, ...args) { return Promise.all(users.map(user => nitter.createSource(user, ...args))) }, isRepost(post) { let creator = post.item.getElementsByTagName('dc:creator')[0] return creator.innerHTML.slice(1) !== post.source.user }, async pullImages (post, view, imageMirrorDomain, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) { let images = extractImages(post) let mirroredImages = [] const mirrorImage = nitter.createImageMirrorer(post, imageMirrorDomain) if(!discardPostIfNoImages || images.length > 0) { post.images = await downloadImages( images.map(mirrorImage), post.source, getPostId(post), view ) return post } }, createImageMirrorer(post, imageMirrorDomain) { let mirrorUrl = new URL(imageMirrorDomain) let basePathname = new URL(post.guid).pathname return (image, index, images) => { mirrorUrl.pathname = Path.join(basePathname, 'photo', (index + 1).toString()) return mirrorUrl.href } } } export const mastodon = { createSource(usertag, options, postReducerCallback, cache) { let [ user, hostname ] = usertag.toLowerCase().split('@') let source = { type: 'mastodon', description: `Aggregate feed for @${user} at ${hostname}`, hostname, pathname: '@' + user + ".rss", name: `${hostname}-${user}`, displayName: user, user, ...createSourceOptions(options) } return createSource(source, fetchChannel, postReducerCallback, cache) }, isRepost(post) { // Mastodon's rss does not provide retweets/retoots return false }, async pullImages(post, view, discardPostIfNoImages) { let media = post.item.getElementsByTagName('media:content') let images = [] for(let image of media) { images.push(image.getAttribute('url')) } if(!discardPostIfNoImages || media.length > 0) { post.images = await downloadImages( images, post.source, getPostIdFromPathname(post), view ) return post } } }