rssssing/lib.js

719 lines
17 KiB
JavaScript

// Ascii font used is "Shimrod"
import Path from "path"
import FS from "fs/promises"
import { JSDOM } from "jsdom"
import mime from "mime-types"
// | o |
// . . |- . | ,-.
// | | | | | `-.
// `-` `-' ' ' `-'
export function PromiseBatch() {
let promises = []
this.add = (promise) =>
promises.push(promise)
this.complete = () =>
Promise.all(promises)
}
export const annotate = annotation =>
data => {
console.log(annotation),
data
}
export const write = async (path, content) => {
let dir = Path.dirname(path)
try {
await FS.access(dir)
} catch(e) {
await FS.mkdir(dir, { recursive: true })
}
return await FS.writeFile(path, content)
}
export const createNetworkingError = response => {
return new Error(`Request failed, ${response.status}: ${response.statusText}`)
}
export const getLinkExtname = link =>
Path.extname(new URL(link).pathname)
export const getImageBasePath = (source, postId) =>
`images/${source.name}-${postId}`
export const writeStylesheet = (path, { directory, batch }) =>
batch.add(
FS.readFile(path)
.then(content => write(Path.join(directory, 'style.css'), content))
)
export const getPostIdFromPathname = post => {
let { pathname } = new URL(post.link)
return pathname.slice(pathname.lastIndexOf('/') + 1)
}
export const testWhitelist = (array, whitelist) =>
whitelist.find(tag => !array.includes(tag)) !== undefined
export const testBlacklist = (array, blacklist) =>
blacklist.find(tag => array.includes(tag)) !== undefined
export const createView = (directory, pageSize, extra = {}) => {
return {
batch: new PromiseBatch(),
directory,
pageSize,
...extra
}
}
//
// ;-. ,-. ,-.
// | `-. `-.
// ' `-' `-'
export async function fetchRss(source) {
let { hostname } = source
let error
let response
let rss
try {
response = await delayedFetch(
new URL(source.pathname, 'https://' + hostname),
{},
source.courtesyWait ?? 5 * 1000
)
} catch(err) {
error = err
}
source.errored = error !== undefined || !response.ok
if(source.errored) {
source.error = error ?? createNetworkingError(response)
} else {
rss = await response.text()
console.log(`Found ${source.name} at ${hostname}`)
}
return rss
}
let waitingList = new Map()
export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => {
let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname)
let waitFor = waitingList.get(domain) ?? 0
waitingList.set(domain, waitFor + courtesyWait)
if(waitFor !== 0) {
await sleep(waitFor)
}
return await fetch(url, options)
}
export const getCacheFilename = (source) =>
source.name + '.xml'
export const getCachePath = (source, { directory }) =>
Path.join(directory, getCacheFilename(source))
export const cacheSource = (source, cache) =>
write(getCachePath(source, cache), renderCache(source, cache))
export const cacheSources = (sources, cache) =>
Promise.all(sources.map(source => cacheSource(source, cache)))
export const openCache = async (source, cache) => {
let path = getCachePath(source, cache)
let exists
try {
await FS.access(path)
exists = true
} catch(err) {
exists = false
}
let rss
if(exists)
rss = await FS.readFile(path, { encoding: 'utf8' })
if(exists & rss) {
let channel = createChannel(rss)
let date = readPubDate(channel.querySelector('pubDate'))
let link = new URL(channel.querySelector('link').textContent)
source.cache = {
channel,
date,
link
}
} else {
source.cache = {
date: new Date(0),
}
if(source.hostname)
source.cache.link = buildCacheLink(source)
}
source.latestPostDate = source.cache.date
return source
}
export const buildCacheLink = source =>
new URL('https://' + source.hostname)
export const renderCache = (source, cache) => `\
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>${source.displayName}</title>
<description>${source.description}</description>
<link>${buildCacheLink(source)}</link>
<atom:link href="${new URL(getCacheFilename(source), cache.directoryUrl)}" rel="self" type="application/rss+xml" />
<pubDate>${new Date(source.latestPostDate).toUTCString()}</pubDate>
<generator>rssssing</generator>
${source.items.map(item => item.outerHTML.replaceAll(/\n\s*/g, '')).join('\n')}
</channel>
</rss>`
export const createChannel = rss => {
let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window
return document.querySelector('channel')
}
export const readPubDate = (pubDate) =>
pubDate ? new Date(pubDate.textContent).valueOf() : 0
export const createPosts = async (channel, source, fromDate, reducerCallback) => {
let items = channel.querySelectorAll('item')
// if(items.length === 0) {
// // throw new NoMatchesError('Got no matches')
// return source
// }
let promises = []
for(let item of items) {
source.items.push(item)
let promise = createPost(item, source, reducerCallback)
.then(post => {
if(post && post.date > fromDate) {
source.posts.push(post)
}
return post
})
promises.push(promise)
}
await Promise.all(promises)
return source
}
export const createPost = async (item, source, reducerCallback) => {
let description = new JSDOM(item.querySelector('description').textContent).window.document
let date = readPubDate(item.querySelector('pubDate'))
let link = item.querySelector('link').textContent
let guid = item.querySelector('guid')?.textContent
let title = item.querySelector('title')?.textContent
let post = {
source,
item,
description,
date,
link,
guid,
title,
occurances: []
}
return await reducerCallback(post)
}
export const processCategories = (post) => {
let categoryMatches = post.item.querySelectorAll('category')
post.categories = []
for(let category of categoryMatches) {
post.categories.push(category.textContent)
}
return post
}
export const extractImages = (post, cache = true) => {
let images = post.description.querySelectorAll('img')
if(images) {
let imageUrls = []
for(let image of images) {
let { src } = image
if(!src) {
let finalSrc = image.srcset.split(', ').pop()
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
}
// Sending through URL prevents potential XSS
imageUrls.push(new URL(src).href)
}
return imageUrls
}
}
// o
// . , . ,-. , , ,
// |/ | |-' |/|/
// ' ' `-' ' '
export const createPages = (list, { pageSize }) => {
let posts = []
let pages = []
let lastPageLink = 'about:blank'
list.posts.sort((a, b) => b.date - a.date)
for(let i = list.posts.length - 1; i >= 0; i--) {
posts.push(list.posts[i])
if(i % pageSize == 0) {
let title = getPageTitle(list, pages.length)
let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pages.length)
let page = {
filename,
title,
posts: posts.reverse(),
lastPageLink
}
for(let i = 0; i < page.posts.length; i++) {
page.posts[i].occurances.push({
index: i,
list,
page
})
}
pages.push(page)
posts = []
lastPageLink = filename
}
}
return pages
}
export const writePage = (page, { header = '', directory, batch }) => {
let html = renderPage(page.title, page.posts, header, renderNextPageLink(page.lastPageLink))
let promise = write(Path.join(directory, page.filename), html)
batch.add(promise.then(annotate(`Created "${page.title}" (${page.filename})`)))
}
export const getFinalPageFilename = list =>
(list.main ? 'index' : list.name) + '.html'
export const getPageFilename = (list, i) =>
list.name + '-' + i + '.html'
export const getPageTitle = (list, i) =>
list.displayName + ' - ' + (i + 1)
export const renderPage = (title, posts, header, footer) => `\
<html>
<head>
<title>${title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="./style.css">
</head>
<body>
<header>
${header}
</header>
<main>
${posts.map(renderPost).join('\n')}
</main>
<footer>
${footer}
</footer>
</body>
</html>`
export const renderPost = (post, index) => {
let details = []
if(post.title)
details.push([ 'title', `"${post.title}"` ])
if(post.categories && post.categories.length > 0)
details.push([ 'categories', post.categories.map(name => `<mark>${name}</mark>`).join(', ') ])
details.push([ 'source', `<a href="${post.source.hostname}">${post.source.hostname}</a>` ])
details.push([ 'lists', post.occurances.map(occ => `<a href="${occ.page.filename}#${occ.index}">${occ.list.displayName}</a>`).join(', ') ])
return `\
<section id="${index}">
${post.images.map(renderImage).join('\n')}
<details>
<summary><b>${post.source.displayName}</b> ${renderDate(new Date(post.date))} <a href="${post.link}">open</a></summary>
<ul>
${details.map(args => renderPostDetail(...args)).join('\n')}
<ul>
</details>
<hr>
</section>`
}
export const renderPostDetail = (name, value) =>
`<li><b>${name}</b> ${value}</li>`
export const renderImage = href => {
return `\
<a href="${href}" download><img src="${href}" loading="lazy"></img></a>`
}
export const renderDate = date =>
(date.getMonth() + 1) + '.' + date.getDate() + '.' + date.getFullYear()
export const renderNextPageLink = link => `\
<a href="${link}">next</a>`
export const renderNav = (feeds, sources) => `\
<details>
<summary>Feeds</summary>
<section>
<ul>
${feeds.map(renderNavEntry).join('\n')}
</ul>
<hr>
<ul>
${sources.map(renderNavEntry).join('\n')}
</ul>
</section>
</details>
<hr>`
export const renderNavEntry = (list) => {
let extra = ''
if(list.errored) {
extra += ' (errored)'
} else if (list.posts.length == 0) {
extra += ' (empty)'
}
return `<li><a href="${getFinalPageFilename(list)}">${list.displayName}</a>${extra}</li>`
}
// | | | o
// ,-. ,-. | | ,-: |- . ,-. ;-.
// | | | | | | | | | | | | |
// `-' `-' ' ' `-` `-' ' `-' ' '
export const downloadImage = async (url, basename, courtesyWait, { batch, directory }) => {
let response = await delayedFetch(url, {}, courtesyWait)
.catch(err => console.error(`Failed download of ${url}:`, err) )
if(response.ok) {
let relativePath = basename + imageExtensions[response.headers.get('Content-Type')]
let path = Path.join(directory, relativePath)
const download = () => write(path, response.body)
.then(annotate( `Downloaded ${relativePath}`))
batch.add(FS.access(path).catch(download))
return relativePath
} else {
throw createNetworkingError(response)
}
}
export const downloadImages = (images, source, postId, view) => {
let basePath = getImageBasePath(source, postId)
let pathnames = []
for(let i = 0; i < images.length; i++) {
let basename = images.length > 1 ? basePath + '-' + i : basePath
pathnames.push(downloadImage(images[i], basename, source.courtesyWait, view))
}
return Promise.all(pathnames)
}
export const imageExtensions = {
'image/apng': '.apng',
'image/avif': '.avif',
'image/bmp': '.bmp',
'image/gif': '.gif',
'image/vnd.microsoft.icon': '.icon',
'image/jpeg': '.jpg',
'image/png': '.png',
'image/svg+xml': '.xml',
'image/tiff': '.tif',
'image/webp': '.webp'
}
export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => {
let images = extractImages(post)
if(!discardPostIfNoImages || images.length > 0) {
post.images = await downloadImages(
images,
post.source,
getPostId(post),
view
)
return post
}
}
export const createFeed = (name, sources, main = false) => {
return {
name,
displayName: name,
main,
posts: sources.reduce((posts, source) => posts.concat(source.posts), [])
}
}
export const fetchRssFromInstances = async (source) => {
let index = 0
let instances = source.instances
let cachedLink = source.cache.link
let rss
if(cachedLink) {
instances.unshift(cachedLink.hostname)
}
while(!rss && index != instances.length) {
source.hostname = instances[index]
rss = await fetchRss(source)
if(source.errored) {
console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error)
index++
} else {
break
}
}
return rss
}
export const populateSource = async (rss, source, postReducerCallback, useCache = true) => {
let fromDate = 0
source.items = []
source.posts = []
if(useCache) {
fromDate = source.latestPostDate
if(source.cache.channel)
source = await createPosts(source.cache.channel, source, 0, postReducerCallback)
}
let remoteReducerCallback = post => {
if(post.date > source.latestPostDate)
source.latestPostDate = post.date
return postReducerCallback(post)
}
if(rss ?? false)
source = await createPosts(createChannel(rss), source, fromDate, remoteReducerCallback)
return source
}
export const writeView = (sources, feeds, view) => {
view.header = renderNav(feeds, sources)
let pages = []
for(let feed of feeds) {
pages = pages.concat(createPages(feed, view))
}
for(let source of sources) {
pages = pages.concat(createPages(source, view))
}
for(let page of pages) {
writePage(page, view)
}
writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view)
}
export const createSource = async (source, getRss, postReducerCallback, cache) => {
source = await openCache(source, cache)
source = await populateSource(await getRss(source), source, postReducerCallback, cache.populate)
cache.batch.add(cacheSource(source, cache))
return source
}
// | | ,-
// ;-. | ,-: |- | ,-. ;-. ;-.-. ,-.
// | | | | | | |- | | | | | | `-.
// |-' ' `-` `-' | `-' ' ' ' ' `-'
// ' -'
export const tumblr = {
createSource(user, courtesyWait, postReducerCallback, cache) {
let lowercaseUser = user.toLowerCase()
let source = {
description: `Aggregate feed for @${lowercaseUser} on tumblr.com`,
hostname: lowercaseUser + '.tumblr.com',
pathname: 'rss',
courtesyWait,
name: `tumblr-${lowercaseUser}`,
displayName: user + ' (t)',
user: lowercaseUser,
}
return createSource(source, fetchRss, postReducerCallback, cache)
},
createSources(users, ...args) {
return Promise.all(users.map(user => tumblr.createSource(user, ...args)))
},
isRepost(post) {
let reblog = post.description.querySelector('p > a.tumblr_blog')
return reblog && reblog.innerHTML !== post.source.user
},
matchesTags(post, whitelist, blacklist) {
if(whitelist && testWhitelist(post.categories, whitelist)) {
return false
}
if(blacklist && testBlacklist(post.categories, blacklist)) {
return false
}
return true
},
pullImages
}
export const nitter = {
createSource(user, instances, courtesyWait, postReducerCallback, cache) {
let source = {
description: `Aggregate feed for @${user} on twitter.com`,
instances,
pathname: user + '/rss',
courtesyWait,
name: `nitter-${user}`,
displayName: user + ' (n)',
user
}
return createSource(source, fetchRssFromInstances, postReducerCallback, cache)
},
createSources(users, ...args) {
return Promise.all(users.map(user => nitter.createSource(user, ...args)))
},
isRepost(post) {
let creator = post.item.getElementsByTagName('dc:creator')[0]
return creator.innerHTML.slice(1) !== post.source.user
},
pullImages
}
// TODO: Mastodon support
//
// "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm
// https://mastodon.social/@brownpau.rss (note the "@")"
// - https://mastodon.social/@brownpau/100523448408374430
export const mastodon = {
createSource(usertag, courtesyWait, postReducerCallback, cache) {
let [ user, hostname ] = usertag.toLowerCase().split('@')
let source = {
description: `Aggregate feed for @${user} at ${hostname}`,
hostname,
pathname: '@' + user + ".rss",
courtesyWait,
name: `${hostname}-${user}`,
displayName: user + ' (m)',
user,
}
return createSource(source, fetchRss, postReducerCallback, cache)
},
isRepost(post) {
// Mastodon's rss does not provide retweets/retoots
return false
},
async pullImages(post, view, discardPostIfNoImages) {
let media = post.item.getElementsByTagName('media:content')
let images = []
for(let image of media) {
images.push(image.getAttribute('url'))
}
if(!discardPostIfNoImages || media.length > 0) {
post.images = await downloadImages(
images,
post.source,
getPostIdFromPathname(post),
view
)
return post
}
}
}