rssssing/lib.js
2025-02-05 12:34:07 -07:00

724 lines
16 KiB
JavaScript

import Path from "path"
import FS from "fs/promises"
import { JSDOM } from "jsdom"
import Mustache from "mustache"
// | o |
// . . |- . | ,-.
// | | | | | `-.
// `-` `-' ' ' `-'
export function PromiseBatch() {
let promises = []
this.add = (promise) =>
promises.push(promise)
this.complete = () =>
Promise.all(promises)
}
export const annotate = annotation =>
data => {
console.log(annotation),
data
}
export const write = async (path, content) => {
let dir = Path.dirname(path)
try {
await FS.access(dir)
} catch(e) {
await FS.mkdir(dir, { recursive: true })
}
return await FS.writeFile(path, content)
}
export const createNetworkingError = response => {
return new Error(`Request failed for ${response.url}, ${response.status}: ${response.statusText}`)
}
export const getLinkExtname = link =>
Path.extname(new URL(link).pathname)
export const getImageBasePath = (source, postId) =>
`${source.name}-${postId}`
export const writeStylesheet = (path, view) =>
view.batch.add(
FS.readFile(path)
.then(content => write(Path.join(view.path, 'style.css'), content))
)
export const getPostIdFromPathname = post => {
let { pathname } = new URL(post.link)
return pathname.slice(pathname.lastIndexOf('/') + 1)
}
export const doesExist = async (path) => {
let exists
try {
await FS.access(path)
exists = true
} catch(err) {
exists = false
}
return exists
}
export const ensureDir = async (path) => {
let exists = doesExist(path)
if(!exists) {
await FS.mkdir(path, { recursive: true })
}
return exists
}
export const isUnset = (value) => {
return typeof value === "undefined" || value === null
}
let waitingList = new Map()
export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => {
let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname)
let waitFor = waitingList.get(domain) ?? 0
waitingList.set(domain, waitFor + courtesyWait)
if(waitFor !== 0) {
console.log(`Waiting ${waitFor}ms to download ${url}`)
await sleep(waitFor)
}
return await fetch(url, options)
}
export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => {
let attemptsTried = 0
let response = undefined
while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) {
if(attemptsTried > 0)
console.error(`Failed to fetch ${url}, retrying...`)
response = await delayedFetch(url, options, courtesyWait)
attemptsTried++
}
return response
}
//
// ;-. ,-. ,-.
// | `-. `-.
// ' `-' `-'
export const fetchChannel = async (source) => {
let { hostname } = source
let error
let response
let rss
let channel
try {
response = await delayedFetch(
new URL(source.pathname, 'https://' + hostname),
{},
source.courtesyWait
)
} catch(err) {
error = err
}
source.errored = error !== undefined || !response.ok
if(source.errored) {
source.error = error ?? createNetworkingError(response)
return
}
console.log(`Found ${source.name} at ${hostname}`)
try {
channel = createChannel(await response.text())
} catch(err) {
error = err
}
source.errored = error !== undefined
if(source.errored) {
source.error = error
return
}
return channel
}
export const createChannel = rss => {
let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window
return document.querySelector('channel')
}
export const readPubDate = (pubDate) =>
pubDate ? new Date(pubDate.textContent).valueOf() : 0
export const createPosts = async (channel, source, fromDate, reducerCallback) => {
let items = channel.querySelectorAll('item')
let promises = []
for(let item of items) {
let post = createPost(item, source)
if(post.date <= fromDate)
continue
source.items.push(item)
let postResolvable = reducerCallback(post)
if(postResolvable instanceof Promise) {
postResolvable
.then(post => {
if(post) {
source.posts.push(post)
}
})
} else {
if(postResolvable) {
source.posts.push(postResolvable)
}
}
promises.push(postResolvable)
}
await Promise.all(promises)
return source
}
export const createPost = (item, source) => {
let description = item.querySelector('description')
description = new JSDOM(description === null ? '' : description.textContent).window.document
let date = readPubDate(item.querySelector('pubDate'))
let link = item.querySelector('link')?.textContent
let guid = item.querySelector('guid')?.textContent
let title = item.querySelector('title')?.textContent
let post = {
source,
item,
description,
date,
link,
guid,
title,
occurances: []
}
return post
}
export const extractImages = (post) => {
let images = post.description.querySelectorAll('img')
if(images) {
let imageUrls = []
for(let image of images) {
let { src } = image
if(isUnset(src)) {
let finalSrc = image.srcset.split(', ').pop()
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
}
// Sending through URL prevents potential XSS
imageUrls.push(new URL(src).href)
}
return imageUrls
}
}
export const processCategories = (post) => {
let categoryMatches = post.item.querySelectorAll('category')
post.categories = []
for(let category of categoryMatches) {
post.categories.push(category.textContent)
}
return post
}
// .
// |
// ,-. ,-: ,-. |-. ,-.
// | | | | | | |-'
// `-' `-` `-' ' ' `-'
export const createCache = async (cache = {}) => {
if(isUnset(cache.enabled)) {
cache.enabled = false
return cache
}
if(isUnset(cache.batch))
cache.batch = new PromiseBatch()
await ensureDir(cache.path)
return cache
}
export const getCachePath = (source, cache) =>
Path.join(cache.path, source.cacheFilename)
export const cacheSource = (source, cache) =>
write(getCachePath(source, cache), renderCache(source, cache))
export const cacheSources = (sources, cache) =>
Promise.all(sources.map(source => cacheSource(source, cache)))
export const openCache = async (source, cache) => {
let path = getCachePath(source, cache)
let exists = await doesExist(path)
let rss
if(exists)
rss = await FS.readFile(path, { encoding: 'utf8' })
if(exists && rss) {
// if(source.user == 'nanoraptor') {
// source.asdf = 'b'
// source.cache.asdf = 'b'
// }
let channel = createChannel(rss)
source.cache = {
channel,
date: readPubDate(channel.querySelector('pubDate')),
link: new URL(channel.querySelector('link').textContent),
}
} else {
source.cache = {
date: new Date(0)
}
if(source.hostname)
source.cache.link = buildCacheLink(source)
}
source.latestPostDate = source.cache.date
return source
}
export const buildCacheLink = source =>
new URL('https://' + source.hostname)
// .replaceAll(/\n\s*/g, '')
export const renderCache = (source, cache) => `\
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>${source.displayName}</title>
<description>${source.description}</description>
<link>${buildCacheLink(source)}</link>
<atom:link href="${new URL(source.cacheFilename, cache.directoryUrl)}" rel="self" type="application/rss+xml" />
<pubDate>${new Date(source.latestPostDate).toUTCString()}</pubDate>
<generator>rssssing</generator>
${source.items.map(item => item.outerHTML.replaceAll(/\n\s*/g, '')).join('\n')}
</channel>
</rss>`
// | | | o
// ,-. ,-. | | ,-: |- . ,-. ;-.
// | | | | | | | | | | | | |
// `-' `-' ' ' `-` `-' ' `-' ' '
export const createFeed = (name, sources, main = false) => {
return {
name,
displayName: name,
main,
posts: sources.reduce((posts, source) => posts.concat(source.posts), [])
}
}
export const downloadImage = async (url, basename, source, view) => {
let response = await retryDelayedFetch(url, {}, source.courtesyWait, source.retryAttempts)
.catch(err => console.error(`Failed download of ${url}:`, err, err.errors) )
if(response == undefined) {
console.error('Could not download image: ' + url)
return url
}
if(response.ok) {
let mimetype = response.headers.get('Content-Type').split(';')[0]
let extension = imageExtensions[mimetype]
if(typeof extension !== 'string') {
console.error(`Unknown mimetype for ${url}: ${mimetype}. Cannot download`)
return url
}
let pathname = Path.join(view.imageStoreDirectory, basename + extension)
let path = Path.join(view.path, pathname)
const download = () => write(path, response.body)
.then(annotate( `Downloaded ${pathname}`))
view.batch.add(FS.access(path).catch(download))
return pathname
} else {
console.error( createNetworkingError(response) )
return url
}
}
export const downloadImages = (images, source, postId, view) => {
let basePath = getImageBasePath(source, postId)
let pathnames = []
for(let i = 0; i < images.length; i++) {
let basename = images.length > 1 ? basePath + '-' + i : basePath
let pathname = view.imageStore.get(basename)
if(pathname === undefined) {
pathname = downloadImage(images[i], basename, source, view)
}
pathnames.push(pathname)
}
return Promise.all(pathnames)
}
export const imageExtensions = {
'image/apng': '.apng',
'image/avif': '.avif',
'image/bmp': '.bmp',
'image/gif': '.gif',
'image/gifv': '.gifv',
'image/vnd.microsoft.icon': '.icon',
'image/jpeg': '.jpg',
'image/png': '.png',
'image/svg+xml': '.xml',
'image/tiff': '.tif',
'image/webp': '.webp',
'image/webm': '.gifv'
}
export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => {
let images = extractImages(post)
if(!discardPostIfNoImages || images.length > 0) {
post.images = await downloadImages(
images,
post.source,
getPostId(post),
view
)
return post
}
}
// o
// . , . ,-. , , ,
// |/ | |-' |/|/
// ' ' `-' ' '
export const createView = async (view = {}) => {
if(isUnset(view.batch))
view.batch = new PromiseBatch()
if(isUnset(view.header))
view.header = ''
await ensureDir(view.path)
if(view.imageStoreDirectory)
await openImageStore(view)
if(isUnset(view.templatesPath)) {
view.templatesPath = Path.join(import.meta.dirname, 'templates')
}
if(isUnset(view.stylesheetPath)) {
view.stylesheetPath = Path.join(import.meta.dirname, 'assets/style.css')
}
view.batch.add(openTemplates(view))
return view
}
export const openImageStore = async view => {
let imageStorePath = Path.join(view.path, view.imageStoreDirectory)
view.imageStore = new Map()
if(!await ensureDir(imageStorePath)) {
return view
}
let dirents = await FS.readdir(imageStorePath, { withFileTypes: true })
for(let dirent of dirents) {
if(!dirent.isFile())
continue
let basename = dirent.name.slice(0, dirent.name.lastIndexOf('.'))
view.imageStore.set(basename, Path.join(view.imageStoreDirectory, dirent.name))
}
return view
}
export const openTemplates = async (view) => {
view.templates = {}
let exists = await doesExist(view.templatesPath)
let dirents
if(exists)
dirents = await FS.readdir(view.templatesPath, { withFileTypes: true })
if(!exists || dirents.length === 0)
throw new Error('Assets directory must contain a "nav" and "main" file.')
for(let dirent of dirents) {
if(!dirent.isFile())
continue
let extensionStart = dirent.name.lastIndexOf('.')
let basename = dirent.name.slice(0, extensionStart)
let extension = dirent.name.slice(extensionStart + 1)
if(basename == 'main' && isUnset(view.formatExtension))
view.formatExtension = extension
view.batch.add(
FS.readFile(Path.join(view.templatesPath, dirent.name), { encoding: 'utf-8' })
// Must remove trailing newlines so partials will work cleanly
.then(template => view.templates[basename] = template )
)
}
return view
}
export const writeView = (sources, feeds, view) => {
for(let source of sources)
labelList(source, view)
for(let feed of feeds)
labelList(feed, view)
view.header = renderNav(sources, feeds, view)
let pages = []
for(let source of sources) {
pages = pages.concat(createPages(source, view))
}
for(let feed of feeds) {
pages = pages.concat(createPages(feed, view))
}
for(let page of pages) {
console.log('Writing page', page)
writePage(page, view)
}
writeStylesheet(view.stylesheetPath, view)
}
export const labelList = (list, view) => {
list.indexFilename = nameFinalPage(list, view)
list.empty = list.posts.length == 0
}
export const createPages = (list, view) => {
let posts = []
let pages = []
let lastPageLink = 'about:blank'
list.posts.sort((a, b) => b.date - a.date)
for(let i = list.posts.length - 1; i >= 0; i--) {
posts.push(list.posts[i])
if(i % view.pageSize == 0) {
let filename = i < view.pageSize ? list.indexFilename : namePage(list, pages.length, view)
let page = {
filename,
title: list.displayName,
index: pages.length,
posts: posts.reverse(),
lastPageLink
}
for(let i = 0; i < page.posts.length; i++) {
page.posts[i].occurances.push({
index: i,
list,
page
})
}
pages.push(page)
posts = []
lastPageLink = filename
}
}
console.log(list)
return pages
}
export const namePage = (list, number, view) =>
list.name + '-' + number + '.' + view.formatExtension
export const nameFinalPage = (list, view) =>
(list.main ? 'index' : list.name) + '.' + view.formatExtension
export const writePage = (page, view) => {
let content = renderPage(page, view)
let promise = write(Path.join(view.path, page.filename), content)
view.batch.add(promise.then(annotate(`Created ${page.filename}`)))
}
export const renderPage = (page, view) =>
Mustache.render(view.templates.main, {
...page,
sections: page.posts.map(createSection),
header: view.header,
footer: view.footer
}, view.templates)
export const createMustacheDelimitedArray = (array) => {
if(isUnset(array) || array.length === 0)
return false
return {
final: array[array.length -1],
entries: array.slice(0, -1)
}
}
export const createSection = (post, index) => {
let date = new Date(post.date)
return {
post,
index,
date: {
month: date.getMonth() + 1,
day: date.getDate(),
year: date.getFullYear()
},
categories: createMustacheDelimitedArray(post.categories),
occurances: createMustacheDelimitedArray(post.occurances)
}
}
export const renderNav = (sources, feeds, view) => {
let sourceTypes = []
for(let source of sources) {
let section = sourceTypes[source.type]
if(section) {
section.push(source)
} else {
sourceTypes[source.type] = [
source
]
}
}
return Mustache.render(view.templates.nav, {
sourceTypes: Object.values(sourceTypes).map(createMustacheDelimitedArray),
feeds: createMustacheDelimitedArray(feeds),
}, view.templates)
}
//
// ,-. ,-. . . ;-. ,-. ,-.
// `-. | | | | | | |-'
// `-' `-' `-` ' `-' `-'
export const populateSource = async (channel, source, postReducerCallback, cache) => {
let fromDate = 0
source.items = []
source.posts = []
if(cache.enabled) {
fromDate = source.latestPostDate
if(source.cache.channel)
source = await createPosts(source.cache.channel, source, 0, postReducerCallback)
}
let remoteReducerCallback = post => {
if(post.date > source.latestPostDate)
source.latestPostDate = post.date
return postReducerCallback(post)
}
if(channel ?? false)
source = await createPosts(channel, source, fromDate, remoteReducerCallback)
return source
}
export const createSource = async (source, getChannel, postReducerCallback, cache) => {
source.cacheFilename = source.name + '.xml'
if(cache.enabled)
source = await openCache(source, cache)
source = await populateSource(await getChannel(source), source, postReducerCallback, cache)
if(cache.enabled)
cache.batch.add(cacheSource(source, cache))
return source
}
export const createSourceOptions = (options, view) => {
if(isUnset(options.courtesyWait))
options.courtesyWait = 1000
if(isUnset(options.retryAttempts))
options.retryAttempts = 3
return options
}