v2 almost there

This commit is contained in:
dakedres 2024-02-10 01:04:17 -07:00
parent 2288085649
commit 9f952d8c3c
3 changed files with 201 additions and 568 deletions

418
index.js
View File

@ -1,418 +0,0 @@
import fetch from "node-fetch"
import Path from "path"
import FS from "fs/promises"
import { JSDOM } from "jsdom"
import config from "./config.js"
let cache = await FS.readFile('./cache.json', { encoding: 'utf-8' })
.then(json => JSON.parse(json) )
let waitingList = new Map()
const write = async (path, content) => {
let dir = Path.dirname(path)
try {
await FS.access(dir)
} catch(e) {
await FS.mkdir(dir, { recursive: true })
}
return await FS.writeFile(path, content)
}
const handleNitterUser = async user => {
let data
let index = 0
let sources = cache.nitter[user] ?
[ cache.nitter[user] ].concat(config.sources.nitter) :
config.sources.nitter
while(!data && index < sources.length) {
let source = sources[index]
let rss = await fetchRss(source, user + '/rss')
try {
data = processNitter(rss, user)
} catch(err) {
if(err.constructor.name == NoMatchesError.name || err.constructor.name == DOMException.name) {
console.warn(`Failed to fetch ${user} from ${source}`)
index++
} else {
throw err
}
}
}
console.log(`Found ${user} at ${sources[index]}`)
cache.nitter[user] = sources[index]
return data
}
const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
class NoMatchesError extends Error {}
const processRss = (rss, reducerCallback, cdata) => {
let { document } = new JSDOM(rss, {
contentType: 'text/xml'
}).window
let items = document.querySelectorAll('channel item')
if(items.length == 0) {
throw new NoMatchesError('Got no matches')
}
let posts = []
for(let item of items) {
let description = new JSDOM(item.querySelector('description').textContent).window.document
let dateString = item.querySelector('pubDate').textContent
let link = item.querySelector('link').textContent
let post = reducerCallback(item, description, dateString, link)
if(post) {
post.date = new Date(dateString).valueOf() ?? 0
post.link = link
posts.push(post)
}
}
return posts
}
const fetchRss = async (hostname, path) => {
let waitFor = waitingList.get(hostname)
if(waitFor !== 0) {
await sleep(waitFor)
waitingList.set(hostname, 0)
}
return await fetch(new URL(path, 'https://' + hostname) )
.then(response => {
waitingList.set(hostname, config.courtesyWait)
return response.text()
})
.catch(console.error)
}
const getImages = (user, description) => {
let images = description.querySelectorAll('img')
if(images) {
let imageUrls = []
for(let image of images) {
let { src } = image
if(!src) {
let finalSrc = image.srcset.split(', ').pop()
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
}
imageUrls.push(src)
}
if(imageUrls.length > 0) {
return {
images: imageUrls,
user
}
}
}
}
const processNitter = (rss, user) => {
return processRss(rss, (item, description) => {
let creator = item.getElementsByTagName('dc:creator')[0]
if(creator.innerHTML.slice(1) === user)
return getImages(user, description)
}, true)
}
const handleTumblrUser = async (user) => {
let rss = await fetchRss(user + '.tumblr.com', 'rss')
console.log('Found ' + user)
return processTumblr(rss, user)
}
const processTumblr = (rss, user) => {
return processRss(rss, (item, description) => {
let reblog = description.querySelector('p > a.tumblr_blog')
// If it's a reblog, skip it
if(reblog && reblog.innerHTML !== user) {
return
}
return getImages(user, description)
})
}
const oneDay = 1000 * 60 * 60 * 24
const printFeed = async (sources, directory, header, viewOptions, error) => {
// Coalate
let feed = []
let tooLongAgo = viewOptions.tooLongAgo ?
(Date.now() - (Date.now() % oneDay)) - oneDay * viewOptions.tooLongAgo :
0
for(let source of sources) {
if(source == undefined) {
continue
}
for(let post of source) {
if(post.date > tooLongAgo)
feed.push(post)
}
}
feed = feed.sort((a, b) => a.date > b.date)
// Render
let pages = []
for(let i = 0; i < Math.ceil(feed.length / viewOptions.pageSize); i++) {
pages.push(feed.slice(i * viewOptions.pageSize, (i + 1) * viewOptions.pageSize) )
}
// Write
let lastIndex = getLastIndex()
let promises = []
const writePage = (index, content) =>
promises.push(
write(Path.join(directory, index == (feed.length - 1) ? 'index' : index.toString() ) + '.html', content)
)
for(let i = 0; i < pages.length; i++) {
let nextPage = i + 1
let link = nextPage === pages.length ?
`<a href="data:text/html,">end</a>` :
`<a href="${nextPage}.html">next</a>`
writePage(i, renderPage(`Page ${i + 1}`, pages[i], header, link) )
}
if(pages.length == 0) {
let message = 'No posts available'
if(error) {
// Put in an iframe to prevent potential XSS through response body? Who knows.
message += `<br><br>
<iframe src="data:text/plain,${encodeURIComponent(error.stack)}" style="width: 100%;"></iframe>`
}
writePage(0, renderPage('No posts', [], header, message) )
}
return Promise.all(promises)
}
const renderPage = (title, posts, header, footer) => {
let html = `\
<html>
<head>
<title>${title}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {
max-width: 640px;
margin: 0 0 0 auto;
padding: 8px;
font-family: sans-serif;
}
ul {
padding-inline-start: 30px;
list-style-type: none;
}
p {
padding: 30px;
}
img {
margin: 10px auto;
max-width: 100%;
}
p a, footer a {
float: right
}
hr {
clear: both
}
</style>
</head>
<body>`
if(header) {
html += `
<header>
${header}
</header>
`
}
for(let post of posts) {
let date = new Date(post.date)
html += `
${post.images.map(renderImage).join('\n')}
<p><b>${post.user}</b> ${config.printDate(date)} <a href="${post.link}">open</a></p><hr>`
}
if(footer) {
html += `
<footer>
${footer}
</footer>`
}
html += `
</body>
</html>`
return html
}
const renderImage = image => {
let { href } = new URL(image)
return `\
<a href="${href}"><img src="${href}" loading="lazy"></img></a>`
}
const main = async () => {
let promises = []
let feeds = []
let sources = []
const wait = promise =>
promises.push(promise)
for(let feedName in config.feeds) {
let feed = config.feeds[feedName]
let feedSources = []
const subscribe = (postPromise, type, name) => {
let source = { type, name, link: Path.join(type, name) }
return postPromise
.catch(error => {
source.error = error
console.error(error)
})
.then(posts => {
feedSources.push(posts)
source.posts = posts
sources.push(source)
})
}
if(feed.nitter) {
for(let user of feed.nitter) {
await subscribe(handleNitterUser(user), 'nitter', user)
}
console.log('Caching sources...')
wait(write('cache.json', JSON.stringify(cache, null, 2) ) )
}
if(feed.tumblr) {
for(let user of feed.tumblr) {
await subscribe(handleTumblrUser(user), 'tumblr', user)
}
}
let link = feed.main ? '' : feedName
feeds.push({
name: feedName,
main: feed.main,
view: feed.view,
sources: feedSources,
link
})
}
const buildNav = depth => {
const root = '../'.repeat(depth)
const buildLink = link =>
config.linkToIndex ? link + 'index.html' : link
const renderEntry = (page, name = page.link) => {
let link = buildLink(root + page.link + '/')
let extra = ''
if(page.error) {
extra += ' (errored)'
} else if (page.posts.length == 0) {
extra += ' (empty)'
}
return `<li><a href="${link}">${name}</a>${extra}</li>`
}
return `\
<details>
<summary>Feeds</summary>
<section>
<ul>
<li><a href="${buildLink(root)}">main</a></li>
${feeds.filter(feed => !feed.main).map(feed => renderEntry(feed)).join('\n')}
</ul>
<hr>
<ul>
${sources.map(source => renderEntry(source)).join('\n')}
</ul>
</section>
</details>
<hr>`
}
let navs = [
buildNav(0),
buildNav(1),
buildNav(2)
]
console.log('Writing...')
for(let source of sources) {
wait(
printFeed([ source.posts ], Path.join('out', source.link), navs[2], config.sourceView, source.error)
)
}
for(let feed of feeds) {
wait(
printFeed(feed.sources, Path.join('out', feed.link), navs[feed.main ? 0 : 1], feed.view)
)
}
await Promise.all(promises)
console.log('Done!')
}
main()

308
lib.js
View File

@ -68,10 +68,10 @@ export const buildImagePathHandler = (source, id) => (url, i, array) => {
return path + getLinkExtname(url)
}
export const addStylesheet = (path, { viewDir, batch }) =>
export const writeStylesheet = (path, { directory, batch }) =>
batch.add(
FS.readFile(path)
.then(content => write(Path.join(viewDir, 'style.css'), content))
.then(content => write(Path.join(directory, 'style.css'), content))
)
export const postIdFromPathname = post => {
@ -79,92 +79,51 @@ export const postIdFromPathname = post => {
return pathname.slice(pathname.lastIndexOf('/') + 1)
}
export const createLock = async renderer => {
export const createLock = async (path) => {
let lockExists = false
try {
await FS.access(renderer.lockPath)
await FS.access(path)
lockExists = true
} catch(err) {
lockExists = false
}
renderer.lock = {
let lock = {
sources: {},
lists: {}
}
if(lockExists) {
let lock = JSON.parse(await FS.readFile(renderer.lockPath, { encoding: 'utf8' }))
Object.assign(renderer.lock, lock)
Object.assign(lock, JSON.parse(await FS.readFile(path, { encoding: 'utf8' })))
}
return lock
}
export const writeLock = renderer =>
write(renderer.lockPath, JSON.stringify(renderer.lock) )
export const writeLock = (lock, path) =>
write(path, JSON.stringify(lock) )
export const testWhitelist = (array, whitelist) =>
whitelist.find(tag => !array.includes(tag)) !== undefined
export const testBlacklist = (array, blacklist) =>
blacklist.find(tag => array.includes(tag)) !== undefined
export const createView = (directory, pageSize, extra = {}) => {
return {
batch: new PromiseBatch(),
directory,
pageSize,
...extra
}
}
//
// ;-. ,-. ,-.
// | `-. `-.
// ' `-' `-'
class NoMatchesError extends Error {}
export const processRss = (source, reducerCallback) => {
let { document } = new JSDOM(source.rss, {
contentType: 'text/xml'
}).window
let items = document.querySelectorAll('channel item')
if(items.length == 0) {
throw new NoMatchesError('Got no matches')
}
source.posts = []
for(let item of items) {
let description = new JSDOM(item.querySelector('description').textContent).window.document
let dateString = item.querySelector('pubDate').textContent
let link = item.querySelector('link').textContent
let guid = item.querySelector('guid').textContent
let post = {
source,
item,
description,
dateString,
date: new Date(dateString).valueOf() ?? 0,
link,
guid
}
post = reducerCallback(post)
if(post) {
source.posts.push(post)
}
}
return source
}
let waitingList = new Map()
export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => {
let [ domain ] = /[\w-]+.[\w-]+$/.exec(url.hostname)
let timeout = waitingList.get(domain) ?? 0
let now = Date.now()
if(timeout == null || timeout <= now) {
waitingList.set(domain, timeout + courtesyWait)
} else {
await sleep(timeout - now)
}
return await fetch(url, options)
}
export async function fetchRss(source) {
let { hostname } = source
let error
@ -191,6 +150,72 @@ export async function fetchRss(source) {
return source
}
let waitingList = new Map()
export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => {
let [ domain ] = /[\w-]+.[\w-]+$/.exec(url.hostname)
let timeout = waitingList.get(domain) ?? 0
let now = Date.now()
if(timeout == null || timeout <= now) {
waitingList.set(domain, timeout + courtesyWait)
} else {
await sleep(timeout - now)
}
return await fetch(url, options)
}
class NoMatchesError extends Error {}
export const processRss = (source, fromDate, reducerCallback) => {
let { document } = new JSDOM(source.rss, { contentType: 'text/xml' }).window
let items = document.querySelectorAll('channel item')
if(items.length == 0) {
throw new NoMatchesError('Got no matches')
}
for(let item of items) {
let post = processRssItem(source, item, reducerCallback)
if(post && post.date > fromDate) {
source.posts.push(post)
}
}
return source
}
export const processRssItem = (source, item, reducerCallback) => {
let description = new JSDOM(item.querySelector('description').textContent).window.document
let dateString = item.querySelector('pubDate').textContent
let link = item.querySelector('link').textContent
let guid = item.querySelector('guid')?.textContent
let post = {
source,
item,
description,
dateString,
date: new Date(dateString).valueOf() ?? 0,
link,
guid
}
return reducerCallback(post)
}
export const processCategories = (post) => {
let categoryMatches = post.item.querySelectorAll('category')
post.categories = []
for(let category of categoryMatches) {
post.categories.push(category.textContent)
}
return post
}
export const extractImages = (post, cache = true) => {
let images = post.description.querySelectorAll('img')
@ -220,36 +245,21 @@ export const extractImages = (post, cache = true) => {
// |/ | |-' |/|/
// ' ' `-' ' '
export const createPages = (list, { pageSize, header = '', viewDir, batch, getPageFilename, getPageTitle, lock }) => {
export const writePages = (list, { pageSize, header = '', directory, batch }) => {
let posts = []
let lastPageLink = 'about:blank'
let pageIndex = 0
// let pageIndex = Math.ceil(list.posts.length / pageSize)
// let {
// index: pageIndex = 0,
// lastPostDate
// } = lock.lists[list.name]?.lastPage ?? {}
// let sinceDate = posts[0]?.date ?? 0
// posts = list.posts
// .filter(post => post.date > sinceDate)
// .concat(posts)
// .sort((a, b) => b.date - a.date)
list.posts.sort((a, b) => b.date - a.date)
// let firstPageSize =
list.posts.sort((a, b) => a.date - b.date)
for(let i = 0; i < list.posts.length; i++) {
// for(let i = list.posts.length - 1; i >= 0; i--) {
for(let i = list.posts.length - 1; i >= 0; i--) {
posts.push(list.posts[i])
if(i % pageSize == 0) {
let isLastPage = list.main && i < pageSize
let title = getPageTitle(list, pageIndex)
let html = renderPage(title, posts.reverse(), header, renderNextPageLink(lastPageLink))
let filename = isLastPage ? 'index.html' : getPageFilename(list, pageIndex)
let promise = write(Path.join(viewDir, filename), html)
let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pageIndex)
let promise = write(Path.join(directory, filename), html)
batch.add(promise.then(annotate(`Created "${title}" (${filename})`)))
posts = []
@ -264,6 +274,15 @@ export const createPages = (list, { pageSize, header = '', viewDir, batch, getPa
// }
}
export const getFinalPageFilename = list =>
(list.main ? 'index' : list.name) + '.html'
export const getPageFilename = (list, i) =>
list.name + '-' + i + '.html'
export const getPageTitle = (list, i) =>
list.displayName + ' - ' + (i + 1)
export const renderPage = (title, posts, header, footer) => `\
<html>
<head>
@ -329,13 +348,13 @@ ${sources.map(renderNavEntry).join('\n')}
export const renderNavEntry = (list) => {
let extra = ''
if(list.error) {
if(list.errored) {
extra += ' (errored)'
} else if (list.posts.length == 0) {
extra += ' (empty)'
}
return `<li><a href="${list.link}">${list.displayName}</a>${extra}</li>`
return `<li><a href="${getFinalPageFilename(list)}">${list.displayName}</a>${extra}</li>`
}
@ -344,13 +363,13 @@ export const renderNavEntry = (list) => {
// | | | | | | | | | | | | |
// `-' `-' ' ' `-` `-' ' `-' ' '
export const downloadImages = (images, getImagePath, courtesyWait, { viewDir, batch }) => {
export const downloadImages = (images, getImagePath, courtesyWait, { directory, batch }) => {
let out = []
for(let i = 0; i < images.length; i ++) {
let url = images[i]
let relativePath = getImagePath(url, i, images)
let fullPath = Path.join(viewDir, relativePath)
let fullPath = Path.join(directory, relativePath)
let promise = FS.access(fullPath)
.catch(() =>
@ -379,17 +398,19 @@ export const pullImages = (post, renderer, discardPostIfNoImages = false, getPos
}
}
export const createFeed = (name, sources) => {
export const createFeed = (name, sources, main = false) => {
return {
name,
displayName: name,
main,
posts: sources.reduce((posts, source) => posts.concat(source.posts), [])
}
}
export const fetchRssFromInstances = async (source, renderer) => {
export const fetchRssFromInstances = async (source, lock) => {
let index = 0
let instances = source.instances
let lockHostname = renderer.lock.sources[source.name]?.hostname
let lockHostname = lock.sources[source.name]?.hostname
if(lockHostname) {
instances.unshift(lockHostname)
@ -407,13 +428,67 @@ export const fetchRssFromInstances = async (source, renderer) => {
}
}
(renderer.lock.sources[source.name] ??= {}).hostname = source.hostname
return source
}
const addPostsToLock = (source, renderer) => {
(renderer.lock.sources[source.name] ??= {}).postData = source.posts.map(post => post.description)
// const addPostsToLock = (source, renderer) => {
// (renderer.lock.sources[source.name] ??= {})
// .postData = source.posts.map(post => post.description)
// }
export const populateSource = (source, postReducerCallback, lock) => {
let sourceLock = lock.sources[source.name] ??= {}
source.posts = []
source = processRss(source, sourceLock.timestamp ?? 0, postReducerCallback)
if(sourceLock.items) {
for(let itemText of sourceLock.items) {
let item = new JSDOM(itemText, { contentType: 'text/xml' }).window.document.documentElement
source.posts.push(processRssItem(source, item, postReducerCallback))
}
}
lock.sources[source.name] = sourceLock
return source
}
export const lockSource = (source, lock) => {
let date = 0
let items = []
for(let post of source.posts) {
if(post.date > date)
date = post.date
items.push(post.item.outerHTML)
}
lock.sources[source.name] = {
hostname: source.hostname,
timestamp: date,
items
}
}
export const lockSources = (sources, lock) => {
sources.forEach(source => lockSource(source, lock))
}
export const writeView = (sources, feeds, renderer) => {
renderer.header = renderNav(feeds, sources)
for(let feed of feeds) {
writePages(feed, renderer)
}
for(let source of sources) {
writePages(source, renderer)
}
writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), renderer)
}
@ -424,19 +499,19 @@ const addPostsToLock = (source, renderer) => {
// ' -'
export const tumblr = {
async createSource(user, courtesyWait, postReducerCallback, renderer) {
async createSource(user, courtesyWait, postReducerCallback, lock) {
let lowercaseUser = user.toLowerCase()
let source = {
hostname: user + '.tumblr.com',
hostname: lowercaseUser + '.tumblr.com',
pathname: 'rss',
courtesyWait,
name: `tumblr-${user}`,
name: `tumblr-${lowercaseUser}`,
displayName: user,
user
user: lowercaseUser,
}
source = await fetchRss(source)
source = processRss(source, postReducerCallback)
addPostsToLock(source, renderer)
source = populateSource(source, postReducerCallback, lock)
return source
},
@ -450,11 +525,25 @@ export const tumblr = {
return reblog && reblog.innerHTML !== post.source.user
},
matchesTags(post, whitelist, blacklist) {
post = processCategories(post)
if(whitelist && testWhitelist(post.categories, whitelist)) {
return false
}
if(blacklist && testBlacklist(post.categories, blacklist)) {
return false
}
return true
},
pullImages
}
export const nitter = {
async createSource(user, instances, courtesyWait, postReducerCallback, renderer) {
async createSource(user, instances, courtesyWait, postReducerCallback, lock) {
let source = {
instances,
pathname: user + '/rss',
@ -464,8 +553,8 @@ export const nitter = {
user
}
source = await fetchRssFromInstances(source, renderer)
source = processRss(source, postReducerCallback)
source = await fetchRssFromInstances(source, lock)
source = populateSource(source, postReducerCallback, lock)
return source
},
@ -486,4 +575,9 @@ export const nitter = {
//
// "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm
// https://mastodon.social/@brownpau.rss (note the "@")"
// - https://mastodon.social/@brownpau/100523448408374430
// - https://mastodon.social/@brownpau/100523448408374430
export const platforms = {
tumblr,
nitter
}

View File

@ -1,43 +0,0 @@
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>muses</title>
<link rel="stylesheet" href="style.css">
<style>
</style>
</head>
<body>
<main>
<article>
</article>
</main>
<header><a href="index.html">art regurgitor</a></header>
<nav>
<ul>
<li><a href="index.html">index</a></li>
<li><a href="meta.html">meta</a></li>
<li><a href="muses.html"><i>muses</i></a></li>
<li><a href="musings.html">musings</a></li>
</ul>
<ul>
<li><a href="computing.html">computing</a></li>
</ul>
<hr><p><a href="https://git.sys42.net/dakedres/website/raw/branch/v2/site/muses.md">[source]</a></p>
</nav>
</body>
</html>