diff --git a/index.js b/index.js deleted file mode 100644 index a9dd7ac..0000000 --- a/index.js +++ /dev/null @@ -1,418 +0,0 @@ -import fetch from "node-fetch" -import Path from "path" -import FS from "fs/promises" -import { JSDOM } from "jsdom" - -import config from "./config.js" - -let cache = await FS.readFile('./cache.json', { encoding: 'utf-8' }) - .then(json => JSON.parse(json) ) -let waitingList = new Map() - -const write = async (path, content) => { - let dir = Path.dirname(path) - - try { - await FS.access(dir) - } catch(e) { - await FS.mkdir(dir, { recursive: true }) - } - - return await FS.writeFile(path, content) -} - -const handleNitterUser = async user => { - let data - let index = 0 - let sources = cache.nitter[user] ? - [ cache.nitter[user] ].concat(config.sources.nitter) : - config.sources.nitter - - while(!data && index < sources.length) { - let source = sources[index] - let rss = await fetchRss(source, user + '/rss') - - try { - data = processNitter(rss, user) - } catch(err) { - if(err.constructor.name == NoMatchesError.name || err.constructor.name == DOMException.name) { - console.warn(`Failed to fetch ${user} from ${source}`) - index++ - } else { - throw err - } - } - } - - console.log(`Found ${user} at ${sources[index]}`) - cache.nitter[user] = sources[index] - return data -} - -const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) - -class NoMatchesError extends Error {} - -const processRss = (rss, reducerCallback, cdata) => { - let { document } = new JSDOM(rss, { - contentType: 'text/xml' - }).window - let items = document.querySelectorAll('channel item') - - if(items.length == 0) { - throw new NoMatchesError('Got no matches') - } - - let posts = [] - - for(let item of items) { - let description = new JSDOM(item.querySelector('description').textContent).window.document - let dateString = item.querySelector('pubDate').textContent - let link = item.querySelector('link').textContent - - let post = reducerCallback(item, description, dateString, link) - - if(post) { - post.date = new Date(dateString).valueOf() ?? 0 - post.link = link - - posts.push(post) - } - } - - return posts -} - -const fetchRss = async (hostname, path) => { - let waitFor = waitingList.get(hostname) - - if(waitFor !== 0) { - await sleep(waitFor) - waitingList.set(hostname, 0) - } - - return await fetch(new URL(path, 'https://' + hostname) ) - .then(response => { - waitingList.set(hostname, config.courtesyWait) - return response.text() - }) - .catch(console.error) -} - -const getImages = (user, description) => { - let images = description.querySelectorAll('img') - - if(images) { - let imageUrls = [] - - for(let image of images) { - let { src } = image - - if(!src) { - let finalSrc = image.srcset.split(', ').pop() - - src = finalSrc.slice(0, finalSrc.indexOf(' ') ) - } - - imageUrls.push(src) - } - - if(imageUrls.length > 0) { - return { - images: imageUrls, - user - } - } - } -} - -const processNitter = (rss, user) => { - return processRss(rss, (item, description) => { - let creator = item.getElementsByTagName('dc:creator')[0] - - if(creator.innerHTML.slice(1) === user) - return getImages(user, description) - }, true) -} - -const handleTumblrUser = async (user) => { - let rss = await fetchRss(user + '.tumblr.com', 'rss') - - console.log('Found ' + user) - return processTumblr(rss, user) -} - -const processTumblr = (rss, user) => { - return processRss(rss, (item, description) => { - let reblog = description.querySelector('p > a.tumblr_blog') - - // If it's a reblog, skip it - if(reblog && reblog.innerHTML !== user) { - return - } - - return getImages(user, description) - }) -} - -const oneDay = 1000 * 60 * 60 * 24 - -const printFeed = async (sources, directory, header, viewOptions, error) => { - // Coalate - let feed = [] - let tooLongAgo = viewOptions.tooLongAgo ? - (Date.now() - (Date.now() % oneDay)) - oneDay * viewOptions.tooLongAgo : - 0 - - for(let source of sources) { - if(source == undefined) { - continue - } - - for(let post of source) { - if(post.date > tooLongAgo) - feed.push(post) - } - } - - feed = feed.sort((a, b) => a.date > b.date) - - // Render - - let pages = [] - - for(let i = 0; i < Math.ceil(feed.length / viewOptions.pageSize); i++) { - pages.push(feed.slice(i * viewOptions.pageSize, (i + 1) * viewOptions.pageSize) ) - } - - // Write - - let lastIndex = getLastIndex() - let promises = [] - - const writePage = (index, content) => - promises.push( - write(Path.join(directory, index == (feed.length - 1) ? 'index' : index.toString() ) + '.html', content) - ) - - for(let i = 0; i < pages.length; i++) { - let nextPage = i + 1 - - let link = nextPage === pages.length ? - `end` : - `next` - - writePage(i, renderPage(`Page ${i + 1}`, pages[i], header, link) ) - } - - if(pages.length == 0) { - let message = 'No posts available' - - if(error) { - // Put in an iframe to prevent potential XSS through response body? Who knows. - message += `

-` - } - - writePage(0, renderPage('No posts', [], header, message) ) - } - - return Promise.all(promises) -} - -const renderPage = (title, posts, header, footer) => { - let html = `\ - - - -${title} - - - - -` - - if(header) { - html += ` -
-${header} -
-` - } - - for(let post of posts) { - let date = new Date(post.date) - - html += ` -${post.images.map(renderImage).join('\n')} -

${post.user} ${config.printDate(date)} open


` - } - - if(footer) { - html += ` - -` - } - - html += ` - -` - return html -} - -const renderImage = image => { - let { href } = new URL(image) - - return `\ -` -} - -const main = async () => { - let promises = [] - let feeds = [] - let sources = [] - - const wait = promise => - promises.push(promise) - - for(let feedName in config.feeds) { - let feed = config.feeds[feedName] - let feedSources = [] - - const subscribe = (postPromise, type, name) => { - let source = { type, name, link: Path.join(type, name) } - - return postPromise - .catch(error => { - source.error = error - console.error(error) - }) - .then(posts => { - feedSources.push(posts) - source.posts = posts - sources.push(source) - }) - } - - if(feed.nitter) { - for(let user of feed.nitter) { - await subscribe(handleNitterUser(user), 'nitter', user) - } - console.log('Caching sources...') - wait(write('cache.json', JSON.stringify(cache, null, 2) ) ) - } - - if(feed.tumblr) { - for(let user of feed.tumblr) { - await subscribe(handleTumblrUser(user), 'tumblr', user) - } - } - - let link = feed.main ? '' : feedName - - feeds.push({ - name: feedName, - main: feed.main, - view: feed.view, - sources: feedSources, - link - }) - } - - const buildNav = depth => { - const root = '../'.repeat(depth) - - const buildLink = link => - config.linkToIndex ? link + 'index.html' : link - - const renderEntry = (page, name = page.link) => { - let link = buildLink(root + page.link + '/') - let extra = '' - - if(page.error) { - extra += ' (errored)' - } else if (page.posts.length == 0) { - extra += ' (empty)' - } - - return `
  • ${name}${extra}
  • ` - } - - return `\ -
    - -Feeds -
    - -
    - -
    - -
    -
    ` - } - - let navs = [ - buildNav(0), - buildNav(1), - buildNav(2) - ] - - console.log('Writing...') - for(let source of sources) { - wait( - printFeed([ source.posts ], Path.join('out', source.link), navs[2], config.sourceView, source.error) - ) - } - for(let feed of feeds) { - wait( - printFeed(feed.sources, Path.join('out', feed.link), navs[feed.main ? 0 : 1], feed.view) - ) - } - - await Promise.all(promises) - - console.log('Done!') -} - -main() \ No newline at end of file diff --git a/lib.js b/lib.js index 846eeec..971fd10 100644 --- a/lib.js +++ b/lib.js @@ -68,10 +68,10 @@ export const buildImagePathHandler = (source, id) => (url, i, array) => { return path + getLinkExtname(url) } -export const addStylesheet = (path, { viewDir, batch }) => +export const writeStylesheet = (path, { directory, batch }) => batch.add( FS.readFile(path) - .then(content => write(Path.join(viewDir, 'style.css'), content)) + .then(content => write(Path.join(directory, 'style.css'), content)) ) export const postIdFromPathname = post => { @@ -79,92 +79,51 @@ export const postIdFromPathname = post => { return pathname.slice(pathname.lastIndexOf('/') + 1) } -export const createLock = async renderer => { +export const createLock = async (path) => { let lockExists = false try { - await FS.access(renderer.lockPath) + await FS.access(path) lockExists = true } catch(err) { lockExists = false } - renderer.lock = { + let lock = { sources: {}, lists: {} } if(lockExists) { - let lock = JSON.parse(await FS.readFile(renderer.lockPath, { encoding: 'utf8' })) - - Object.assign(renderer.lock, lock) + Object.assign(lock, JSON.parse(await FS.readFile(path, { encoding: 'utf8' }))) } + + return lock } -export const writeLock = renderer => - write(renderer.lockPath, JSON.stringify(renderer.lock) ) +export const writeLock = (lock, path) => + write(path, JSON.stringify(lock) ) +export const testWhitelist = (array, whitelist) => + whitelist.find(tag => !array.includes(tag)) !== undefined + +export const testBlacklist = (array, blacklist) => + blacklist.find(tag => array.includes(tag)) !== undefined + +export const createView = (directory, pageSize, extra = {}) => { + return { + batch: new PromiseBatch(), + directory, + pageSize, + ...extra + } +} // // ;-. ,-. ,-. // | `-. `-. // ' `-' `-' -class NoMatchesError extends Error {} -export const processRss = (source, reducerCallback) => { - let { document } = new JSDOM(source.rss, { - contentType: 'text/xml' - }).window - let items = document.querySelectorAll('channel item') - - if(items.length == 0) { - throw new NoMatchesError('Got no matches') - } - - source.posts = [] - - for(let item of items) { - let description = new JSDOM(item.querySelector('description').textContent).window.document - let dateString = item.querySelector('pubDate').textContent - let link = item.querySelector('link').textContent - let guid = item.querySelector('guid').textContent - - let post = { - source, - item, - description, - dateString, - date: new Date(dateString).valueOf() ?? 0, - link, - guid - } - - post = reducerCallback(post) - - if(post) { - source.posts.push(post) - } - } - - return source -} - -let waitingList = new Map() -export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) -export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { - let [ domain ] = /[\w-]+.[\w-]+$/.exec(url.hostname) - let timeout = waitingList.get(domain) ?? 0 - let now = Date.now() - - if(timeout == null || timeout <= now) { - waitingList.set(domain, timeout + courtesyWait) - } else { - await sleep(timeout - now) - } - - return await fetch(url, options) -} - export async function fetchRss(source) { let { hostname } = source let error @@ -191,6 +150,72 @@ export async function fetchRss(source) { return source } +let waitingList = new Map() +export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) +export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { + let [ domain ] = /[\w-]+.[\w-]+$/.exec(url.hostname) + let timeout = waitingList.get(domain) ?? 0 + let now = Date.now() + + if(timeout == null || timeout <= now) { + waitingList.set(domain, timeout + courtesyWait) + } else { + await sleep(timeout - now) + } + + return await fetch(url, options) +} + +class NoMatchesError extends Error {} +export const processRss = (source, fromDate, reducerCallback) => { + let { document } = new JSDOM(source.rss, { contentType: 'text/xml' }).window + let items = document.querySelectorAll('channel item') + + if(items.length == 0) { + throw new NoMatchesError('Got no matches') + } + + for(let item of items) { + let post = processRssItem(source, item, reducerCallback) + + if(post && post.date > fromDate) { + source.posts.push(post) + } + } + + return source +} + +export const processRssItem = (source, item, reducerCallback) => { + let description = new JSDOM(item.querySelector('description').textContent).window.document + let dateString = item.querySelector('pubDate').textContent + let link = item.querySelector('link').textContent + let guid = item.querySelector('guid')?.textContent + + let post = { + source, + item, + description, + dateString, + date: new Date(dateString).valueOf() ?? 0, + link, + guid + } + + return reducerCallback(post) +} + +export const processCategories = (post) => { + let categoryMatches = post.item.querySelectorAll('category') + post.categories = [] + + for(let category of categoryMatches) { + post.categories.push(category.textContent) + } + + return post +} + export const extractImages = (post, cache = true) => { let images = post.description.querySelectorAll('img') @@ -220,36 +245,21 @@ export const extractImages = (post, cache = true) => { // |/ | |-' |/|/ // ' ' `-' ' ' -export const createPages = (list, { pageSize, header = '', viewDir, batch, getPageFilename, getPageTitle, lock }) => { +export const writePages = (list, { pageSize, header = '', directory, batch }) => { let posts = [] let lastPageLink = 'about:blank' let pageIndex = 0 - // let pageIndex = Math.ceil(list.posts.length / pageSize) - // let { - // index: pageIndex = 0, - // lastPostDate - // } = lock.lists[list.name]?.lastPage ?? {} - // let sinceDate = posts[0]?.date ?? 0 - // posts = list.posts - // .filter(post => post.date > sinceDate) - // .concat(posts) - // .sort((a, b) => b.date - a.date) + list.posts.sort((a, b) => b.date - a.date) - // let firstPageSize = - - list.posts.sort((a, b) => a.date - b.date) - - for(let i = 0; i < list.posts.length; i++) { - // for(let i = list.posts.length - 1; i >= 0; i--) { + for(let i = list.posts.length - 1; i >= 0; i--) { posts.push(list.posts[i]) if(i % pageSize == 0) { - let isLastPage = list.main && i < pageSize let title = getPageTitle(list, pageIndex) let html = renderPage(title, posts.reverse(), header, renderNextPageLink(lastPageLink)) - let filename = isLastPage ? 'index.html' : getPageFilename(list, pageIndex) - let promise = write(Path.join(viewDir, filename), html) + let filename = i < pageSize ? getFinalPageFilename(list) : getPageFilename(list, pageIndex) + let promise = write(Path.join(directory, filename), html) batch.add(promise.then(annotate(`Created "${title}" (${filename})`))) posts = [] @@ -264,6 +274,15 @@ export const createPages = (list, { pageSize, header = '', viewDir, batch, getPa // } } +export const getFinalPageFilename = list => + (list.main ? 'index' : list.name) + '.html' + +export const getPageFilename = (list, i) => + list.name + '-' + i + '.html' + +export const getPageTitle = (list, i) => + list.displayName + ' - ' + (i + 1) + export const renderPage = (title, posts, header, footer) => `\ @@ -329,13 +348,13 @@ ${sources.map(renderNavEntry).join('\n')} export const renderNavEntry = (list) => { let extra = '' - if(list.error) { + if(list.errored) { extra += ' (errored)' } else if (list.posts.length == 0) { extra += ' (empty)' } - return `
  • ${list.displayName}${extra}
  • ` + return `
  • ${list.displayName}${extra}
  • ` } @@ -344,13 +363,13 @@ export const renderNavEntry = (list) => { // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' -export const downloadImages = (images, getImagePath, courtesyWait, { viewDir, batch }) => { +export const downloadImages = (images, getImagePath, courtesyWait, { directory, batch }) => { let out = [] for(let i = 0; i < images.length; i ++) { let url = images[i] let relativePath = getImagePath(url, i, images) - let fullPath = Path.join(viewDir, relativePath) + let fullPath = Path.join(directory, relativePath) let promise = FS.access(fullPath) .catch(() => @@ -379,17 +398,19 @@ export const pullImages = (post, renderer, discardPostIfNoImages = false, getPos } } -export const createFeed = (name, sources) => { +export const createFeed = (name, sources, main = false) => { return { name, + displayName: name, + main, posts: sources.reduce((posts, source) => posts.concat(source.posts), []) } } -export const fetchRssFromInstances = async (source, renderer) => { +export const fetchRssFromInstances = async (source, lock) => { let index = 0 let instances = source.instances - let lockHostname = renderer.lock.sources[source.name]?.hostname + let lockHostname = lock.sources[source.name]?.hostname if(lockHostname) { instances.unshift(lockHostname) @@ -407,13 +428,67 @@ export const fetchRssFromInstances = async (source, renderer) => { } } - (renderer.lock.sources[source.name] ??= {}).hostname = source.hostname - return source } -const addPostsToLock = (source, renderer) => { - (renderer.lock.sources[source.name] ??= {}).postData = source.posts.map(post => post.description) +// const addPostsToLock = (source, renderer) => { +// (renderer.lock.sources[source.name] ??= {}) + +// .postData = source.posts.map(post => post.description) +// } + +export const populateSource = (source, postReducerCallback, lock) => { + let sourceLock = lock.sources[source.name] ??= {} + + source.posts = [] + source = processRss(source, sourceLock.timestamp ?? 0, postReducerCallback) + + if(sourceLock.items) { + for(let itemText of sourceLock.items) { + let item = new JSDOM(itemText, { contentType: 'text/xml' }).window.document.documentElement + + source.posts.push(processRssItem(source, item, postReducerCallback)) + } + } + + lock.sources[source.name] = sourceLock + return source +} + +export const lockSource = (source, lock) => { + let date = 0 + let items = [] + + for(let post of source.posts) { + if(post.date > date) + date = post.date + + items.push(post.item.outerHTML) + } + + lock.sources[source.name] = { + hostname: source.hostname, + timestamp: date, + items + } +} + +export const lockSources = (sources, lock) => { + sources.forEach(source => lockSource(source, lock)) +} + +export const writeView = (sources, feeds, renderer) => { + renderer.header = renderNav(feeds, sources) + + for(let feed of feeds) { + writePages(feed, renderer) + } + + for(let source of sources) { + writePages(source, renderer) + } + + writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), renderer) } @@ -424,19 +499,19 @@ const addPostsToLock = (source, renderer) => { // ' -' export const tumblr = { - async createSource(user, courtesyWait, postReducerCallback, renderer) { + async createSource(user, courtesyWait, postReducerCallback, lock) { + let lowercaseUser = user.toLowerCase() let source = { - hostname: user + '.tumblr.com', + hostname: lowercaseUser + '.tumblr.com', pathname: 'rss', courtesyWait, - name: `tumblr-${user}`, + name: `tumblr-${lowercaseUser}`, displayName: user, - user + user: lowercaseUser, } - + source = await fetchRss(source) - source = processRss(source, postReducerCallback) - addPostsToLock(source, renderer) + source = populateSource(source, postReducerCallback, lock) return source }, @@ -450,11 +525,25 @@ export const tumblr = { return reblog && reblog.innerHTML !== post.source.user }, + matchesTags(post, whitelist, blacklist) { + post = processCategories(post) + + if(whitelist && testWhitelist(post.categories, whitelist)) { + return false + } + + if(blacklist && testBlacklist(post.categories, blacklist)) { + return false + } + + return true + }, + pullImages } export const nitter = { - async createSource(user, instances, courtesyWait, postReducerCallback, renderer) { + async createSource(user, instances, courtesyWait, postReducerCallback, lock) { let source = { instances, pathname: user + '/rss', @@ -464,8 +553,8 @@ export const nitter = { user } - source = await fetchRssFromInstances(source, renderer) - source = processRss(source, postReducerCallback) + source = await fetchRssFromInstances(source, lock) + source = populateSource(source, postReducerCallback, lock) return source }, @@ -486,4 +575,9 @@ export const nitter = { // // "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm // https://mastodon.social/@brownpau.rss (note the "@")" -// - https://mastodon.social/@brownpau/100523448408374430 \ No newline at end of file +// - https://mastodon.social/@brownpau/100523448408374430 + +export const platforms = { + tumblr, + nitter +} \ No newline at end of file diff --git a/test.html b/test.html deleted file mode 100644 index a48f92b..0000000 --- a/test.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - - -muses - - - - - - -
    - -
    -
    - -
    -
    art regurgitor
    - - - - - \ No newline at end of file