diff --git a/lib.js b/lib.js old mode 100644 new mode 100755 index 738df5b..a2e5782 --- a/lib.js +++ b/lib.js @@ -3,7 +3,6 @@ import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" -import mime from "mime-types" // | o | @@ -40,7 +39,7 @@ export const write = async (path, content) => { } export const createNetworkingError = response => { - return new Error(`Request failed, ${response.status}: ${response.statusText}`) + return new Error(`Request failed for ${response.url}, ${response.status}: ${response.statusText}`) } export const getLinkExtname = link => @@ -75,22 +74,27 @@ export const createView = (directory, pageSize, extra = {}) => { } } +export const isUnset = (value) => { + return typeof value === "undefined" || value === null +} + // // ;-. ,-. ,-. // | `-. `-. // ' `-' `-' -export async function fetchRss(source) { +export async function fetchChannel(source) { let { hostname } = source let error let response let rss + let channel try { response = await delayedFetch( new URL(source.pathname, 'https://' + hostname), {}, - source.courtesyWait ?? 5 * 1000 + source.courtesyWait ) } catch(err) { error = err @@ -99,12 +103,24 @@ export async function fetchRss(source) { source.errored = error !== undefined || !response.ok if(source.errored) { source.error = error ?? createNetworkingError(response) - } else { - rss = await response.text() - console.log(`Found ${source.name} at ${hostname}`) + return + } + + console.log(`Found ${source.name} at ${hostname}`) + + try { + channel = createChannel(await response.text()) + } catch(err) { + error = err } - return rss + source.errored = error !== undefined + if(source.errored) { + source.error = error + return + } + + return channel } let waitingList = new Map() @@ -121,6 +137,21 @@ export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { return await fetch(url, options) } +export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => { + let attemptsTried = 0 + let response = undefined + + while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) { + if(attemptsTried > 0) + console.error(`Failed to fetch ${url}, retrying...`) + + response = await delayedFetch(url, options, courtesyWait) + attemptsTried++ + } + + return response +} + export const getCacheFilename = (source) => source.name + '.xml' @@ -206,11 +237,6 @@ export const readPubDate = (pubDate) => export const createPosts = async (channel, source, fromDate, reducerCallback) => { let items = channel.querySelectorAll('item') - // if(items.length === 0) { - // // throw new NoMatchesError('Got no matches') - // return source - // } - let promises = [] for(let item of items) { @@ -272,7 +298,7 @@ export const extractImages = (post, cache = true) => { for(let image of images) { let { src } = image - if(!src) { + if(isUnset(src)) { let finalSrc = image.srcset.split(', ').pop() src = finalSrc.slice(0, finalSrc.indexOf(' ') ) @@ -403,7 +429,7 @@ export const renderPostDetail = (name, value) => export const renderImage = href => { return `\ -` +` } export const renderDate = date => @@ -479,21 +505,36 @@ export const renderNavEntry = (list) => { // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' -export const downloadImage = async (url, basename, courtesyWait, { batch, directory }) => { - let response = await delayedFetch(url, {}, courtesyWait) +export const downloadImage = async (url, basename, { courtesyWait, retryAttempts }, { batch, directory }) => { + let response = await retryDelayedFetch(url, {}, courtesyWait, retryAttempts) .catch(err => console.error(`Failed download of ${url}:`, err, err.errors) ) + if(response == undefined) { + console.error('Could not download image: ' + url) + return url + } + if(response.ok) { - let relativePath = basename + imageExtensions[response.headers.get('Content-Type')] + let mimetype = response.headers.get('Content-Type').split(';')[0] + let extension = imageExtensions[mimetype] + + if(typeof extension !== 'string') { + console.error(`Unknown image mimetype for ${url}: ${mimetype}. Cannot download`) + return url + } + + let relativePath = basename + extension let path = Path.join(directory, relativePath) const download = () => write(path, response.body) .then(annotate( `Downloaded ${relativePath}`)) + // TODO: See if the image is downloaded before even trying to download it batch.add(FS.access(path).catch(download)) return relativePath } else { - throw createNetworkingError(response) + console.error( createNetworkingError(response) ) + return url } } @@ -504,7 +545,7 @@ export const downloadImages = (images, source, postId, view) => { for(let i = 0; i < images.length; i++) { let basename = images.length > 1 ? basePath + '-' + i : basePath - pathnames.push(downloadImage(images[i], basename, source.courtesyWait, view)) + pathnames.push(downloadImage(images[i], basename, source, view)) } return Promise.all(pathnames) @@ -546,19 +587,19 @@ export const createFeed = (name, sources, main = false) => { } } -export const fetchRssFromInstances = async (source) => { +export const fetchChannelFromInstances = async (source) => { let index = 0 let instances = source.instances let cachedLink = source.cache.link - let rss + let channel if(cachedLink) { instances.unshift(cachedLink.hostname) } - while(!rss && index != instances.length) { + while(!channel && index != instances.length) { source.hostname = instances[index] - rss = await fetchRss(source) + channel = await fetchChannel(source) if(source.errored) { console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) @@ -568,10 +609,10 @@ export const fetchRssFromInstances = async (source) => { } } - return rss + return channel } -export const populateSource = async (rss, source, postReducerCallback, useCache = true) => { +export const populateSource = async (channel, source, postReducerCallback, useCache = true) => { let fromDate = 0 source.items = [] source.posts = [] @@ -590,8 +631,8 @@ export const populateSource = async (rss, source, postReducerCallback, useCache return postReducerCallback(post) } - if(rss ?? false) - source = await createPosts(createChannel(rss), source, fromDate, remoteReducerCallback) + if(channel ?? false) + source = await createPosts(channel, source, fromDate, remoteReducerCallback) return source } @@ -615,14 +656,24 @@ export const writeView = (sources, feeds, view) => { writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view) } -export const createSource = async (source, getRss, postReducerCallback, cache) => { +export const createSource = async (source, getChannel, postReducerCallback, cache) => { source = await openCache(source, cache) - source = await populateSource(await getRss(source), source, postReducerCallback, cache.populate) + source = await populateSource(await getChannel(source), source, postReducerCallback, cache.populate) cache.batch.add(cacheSource(source, cache)) return source } +export const createSourceOptions = (options) => { + if(isUnset(options.courtesyWait)) + options.courtesyWait = 1000 + + if(isUnset(options.retryAttempts)) + options.retryAttempts = 3 + + return options +} + // | | ,- // ;-. | ,-: |- | ,-. ;-. ;-.-. ,-. // | | | | | | |- | | | | | | `-. @@ -630,20 +681,20 @@ export const createSource = async (source, getRss, postReducerCallback, cache) = // ' -' export const tumblr = { - createSource(user, courtesyWait, postReducerCallback, cache) { + createSource(user, options, postReducerCallback, cache) { let lowercaseUser = user.toLowerCase() let source = { type: 'tumblr', description: `Aggregate feed for @${lowercaseUser} on tumblr.com`, hostname: lowercaseUser + '.tumblr.com', pathname: 'rss', - courtesyWait, name: `tumblr-${lowercaseUser}`, displayName: user, user: lowercaseUser, + ...createSourceOptions(options) } - return createSource(source, fetchRss, postReducerCallback, cache) + return createSource(source, fetchChannel, postReducerCallback, cache) }, createSources(users, ...args) { @@ -672,19 +723,19 @@ export const tumblr = { } export const nitter = { - createSource(user, instances, courtesyWait, postReducerCallback, cache) { + createSource(user, options, instances, postReducerCallback, cache) { let source = { type: 'nitter', description: `Aggregate feed for @${user} on twitter.com`, instances, pathname: user + '/rss', - courtesyWait, name: `nitter-${user}`, displayName: user, - user + user, + ...createSourceOptions(options) } - return createSource(source, fetchRssFromInstances, postReducerCallback, cache) + return createSource(source, fetchChannelFromInstances, postReducerCallback, cache) }, createSources(users, ...args) { @@ -697,17 +748,36 @@ export const nitter = { return creator.innerHTML.slice(1) !== post.source.user }, - pullImages + async pullImages (post, view, imageMirrorDomain, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) { + let images = extractImages(post) + let mirroredImages = [] + const mirrorImage = nitter.createImageMirrorer(post, imageMirrorDomain) + + if(!discardPostIfNoImages || images.length > 0) { + post.images = await downloadImages( + images.map(mirrorImage), + post.source, + getPostId(post), + view + ) + return post + } + }, + + createImageMirrorer(post, imageMirrorDomain) { + let mirrorUrl = new URL(imageMirrorDomain) + let basePathname = new URL(post.guid).pathname + + return (image, index, images) => { + mirrorUrl.pathname = Path.join(basePathname, 'photo', (index + 1).toString()) + + return mirrorUrl.href + } + } } -// TODO: Mastodon support -// -// "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm -// https://mastodon.social/@brownpau.rss (note the "@")" -// - https://mastodon.social/@brownpau/100523448408374430 - export const mastodon = { - createSource(usertag, courtesyWait, postReducerCallback, cache) { + createSource(usertag, options, postReducerCallback, cache) { let [ user, hostname ] = usertag.toLowerCase().split('@') let source = { @@ -715,13 +785,13 @@ export const mastodon = { description: `Aggregate feed for @${user} at ${hostname}`, hostname, pathname: '@' + user + ".rss", - courtesyWait, name: `${hostname}-${user}`, displayName: user, user, + ...createSourceOptions(options) } - return createSource(source, fetchRss, postReducerCallback, cache) + return createSource(source, fetchChannel, postReducerCallback, cache) }, isRepost(post) { @@ -747,4 +817,4 @@ export const mastodon = { return post } } -} \ No newline at end of file +} diff --git a/package.json b/package.json old mode 100644 new mode 100755 index 3601d3e..49f0e01 --- a/package.json +++ b/package.json @@ -10,8 +10,7 @@ "author": "", "license": "ISC", "dependencies": { - "jsdom": "^22.1.0", - "mime-types": "^2.1.35" + "jsdom": "^22.1.0" }, "type": "module" }