Fix delayed fetching, retry requests, image mirroring for nitter, better RSS error handling
This commit is contained in:
parent
21fe37c7ba
commit
c53044f4d1
166
lib.js
Normal file → Executable file
166
lib.js
Normal file → Executable file
@ -3,7 +3,6 @@
|
|||||||
import Path from "path"
|
import Path from "path"
|
||||||
import FS from "fs/promises"
|
import FS from "fs/promises"
|
||||||
import { JSDOM } from "jsdom"
|
import { JSDOM } from "jsdom"
|
||||||
import mime from "mime-types"
|
|
||||||
|
|
||||||
|
|
||||||
// | o |
|
// | o |
|
||||||
@ -40,7 +39,7 @@ export const write = async (path, content) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const createNetworkingError = response => {
|
export const createNetworkingError = response => {
|
||||||
return new Error(`Request failed, ${response.status}: ${response.statusText}`)
|
return new Error(`Request failed for ${response.url}, ${response.status}: ${response.statusText}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getLinkExtname = link =>
|
export const getLinkExtname = link =>
|
||||||
@ -75,22 +74,27 @@ export const createView = (directory, pageSize, extra = {}) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const isUnset = (value) => {
|
||||||
|
return typeof value === "undefined" || value === null
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// ;-. ,-. ,-.
|
// ;-. ,-. ,-.
|
||||||
// | `-. `-.
|
// | `-. `-.
|
||||||
// ' `-' `-'
|
// ' `-' `-'
|
||||||
|
|
||||||
export async function fetchRss(source) {
|
export async function fetchChannel(source) {
|
||||||
let { hostname } = source
|
let { hostname } = source
|
||||||
let error
|
let error
|
||||||
let response
|
let response
|
||||||
let rss
|
let rss
|
||||||
|
let channel
|
||||||
|
|
||||||
try {
|
try {
|
||||||
response = await delayedFetch(
|
response = await delayedFetch(
|
||||||
new URL(source.pathname, 'https://' + hostname),
|
new URL(source.pathname, 'https://' + hostname),
|
||||||
{},
|
{},
|
||||||
source.courtesyWait ?? 5 * 1000
|
source.courtesyWait
|
||||||
)
|
)
|
||||||
} catch(err) {
|
} catch(err) {
|
||||||
error = err
|
error = err
|
||||||
@ -99,12 +103,24 @@ export async function fetchRss(source) {
|
|||||||
source.errored = error !== undefined || !response.ok
|
source.errored = error !== undefined || !response.ok
|
||||||
if(source.errored) {
|
if(source.errored) {
|
||||||
source.error = error ?? createNetworkingError(response)
|
source.error = error ?? createNetworkingError(response)
|
||||||
} else {
|
return
|
||||||
rss = await response.text()
|
}
|
||||||
console.log(`Found ${source.name} at ${hostname}`)
|
|
||||||
|
console.log(`Found ${source.name} at ${hostname}`)
|
||||||
|
|
||||||
|
try {
|
||||||
|
channel = createChannel(await response.text())
|
||||||
|
} catch(err) {
|
||||||
|
error = err
|
||||||
}
|
}
|
||||||
|
|
||||||
return rss
|
source.errored = error !== undefined
|
||||||
|
if(source.errored) {
|
||||||
|
source.error = error
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return channel
|
||||||
}
|
}
|
||||||
|
|
||||||
let waitingList = new Map()
|
let waitingList = new Map()
|
||||||
@ -121,6 +137,21 @@ export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => {
|
|||||||
return await fetch(url, options)
|
return await fetch(url, options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => {
|
||||||
|
let attemptsTried = 0
|
||||||
|
let response = undefined
|
||||||
|
|
||||||
|
while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) {
|
||||||
|
if(attemptsTried > 0)
|
||||||
|
console.error(`Failed to fetch ${url}, retrying...`)
|
||||||
|
|
||||||
|
response = await delayedFetch(url, options, courtesyWait)
|
||||||
|
attemptsTried++
|
||||||
|
}
|
||||||
|
|
||||||
|
return response
|
||||||
|
}
|
||||||
|
|
||||||
export const getCacheFilename = (source) =>
|
export const getCacheFilename = (source) =>
|
||||||
source.name + '.xml'
|
source.name + '.xml'
|
||||||
|
|
||||||
@ -206,11 +237,6 @@ export const readPubDate = (pubDate) =>
|
|||||||
export const createPosts = async (channel, source, fromDate, reducerCallback) => {
|
export const createPosts = async (channel, source, fromDate, reducerCallback) => {
|
||||||
let items = channel.querySelectorAll('item')
|
let items = channel.querySelectorAll('item')
|
||||||
|
|
||||||
// if(items.length === 0) {
|
|
||||||
// // throw new NoMatchesError('Got no matches')
|
|
||||||
// return source
|
|
||||||
// }
|
|
||||||
|
|
||||||
let promises = []
|
let promises = []
|
||||||
|
|
||||||
for(let item of items) {
|
for(let item of items) {
|
||||||
@ -272,7 +298,7 @@ export const extractImages = (post, cache = true) => {
|
|||||||
for(let image of images) {
|
for(let image of images) {
|
||||||
let { src } = image
|
let { src } = image
|
||||||
|
|
||||||
if(!src) {
|
if(isUnset(src)) {
|
||||||
let finalSrc = image.srcset.split(', ').pop()
|
let finalSrc = image.srcset.split(', ').pop()
|
||||||
|
|
||||||
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
|
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
|
||||||
@ -403,7 +429,7 @@ export const renderPostDetail = (name, value) =>
|
|||||||
|
|
||||||
export const renderImage = href => {
|
export const renderImage = href => {
|
||||||
return `\
|
return `\
|
||||||
<a href="${href}" download><img src="${href}" loading="lazy"></img></a>`
|
<a href="${href}"><img src="${href}" loading="lazy"></img></a>`
|
||||||
}
|
}
|
||||||
|
|
||||||
export const renderDate = date =>
|
export const renderDate = date =>
|
||||||
@ -479,21 +505,36 @@ export const renderNavEntry = (list) => {
|
|||||||
// | | | | | | | | | | | | |
|
// | | | | | | | | | | | | |
|
||||||
// `-' `-' ' ' `-` `-' ' `-' ' '
|
// `-' `-' ' ' `-` `-' ' `-' ' '
|
||||||
|
|
||||||
export const downloadImage = async (url, basename, courtesyWait, { batch, directory }) => {
|
export const downloadImage = async (url, basename, { courtesyWait, retryAttempts }, { batch, directory }) => {
|
||||||
let response = await delayedFetch(url, {}, courtesyWait)
|
let response = await retryDelayedFetch(url, {}, courtesyWait, retryAttempts)
|
||||||
.catch(err => console.error(`Failed download of ${url}:`, err, err.errors) )
|
.catch(err => console.error(`Failed download of ${url}:`, err, err.errors) )
|
||||||
|
|
||||||
|
if(response == undefined) {
|
||||||
|
console.error('Could not download image: ' + url)
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
if(response.ok) {
|
if(response.ok) {
|
||||||
let relativePath = basename + imageExtensions[response.headers.get('Content-Type')]
|
let mimetype = response.headers.get('Content-Type').split(';')[0]
|
||||||
|
let extension = imageExtensions[mimetype]
|
||||||
|
|
||||||
|
if(typeof extension !== 'string') {
|
||||||
|
console.error(`Unknown image mimetype for ${url}: ${mimetype}. Cannot download`)
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
|
let relativePath = basename + extension
|
||||||
let path = Path.join(directory, relativePath)
|
let path = Path.join(directory, relativePath)
|
||||||
|
|
||||||
const download = () => write(path, response.body)
|
const download = () => write(path, response.body)
|
||||||
.then(annotate( `Downloaded ${relativePath}`))
|
.then(annotate( `Downloaded ${relativePath}`))
|
||||||
|
|
||||||
|
// TODO: See if the image is downloaded before even trying to download it
|
||||||
batch.add(FS.access(path).catch(download))
|
batch.add(FS.access(path).catch(download))
|
||||||
return relativePath
|
return relativePath
|
||||||
} else {
|
} else {
|
||||||
throw createNetworkingError(response)
|
console.error( createNetworkingError(response) )
|
||||||
|
return url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -504,7 +545,7 @@ export const downloadImages = (images, source, postId, view) => {
|
|||||||
for(let i = 0; i < images.length; i++) {
|
for(let i = 0; i < images.length; i++) {
|
||||||
let basename = images.length > 1 ? basePath + '-' + i : basePath
|
let basename = images.length > 1 ? basePath + '-' + i : basePath
|
||||||
|
|
||||||
pathnames.push(downloadImage(images[i], basename, source.courtesyWait, view))
|
pathnames.push(downloadImage(images[i], basename, source, view))
|
||||||
}
|
}
|
||||||
|
|
||||||
return Promise.all(pathnames)
|
return Promise.all(pathnames)
|
||||||
@ -546,19 +587,19 @@ export const createFeed = (name, sources, main = false) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export const fetchRssFromInstances = async (source) => {
|
export const fetchChannelFromInstances = async (source) => {
|
||||||
let index = 0
|
let index = 0
|
||||||
let instances = source.instances
|
let instances = source.instances
|
||||||
let cachedLink = source.cache.link
|
let cachedLink = source.cache.link
|
||||||
let rss
|
let channel
|
||||||
|
|
||||||
if(cachedLink) {
|
if(cachedLink) {
|
||||||
instances.unshift(cachedLink.hostname)
|
instances.unshift(cachedLink.hostname)
|
||||||
}
|
}
|
||||||
|
|
||||||
while(!rss && index != instances.length) {
|
while(!channel && index != instances.length) {
|
||||||
source.hostname = instances[index]
|
source.hostname = instances[index]
|
||||||
rss = await fetchRss(source)
|
channel = await fetchChannel(source)
|
||||||
|
|
||||||
if(source.errored) {
|
if(source.errored) {
|
||||||
console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error)
|
console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error)
|
||||||
@ -568,10 +609,10 @@ export const fetchRssFromInstances = async (source) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return rss
|
return channel
|
||||||
}
|
}
|
||||||
|
|
||||||
export const populateSource = async (rss, source, postReducerCallback, useCache = true) => {
|
export const populateSource = async (channel, source, postReducerCallback, useCache = true) => {
|
||||||
let fromDate = 0
|
let fromDate = 0
|
||||||
source.items = []
|
source.items = []
|
||||||
source.posts = []
|
source.posts = []
|
||||||
@ -590,8 +631,8 @@ export const populateSource = async (rss, source, postReducerCallback, useCache
|
|||||||
return postReducerCallback(post)
|
return postReducerCallback(post)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(rss ?? false)
|
if(channel ?? false)
|
||||||
source = await createPosts(createChannel(rss), source, fromDate, remoteReducerCallback)
|
source = await createPosts(channel, source, fromDate, remoteReducerCallback)
|
||||||
|
|
||||||
return source
|
return source
|
||||||
}
|
}
|
||||||
@ -615,14 +656,24 @@ export const writeView = (sources, feeds, view) => {
|
|||||||
writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view)
|
writeStylesheet(Path.join(import.meta.dirname, 'assets/style.css'), view)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const createSource = async (source, getRss, postReducerCallback, cache) => {
|
export const createSource = async (source, getChannel, postReducerCallback, cache) => {
|
||||||
source = await openCache(source, cache)
|
source = await openCache(source, cache)
|
||||||
source = await populateSource(await getRss(source), source, postReducerCallback, cache.populate)
|
source = await populateSource(await getChannel(source), source, postReducerCallback, cache.populate)
|
||||||
|
|
||||||
cache.batch.add(cacheSource(source, cache))
|
cache.batch.add(cacheSource(source, cache))
|
||||||
return source
|
return source
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const createSourceOptions = (options) => {
|
||||||
|
if(isUnset(options.courtesyWait))
|
||||||
|
options.courtesyWait = 1000
|
||||||
|
|
||||||
|
if(isUnset(options.retryAttempts))
|
||||||
|
options.retryAttempts = 3
|
||||||
|
|
||||||
|
return options
|
||||||
|
}
|
||||||
|
|
||||||
// | | ,-
|
// | | ,-
|
||||||
// ;-. | ,-: |- | ,-. ;-. ;-.-. ,-.
|
// ;-. | ,-: |- | ,-. ;-. ;-.-. ,-.
|
||||||
// | | | | | | |- | | | | | | `-.
|
// | | | | | | |- | | | | | | `-.
|
||||||
@ -630,20 +681,20 @@ export const createSource = async (source, getRss, postReducerCallback, cache) =
|
|||||||
// ' -'
|
// ' -'
|
||||||
|
|
||||||
export const tumblr = {
|
export const tumblr = {
|
||||||
createSource(user, courtesyWait, postReducerCallback, cache) {
|
createSource(user, options, postReducerCallback, cache) {
|
||||||
let lowercaseUser = user.toLowerCase()
|
let lowercaseUser = user.toLowerCase()
|
||||||
let source = {
|
let source = {
|
||||||
type: 'tumblr',
|
type: 'tumblr',
|
||||||
description: `Aggregate feed for @${lowercaseUser} on tumblr.com`,
|
description: `Aggregate feed for @${lowercaseUser} on tumblr.com`,
|
||||||
hostname: lowercaseUser + '.tumblr.com',
|
hostname: lowercaseUser + '.tumblr.com',
|
||||||
pathname: 'rss',
|
pathname: 'rss',
|
||||||
courtesyWait,
|
|
||||||
name: `tumblr-${lowercaseUser}`,
|
name: `tumblr-${lowercaseUser}`,
|
||||||
displayName: user,
|
displayName: user,
|
||||||
user: lowercaseUser,
|
user: lowercaseUser,
|
||||||
|
...createSourceOptions(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
return createSource(source, fetchRss, postReducerCallback, cache)
|
return createSource(source, fetchChannel, postReducerCallback, cache)
|
||||||
},
|
},
|
||||||
|
|
||||||
createSources(users, ...args) {
|
createSources(users, ...args) {
|
||||||
@ -672,19 +723,19 @@ export const tumblr = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const nitter = {
|
export const nitter = {
|
||||||
createSource(user, instances, courtesyWait, postReducerCallback, cache) {
|
createSource(user, options, instances, postReducerCallback, cache) {
|
||||||
let source = {
|
let source = {
|
||||||
type: 'nitter',
|
type: 'nitter',
|
||||||
description: `Aggregate feed for @${user} on twitter.com`,
|
description: `Aggregate feed for @${user} on twitter.com`,
|
||||||
instances,
|
instances,
|
||||||
pathname: user + '/rss',
|
pathname: user + '/rss',
|
||||||
courtesyWait,
|
|
||||||
name: `nitter-${user}`,
|
name: `nitter-${user}`,
|
||||||
displayName: user,
|
displayName: user,
|
||||||
user
|
user,
|
||||||
|
...createSourceOptions(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
return createSource(source, fetchRssFromInstances, postReducerCallback, cache)
|
return createSource(source, fetchChannelFromInstances, postReducerCallback, cache)
|
||||||
},
|
},
|
||||||
|
|
||||||
createSources(users, ...args) {
|
createSources(users, ...args) {
|
||||||
@ -697,17 +748,36 @@ export const nitter = {
|
|||||||
return creator.innerHTML.slice(1) !== post.source.user
|
return creator.innerHTML.slice(1) !== post.source.user
|
||||||
},
|
},
|
||||||
|
|
||||||
pullImages
|
async pullImages (post, view, imageMirrorDomain, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) {
|
||||||
|
let images = extractImages(post)
|
||||||
|
let mirroredImages = []
|
||||||
|
const mirrorImage = nitter.createImageMirrorer(post, imageMirrorDomain)
|
||||||
|
|
||||||
|
if(!discardPostIfNoImages || images.length > 0) {
|
||||||
|
post.images = await downloadImages(
|
||||||
|
images.map(mirrorImage),
|
||||||
|
post.source,
|
||||||
|
getPostId(post),
|
||||||
|
view
|
||||||
|
)
|
||||||
|
return post
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
createImageMirrorer(post, imageMirrorDomain) {
|
||||||
|
let mirrorUrl = new URL(imageMirrorDomain)
|
||||||
|
let basePathname = new URL(post.guid).pathname
|
||||||
|
|
||||||
|
return (image, index, images) => {
|
||||||
|
mirrorUrl.pathname = Path.join(basePathname, 'photo', (index + 1).toString())
|
||||||
|
|
||||||
|
return mirrorUrl.href
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Mastodon support
|
|
||||||
//
|
|
||||||
// "Turns out Mastodon has built-in RSS; your feed URL is [instance]/@[username].rss, so for example I'm
|
|
||||||
// https://mastodon.social/@brownpau.rss (note the "@")"
|
|
||||||
// - https://mastodon.social/@brownpau/100523448408374430
|
|
||||||
|
|
||||||
export const mastodon = {
|
export const mastodon = {
|
||||||
createSource(usertag, courtesyWait, postReducerCallback, cache) {
|
createSource(usertag, options, postReducerCallback, cache) {
|
||||||
let [ user, hostname ] = usertag.toLowerCase().split('@')
|
let [ user, hostname ] = usertag.toLowerCase().split('@')
|
||||||
|
|
||||||
let source = {
|
let source = {
|
||||||
@ -715,13 +785,13 @@ export const mastodon = {
|
|||||||
description: `Aggregate feed for @${user} at ${hostname}`,
|
description: `Aggregate feed for @${user} at ${hostname}`,
|
||||||
hostname,
|
hostname,
|
||||||
pathname: '@' + user + ".rss",
|
pathname: '@' + user + ".rss",
|
||||||
courtesyWait,
|
|
||||||
name: `${hostname}-${user}`,
|
name: `${hostname}-${user}`,
|
||||||
displayName: user,
|
displayName: user,
|
||||||
user,
|
user,
|
||||||
|
...createSourceOptions(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
return createSource(source, fetchRss, postReducerCallback, cache)
|
return createSource(source, fetchChannel, postReducerCallback, cache)
|
||||||
},
|
},
|
||||||
|
|
||||||
isRepost(post) {
|
isRepost(post) {
|
||||||
@ -747,4 +817,4 @@ export const mastodon = {
|
|||||||
return post
|
return post
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
3
package.json
Normal file → Executable file
3
package.json
Normal file → Executable file
@ -10,8 +10,7 @@
|
|||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"jsdom": "^22.1.0",
|
"jsdom": "^22.1.0"
|
||||||
"mime-types": "^2.1.35"
|
|
||||||
},
|
},
|
||||||
"type": "module"
|
"type": "module"
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user