From b58680d249c1942adc0b9d697732f93d967299f5 Mon Sep 17 00:00:00 2001 From: Dakedres Date: Thu, 11 Apr 2024 09:48:26 -0600 Subject: [PATCH] Reordering of codebase --- lib.js | 458 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 240 insertions(+), 218 deletions(-) mode change 100755 => 100644 lib.js diff --git a/lib.js b/lib.js old mode 100755 new mode 100644 index 4bb3747..2be6dcf --- a/lib.js +++ b/lib.js @@ -1,9 +1,9 @@ -// Ascii font used is "Shimrod" - import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" + + // | o | // . . |- . | ,-. // | | | | | `-. @@ -89,6 +89,39 @@ export const isUnset = (value) => { return typeof value === "undefined" || value === null } +let waitingList = new Map() +export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) + +export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { + let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname) + let waitFor = waitingList.get(domain) ?? 0 + + waitingList.set(domain, waitFor + courtesyWait) + if(waitFor !== 0) { + console.log(`Waiting ${waitFor}ms to download ${url}`) + await sleep(waitFor) + } + + return await fetch(url, options) +} + +export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => { + let attemptsTried = 0 + let response = undefined + + while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) { + if(attemptsTried > 0) + console.error(`Failed to fetch ${url}, retrying...`) + + response = await delayedFetch(url, options, courtesyWait) + attemptsTried++ + } + + return response +} + + + // // ;-. ,-. ,-. // | `-. `-. @@ -134,36 +167,113 @@ export async function fetchChannel(source) { return channel } -let waitingList = new Map() -export const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) -export const delayedFetch = async (url, options, courtesyWait = 5 * 1000) => { - let [ domain ] = /[\w-]+.[\w-]+$/.exec(new URL(url).hostname) - let waitFor = waitingList.get(domain) ?? 0 +export const createChannel = rss => { + let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window - waitingList.set(domain, waitFor + courtesyWait) - if(waitFor !== 0) { - console.log(`Waiting ${waitFor}ms to download ${url}`) - await sleep(waitFor) - } - - return await fetch(url, options) + return document.querySelector('channel') } -export const retryDelayedFetch = async (url, options, courtesyWait, retryAttempts) => { - let attemptsTried = 0 - let response = undefined +export const readPubDate = (pubDate) => + pubDate ? new Date(pubDate.textContent).valueOf() : 0 - while(isUnset(response) && attemptsTried <= (retryAttempts ?? 3)) { - if(attemptsTried > 0) - console.error(`Failed to fetch ${url}, retrying...`) +export const createPosts = async (channel, source, fromDate, reducerCallback) => { + let items = channel.querySelectorAll('item') - response = await delayedFetch(url, options, courtesyWait) - attemptsTried++ + let promises = [] + + for(let item of items) { + let post = createPost(item, source) + + if(post.date <= fromDate) + continue + + source.items.push(item) + + let postResolvable = reducerCallback(post) + + if(postResolvable instanceof Promise) { + postResolvable + .then(post => { + if(post) { + source.posts.push(post) + } + }) + } else { + if(postResolvable) { + source.posts.push(postResolvable) + } + } + + promises.push(postResolvable) } - return response + await Promise.all(promises) + return source } +export const createPost = (item, source) => { + let description = new JSDOM(item.querySelector('description').textContent).window.document + let date = readPubDate(item.querySelector('pubDate')) + let link = item.querySelector('link').textContent + let guid = item.querySelector('guid')?.textContent + let title = item.querySelector('title')?.textContent + + let post = { + source, + item, + description, + date, + link, + guid, + title, + occurances: [] + } + + return post +} + +export const extractImages = (post) => { + let images = post.description.querySelectorAll('img') + + if(images) { + let imageUrls = [] + + for(let image of images) { + let { src } = image + + if(isUnset(src)) { + let finalSrc = image.srcset.split(', ').pop() + + src = finalSrc.slice(0, finalSrc.indexOf(' ') ) + } + + // Sending through URL prevents potential XSS + imageUrls.push(new URL(src).href) + } + + return imageUrls + } +} + +export const processCategories = (post) => { + let categoryMatches = post.item.querySelectorAll('category') + post.categories = [] + + for(let category of categoryMatches) { + post.categories.push(category.textContent) + } + + return post +} + + + +// . +// | +// ,-. ,-: ,-. |-. ,-. +// | | | | | | |-' +// `-' `-` `-' ' ' `-' + export const createCache = async (cache = {}) => { if(isUnset(cache.enabled)) { cache.enabled = false @@ -247,106 +357,102 @@ ${source.items.map(item => item.outerHTML.replaceAll(/\n\s*/g, '')).join('\n')} ` -export const createChannel = rss => { - let { document } = new JSDOM(rss, { contentType: 'text/xml' }).window - return document.querySelector('channel') + +// | | | o +// ,-. ,-. | | ,-: |- . ,-. ;-. +// | | | | | | | | | | | | | +// `-' `-' ' ' `-` `-' ' `-' ' ' + +export const createFeed = (name, sources, main = false) => { + return { + name, + displayName: name, + main, + posts: sources.reduce((posts, source) => posts.concat(source.posts), []) + } } -export const readPubDate = (pubDate) => - pubDate ? new Date(pubDate.textContent).valueOf() : 0 +export const downloadImage = async (url, basename, source, view) => { + let response = await retryDelayedFetch(url, {}, source.courtesyWait, source.retryAttempts) + .catch(err => console.error(`Failed download of ${url}:`, err, err.errors) ) -export const createPosts = async (channel, source, fromDate, reducerCallback) => { - let items = channel.querySelectorAll('item') - - let promises = [] - - for(let item of items) { - let post = createPost(item, source) - - if(post.date <= fromDate) - continue - - source.items.push(item) - - let postResolvable = reducerCallback(post) - - if(postResolvable instanceof Promise) { - postResolvable - .then(post => { - if(post) { - source.posts.push(post) - } - }) - } else { - if(postResolvable) { - source.posts.push(postResolvable) - } - } - - promises.push(postResolvable) + if(response == undefined) { + console.error('Could not download image: ' + url) + return url } - await Promise.all(promises) - return source + if(response.ok) { + let mimetype = response.headers.get('Content-Type').split(';')[0] + let extension = imageExtensions[mimetype] + + if(typeof extension !== 'string') { + console.error(`Unknown mimetype for ${url}: ${mimetype}. Cannot download`) + return url + } + + let pathname = Path.join(view.imageStoreDirectory, basename + extension) + let path = Path.join(view.path, pathname) + + const download = () => write(path, response.body) + .then(annotate( `Downloaded ${pathname}`)) + + // TODO: See if the image is downloaded before even trying to download it + view.batch.add(FS.access(path).catch(download)) + return pathname + } else { + console.error( createNetworkingError(response) ) + return url + } } -export const createPost = (item, source) => { - let description = new JSDOM(item.querySelector('description').textContent).window.document - let date = readPubDate(item.querySelector('pubDate')) - let link = item.querySelector('link').textContent - let guid = item.querySelector('guid')?.textContent - let title = item.querySelector('title')?.textContent +export const downloadImages = (images, source, postId, view) => { + let basePath = getImageBasePath(source, postId) + let pathnames = [] - let post = { - source, - item, - description, - date, - link, - guid, - title, - occurances: [] + for(let i = 0; i < images.length; i++) { + let basename = images.length > 1 ? basePath + '-' + i : basePath + let pathname = view.imageStore.get(basename) + + if(isUnset(pathname)) { + pathname = downloadImage(images[i], basename, source, view) + } + + pathnames.push(pathname) } - return post + return Promise.all(pathnames) } -export const processCategories = (post) => { - let categoryMatches = post.item.querySelectorAll('category') - post.categories = [] - - for(let category of categoryMatches) { - post.categories.push(category.textContent) - } - - return post +export const imageExtensions = { + 'image/apng': '.apng', + 'image/avif': '.avif', + 'image/bmp': '.bmp', + 'image/gif': '.gif', + 'image/vnd.microsoft.icon': '.icon', + 'image/jpeg': '.jpg', + 'image/png': '.png', + 'image/svg+xml': '.xml', + 'image/tiff': '.tif', + 'image/webp': '.webp' } -export const extractImages = (post) => { - let images = post.description.querySelectorAll('img') +export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => { + let images = extractImages(post) - if(images) { - let imageUrls = [] - - for(let image of images) { - let { src } = image - - if(isUnset(src)) { - let finalSrc = image.srcset.split(', ').pop() - - src = finalSrc.slice(0, finalSrc.indexOf(' ') ) - } - - // Sending through URL prevents potential XSS - imageUrls.push(new URL(src).href) - } - - return imageUrls + if(!discardPostIfNoImages || images.length > 0) { + post.images = await downloadImages( + images, + post.source, + getPostId(post), + view + ) + return post } } + // o // . , . ,-. , , , // |/ | |-' |/|/ @@ -588,122 +694,11 @@ export const renderNavEntry = (list) => { } -// | | | o -// ,-. ,-. | | ,-: |- . ,-. ;-. -// | | | | | | | | | | | | | -// `-' `-' ' ' `-` `-' ' `-' ' ' -export const downloadImage = async (url, basename, source, view) => { - let response = await retryDelayedFetch(url, {}, source.courtesyWait, source.retryAttempts) - .catch(err => console.error(`Failed download of ${url}:`, err, err.errors) ) - - if(response == undefined) { - console.error('Could not download image: ' + url) - return url - } - - if(response.ok) { - let mimetype = response.headers.get('Content-Type').split(';')[0] - let extension = imageExtensions[mimetype] - - if(typeof extension !== 'string') { - console.error(`Unknown mimetype for ${url}: ${mimetype}. Cannot download`) - return url - } - - let pathname = Path.join(view.imageStoreDirectory, basename + extension) - let path = Path.join(view.path, pathname) - - const download = () => write(path, response.body) - .then(annotate( `Downloaded ${pathname}`)) - - // TODO: See if the image is downloaded before even trying to download it - view.batch.add(FS.access(path).catch(download)) - return pathname - } else { - console.error( createNetworkingError(response) ) - return url - } -} - -export const downloadImages = (images, source, postId, view) => { - let basePath = getImageBasePath(source, postId) - let pathnames = [] - - for(let i = 0; i < images.length; i++) { - let basename = images.length > 1 ? basePath + '-' + i : basePath - let pathname = view.imageStore.get(basename) - - if(isUnset(pathname)) { - pathname = downloadImage(images[i], basename, source, view) - } - - pathnames.push(pathname) - } - - return Promise.all(pathnames) -} - -export const imageExtensions = { - 'image/apng': '.apng', - 'image/avif': '.avif', - 'image/bmp': '.bmp', - 'image/gif': '.gif', - 'image/vnd.microsoft.icon': '.icon', - 'image/jpeg': '.jpg', - 'image/png': '.png', - 'image/svg+xml': '.xml', - 'image/tiff': '.tif', - 'image/webp': '.webp' -} - -export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => { - let images = extractImages(post) - - if(!discardPostIfNoImages || images.length > 0) { - post.images = await downloadImages( - images, - post.source, - getPostId(post), - view - ) - return post - } -} - -export const createFeed = (name, sources, main = false) => { - return { - name, - displayName: name, - main, - posts: sources.reduce((posts, source) => posts.concat(source.posts), []) - } -} - -export const fetchChannelFromInstances = async (source) => { - let index = 0 - let instances = source.instances - let cachedLink = source.cache.link - let channel - - if(cachedLink) { - instances.unshift(cachedLink.hostname) - } - - while(!channel && index != instances.length) { - source.hostname = instances[index] - channel = await fetchChannel(source) - - if(source.errored) { - console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) - index++ - } else { - break - } - } - - return channel -} +// +// ,-. ,-. . . ;-. ,-. ,-. +// `-. | | | | | | |-' +// `-' `-' `-` ' `-' `-' export const populateSource = async (channel, source, postReducerCallback, cache) => { let fromDate = 0 @@ -750,6 +745,8 @@ export const createSourceOptions = (options, view) => { return options } + + // | | ,- // ;-. | ,-: |- | ,-. ;-. ;-.-. ,-. // | | | | | | |- | | | | | | `-. @@ -798,6 +795,31 @@ export const tumblr = { pullImages } +export const fetchChannelFromInstances = async (source) => { + let index = 0 + let instances = source.instances + let cachedLink = source.cache.link + let channel + + if(cachedLink) { + instances.unshift(cachedLink.hostname) + } + + while(!channel && index != instances.length) { + source.hostname = instances[index] + channel = await fetchChannel(source) + + if(source.errored) { + console.error(`Failed to fetch ${source.name} from ${source.hostname}: `, source.error) + index++ + } else { + break + } + } + + return channel +} + export const nitter = { createSource(user, options, instances, postReducerCallback, cache) { let source = {