From 5e5af26f17eac46364fc2e8822e36dbd8f84615f Mon Sep 17 00:00:00 2001 From: dakedres Date: Wed, 28 Feb 2024 19:25:46 -0700 Subject: [PATCH] Fix downloaded image extensions --- assets/style.css | 2 +- lib.js | 165 ++++++++++++++++++++++++++++++----------------- package.json | 3 +- yarn.lock | 7 ++ 4 files changed, 115 insertions(+), 62 deletions(-) diff --git a/assets/style.css b/assets/style.css index 200bbe3..9e72bbd 100644 --- a/assets/style.css +++ b/assets/style.css @@ -48,7 +48,7 @@ hr { } footer a { - padding-bottom: 10px; + padding-bottom: 30vh; } @media (prefers-color-scheme: dark) { diff --git a/lib.js b/lib.js index 8cf349d..feafeb9 100644 --- a/lib.js +++ b/lib.js @@ -3,6 +3,7 @@ import Path from "path" import FS from "fs/promises" import { JSDOM } from "jsdom" +import mime from "mime-types" // | o | @@ -38,17 +39,6 @@ export const write = async (path, content) => { return await FS.writeFile(path, content) } -export const download = async (url, path, courtesyWait) => { - let response = await delayedFetch(url, {}, courtesyWait) - .catch(err => console.error(`Failed download of ${url}:`, err) ) - - if(response.ok) { - await write(path, response.body) - } else { - throw createNetworkingError(response) - } -} - export const createNetworkingError = response => { return new Error(`Request failed, ${response.status}: ${response.statusText}`) } @@ -56,14 +46,8 @@ export const createNetworkingError = response => { export const getLinkExtname = link => Path.extname(new URL(link).pathname) -export const buildImagePathHandler = (source, id) => (url, i, array) => { - let path = `images/${source.name}-${id}` - - if(array.length > 1) - path += `-${i}` - - return path + getLinkExtname(url) -} +export const getImageBasePath = (source, postId) => + `images/${source.name}-${postId}` export const writeStylesheet = (path, { directory, batch }) => batch.add( @@ -71,7 +55,7 @@ export const writeStylesheet = (path, { directory, batch }) => .then(content => write(Path.join(directory, 'style.css'), content)) ) -export const postIdFromPathname = post => { +export const getPostIdFromPathname = post => { let { pathname } = new URL(post.link) return pathname.slice(pathname.lastIndexOf('/') + 1) } @@ -144,7 +128,7 @@ export const getCachePath = (source, { directory }) => Path.join(directory, getCacheFilename(source)) export const cacheSource = (source, cache) => - write(getCachePath(source, cache), createCache(source, cache)) + write(getCachePath(source, cache), renderCache(source, cache)) export const cacheSources = (sources, cache) => Promise.all(sources.map(source => cacheSource(source, cache))) @@ -192,7 +176,7 @@ export const openCache = async (source, cache) => { export const buildCacheLink = source => new URL('https://' + source.hostname) -export const createCache = (source, cache) => `\ +export const renderCache = (source, cache) => `\ @@ -219,8 +203,7 @@ export const createChannel = rss => { export const readPubDate = (pubDate) => pubDate ? new Date(pubDate.textContent).valueOf() : 0 -class NoMatchesError extends Error {} -export const createPosts = (channel, source, fromDate, reducerCallback) => { +export const createPosts = async (channel, source, fromDate, reducerCallback) => { let items = channel.querySelectorAll('item') // if(items.length === 0) { @@ -228,19 +211,27 @@ export const createPosts = (channel, source, fromDate, reducerCallback) => { // return source // } + let promises = [] + for(let item of items) { source.items.push(item) - let post = createPost(item, source, reducerCallback) + let promise = createPost(item, source, reducerCallback) + .then(post => { + if(post && post.date > fromDate) { + source.posts.push(post) + } - if(post && post.date > fromDate) { - source.posts.push(post) - } + return post + }) + + promises.push(promise) } + await Promise.all(promises) return source } -export const createPost = (item, source, reducerCallback) => { +export const createPost = async (item, source, reducerCallback) => { let description = new JSDOM(item.querySelector('description').textContent).window.document let date = readPubDate(item.querySelector('pubDate')) let link = item.querySelector('link').textContent @@ -258,7 +249,7 @@ export const createPost = (item, source, reducerCallback) => { occurances: [] } - return reducerCallback(post) + return await reducerCallback(post) } export const processCategories = (post) => { @@ -460,35 +451,58 @@ export const renderNavEntry = (list) => { // | | | | | | | | | | | | | // `-' `-' ' ' `-` `-' ' `-' ' ' -export const downloadImages = (images, getImagePath, courtesyWait, { directory, batch }) => { - let out = [] - - for(let i = 0; i < images.length; i ++) { - let url = images[i] - let relativePath = getImagePath(url, i, images) - let fullPath = Path.join(directory, relativePath) +export const downloadImage = async (url, basename, courtesyWait, { batch, directory }) => { + let response = await delayedFetch(url, {}, courtesyWait) + .catch(err => console.error(`Failed download of ${url}:`, err) ) - let promise = FS.access(fullPath) - .catch(() => - download(url, fullPath, courtesyWait) - .then(annotate( `Downloaded ${relativePath}`)) - ) + if(response.ok) { + let relativePath = basename + imageExtensions[response.headers.get('Content-Type')] + let path = Path.join(directory, relativePath) - out.push(relativePath) - batch.add(promise) + const download = () => write(path, response.body) + .then(annotate( `Downloaded ${relativePath}`)) + + batch.add(FS.access(path).catch(download)) + return relativePath + } else { + throw createNetworkingError(response) } - - return out } -export const pullImages = (post, view, discardPostIfNoImages = false, getPostId = postIdFromPathname) => { +export const downloadImages = (images, source, postId, view) => { + let basePath = getImageBasePath(source, postId) + let pathnames = [] + + for(let i = 0; i < images.length; i++) { + let basename = images.length > 1 ? basePath + '-' + i : basePath + + pathnames.push(downloadImage(images[i], basename, source.courtesyWait, view)) + } + + return Promise.all(pathnames) +} + +export const imageExtensions = { + 'image/apng': '.apng', + 'image/avif': '.avif', + 'image/bmp': '.bmp', + 'image/gif': '.gif', + 'image/vnd.microsoft.icon': '.icon', + 'image/jpeg': '.jpg', + 'image/png': '.png', + 'image/svg+xml': '.xml', + 'image/tiff': '.tif', + 'image/webp': '.webp' +} + +export const pullImages = async (post, view, discardPostIfNoImages = false, getPostId = getPostIdFromPathname) => { let images = extractImages(post) if(!discardPostIfNoImages || images.length > 0) { - post.images = downloadImages( + post.images = await downloadImages( images, - buildImagePathHandler(post.source, getPostId(post)), - post.source.courtesyWait, + post.source, + getPostId(post), view ) return post @@ -529,7 +543,7 @@ export const fetchRssFromInstances = async (source) => { return rss } -export const populateSource = (rss, source, postReducerCallback, useCache = true) => { +export const populateSource = async (rss, source, postReducerCallback, useCache = true) => { let fromDate = 0 source.items = [] source.posts = [] @@ -538,7 +552,7 @@ export const populateSource = (rss, source, postReducerCallback, useCache = true fromDate = source.latestPostDate if(source.cache.channel) - source = createPosts(source.cache.channel, source, 0, postReducerCallback) + source = await createPosts(source.cache.channel, source, 0, postReducerCallback) } let remoteReducerCallback = post => { @@ -549,7 +563,7 @@ export const populateSource = (rss, source, postReducerCallback, useCache = true } if(rss ?? false) - source = createPosts(createChannel(rss), source, fromDate, remoteReducerCallback) + source = await createPosts(createChannel(rss), source, fromDate, remoteReducerCallback) return source } @@ -575,7 +589,7 @@ export const writeView = (sources, feeds, view) => { export const createSource = async (source, getRss, postReducerCallback, cache) => { source = await openCache(source, cache) - source = populateSource(await getRss(source), source, postReducerCallback, cache.populate) + source = await populateSource(await getRss(source), source, postReducerCallback, cache.populate) cache.batch.add(cacheSource(source, cache)) return source @@ -663,12 +677,43 @@ export const nitter = { // - https://mastodon.social/@brownpau/100523448408374430 export const mastodon = { - createSource(user, instances, courtesyWait, postReducerCallback, cache) { + createSource(usertag, courtesyWait, postReducerCallback, cache) { + let [ user, hostname ] = usertag.toLowerCase().split('@') + + let source = { + description: `Aggregate feed for @${user} at ${hostname}`, + hostname, + pathname: '@' + user + ".rss", + courtesyWait, + name: `${hostname}-${user}`, + displayName: user + ' (m)', + user, + } + return createSource(source, fetchRss, postReducerCallback, cache) + }, + + isRepost(post) { + // Mastodon's rss does not provide retweets/retoots + return false + }, + + async pullImages(post, view, discardPostIfNoImages) { + let media = post.item.getElementsByTagName('media:content') + let images = [] + + for(let image of media) { + images.push(image.getAttribute('url')) + } + + if(!discardPostIfNoImages || media.length > 0) { + post.images = await downloadImages( + images, + post.source, + getPostIdFromPathname(post), + view + ) + return post + } } -} - -export const platforms = { - tumblr, - nitter } \ No newline at end of file diff --git a/package.json b/package.json index 49f0e01..3601d3e 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,8 @@ "author": "", "license": "ISC", "dependencies": { - "jsdom": "^22.1.0" + "jsdom": "^22.1.0", + "mime-types": "^2.1.35" }, "type": "module" } diff --git a/yarn.lock b/yarn.lock index ba8bae8..8caed0b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -162,6 +162,13 @@ mime-types@^2.1.12: dependencies: mime-db "1.52.0" +mime-types@^2.1.35: + version "2.1.35" + resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" + integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== + dependencies: + mime-db "1.52.0" + ms@2.1.2: version "2.1.2" resolved "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"