diff --git a/bun.lockb b/bun.lockb index 371f04e..b699e1c 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/default/config.js b/default/config.js index 56f874b..d7bd653 100644 --- a/default/config.js +++ b/default/config.js @@ -104,9 +104,23 @@ const sources = { ] } +const endPage = ` + + +
+ + +

You have reached the end

+
+ +
+ +` + module.exports = { feeds, sources, pageSize, - courtesyWait + courtesyWait, + endPage } \ No newline at end of file diff --git a/index.js b/index.js index a4dc72a..90a1633 100644 --- a/index.js +++ b/index.js @@ -1,7 +1,7 @@ -const { fetch } = require('node-fetch') +const fetch = require('node-fetch') const config = require('./config.js') - -let cache = require('./cache.json') +const Path = require('path') +const { JSDOM } = require('jsdom') let waitingList = new Map() @@ -29,24 +29,18 @@ const handleNitterUser = async user => { while(!data && index < sources.length) { let source = sources[index] - - if(waitingList.get(source)) { - console.log('Waiting...') - await sleep(config.courtesyWait) - waitingList.set(source, false) - } - - let rss = await fetch('https://' + source + '/' + user + "/rss") - .catch(console.error) - .then(r => r.text() ) - - waitingList.set(source, true) + let rss = await fetchRss(source, user + '/rss') try { - data = processNitter(user, rss) + data = processNitter(rss, user) } catch(err) { - console.log(`Failed to fetch ${user} from ${source}`) - index++ + if(err.constructor.name == NoMatchesError.name) { + console.log(`Failed to fetch ${user} from ${source}`) + index++ + } else { + console.error(err) + break + } } } @@ -57,51 +51,138 @@ const handleNitterUser = async user => { const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) -const processNitter = (user, rss) => { - const descriptionMatches = getMatches( - new RegExp(`\ -.*?\ -@${user}<\/dc:creator>.*?\ -(.+?)<\/description>.*?\ -(.+?).*?\ -(.*?)<\/link>`, 'sg') - )(rss) +class NoMatchesError extends Error {} - if(descriptionMatches.length == 0) { - throw new Error('Got no matches') - return +const processRss = (rss, reducerCallback, cdata) => { + let { document } = new JSDOM(rss, { + contentType: 'text/xml' + }).window + let items = document.querySelectorAll('channel item') + + if(items.length == 0) { + throw new NoMatchesError('Got no matches') } - const getImageMatches = getMatches(/ 0) { - posts.push({ - user, - images, - date: new Date(date).valueOf(), - link - }) + if(post) { + post.date = new Date(dateString).valueOf() ?? 0 + post.link = link + + posts.push(post) } } return posts } +const fetchRss = async (hostname, path) => { + let waitFor = waitingList.get(hostname) + + if(waitFor !== 0) { + await sleep(waitFor) + waitingList.set(hostname, 0) + } + + return await fetch(new URL(path, 'https://' + hostname)) + .then(response => { + waitingList.set(hostname, config.courtesyWait) + return response.text() + }) + .catch(console.error) +} + +const getImages = (user, description) => { + let images = description.querySelectorAll('img') + + if(images) { + let imageUrls = [] + + for(let image of images) { + let { src } = image + + if(!src) { + let finalSrc = image.srcset.split(', ').pop() + + src = finalSrc.slice(0, finalSrc.indexOf(' ') ) + } + + imageUrls.push(src) + } + + if(imageUrls.length > 0) { + return { + images: imageUrls, + user + } + } + } +} + +const processNitter = (rss, user) => { + return processRss(rss, (item, description) => { + // if(dcCreatorRegex.test(item)) + // return + + // let images = [] + + // for(let [, url] of getImageMatches(description) ) { + // images.push(url) + // } + + // if(images.length > 0) { + // return { images, user } + // } + + let creator = item.getElementsByTagName('dc:creator')[0] + + if(creator.innerHTML.slice(1) === user) + return getImages(user, description) + }, true) +} + +const handleTumblrUser = async (user) => { + let rss = await fetchRss(user + '.tumblr.com', 'rss') + + console.log('Found ' + user) + return processTumblr(rss, user) +} + +const processTumblr = (rss, user) => { + // const unescapedRss = unescape(rss) + + return processRss(rss, (item, description) => { + let reblog = description.querySelector('p > a.tumblr_blog') + + // If it's a reblog, skip it + if(reblog && reblog.innerHTML !== user) { + return + } + + return getImages(user, description) + }) +} + +const oneDay = 1000 * 60 * 60 * 24 + const print = async feeds => { // Coalate let masterFeed = [] + let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo for(let feed of feeds) { - masterFeed = masterFeed.concat(feed) + for(let post of feed) { + if(tooLongAgo && post.date > tooLongAgo) + masterFeed.push(post) + } } masterFeed = masterFeed.sort((a, b) => a.date < b.date) @@ -118,12 +199,11 @@ const print = async feeds => { console.log('Writing...') for(let i = 0; i < pages.length; i++) { - Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i) ) + Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i, pages.length) ) } - Bun.write('cache.json', JSON.stringify(cache, null, 2)) } -const renderPage = (posts, index) => { +const renderPage = (posts, index, pageCount) => { let html = `\ @@ -133,6 +213,7 @@ const renderPage = (posts, index) => { body { max-width: 640px; float: right; + font-family: sans-serif; } p { @@ -161,12 +242,19 @@ const renderPage = (posts, index) => { html += `\ ${post.images.map(renderImage).join('\n')} -

${post.user} ${date.getMonth()}/${date.getDay()}/${date.getFullYear()} open


\n` +

${post.user} ${date.getMonth()}/${date.getDate()}/${date.getFullYear()} open


\n` } + let nextPage = index + 1 + + +let link = nextPage === pageCount ? + `end` : + `next` + html += ` ` @@ -183,6 +271,15 @@ const main = async () => { for(let user of config.feeds.nitter) { feeds.push(await handleNitterUser(user) ) } + console.log('Caching sources...') + Bun.write('cache.json', JSON.stringify(cache, null, 2)) + + for(let user of config.feeds.tumblr) { + feeds.push(await handleTumblrUser(user) ) + } + + await print(feeds) + console.log('Done!') } main() \ No newline at end of file diff --git a/package.json b/package.json index 2ceb96d..c84a632 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "author": "", "license": "ISC", "dependencies": { + "jsdom": "^22.1.0", "node-fetch": "^3.3.1" } } diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..8441479 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,47 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +data-uri-to-buffer@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e" + integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A== + +fetch-blob@^3.1.2, fetch-blob@^3.1.4: + version "3.2.0" + resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9" + integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ== + dependencies: + node-domexception "^1.0.0" + web-streams-polyfill "^3.0.3" + +formdata-polyfill@^4.0.10: + version "4.0.10" + resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423" + integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g== + dependencies: + fetch-blob "^3.1.2" + +html-escaper@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-3.0.3.tgz#4d336674652beb1dcbc29ef6b6ba7f6be6fdfed6" + integrity sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ== + +node-domexception@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + +node-fetch@^3.3.1: + version "3.3.2" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b" + integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA== + dependencies: + data-uri-to-buffer "^4.0.0" + fetch-blob "^3.1.4" + formdata-polyfill "^4.0.10" + +web-streams-polyfill@^3.0.3: + version "3.2.1" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz#71c2718c52b45fd49dbeee88634b3a60ceab42a6" + integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==