diff --git a/bun.lockb b/bun.lockb
index 371f04e..b699e1c 100755
Binary files a/bun.lockb and b/bun.lockb differ
diff --git a/default/config.js b/default/config.js
index 56f874b..d7bd653 100644
--- a/default/config.js
+++ b/default/config.js
@@ -104,9 +104,23 @@ const sources = {
]
}
+const endPage = `
+
+
+
+
+
+You have reached the end
+
+
+
+
+`
+
module.exports = {
feeds,
sources,
pageSize,
- courtesyWait
+ courtesyWait,
+ endPage
}
\ No newline at end of file
diff --git a/index.js b/index.js
index a4dc72a..90a1633 100644
--- a/index.js
+++ b/index.js
@@ -1,7 +1,7 @@
-const { fetch } = require('node-fetch')
+const fetch = require('node-fetch')
const config = require('./config.js')
-
-let cache = require('./cache.json')
+const Path = require('path')
+const { JSDOM } = require('jsdom')
let waitingList = new Map()
@@ -29,24 +29,18 @@ const handleNitterUser = async user => {
while(!data && index < sources.length) {
let source = sources[index]
-
- if(waitingList.get(source)) {
- console.log('Waiting...')
- await sleep(config.courtesyWait)
- waitingList.set(source, false)
- }
-
- let rss = await fetch('https://' + source + '/' + user + "/rss")
- .catch(console.error)
- .then(r => r.text() )
-
- waitingList.set(source, true)
+ let rss = await fetchRss(source, user + '/rss')
try {
- data = processNitter(user, rss)
+ data = processNitter(rss, user)
} catch(err) {
- console.log(`Failed to fetch ${user} from ${source}`)
- index++
+ if(err.constructor.name == NoMatchesError.name) {
+ console.log(`Failed to fetch ${user} from ${source}`)
+ index++
+ } else {
+ console.error(err)
+ break
+ }
}
}
@@ -57,51 +51,138 @@ const handleNitterUser = async user => {
const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
-const processNitter = (user, rss) => {
- const descriptionMatches = getMatches(
- new RegExp(`\
-- .*?\
-@${user}<\/dc:creator>.*?\
-(.+?)<\/description>.*?\
-(.+?).*?\
-(.*?)<\/link>`, 'sg')
- )(rss)
+class NoMatchesError extends Error {}
- if(descriptionMatches.length == 0) {
- throw new Error('Got no matches')
- return
+const processRss = (rss, reducerCallback, cdata) => {
+ let { document } = new JSDOM(rss, {
+ contentType: 'text/xml'
+ }).window
+ let items = document.querySelectorAll('channel item')
+
+ if(items.length == 0) {
+ throw new NoMatchesError('Got no matches')
}
- const getImageMatches = getMatches(/
0) {
- posts.push({
- user,
- images,
- date: new Date(date).valueOf(),
- link
- })
+ if(post) {
+ post.date = new Date(dateString).valueOf() ?? 0
+ post.link = link
+
+ posts.push(post)
}
}
return posts
}
+const fetchRss = async (hostname, path) => {
+ let waitFor = waitingList.get(hostname)
+
+ if(waitFor !== 0) {
+ await sleep(waitFor)
+ waitingList.set(hostname, 0)
+ }
+
+ return await fetch(new URL(path, 'https://' + hostname))
+ .then(response => {
+ waitingList.set(hostname, config.courtesyWait)
+ return response.text()
+ })
+ .catch(console.error)
+}
+
+const getImages = (user, description) => {
+ let images = description.querySelectorAll('img')
+
+ if(images) {
+ let imageUrls = []
+
+ for(let image of images) {
+ let { src } = image
+
+ if(!src) {
+ let finalSrc = image.srcset.split(', ').pop()
+
+ src = finalSrc.slice(0, finalSrc.indexOf(' ') )
+ }
+
+ imageUrls.push(src)
+ }
+
+ if(imageUrls.length > 0) {
+ return {
+ images: imageUrls,
+ user
+ }
+ }
+ }
+}
+
+const processNitter = (rss, user) => {
+ return processRss(rss, (item, description) => {
+ // if(dcCreatorRegex.test(item))
+ // return
+
+ // let images = []
+
+ // for(let [, url] of getImageMatches(description) ) {
+ // images.push(url)
+ // }
+
+ // if(images.length > 0) {
+ // return { images, user }
+ // }
+
+ let creator = item.getElementsByTagName('dc:creator')[0]
+
+ if(creator.innerHTML.slice(1) === user)
+ return getImages(user, description)
+ }, true)
+}
+
+const handleTumblrUser = async (user) => {
+ let rss = await fetchRss(user + '.tumblr.com', 'rss')
+
+ console.log('Found ' + user)
+ return processTumblr(rss, user)
+}
+
+const processTumblr = (rss, user) => {
+ // const unescapedRss = unescape(rss)
+
+ return processRss(rss, (item, description) => {
+ let reblog = description.querySelector('p > a.tumblr_blog')
+
+ // If it's a reblog, skip it
+ if(reblog && reblog.innerHTML !== user) {
+ return
+ }
+
+ return getImages(user, description)
+ })
+}
+
+const oneDay = 1000 * 60 * 60 * 24
+
const print = async feeds => {
// Coalate
let masterFeed = []
+ let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo
for(let feed of feeds) {
- masterFeed = masterFeed.concat(feed)
+ for(let post of feed) {
+ if(tooLongAgo && post.date > tooLongAgo)
+ masterFeed.push(post)
+ }
}
masterFeed = masterFeed.sort((a, b) => a.date < b.date)
@@ -118,12 +199,11 @@ const print = async feeds => {
console.log('Writing...')
for(let i = 0; i < pages.length; i++) {
- Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i) )
+ Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i, pages.length) )
}
- Bun.write('cache.json', JSON.stringify(cache, null, 2))
}
-const renderPage = (posts, index) => {
+const renderPage = (posts, index, pageCount) => {
let html = `\
@@ -133,6 +213,7 @@ const renderPage = (posts, index) => {
body {
max-width: 640px;
float: right;
+ font-family: sans-serif;
}
p {
@@ -161,12 +242,19 @@ const renderPage = (posts, index) => {
html += `\
${post.images.map(renderImage).join('\n')}
-${post.user} ${date.getMonth()}/${date.getDay()}/${date.getFullYear()} open
\n`
+${post.user} ${date.getMonth()}/${date.getDate()}/${date.getFullYear()} open
\n`
}
+ let nextPage = index + 1
+
+
+let link = nextPage === pageCount ?
+ `end` :
+ `next`
+
html += `
`
@@ -183,6 +271,15 @@ const main = async () => {
for(let user of config.feeds.nitter) {
feeds.push(await handleNitterUser(user) )
}
+ console.log('Caching sources...')
+ Bun.write('cache.json', JSON.stringify(cache, null, 2))
+
+ for(let user of config.feeds.tumblr) {
+ feeds.push(await handleTumblrUser(user) )
+ }
+
+ await print(feeds)
+ console.log('Done!')
}
main()
\ No newline at end of file
diff --git a/package.json b/package.json
index 2ceb96d..c84a632 100644
--- a/package.json
+++ b/package.json
@@ -9,6 +9,7 @@
"author": "",
"license": "ISC",
"dependencies": {
+ "jsdom": "^22.1.0",
"node-fetch": "^3.3.1"
}
}
diff --git a/yarn.lock b/yarn.lock
new file mode 100644
index 0000000..8441479
--- /dev/null
+++ b/yarn.lock
@@ -0,0 +1,47 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+data-uri-to-buffer@^4.0.0:
+ version "4.0.1"
+ resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e"
+ integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==
+
+fetch-blob@^3.1.2, fetch-blob@^3.1.4:
+ version "3.2.0"
+ resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9"
+ integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==
+ dependencies:
+ node-domexception "^1.0.0"
+ web-streams-polyfill "^3.0.3"
+
+formdata-polyfill@^4.0.10:
+ version "4.0.10"
+ resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423"
+ integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==
+ dependencies:
+ fetch-blob "^3.1.2"
+
+html-escaper@^3.0.3:
+ version "3.0.3"
+ resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-3.0.3.tgz#4d336674652beb1dcbc29ef6b6ba7f6be6fdfed6"
+ integrity sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==
+
+node-domexception@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
+ integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
+
+node-fetch@^3.3.1:
+ version "3.3.2"
+ resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b"
+ integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==
+ dependencies:
+ data-uri-to-buffer "^4.0.0"
+ fetch-blob "^3.1.4"
+ formdata-polyfill "^4.0.10"
+
+web-streams-polyfill@^3.0.3:
+ version "3.2.1"
+ resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz#71c2718c52b45fd49dbeee88634b3a60ceab42a6"
+ integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==