Tumblr support, proper xml parsing
This commit is contained in:
parent
c0e45d3d30
commit
aacf2483dd
@ -104,9 +104,23 @@ const sources = {
|
||||
]
|
||||
}
|
||||
|
||||
const endPage = `
|
||||
<html>
|
||||
<body>
|
||||
<center>
|
||||
|
||||
<img src="end.jpg">
|
||||
<h4>You have reached the end</h4>
|
||||
<hr>
|
||||
|
||||
</center>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
module.exports = {
|
||||
feeds,
|
||||
sources,
|
||||
pageSize,
|
||||
courtesyWait
|
||||
courtesyWait,
|
||||
endPage
|
||||
}
|
197
index.js
197
index.js
@ -1,7 +1,7 @@
|
||||
const { fetch } = require('node-fetch')
|
||||
const fetch = require('node-fetch')
|
||||
const config = require('./config.js')
|
||||
|
||||
let cache = require('./cache.json')
|
||||
const Path = require('path')
|
||||
const { JSDOM } = require('jsdom')
|
||||
|
||||
let waitingList = new Map()
|
||||
|
||||
@ -29,24 +29,18 @@ const handleNitterUser = async user => {
|
||||
|
||||
while(!data && index < sources.length) {
|
||||
let source = sources[index]
|
||||
|
||||
if(waitingList.get(source)) {
|
||||
console.log('Waiting...')
|
||||
await sleep(config.courtesyWait)
|
||||
waitingList.set(source, false)
|
||||
}
|
||||
|
||||
let rss = await fetch('https://' + source + '/' + user + "/rss")
|
||||
.catch(console.error)
|
||||
.then(r => r.text() )
|
||||
|
||||
waitingList.set(source, true)
|
||||
let rss = await fetchRss(source, user + '/rss')
|
||||
|
||||
try {
|
||||
data = processNitter(user, rss)
|
||||
data = processNitter(rss, user)
|
||||
} catch(err) {
|
||||
console.log(`Failed to fetch ${user} from ${source}`)
|
||||
index++
|
||||
if(err.constructor.name == NoMatchesError.name) {
|
||||
console.log(`Failed to fetch ${user} from ${source}`)
|
||||
index++
|
||||
} else {
|
||||
console.error(err)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,51 +51,138 @@ const handleNitterUser = async user => {
|
||||
|
||||
const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) )
|
||||
|
||||
const processNitter = (user, rss) => {
|
||||
const descriptionMatches = getMatches(
|
||||
new RegExp(`\
|
||||
<item>.*?\
|
||||
<dc:creator>@${user}<\/dc:creator>.*?\
|
||||
<description>(.+?)<\/description>.*?\
|
||||
<pubDate>(.+?)</pubDate>.*?\
|
||||
<link>(.*?)<\/link>`, 'sg')
|
||||
)(rss)
|
||||
class NoMatchesError extends Error {}
|
||||
|
||||
if(descriptionMatches.length == 0) {
|
||||
throw new Error('Got no matches')
|
||||
return
|
||||
const processRss = (rss, reducerCallback, cdata) => {
|
||||
let { document } = new JSDOM(rss, {
|
||||
contentType: 'text/xml'
|
||||
}).window
|
||||
let items = document.querySelectorAll('channel item')
|
||||
|
||||
if(items.length == 0) {
|
||||
throw new NoMatchesError('Got no matches')
|
||||
}
|
||||
|
||||
const getImageMatches = getMatches(/<img src="([^]*?)"/g)
|
||||
|
||||
let posts = []
|
||||
|
||||
for(let [, description, date, link] of descriptionMatches) {
|
||||
let images = []
|
||||
for(let item of items) {
|
||||
let description = new JSDOM(item.querySelector('description').textContent).window.document
|
||||
// let description = item.querySelector('description')
|
||||
let dateString = item.querySelector('pubDate').textContent
|
||||
let link = item.querySelector('link').textContent
|
||||
|
||||
for(let [, url] of getImageMatches(description) ) {
|
||||
images.push(url)
|
||||
}
|
||||
let post = reducerCallback(item, description, dateString, link)
|
||||
|
||||
if(images.length > 0) {
|
||||
posts.push({
|
||||
user,
|
||||
images,
|
||||
date: new Date(date).valueOf(),
|
||||
link
|
||||
})
|
||||
if(post) {
|
||||
post.date = new Date(dateString).valueOf() ?? 0
|
||||
post.link = link
|
||||
|
||||
posts.push(post)
|
||||
}
|
||||
}
|
||||
|
||||
return posts
|
||||
}
|
||||
|
||||
const fetchRss = async (hostname, path) => {
|
||||
let waitFor = waitingList.get(hostname)
|
||||
|
||||
if(waitFor !== 0) {
|
||||
await sleep(waitFor)
|
||||
waitingList.set(hostname, 0)
|
||||
}
|
||||
|
||||
return await fetch(new URL(path, 'https://' + hostname))
|
||||
.then(response => {
|
||||
waitingList.set(hostname, config.courtesyWait)
|
||||
return response.text()
|
||||
})
|
||||
.catch(console.error)
|
||||
}
|
||||
|
||||
const getImages = (user, description) => {
|
||||
let images = description.querySelectorAll('img')
|
||||
|
||||
if(images) {
|
||||
let imageUrls = []
|
||||
|
||||
for(let image of images) {
|
||||
let { src } = image
|
||||
|
||||
if(!src) {
|
||||
let finalSrc = image.srcset.split(', ').pop()
|
||||
|
||||
src = finalSrc.slice(0, finalSrc.indexOf(' ') )
|
||||
}
|
||||
|
||||
imageUrls.push(src)
|
||||
}
|
||||
|
||||
if(imageUrls.length > 0) {
|
||||
return {
|
||||
images: imageUrls,
|
||||
user
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const processNitter = (rss, user) => {
|
||||
return processRss(rss, (item, description) => {
|
||||
// if(dcCreatorRegex.test(item))
|
||||
// return
|
||||
|
||||
// let images = []
|
||||
|
||||
// for(let [, url] of getImageMatches(description) ) {
|
||||
// images.push(url)
|
||||
// }
|
||||
|
||||
// if(images.length > 0) {
|
||||
// return { images, user }
|
||||
// }
|
||||
|
||||
let creator = item.getElementsByTagName('dc:creator')[0]
|
||||
|
||||
if(creator.innerHTML.slice(1) === user)
|
||||
return getImages(user, description)
|
||||
}, true)
|
||||
}
|
||||
|
||||
const handleTumblrUser = async (user) => {
|
||||
let rss = await fetchRss(user + '.tumblr.com', 'rss')
|
||||
|
||||
console.log('Found ' + user)
|
||||
return processTumblr(rss, user)
|
||||
}
|
||||
|
||||
const processTumblr = (rss, user) => {
|
||||
// const unescapedRss = unescape(rss)
|
||||
|
||||
return processRss(rss, (item, description) => {
|
||||
let reblog = description.querySelector('p > a.tumblr_blog')
|
||||
|
||||
// If it's a reblog, skip it
|
||||
if(reblog && reblog.innerHTML !== user) {
|
||||
return
|
||||
}
|
||||
|
||||
return getImages(user, description)
|
||||
})
|
||||
}
|
||||
|
||||
const oneDay = 1000 * 60 * 60 * 24
|
||||
|
||||
const print = async feeds => {
|
||||
// Coalate
|
||||
let masterFeed = []
|
||||
let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo
|
||||
|
||||
for(let feed of feeds) {
|
||||
masterFeed = masterFeed.concat(feed)
|
||||
for(let post of feed) {
|
||||
if(tooLongAgo && post.date > tooLongAgo)
|
||||
masterFeed.push(post)
|
||||
}
|
||||
}
|
||||
|
||||
masterFeed = masterFeed.sort((a, b) => a.date < b.date)
|
||||
@ -118,12 +199,11 @@ const print = async feeds => {
|
||||
|
||||
console.log('Writing...')
|
||||
for(let i = 0; i < pages.length; i++) {
|
||||
Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i) )
|
||||
Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i, pages.length) )
|
||||
}
|
||||
Bun.write('cache.json', JSON.stringify(cache, null, 2))
|
||||
}
|
||||
|
||||
const renderPage = (posts, index) => {
|
||||
const renderPage = (posts, index, pageCount) => {
|
||||
let html = `\
|
||||
<html>
|
||||
<head>
|
||||
@ -133,6 +213,7 @@ const renderPage = (posts, index) => {
|
||||
body {
|
||||
max-width: 640px;
|
||||
float: right;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
p {
|
||||
@ -161,12 +242,19 @@ const renderPage = (posts, index) => {
|
||||
|
||||
html += `\
|
||||
${post.images.map(renderImage).join('\n')}
|
||||
<p><b>${post.user}</b> ${date.getMonth()}/${date.getDay()}/${date.getFullYear()} <a href="${post.link}">open</a></p><hr>\n`
|
||||
<p><b>${post.user}</b> ${date.getMonth()}/${date.getDate()}/${date.getFullYear()} <a href="${post.link}">open</a></p><hr>\n`
|
||||
}
|
||||
|
||||
let nextPage = index + 1
|
||||
|
||||
|
||||
let link = nextPage === pageCount ?
|
||||
`<a href="data:text/html,">end</a>` :
|
||||
`<a href="${nextPage}.html">next</a>`
|
||||
|
||||
html += `
|
||||
<footer>
|
||||
<a href="${index + 1}.html">next</a>
|
||||
${link}
|
||||
</footer>
|
||||
</body>
|
||||
</html>`
|
||||
@ -183,6 +271,15 @@ const main = async () => {
|
||||
for(let user of config.feeds.nitter) {
|
||||
feeds.push(await handleNitterUser(user) )
|
||||
}
|
||||
console.log('Caching sources...')
|
||||
Bun.write('cache.json', JSON.stringify(cache, null, 2))
|
||||
|
||||
for(let user of config.feeds.tumblr) {
|
||||
feeds.push(await handleTumblrUser(user) )
|
||||
}
|
||||
|
||||
await print(feeds)
|
||||
console.log('Done!')
|
||||
}
|
||||
|
||||
main()
|
@ -9,6 +9,7 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"jsdom": "^22.1.0",
|
||||
"node-fetch": "^3.3.1"
|
||||
}
|
||||
}
|
||||
|
47
yarn.lock
Normal file
47
yarn.lock
Normal file
@ -0,0 +1,47 @@
|
||||
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
|
||||
# yarn lockfile v1
|
||||
|
||||
|
||||
data-uri-to-buffer@^4.0.0:
|
||||
version "4.0.1"
|
||||
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e"
|
||||
integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==
|
||||
|
||||
fetch-blob@^3.1.2, fetch-blob@^3.1.4:
|
||||
version "3.2.0"
|
||||
resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9"
|
||||
integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==
|
||||
dependencies:
|
||||
node-domexception "^1.0.0"
|
||||
web-streams-polyfill "^3.0.3"
|
||||
|
||||
formdata-polyfill@^4.0.10:
|
||||
version "4.0.10"
|
||||
resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423"
|
||||
integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==
|
||||
dependencies:
|
||||
fetch-blob "^3.1.2"
|
||||
|
||||
html-escaper@^3.0.3:
|
||||
version "3.0.3"
|
||||
resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-3.0.3.tgz#4d336674652beb1dcbc29ef6b6ba7f6be6fdfed6"
|
||||
integrity sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==
|
||||
|
||||
node-domexception@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
|
||||
integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
|
||||
|
||||
node-fetch@^3.3.1:
|
||||
version "3.3.2"
|
||||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b"
|
||||
integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==
|
||||
dependencies:
|
||||
data-uri-to-buffer "^4.0.0"
|
||||
fetch-blob "^3.1.4"
|
||||
formdata-polyfill "^4.0.10"
|
||||
|
||||
web-streams-polyfill@^3.0.3:
|
||||
version "3.2.1"
|
||||
resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz#71c2718c52b45fd49dbeee88634b3a60ceab42a6"
|
||||
integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==
|
Loading…
x
Reference in New Issue
Block a user