From aacf2483dd43e964c9ed0076553f2a64e52b19ed Mon Sep 17 00:00:00 2001 From: dakedres Date: Sat, 5 Aug 2023 14:38:05 -0400 Subject: [PATCH] Tumblr support, proper xml parsing --- bun.lockb | Bin 2813 -> 19368 bytes default/config.js | 16 +++- index.js | 197 ++++++++++++++++++++++++++++++++++------------ package.json | 1 + yarn.lock | 47 +++++++++++ 5 files changed, 210 insertions(+), 51 deletions(-) create mode 100644 yarn.lock diff --git a/bun.lockb b/bun.lockb index 371f04e076442a53720a94519ba6a8af5af0a598..b699e1c8dda254babb9623263677c95afc064990 100755 GIT binary patch literal 19368 zcmeHvd0dR$8~0Qr)I%XpmSmKjnf6Ue+C(K}$!@BthG`}<)20xS2uYSCO9>CMCVPpL zowAiRLPVt`NtXAz?&j3(;i$E~VjXwBP=_ZB zV7dDXmEd6uL>_az_yQi=OCmelWxm5WdhG<4O!hm^*|WAas+(mE{-= zHHd$frQ1W;5#qBYo+4lPt*uZW50pRzq$|Xcrv%i+a#FFOF{bCU(YactiT>t{jAz{?66%W zU-UJF)sz2ud(k%WTA+B;$@0Mk?0qKZqK<8_d3JN(qF2Xa?s*$+66HT~z^EZ!&k8oBEwLG3>Au{geXHrEJCb#`IDgp@I-Pki zND^}V@D0Paq|{wT!ejQ$T996 zUP7YkJaOt}>-#%%R(6RyKjFUS_4l7wx?P{cV=0##uE`sdnFiB=l1%w7+rwcjv;GzD zCW~`Ue~r;D_Q%| zb|f8sHYudsOTgp&foA-c5WEg3Z6(8F{Vla4!7l;4Ip7fss^DJMA|dz(fVY<6TLhF9 zB6tlLxVC`DI7w>>4=Fnf@K%6F3`xV^%?T;H9q=5P{;>bISP1?G;4J}&fo5PWOf>(g^HST{TPV$=z zNqH;4j{|&5*FC~Z@M{4-1n@1j`@iuoY5Jr6wMl_yLQ=mLZ2TrrKVo4Vw^#^%GT`lH zcm%_AYLO89e!$z(@GSyEA%d?2Jgz^8`yCq+`~WyKSpoicuvizt^8t_J2ZkY4e@n*! z!S4e6|4aWr1w5Xg2p^e8sAE$?%B#XlCEBm0deb?P10TfV{KGMX^tn~C%`D>iVDF6RU!Rx4iH}YI?fES zXy2{+&r_%O;jum^of8>hd(9woQ=Zg;AqFg~@k{iaB*V$=CC9VD-gN z4V4Ktx{+tvTX15pzohZv`bLELO@Gt&D}K2%6-TrU`dXA{T46G`M*Zla0mEz!IJ{0@ zLZaKQKhpY0m+JUyK`F-Owb@!ba$8I88r+N9JUlRemO3oi4f@j8Yi3?g_n5ywzdd)Z zj8W+_^~J82a>sU?Jzg1>c}+2R_$d3xf-iShq~BXmZO^u=+!e87s=e*4iLu=`_n)*P z#_p_l_b)VF`Z<*OJ-74JGtRGB6^vM= zOa=9=BhAie%-oQ4@1xqqnO`+hpVurBCD_n-$$E~qV6wD7hP*D-tk5oS-pLA`Qe2R4 zKQ6rQ>NWvY@7f0BTX{=H)~-#yD`xG`NsM{kF4|_8)7|_NkyY^)w?5cS%hwn|QFat9$h64sBPM>r_9iSedcYw_msu`$5I>v#Ayu&-L?0wR>{v8*9wz zr5W#LtXTW$yXSGOtDH_5xb~Crfcr8LW{1M72eyZ**4pP7W{f_qE?(b%%VV#l7dt-L z*o&iaE39Jo=mG26{8Q5_#=LmV;9_B7P>*%@4Kz~{$Innd(j$63z2D&2)ga7Ct*0xN zDBLKoPOQls-ZP}QRyo6eRS* zH1yc{d3UOm*O$y#nywqgt$h31^-IzC$s2VmF1;Mtwel>D7j-AX+~0Y@b;g&pvEo$W zPKM|F$&PD_`Y^W2kMp~^$xds3#th#9#R11Ryp!flH}sqMVM0uia)hQ^-@{w%OVxHg zWIj9rL_}XWJ~Rk(YvHQ=Hwbh0DxdIvE61HNS>@fQhi7j1 zWWyJR>!-bcG)c|XJ^M~8v4AO0Yh$?ier;`*v(# z1$_gb+VwOpQr1lE`sG8<-b3u$r)S+a`WRBw_sW#$m8%Y)+mRBIUX<-po9y*2VTqvs zf`g*>Bg)^r>3;Y_ny>Ih4z3A=w-cS0zwmwW`mr$|ycf*b`03Tuk$2x&lqUu4{N8sW^MavCpuBWV!k!2a#q1oofnoNuZ>_sBb~Z^Jr$e02NS?h(sh>6)pH$=hHj z-uhr!t8zPMuew32YphQ4oFam}U;Fu8$`3cu2@S`+lGp+7ZHX||R&0;vs-+vB9u}0tld>Deeb0V8zNGBa%VW+5c5OEPr))9n+Tz)!@pjsQB~`BRtsdv@9GK`}VSU8c zX>TGB5ngpd3L&%2mN8*_R)zZ=I-~r3%`>lKbKBkC_(7$nUv=BEu-j|a%gql^zUvfJ zbNpRfi-I?E7|UYQ7WP-!<~u6Y`|iD^SEAwlvVj-R&P149vkH`^^n@b*&hve_Z+-YefN{ZHAd?S zU%ryQRTDi`t!^{(dPQu(yTwMHwJtPX@*M-|nHSoaJCt0AIC=8gs*)4WS6pR}*30U{ z+Tdom!TiD@KSz_J<4j#R{@bFtOAAK1-ZD$|SsUq=c=d4d+Sw*GFOPRgpz(G`LYXl8 zY|83?P%ck3JnKd)quflb4WmA7UY_;VTxVFnwdc(pqSigOyWIEk_+#C*KEGTvHvG`g zX@8dqZ`vN6IcD$N)dktOmJ>T@5K;)4j4H!!k4Gi?Np5NNj>%mZQ2+k?)lY+5}zdHv44 z+P4*-8R!?gS*FjP7vQ!ads^oFZR3wreY8_Ac)h-NXZcZ5$#Y)Ou4|A^5!TWWFc68o~~ zyn?=(w(a&!?0bb*nDTJ{$uNG7dR<7nf#IpkTuKLCRhWEw<<8VM@q*k_Zi8b(Z4R_P z=5eK;+Z>bNs-d@*s_s}vUmtqWd8LyWtJmfJt)y(Adu&oGOXZ5qj;CXEMo!$UdTI3R zU8Opw)or%KTc&)R5x;)p5QcSr#L1^&*B1wSZq)GDGSiEl30y>9Ejq77=cSr2);)e& zW)=G`;huS)%bLgCJ{*rdFyx$M=$F!YR<-vxh3*-V&8pN&ZS_X0@?O`mHx?~C-M#L- zVZn@Lf6a`c@oLj~5AjtlzVy()AND%?V|kdVS_j*~V>Wp&IWc2*gqh#Lw^v<18`osG zA^o(`Q2S*MX}sk98R?mJv3^FE?cP1__&p~pJiS|d zyN`kWHg!#KZ~vmNcgHCC@!AK2axR@&@N`e_SJ~}LE!G6cIam$UIXP%khXGGYI-HtL z<0bFk6z?`4x5{Ncw$AS587ptA*l4%6Ulc3tE_A8VKX0cpXjO=-Ye=c#7>94wE52H5 zcKXLm&(5G^Uj?JOSSL~ZVc6H(G+umJUDu4-431m z9=cVGS!*g{?UbzZZ@8PE$vZ#dWr^>Gm6PSqrYmngnEJx~AnSR0Tl)F8A0dU1nKs;ow{Gejo^v{a}uU{VQ6!)*3F+NQ2-^-UD;y5p{= z`ccX2yRj42Rh86rVMaXfrlFJCt!>Sn-2sK`kH7qO@yf_$*H|<$_E!6hXFU#Rhih98eSgT$|7Q0M zI$L^O-@P%4#ygPCd#Kv<L$~X^*95z`VU9-1M}Dq}rME?}QnZ0ysPRdyiP zah1n~51aB&1x?7?ts88j2SmiaY(fel)9iJ+=>oQWk1l(hb82TY0dt2Zkb|CjxNX`7_(P^H}*X5IxK3a^?Y@1nmY0_s-xAv#MS~$;NJu|rb!@r828mQ0x^vN{J@2`?) z8{-WkYj4xvThP`-n9*L_M&(4RKD_Apcu|GN9`&uK(zi~WINB$<&C1c$+xg5d?d@;u zy-;{2S4VS+y{n*V*hQ73l$5#-w{49!RGdAF@9;!lyjLT_G>JOKQ0){Y9k9{l!&0xi zZ3~`k9dJf{o_2?M!~dGWRg9~eI@4I+z@D?a*v2;5p=M3(%ptrW&7L_MqxW>X{6u>( z5|uBUhks8Ick;=63|W0L=Mnxt;U5e9vA`b-{IS3v3;eOb9}E1kz#j|zvA`b-{IS6Q zj0JS@n+u4oEL0jQ5qog>LWz_s5O9dM3unBT$FpSV>#`*Lc|4KVV3r|^E8u$zMP5qv zc!2Smden-IXq4@0*d6nV8?|1mE4Zn-wH!S?# z)Bzs+j?)Pq{62#Bq4?bazY*YlKHk0KJvrWKW73N z&9%Ayj-s9ybt5|B_i@w{?*Xt5yl0^6hWBx(6W-IZ;KBQ4Y$vuC+m1Fs`=FiBj%Yiy zA=(w~i*}ZE7ZsqLu^if(*ci*8JuwY!i?&8PqrK6#Xw%;Cpk46(6+^TENy9qPE@(eu zOSBc%gY6>vpw8Q6_8O0d1yLG0Ro{5JzbTA5J>CDp zKxciHVTcb9U_ddfhj?ufFCnTPT@K6>;%7wsS|CRsFfb&E=MnLAkzs(1_$U#d7|4;e zXE+QaY!C68fgC+ujv-V;{G5niO(TYQOc4*A#++He27MkDA@kvZ9E1Ay5brDE?E^VR z^*O|Ei}(r2a-bK8XBY7#f*iCCz+}ypkZt1UegRP5j7+-xB12 z)zH?&!T4&lCv(ZbRs&Q~1Lz?J;{Yr+F#p1Gjnu|Ze)OTWnw+HtcAY-s4CLV0 z@sS+({#j)oA27%U{X={PiH{mpj~?dyw+)DYA@OGej6Tdp(13Ut5|1`{IqW!?3Jl_N zNPOJn<(9*8s0um}ug0HSO8gCpe;d>TnsE#a8ay0-(16mK_$m_LIKY4bP&eY;NWAGN zHX~Gy_&E~4I;uSs8}WoBo_7?6>TTllNPO%l3{?;DdL&+UkYfnb6f_|Ijl`c$UJiFB z6ygalb6F45hh6!m#>h zRM;j;0pw_ifX=vF0+wnNI79P-Ja~RmzDO9zb7w=td0bx!y9K)bsyI~jt7NKDy=D~t zNgrZsx{hpWur|Z=b8i6E&x!SRY=(K87-AMMg9E#b?I#um1+%%{u*Ol{NKfEn4@hzK;|kaw{t~Ikm(3IK zd?AG`#Bq&1`2p7K>*(q z$V5VzMH1+4NuUo`8tC1ioGgklc+@?`J=h1Lhy%Em~rKtQRPLOaRl%cA2KyMzaMr zj~0bt(L5fGv;c#DL1VLK2Q+P+1QabsZBB;G3=`^Q332$}bg-%Sq-GGX=28EV2Y|@W z7FmV1v;&kDqozu8)3*La5Po6iMX2%F+~P?MJNk|a#6P96v5gcHwF|S|xss-qFzUquc&HF8WB}*$rY+=)8yK9X zTCxx|ssNbA2sPH4Tg0$szi2EyiE1m=qgGOp2yZWiTq#dSB=l&i$v>3^-4FsTYJLm( z<->5LyMYd$BzYp^2Y>#yXRtGxczOMp|Y;ZWY9YVoxen+gFno{gKS UiCi=ShWuWEa`RdgPU7Xf7867_dVq z1`a3<<2OM0B5V^Y941RJS=cc`MKn*IKlmn;fdR%pz55fG50z%X&folwxruSI0t-tf zNFN9@12GGf%?hNMfEZ>wIt`Qp>mfX8eydqa+U z3XR0XQ7nmI1Nnr3A%|`9MBm|O~0hQjsHrdZuO9~VspilyZ4oKz!RQeBC zdUAn@1eXCcq@J-)t};~vDW1I3bUT*;G&&79fcA(?F0#?#N`MNba!p=gqX0E&^9dVP Jrpb<~GXS<#Y`Fjc diff --git a/default/config.js b/default/config.js index 56f874b..d7bd653 100644 --- a/default/config.js +++ b/default/config.js @@ -104,9 +104,23 @@ const sources = { ] } +const endPage = ` + + +
+ + +

You have reached the end

+
+ +
+ +` + module.exports = { feeds, sources, pageSize, - courtesyWait + courtesyWait, + endPage } \ No newline at end of file diff --git a/index.js b/index.js index a4dc72a..90a1633 100644 --- a/index.js +++ b/index.js @@ -1,7 +1,7 @@ -const { fetch } = require('node-fetch') +const fetch = require('node-fetch') const config = require('./config.js') - -let cache = require('./cache.json') +const Path = require('path') +const { JSDOM } = require('jsdom') let waitingList = new Map() @@ -29,24 +29,18 @@ const handleNitterUser = async user => { while(!data && index < sources.length) { let source = sources[index] - - if(waitingList.get(source)) { - console.log('Waiting...') - await sleep(config.courtesyWait) - waitingList.set(source, false) - } - - let rss = await fetch('https://' + source + '/' + user + "/rss") - .catch(console.error) - .then(r => r.text() ) - - waitingList.set(source, true) + let rss = await fetchRss(source, user + '/rss') try { - data = processNitter(user, rss) + data = processNitter(rss, user) } catch(err) { - console.log(`Failed to fetch ${user} from ${source}`) - index++ + if(err.constructor.name == NoMatchesError.name) { + console.log(`Failed to fetch ${user} from ${source}`) + index++ + } else { + console.error(err) + break + } } } @@ -57,51 +51,138 @@ const handleNitterUser = async user => { const sleep = delay => new Promise(resolve => setTimeout(() => resolve(), delay) ) -const processNitter = (user, rss) => { - const descriptionMatches = getMatches( - new RegExp(`\ -.*?\ -@${user}<\/dc:creator>.*?\ -(.+?)<\/description>.*?\ -(.+?).*?\ -(.*?)<\/link>`, 'sg') - )(rss) +class NoMatchesError extends Error {} - if(descriptionMatches.length == 0) { - throw new Error('Got no matches') - return +const processRss = (rss, reducerCallback, cdata) => { + let { document } = new JSDOM(rss, { + contentType: 'text/xml' + }).window + let items = document.querySelectorAll('channel item') + + if(items.length == 0) { + throw new NoMatchesError('Got no matches') } - const getImageMatches = getMatches(/ 0) { - posts.push({ - user, - images, - date: new Date(date).valueOf(), - link - }) + if(post) { + post.date = new Date(dateString).valueOf() ?? 0 + post.link = link + + posts.push(post) } } return posts } +const fetchRss = async (hostname, path) => { + let waitFor = waitingList.get(hostname) + + if(waitFor !== 0) { + await sleep(waitFor) + waitingList.set(hostname, 0) + } + + return await fetch(new URL(path, 'https://' + hostname)) + .then(response => { + waitingList.set(hostname, config.courtesyWait) + return response.text() + }) + .catch(console.error) +} + +const getImages = (user, description) => { + let images = description.querySelectorAll('img') + + if(images) { + let imageUrls = [] + + for(let image of images) { + let { src } = image + + if(!src) { + let finalSrc = image.srcset.split(', ').pop() + + src = finalSrc.slice(0, finalSrc.indexOf(' ') ) + } + + imageUrls.push(src) + } + + if(imageUrls.length > 0) { + return { + images: imageUrls, + user + } + } + } +} + +const processNitter = (rss, user) => { + return processRss(rss, (item, description) => { + // if(dcCreatorRegex.test(item)) + // return + + // let images = [] + + // for(let [, url] of getImageMatches(description) ) { + // images.push(url) + // } + + // if(images.length > 0) { + // return { images, user } + // } + + let creator = item.getElementsByTagName('dc:creator')[0] + + if(creator.innerHTML.slice(1) === user) + return getImages(user, description) + }, true) +} + +const handleTumblrUser = async (user) => { + let rss = await fetchRss(user + '.tumblr.com', 'rss') + + console.log('Found ' + user) + return processTumblr(rss, user) +} + +const processTumblr = (rss, user) => { + // const unescapedRss = unescape(rss) + + return processRss(rss, (item, description) => { + let reblog = description.querySelector('p > a.tumblr_blog') + + // If it's a reblog, skip it + if(reblog && reblog.innerHTML !== user) { + return + } + + return getImages(user, description) + }) +} + +const oneDay = 1000 * 60 * 60 * 24 + const print = async feeds => { // Coalate let masterFeed = [] + let tooLongAgo = (Date.now() - (Date.now() % oneDay)) - oneDay * config.tooLongAgo for(let feed of feeds) { - masterFeed = masterFeed.concat(feed) + for(let post of feed) { + if(tooLongAgo && post.date > tooLongAgo) + masterFeed.push(post) + } } masterFeed = masterFeed.sort((a, b) => a.date < b.date) @@ -118,12 +199,11 @@ const print = async feeds => { console.log('Writing...') for(let i = 0; i < pages.length; i++) { - Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i) ) + Bun.write('out/' + (i == 0 ? 'index' : i) + '.html', renderPage(pages[i], i, pages.length) ) } - Bun.write('cache.json', JSON.stringify(cache, null, 2)) } -const renderPage = (posts, index) => { +const renderPage = (posts, index, pageCount) => { let html = `\ @@ -133,6 +213,7 @@ const renderPage = (posts, index) => { body { max-width: 640px; float: right; + font-family: sans-serif; } p { @@ -161,12 +242,19 @@ const renderPage = (posts, index) => { html += `\ ${post.images.map(renderImage).join('\n')} -

${post.user} ${date.getMonth()}/${date.getDay()}/${date.getFullYear()} open


\n` +

${post.user} ${date.getMonth()}/${date.getDate()}/${date.getFullYear()} open


\n` } + let nextPage = index + 1 + + +let link = nextPage === pageCount ? + `end` : + `next` + html += ` ` @@ -183,6 +271,15 @@ const main = async () => { for(let user of config.feeds.nitter) { feeds.push(await handleNitterUser(user) ) } + console.log('Caching sources...') + Bun.write('cache.json', JSON.stringify(cache, null, 2)) + + for(let user of config.feeds.tumblr) { + feeds.push(await handleTumblrUser(user) ) + } + + await print(feeds) + console.log('Done!') } main() \ No newline at end of file diff --git a/package.json b/package.json index 2ceb96d..c84a632 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "author": "", "license": "ISC", "dependencies": { + "jsdom": "^22.1.0", "node-fetch": "^3.3.1" } } diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..8441479 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,47 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +data-uri-to-buffer@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e" + integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A== + +fetch-blob@^3.1.2, fetch-blob@^3.1.4: + version "3.2.0" + resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9" + integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ== + dependencies: + node-domexception "^1.0.0" + web-streams-polyfill "^3.0.3" + +formdata-polyfill@^4.0.10: + version "4.0.10" + resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423" + integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g== + dependencies: + fetch-blob "^3.1.2" + +html-escaper@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-3.0.3.tgz#4d336674652beb1dcbc29ef6b6ba7f6be6fdfed6" + integrity sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ== + +node-domexception@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + +node-fetch@^3.3.1: + version "3.3.2" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b" + integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA== + dependencies: + data-uri-to-buffer "^4.0.0" + fetch-blob "^3.1.4" + formdata-polyfill "^4.0.10" + +web-streams-polyfill@^3.0.3: + version "3.2.1" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz#71c2718c52b45fd49dbeee88634b3a60ceab42a6" + integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==