parse and aggregate

This commit is contained in:
mr-boneman 2024-08-20 13:19:47 +02:00
parent d6f89af4f6
commit 60b16577ae
5 changed files with 154 additions and 0 deletions

80
src/rssmix.nim Normal file
View file

@ -0,0 +1,80 @@
import std/[algorithm, json, xmltree, times, options, sequtils]
import pkg/[mummy, rssatom, puppy, pretty]
const
atomTimeFormat {.strdefine.} = "yyyy-MM-dd'T'HH:mm:ss'Z'"
rssv2TimeFormat {.strdefine.} = "ddd',' dd MMM yyyy HH:mm:ss ZZZ"
type RssFeed = object # name:string
url: string
kind: FeedType
proc parseAtomTime(s: string): DateTime =
parse(s, atomTimeFormat)
proc parseRSSv2Time(s: string): DateTime =
parse(s, rssv2TimeFormat)
proc getrssv2Time(r:RssItem):DateTime=
result = dateTime(0, Month.low, MonthdayRange.low)
if r.pubDate.isSome():
result = parseRssv2Time(r.pubDate.get())
proc getTime(r: RssItem): DateTime =
result = dateTime(0, Month.low, MonthdayRange.low)
if r.updated.isSome():
result = parseAtomTime(r.updated.get())
elif r.pubDate.isSome():
result = parseAtomTime(r.pubDate.get())
proc cmpRssItem(x, y: RssItem): int =
cmp(x.getTime(), y.getTime())
proc mixRssFeeds(feeds: seq[RssFeed]): RSS =
result = RSS()
result.id = "https://example.com/".some()
result.title = "This is a test".some()
result.link = "link.com".some()
result.author.name = "Samuel R.".some()
result.description = "this is a test for this and that".some()
var entries: seq[RSS]
for feed in feeds:
case feed.kind
of Atom:
entries.add parseAtom(fetch(feed.url))
of RSSv2:
entries.add parseRss(fetch(feed.url))
entries[^1].items = entries[^1].items.mapIt(
block:
var item = it
item.pubdate = some(getRssv2Time(item).format(atomTimeFormat))
item
)
for feed in entries:
result.items.add feed.items
result.items.sort(cmp = cmpRssItem, order = Descending)
proc assembleAtom(r: RSS): XmlNode =
result = buildAtom(r)
result.attrs = {
"xmlns": "http://www.w3.org/2005/Atom",
"xmlns:thr": "http://purl.org/syndication/thread/1.0",
"xml:lang": "en-EN"
}.toXmlAttributes()
discard
echo assembleAtom(
mixRssFeeds(
@[
RssFeed(kind: Atom, url: "https://github.com/nim-lang/Nim/releases.atom"),
RssFeed(kind: Atom, url: "https://github.com/fatedier/frp/releases.atom"),
RssFeed(kind: RSSv2, url: "https://fedoramagazine.org/rss"),
RssFeed(kind: RSSv2, url: "http://communityblog.fedoraproject.org/?feed=rss2"),
]
)
)