diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-02 23:50:15 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-02 23:50:15 +0000 |
commit | ed597cc711919d75a48940b7ee21d76e9b261e13 (patch) | |
tree | ac77e938a687731d45d797f55af46d6d76789de9 | |
parent | 589df8813ea7732501fb551296d8fd45c4186217 (diff) | |
download | tagrss-ed597cc711919d75a48940b7ee21d76e9b261e13.tar tagrss-ed597cc711919d75a48940b7ee21d76e9b261e13.tar.gz tagrss-ed597cc711919d75a48940b7ee21d76e9b261e13.zip |
Separate server and core, add SQLite storage
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | __pycache__/tagrss.cpython-311.pyc | bin | 0 -> 7735 bytes | |||
-rwxr-xr-x | serve.py | 60 | ||||
-rw-r--r-- | static/styles/main.css | 4 | ||||
-rw-r--r-- | tagrss.py | 113 | ||||
-rw-r--r-- | views/index.tpl | 28 |
6 files changed, 158 insertions, 48 deletions
@@ -1 +1,2 @@ /venv/ +/ignore/
\ No newline at end of file diff --git a/__pycache__/tagrss.cpython-311.pyc b/__pycache__/tagrss.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..d906911 --- /dev/null +++ b/__pycache__/tagrss.cpython-311.pyc @@ -11,20 +11,22 @@ import os import pathlib import time +import tagrss + parser = argparse.ArgumentParser() parser.add_argument("--host", default="localhost") parser.add_argument("--port", default=8000, type=int) +parser.add_argument("--storage-path", required=True) args = parser.parse_args() -feeds_lock = gevent.lock.RLock() -feeds = {} +storage_path: pathlib.Path = pathlib.Path(args.storage_path) -feed_items_lock = gevent.lock.RLock() -feed_items = [] +tagrss_lock = gevent.lock.RLock() +tagrss_backend = tagrss.TagRss(storage_path=storage_path) def parse_space_separated_tags(inp: str) -> list[str]: - tags = [] + tags = set() tag = "" escaped = False for c in inp: @@ -35,20 +37,21 @@ def parse_space_separated_tags(inp: str) -> list[str]: continue case " ": if not escaped: - tags.append(tag) + tags.add(tag) tag = "" continue escaped = False tag += c if tag: - tags.append(tag) - return tags + tags.add(tag) + return tuple(sorted(tags)) @bottle.route("/") def index(): - with feed_items_lock: - return bottle.template("index", items=feed_items) + with tagrss_lock: + entries = tagrss_backend.get_entries(limit=100) + return bottle.template("index", entries=entries, tagrss_backend=tagrss_backend) @bottle.get("/add_feed") @@ -62,42 +65,17 @@ def add_feed_effect(): tags = parse_space_separated_tags(bottle.request.forms.get("tags")) already_present: bool = False - with feeds_lock: - if feed_source not in feeds: - feeds[feed_source] = {"tags": tags} - else: + with tagrss_lock: + try: + tagrss_backend.add_feed(feed_source=feed_source, tags=tags) + except tagrss.FeedAlreadyAddedError: already_present = True - - feed = feedparser.parse(feed_source) - with feed_items_lock: - for entry in reversed(feed.entries): - try: - date_published = time.strftime("%x %X", entry.published_parsed) - except AttributeError: - date_published = None - try: - date_updated = time.strftime("%x %X", entry.updated_parsed) - except AttributeError: - date_updated = None - if date_updated == date_published: - date_updated = None - feed_items.append( - { - "title": entry["title"], - "link": entry["link"], - "date_published": date_published, - "date_updated": date_updated, - "feed": { - "tags": tags, - }, - } - ) - + # TODO: handle FeedFetchError too return bottle.template( "add_feed", after_add=True, feed_source=feed_source, - already_present=already_present, + already_present=already_present ) diff --git a/static/styles/main.css b/static/styles/main.css index deea098..3523c5c 100644 --- a/static/styles/main.css +++ b/static/styles/main.css @@ -25,6 +25,10 @@ th, td { border: 1px solid black; } +span.tag { + background-color: palegoldenrod; +} + .hover-help { cursor: help; user-select: none; diff --git a/tagrss.py b/tagrss.py new file mode 100644 index 0000000..7b1d084 --- /dev/null +++ b/tagrss.py @@ -0,0 +1,113 @@ +import feedparser +import requests + +import calendar +import io +import pathlib +import sqlite3 +import time +import typing + + +class FeedAlreadyAddedError(Exception): + pass + + +class FeedFetchError(Exception): + def __init__(self, feed_source: str, status_code: int): + super().__init__(f"Get {feed_source} returned HTTP {status_code}") + + +class TagRss: + def __init__(self, *, storage_path: str | pathlib.Path): + self.connection: sqlite3.Connection = sqlite3.connect(storage_path) + with self.connection: + self.connection.executescript( + """ +CREATE TABLE IF NOT EXISTS feeds(id INTEGER PRIMARY KEY, source TEXT UNIQUE, title TEXT); +CREATE INDEX IF NOT EXISTS feed_source ON feeds(source); + +CREATE TABLE IF NOT EXISTS feed_tags(feed_id INTEGER, tag TEXT); +CREATE INDEX IF NOT EXISTS feed_tags_feed_id ON feed_tags(feed_id); + +CREATE TABLE IF NOT EXISTS entries(id INTEGER PRIMARY KEY, feed_id INTEGER, title TEXT, link TEXT, epoch_published INTEGER, epoch_updated INTEGER, epoch_downloaded INTEGER); +CREATE INDEX IF NOT EXISTS entry_epoch_downloaded ON entries(epoch_downloaded); + """ + ) + + def add_feed(self, *, feed_source: str, tags: tuple[str]): + response = requests.get(feed_source) + if response.status_code != requests.codes.ok: + raise FeedFetchError(feed_source, response.status_code) + try: + base: str = response.headers["Content-Location"] + except KeyError: + base: str = feed_source + parsed = feedparser.parse( + io.BytesIO(bytes(response.text, encoding="utf-8")), response_headers={"Content-Location": base} + ) + with self.connection: + feed_title: str = parsed.feed.get("title", "") + try: + self.connection.execute( + "INSERT INTO feeds(source, title) VALUES(?, ?);", + (feed_source, feed_title), + ) + except sqlite3.IntegrityError: + raise FeedAlreadyAddedError + feed_id: int = int( + self.connection.execute( + "SELECT id FROM feeds WHERE source = ?;", (feed_source,) + ).fetchone()[0] + ) + self.connection.executemany( + f"INSERT INTO feed_tags(feed_id, tag) VALUES({feed_id}, ?);", tuple(((tag,) for tag in tags)) + ) + for entry in reversed(parsed.entries): + link: str = entry.get("link", "") + try: + epoch_published: typing.Optional[int] = calendar.timegm( + entry.get("published_parsed", None) + ) + except ValueError: + epoch_published = None + try: + epoch_updated: typing.Optional[int] = calendar.timegm( + entry.get("updated_parsed", None) + ) + except ValueError: + epoch_updated = None + self.connection.execute( + "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) VALUES(?, ?, ?, ?, ?, ?);", + ( + feed_id, + feed_title, + link, + epoch_published, + epoch_updated, + int(time.time()), + ), + ) + + def get_entries(self, *, limit: int): + with self.connection: + result = self.connection.execute( + "SELECT feed_id, title, link, epoch_published, epoch_updated FROM entries ORDER BY epoch_downloaded DESC LIMIT ?;", + (limit,), + ).fetchall() + + entries = [] + for entry in result: + entries.append( + { + "feed_id": entry[0], + "title": entry[1], + "link": entry[2], + "epoch_published": entry[3], + "epoch_updated": entry[4], + } + ) + return entries + def get_feed_tags(self, feed_id: int) -> tuple[str]: + with self.connection: + return tuple((t[0] for t in self.connection.execute("SELECT tag FROM feed_tags WHERE feed_id = ?;", (feed_id,)).fetchall()))
\ No newline at end of file diff --git a/views/index.tpl b/views/index.tpl index d521810..6ff4ab4 100644 --- a/views/index.tpl +++ b/views/index.tpl @@ -18,27 +18,41 @@ <th>Title</th> <th>Date</th> <th>Tags</th> + <th>Feed</th> </tr> </thead> <tbody> - % for i, item in enumerate(reversed(items)): + % for i, entry in enumerate(entries): <tr> <td>{{i + 1}}</td> - <td><a href="{{item["link"]}}">{{item["title"]}}</a></td> + <td><a href="{{entry["link"]}}">{{entry["title"]}}</a></td> <% + import time dates = [] - if item.get("date_published", None): - dates.append(item["date_published"]) + if entry.get("epoch_published", None): + dates.append(time.strftime("%x %X", time.localtime(entry["epoch_published"]))) end - if item.get("date_updated", None): - dates.append(item["date_updated"]) + if entry.get("epoch_updated", None): + date_updated = time.strftime("%x %X", time.localtime(entry["epoch_updated"])) + if not date_updated in dates: + dates.append(date_updated) + end end %> <td> {{", updated ".join(dates)}} </td> <td> - {{", ".join(item["feed"]["tags"])}} + % tags = tagrss_backend.get_feed_tags(entry["feed_id"]) + % for i, tag in enumerate(tags): + % if i > 0: + {{", "}} + % end + <span class="tag">{{tag}}</span> + % end + </td> + <td> + <a href="/manage_feeds?feed={{entry["feed_id"]}}">⚙</a> </td> </tr> % end |