From ed597cc711919d75a48940b7ee21d76e9b261e13 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Mon, 3 Jul 2023 05:20:15 +0530 Subject: Separate server and core, add SQLite storage --- .gitignore | 1 + __pycache__/tagrss.cpython-311.pyc | Bin 0 -> 7735 bytes serve.py | 60 +++++++------------- static/styles/main.css | 4 ++ tagrss.py | 113 +++++++++++++++++++++++++++++++++++++ views/index.tpl | 28 ++++++--- 6 files changed, 158 insertions(+), 48 deletions(-) create mode 100644 __pycache__/tagrss.cpython-311.pyc create mode 100644 tagrss.py diff --git a/.gitignore b/.gitignore index 9f21b54..96e766e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /venv/ +/ignore/ \ No newline at end of file diff --git a/__pycache__/tagrss.cpython-311.pyc b/__pycache__/tagrss.cpython-311.pyc new file mode 100644 index 0000000..d906911 Binary files /dev/null and b/__pycache__/tagrss.cpython-311.pyc differ diff --git a/serve.py b/serve.py index 2f45d3e..025510a 100755 --- a/serve.py +++ b/serve.py @@ -11,20 +11,22 @@ import os import pathlib import time +import tagrss + parser = argparse.ArgumentParser() parser.add_argument("--host", default="localhost") parser.add_argument("--port", default=8000, type=int) +parser.add_argument("--storage-path", required=True) args = parser.parse_args() -feeds_lock = gevent.lock.RLock() -feeds = {} +storage_path: pathlib.Path = pathlib.Path(args.storage_path) -feed_items_lock = gevent.lock.RLock() -feed_items = [] +tagrss_lock = gevent.lock.RLock() +tagrss_backend = tagrss.TagRss(storage_path=storage_path) def parse_space_separated_tags(inp: str) -> list[str]: - tags = [] + tags = set() tag = "" escaped = False for c in inp: @@ -35,20 +37,21 @@ def parse_space_separated_tags(inp: str) -> list[str]: continue case " ": if not escaped: - tags.append(tag) + tags.add(tag) tag = "" continue escaped = False tag += c if tag: - tags.append(tag) - return tags + tags.add(tag) + return tuple(sorted(tags)) @bottle.route("/") def index(): - with feed_items_lock: - return bottle.template("index", items=feed_items) + with tagrss_lock: + entries = tagrss_backend.get_entries(limit=100) + return bottle.template("index", entries=entries, tagrss_backend=tagrss_backend) @bottle.get("/add_feed") @@ -62,42 +65,17 @@ def add_feed_effect(): tags = parse_space_separated_tags(bottle.request.forms.get("tags")) already_present: bool = False - with feeds_lock: - if feed_source not in feeds: - feeds[feed_source] = {"tags": tags} - else: + with tagrss_lock: + try: + tagrss_backend.add_feed(feed_source=feed_source, tags=tags) + except tagrss.FeedAlreadyAddedError: already_present = True - - feed = feedparser.parse(feed_source) - with feed_items_lock: - for entry in reversed(feed.entries): - try: - date_published = time.strftime("%x %X", entry.published_parsed) - except AttributeError: - date_published = None - try: - date_updated = time.strftime("%x %X", entry.updated_parsed) - except AttributeError: - date_updated = None - if date_updated == date_published: - date_updated = None - feed_items.append( - { - "title": entry["title"], - "link": entry["link"], - "date_published": date_published, - "date_updated": date_updated, - "feed": { - "tags": tags, - }, - } - ) - + # TODO: handle FeedFetchError too return bottle.template( "add_feed", after_add=True, feed_source=feed_source, - already_present=already_present, + already_present=already_present ) diff --git a/static/styles/main.css b/static/styles/main.css index deea098..3523c5c 100644 --- a/static/styles/main.css +++ b/static/styles/main.css @@ -25,6 +25,10 @@ th, td { border: 1px solid black; } +span.tag { + background-color: palegoldenrod; +} + .hover-help { cursor: help; user-select: none; diff --git a/tagrss.py b/tagrss.py new file mode 100644 index 0000000..7b1d084 --- /dev/null +++ b/tagrss.py @@ -0,0 +1,113 @@ +import feedparser +import requests + +import calendar +import io +import pathlib +import sqlite3 +import time +import typing + + +class FeedAlreadyAddedError(Exception): + pass + + +class FeedFetchError(Exception): + def __init__(self, feed_source: str, status_code: int): + super().__init__(f"Get {feed_source} returned HTTP {status_code}") + + +class TagRss: + def __init__(self, *, storage_path: str | pathlib.Path): + self.connection: sqlite3.Connection = sqlite3.connect(storage_path) + with self.connection: + self.connection.executescript( + """ +CREATE TABLE IF NOT EXISTS feeds(id INTEGER PRIMARY KEY, source TEXT UNIQUE, title TEXT); +CREATE INDEX IF NOT EXISTS feed_source ON feeds(source); + +CREATE TABLE IF NOT EXISTS feed_tags(feed_id INTEGER, tag TEXT); +CREATE INDEX IF NOT EXISTS feed_tags_feed_id ON feed_tags(feed_id); + +CREATE TABLE IF NOT EXISTS entries(id INTEGER PRIMARY KEY, feed_id INTEGER, title TEXT, link TEXT, epoch_published INTEGER, epoch_updated INTEGER, epoch_downloaded INTEGER); +CREATE INDEX IF NOT EXISTS entry_epoch_downloaded ON entries(epoch_downloaded); + """ + ) + + def add_feed(self, *, feed_source: str, tags: tuple[str]): + response = requests.get(feed_source) + if response.status_code != requests.codes.ok: + raise FeedFetchError(feed_source, response.status_code) + try: + base: str = response.headers["Content-Location"] + except KeyError: + base: str = feed_source + parsed = feedparser.parse( + io.BytesIO(bytes(response.text, encoding="utf-8")), response_headers={"Content-Location": base} + ) + with self.connection: + feed_title: str = parsed.feed.get("title", "") + try: + self.connection.execute( + "INSERT INTO feeds(source, title) VALUES(?, ?);", + (feed_source, feed_title), + ) + except sqlite3.IntegrityError: + raise FeedAlreadyAddedError + feed_id: int = int( + self.connection.execute( + "SELECT id FROM feeds WHERE source = ?;", (feed_source,) + ).fetchone()[0] + ) + self.connection.executemany( + f"INSERT INTO feed_tags(feed_id, tag) VALUES({feed_id}, ?);", tuple(((tag,) for tag in tags)) + ) + for entry in reversed(parsed.entries): + link: str = entry.get("link", "") + try: + epoch_published: typing.Optional[int] = calendar.timegm( + entry.get("published_parsed", None) + ) + except ValueError: + epoch_published = None + try: + epoch_updated: typing.Optional[int] = calendar.timegm( + entry.get("updated_parsed", None) + ) + except ValueError: + epoch_updated = None + self.connection.execute( + "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) VALUES(?, ?, ?, ?, ?, ?);", + ( + feed_id, + feed_title, + link, + epoch_published, + epoch_updated, + int(time.time()), + ), + ) + + def get_entries(self, *, limit: int): + with self.connection: + result = self.connection.execute( + "SELECT feed_id, title, link, epoch_published, epoch_updated FROM entries ORDER BY epoch_downloaded DESC LIMIT ?;", + (limit,), + ).fetchall() + + entries = [] + for entry in result: + entries.append( + { + "feed_id": entry[0], + "title": entry[1], + "link": entry[2], + "epoch_published": entry[3], + "epoch_updated": entry[4], + } + ) + return entries + def get_feed_tags(self, feed_id: int) -> tuple[str]: + with self.connection: + return tuple((t[0] for t in self.connection.execute("SELECT tag FROM feed_tags WHERE feed_id = ?;", (feed_id,)).fetchall())) \ No newline at end of file diff --git a/views/index.tpl b/views/index.tpl index d521810..6ff4ab4 100644 --- a/views/index.tpl +++ b/views/index.tpl @@ -18,27 +18,41 @@ Title Date Tags + Feed - % for i, item in enumerate(reversed(items)): + % for i, entry in enumerate(entries): {{i + 1}} - {{item["title"]}} + {{entry["title"]}} <% + import time dates = [] - if item.get("date_published", None): - dates.append(item["date_published"]) + if entry.get("epoch_published", None): + dates.append(time.strftime("%x %X", time.localtime(entry["epoch_published"]))) end - if item.get("date_updated", None): - dates.append(item["date_updated"]) + if entry.get("epoch_updated", None): + date_updated = time.strftime("%x %X", time.localtime(entry["epoch_updated"])) + if not date_updated in dates: + dates.append(date_updated) + end end %> {{", updated ".join(dates)}} - {{", ".join(item["feed"]["tags"])}} + % tags = tagrss_backend.get_feed_tags(entry["feed_id"]) + % for i, tag in enumerate(tags): + % if i > 0: + {{", "}} + % end + {{tag}} + % end + + + % end -- cgit v1.2.3-57-g22cb