From 73bd3c0840f706ee619eeae9c1ec0009f3a900d7 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Wed, 5 Jul 2023 06:33:30 +0530 Subject: Add periodic updates, clean up code --- requirements.txt | 1 + serve.py | 83 +++++++++++++++++++++----------- setup.sql | 64 +++++++++++++++++++++++++ static/styles/main.css | 20 ++++++-- tagrss.py | 128 +++++++++++++++++++++++++------------------------ views/add_feed.tpl | 2 +- views/delete.html | 14 ------ views/delete_feed.html | 14 ++++++ views/index.tpl | 90 +++++++++++++++++++++++----------- views/manage_feed.tpl | 11 +++-- 10 files changed, 289 insertions(+), 138 deletions(-) create mode 100644 setup.sql delete mode 100644 views/delete.html create mode 100644 views/delete_feed.html diff --git a/requirements.txt b/requirements.txt index c32839f..268609c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ gevent==22.10.2 greenlet==2.0.2 idna==3.4 requests==2.31.0 +schedule==1.2.0 sgmllib3k==1.0.0 urllib3==2.0.3 zope.event==4.6 diff --git a/serve.py b/serve.py index d6e074b..1cd6f78 100755 --- a/serve.py +++ b/serve.py @@ -6,8 +6,10 @@ import bottle import gevent.lock import argparse -import os import pathlib +import schedule +import threading +import time import typing import tagrss @@ -16,11 +18,12 @@ parser = argparse.ArgumentParser() parser.add_argument("--host", default="localhost") parser.add_argument("--port", default=8000, type=int) parser.add_argument("--storage-path", required=True) +parser.add_argument("--update-seconds", default=3600, type=int) args = parser.parse_args() storage_path: pathlib.Path = pathlib.Path(args.storage_path) -tagrss_lock = gevent.lock.RLock() +core_lock = gevent.lock.RLock() core = tagrss.TagRss(storage_path=storage_path) @@ -57,7 +60,7 @@ def serialise_tags(tags: list[str]) -> str: @bottle.route("/") def index(): - with tagrss_lock: + with core_lock: entries = core.get_entries(limit=100) return bottle.template("index", entries=entries, core=core) @@ -73,7 +76,7 @@ def add_feed_effect(): tags = parse_space_separated_tags(bottle.request.forms.get("tags")) # type: ignore already_present: bool = False - with tagrss_lock: + with core_lock: try: core.add_feed(feed_source=feed_source, tags=tags) except tagrss.FeedAlreadyAddedError: @@ -96,39 +99,65 @@ def manage_feed_view(): raise bottle.HTTPError(400, "Feed ID not given.") feed: dict[str, typing.Any] = {} feed["id"] = feed_id - feed["source"] = core.get_feed_source(feed_id) - feed["title"] = core.get_feed_title(feed_id) - feed["tags"] = core.get_feed_tags(feed_id) + with core_lock: + feed["source"] = core.get_feed_source(feed_id) + feed["title"] = core.get_feed_title(feed_id) + feed["tags"] = core.get_feed_tags(feed_id) feed["serialised_tags"] = serialise_tags(feed["tags"]) return bottle.template("manage_feed", feed=feed) + @bottle.post("/manage_feed") def manage_feed_effect_update(): - feed_id: int = int(bottle.request.forms["id"]) # type: ignore - feed_source: str = bottle.request.forms["source"] # type: ignore - feed_title: str = bottle.request.forms["title"] # type: ignore - feed_tags: list[str] = parse_space_separated_tags(bottle.request.forms["tags"]) # type: ignore - core.set_feed_source(feed_id, feed_source) - core.set_feed_title(feed_id, feed_title) - core.set_feed_tags(feed_id, feed_tags) - return bottle.redirect(f"/manage_feed?feed={feed_id}") - -@bottle.get("/delete") -def delete_view(): - return bottle.static_file("delete.html", root="views") - - -@bottle.post("/delete") -def delete_effect(): + feed: dict[str, typing.Any] = {} + feed["id"] = int(bottle.request.forms["id"]) # type: ignore + feed["source"] = bottle.request.forms["source"] # type: ignore + feed["title"] = bottle.request.forms["title"] # type: ignore + feed["tags"] = parse_space_separated_tags(bottle.request.forms["tags"]) # type: ignore + feed["serialised_tags"] = bottle.request.forms["tags"] # type: ignore + with core_lock: + core.set_feed_source(feed["id"], feed["source"]) + core.set_feed_title(feed["id"], feed["title"]) + core.set_feed_tags(feed["id"], feed["tags"]) + return bottle.template("manage_feed", feed=feed, after_update=True) + + +@bottle.get("/delete_feed") +def delete_feed_view(): + return bottle.static_file("delete_feed.html", root="views") + + +@bottle.post("/delete_feed") +def delete_feed_effect(): feed_id: int = int(bottle.request.forms["id"]) # type: ignore - core.delete_feed(feed_id) - return bottle.redirect("/delete") + with core_lock: + core.delete_feed(feed_id) + return bottle.redirect("/delete_feed") @bottle.get("/static/") def serve_static(path): - return bottle.static_file(path, pathlib.Path(os.getcwd(), "static")) + return bottle.static_file(path, "static") + + +def update_feeds(run_event: threading.Event): + def inner_update(): + with core_lock: + core.fetch_all_new_feed_entries() + inner_update() + schedule.every(args.update_seconds).seconds.do(inner_update) + try: + while run_event.is_set(): + schedule.run_pending() + time.sleep(1) + except KeyboardInterrupt: + return +run_event = threading.Event() +run_event.set() +threading.Thread(target=update_feeds, args=(run_event,)).start() bottle.run(host=args.host, port=args.port, server="gevent") -core.close() +run_event.clear() +with core_lock: + core.close() diff --git a/setup.sql b/setup.sql new file mode 100644 index 0000000..02aac26 --- /dev/null +++ b/setup.sql @@ -0,0 +1,64 @@ +PRAGMA foreign_keys = ON; + +CREATE TABLE IF NOT EXISTS tagrss_info(info_key TEXT PRIMARY KEY, value TEXT) STRICT; + +INSERT + OR REPLACE INTO tagrss_info(info_key, value) +VALUES + ("version", "0.9.0"); + +CREATE TABLE IF NOT EXISTS feeds( + id INTEGER PRIMARY KEY, + source TEXT UNIQUE, + title TEXT +) STRICT; + +CREATE TABLE IF NOT EXISTS feed_tags( + feed_id INTEGER REFERENCES feeds(id) ON DELETE CASCADE, + tag TEXT +) STRICT; + +CREATE INDEX IF NOT EXISTS idx_feed_tags__feed_id__tag ON feed_tags(feed_id, tag); + +CREATE INDEX IF NOT EXISTS idx_feed_tags__tag__feed_id ON feed_tags(tag, feed_id); + +CREATE TABLE IF NOT EXISTS entries( + id INTEGER PRIMARY KEY, + feed_id INTEGER REFERENCES feeds(id) ON DELETE CASCADE, + title TEXT, + link TEXT, + epoch_published INTEGER, + epoch_updated INTEGER, + epoch_stored INTEGER +) STRICT; + +CREATE INDEX IF NOT EXISTS idx_entries__epoch_stored ON entries(epoch_stored); + +CREATE INDEX IF NOT EXISTS idx_entries__feed_id__title__link__epoch_published__epoch_updated ON entries( + feed_id, + title, + link, + epoch_published, + epoch_updated +); + +CREATE TRIGGER IF NOT EXISTS trig_entries__ensure_unique_with_identical_nulls_before_insert BEFORE +INSERT + ON entries BEGIN +SELECT + RAISE(IGNORE) +WHERE + EXISTS ( + SELECT + 1 + FROM + entries + WHERE + feed_id = NEW.feed_id + AND title IS NEW.title + AND link IS NEW.link + AND epoch_published IS NEW.epoch_published + AND epoch_updated IS NEW.epoch_updated + ); + +END; diff --git a/static/styles/main.css b/static/styles/main.css index 3523c5c..c7fdf59 100644 --- a/static/styles/main.css +++ b/static/styles/main.css @@ -15,18 +15,32 @@ font-family: "Open Sans", sans-serif; } +body { + background-color: black; + color: white; +} + +a:visited { + color:violet; +} + +a:link, a.no-visited-indication { + color: lightskyblue; +} + table { width: 100%; border-collapse: collapse; - border: 1px solid black; + border: 1px solid white; } th, td { - border: 1px solid black; + border: 1px solid white; } span.tag { - background-color: palegoldenrod; + background-color: lightgreen; + color: black; } .hover-help { diff --git a/tagrss.py b/tagrss.py index c1dfbf5..f6d125e 100644 --- a/tagrss.py +++ b/tagrss.py @@ -27,38 +27,8 @@ class TagRss: self.connection: sqlite3.Connection = sqlite3.connect(storage_path) with self.connection: - self.connection.executescript( - """ -PRAGMA foreign_keys = ON; - -CREATE TABLE IF NOT EXISTS - feeds( - id INTEGER PRIMARY KEY, - source TEXT UNIQUE, - title TEXT - ) STRICT; - -CREATE TABLE IF NOT EXISTS - feed_tags( - feed_id INTEGER REFERENCES feeds(id) ON DELETE CASCADE, - tag TEXT - ) STRICT; -CREATE INDEX IF NOT EXISTS idx_feed_tags__feed_id__tag ON feed_tags(feed_id, tag); -CREATE INDEX IF NOT EXISTS idx_feed_tags__tag__feed_id ON feed_tags(tag, feed_id); - -CREATE TABLE IF NOT EXISTS - entries( - id INTEGER PRIMARY KEY, - feed_id INTEGER REFERENCES feeds(id) ON DELETE CASCADE, - title TEXT, - link TEXT, - epoch_published INTEGER, - epoch_updated INTEGER, - epoch_downloaded INTEGER - ) STRICT; -CREATE INDEX IF NOT EXISTS idx_entries__epoch_downloaded ON entries(epoch_downloaded); - """ - ) + with open("setup.sql", "r") as setup_script: + self.connection.executescript(setup_script.read()) if (1,) not in self.connection.execute("PRAGMA foreign_keys;").fetchmany(1): raise SqliteMissingForeignKeySupportError @@ -90,39 +60,13 @@ CREATE INDEX IF NOT EXISTS idx_entries__epoch_downloaded ON entries(epoch_downlo "INSERT INTO feed_tags(feed_id, tag) VALUES(?, ?);", ((feed_id, tag) for tag in tags), ) - for entry in reversed(parsed.entries): - link: str = entry.get("link", None) - title: str = entry.get("title", None) - try: - epoch_published: typing.Optional[int] = calendar.timegm( - entry.get("published_parsed", None) - ) - except TypeError: - epoch_published = None - try: - epoch_updated: typing.Optional[int] = calendar.timegm( - entry.get("updated_parsed", None) - ) - except TypeError: - epoch_updated = None - self.connection.execute( - "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) \ - VALUES(?, ?, ?, ?, ?, ?);", - ( - feed_id, - title, - link, - epoch_published, - epoch_updated, - int(time.time()), - ), - ) + self.store_feed_entries(feed_id, parsed) def get_entries(self, *, limit: int) -> list[dict[str, typing.Any]]: with self.connection: resp = self.connection.execute( "SELECT feed_id, title, link, epoch_published, epoch_updated FROM entries \ - ORDER BY epoch_downloaded DESC LIMIT ?;", + ORDER BY epoch_stored DESC LIMIT ?;", (limit,), ).fetchall() @@ -174,7 +118,9 @@ CREATE INDEX IF NOT EXISTS idx_entries__epoch_downloaded ON entries(epoch_downlo def set_feed_tags(self, feed_id: int, feed_tags: list[str]): with self.connection: - self.connection.execute("DELETE FROM feed_tags WHERE feed_id = ?;", (feed_id,)) + self.connection.execute( + "DELETE FROM feed_tags WHERE feed_id = ?;", (feed_id,) + ) self.connection.executemany( "INSERT INTO feed_tags(feed_id, tag) VALUES(?, ?);", ((feed_id, tag) for tag in feed_tags), @@ -184,7 +130,65 @@ CREATE INDEX IF NOT EXISTS idx_entries__epoch_downloaded ON entries(epoch_downlo with self.connection: self.connection.execute("DELETE FROM feeds WHERE id = ?;", (feed_id,)) + def fetch_all_new_feed_entries(self) -> None: + with self.connection: + resp = self.connection.execute("SELECT id, source FROM feeds;") + while True: + row = resp.fetchone() + if not row: + break + feed_id = row[0] + feed_source = row[1] + response = requests.get(feed_source) + if response.status_code != requests.codes.ok: + continue # TODO: log this somehow + try: + base: str = response.headers["Content-Location"] + except KeyError: + base: str = feed_source + parsed = feedparser.parse( + io.BytesIO(bytes(response.text, encoding="utf-8")), + response_headers={"Content-Location": base}, + ) + self.store_feed_entries(feed_id, parsed) + + def store_feed_entries(self, feed_id: int, parsed_feed): + for entry in reversed(parsed_feed.entries): + link: str = entry.get("link", None) + title: str = entry.get("title", None) + try: + epoch_published: typing.Optional[int] = calendar.timegm( + entry.get("published_parsed", None) + ) + except TypeError: + epoch_published = None + try: + epoch_updated: typing.Optional[int] = calendar.timegm( + entry.get("updated_parsed", None) + ) + except TypeError: + epoch_updated = None + with self.connection: + self.connection.execute( + "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_stored) \ + VALUES(?, ?, ?, ?, ?, ?);", + ( + feed_id, + title, + link, + epoch_published, + epoch_updated, + int(time.time()), + ), + ) + + def close(self) -> None: with self.connection: - self.connection.execute("PRAGMA optimize;") + self.connection.executescript( + """ +PRAGMA analysis_limit=1000; +PRAGMA optimize; + """ + ) self.connection.close() diff --git a/views/add_feed.tpl b/views/add_feed.tpl index 5fc88fb..2e3008d 100644 --- a/views/add_feed.tpl +++ b/views/add_feed.tpl @@ -7,7 +7,7 @@ - < home + < home % if not get("already_present", False): % if get("after_add", False):

Added feed {{feed_source}}

diff --git a/views/delete.html b/views/delete.html deleted file mode 100644 index 000b733..0000000 --- a/views/delete.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - Feed Deleted | TagRSS - - - - -

Feed successfully deleted. Redirecting...

- home - - diff --git a/views/delete_feed.html b/views/delete_feed.html new file mode 100644 index 0000000..da10b3c --- /dev/null +++ b/views/delete_feed.html @@ -0,0 +1,14 @@ + + + + + + Feed Deleted | TagRSS + + + + +

Feed successfully deleted. Redirecting...

+ home + + diff --git a/views/index.tpl b/views/index.tpl index 6ca1518..fe716f0 100644 --- a/views/index.tpl +++ b/views/index.tpl @@ -1,60 +1,96 @@ -<% - import time -%> +% import time - View Feeds | TagRSS + View Feed Entries | TagRSS +

TagRSS

- - - - - + + + + + % for i, entry in enumerate(entries): - + <% - dates = [] + date = "" if entry.get("epoch_published", None): - dates.append(time.strftime("%x %X", time.localtime(entry["epoch_published"]))) + date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(entry["epoch_published"])) end if entry.get("epoch_updated", None): - date_updated = time.strftime("%x %X", time.localtime(entry["epoch_updated"])) - if not date_updated in dates: - dates.append(date_updated) - end + date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(entry["epoch_updated"])) end %> - - % end diff --git a/views/manage_feed.tpl b/views/manage_feed.tpl index 86a9c5d..15d7dec 100644 --- a/views/manage_feed.tpl +++ b/views/manage_feed.tpl @@ -7,7 +7,10 @@ - < home + < home + % if get("after_update", False): +

Updated feed details.

+ % end

Manage feed

#TitleDateTagsFeed#TitleDate & Time ({{time.tzname[time.localtime().tm_isdst]}})TagsFeed
{{i + 1}}{{entry["title"]}}{{entry["title"]}} - {{", updated ".join(dates)}} + - % tags = core.get_feed_tags(entry["feed_id"]) - % for i, tag in enumerate(tags): - % if i > 0: - {{", "}} + +
+ % tags = core.get_feed_tags(entry["feed_id"]) + % for i, tag in enumerate(tags): + % if i > 0: + {{", "}} + % end + {{tag}} % end - {{tag}} - % end +
- + +
+ + {{core.get_feed_title(entry["feed_id"])}} +
@@ -16,7 +19,7 @@ - + @@ -48,9 +51,9 @@
- + - \ No newline at end of file + -- cgit v1.2.3-57-g22cb
Source{{feed["source"]}}{{feed["source"]}}
Tags