From 178b26030f56bd600c5efa33cffa2758f20a7639 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Sun, 30 Jul 2023 15:17:52 +0530 Subject: Handle feed deletion after fetch but before inserting new entries The approach we used is to catch an exception since EAFP. The alternative would be to check if the feed exists just before trying to insert, which would not be a race condition due to the lock. --- serve.py | 14 +++++++++++--- tagrss.py | 34 ++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/serve.py b/serve.py index 2f49fc0..7de1b52 100755 --- a/serve.py +++ b/serve.py @@ -226,7 +226,7 @@ def serve_static(path): def update_feeds(run_event: threading.Event): def inner_update(): - logging.info("Updating feeds...") + logging.info("Updating all feeds...") limit = 100 with core_lock: feed_count = core.get_feed_count() @@ -235,9 +235,17 @@ def update_feeds(run_event: threading.Event): feeds = core.get_feeds(limit=limit, offset=limit * i) for feed in feeds: parsed_feed, epoch_downloaded = tagrss.fetch_parsed_feed(feed["source"]) - logging.debug(f"Fetched feed {feed['id']} (source {feed['source']}).") + logging.info(f"Fetched feed {feed['id']} (source {feed['source']}).") with core_lock: - core.store_feed_entries(feed["id"], parsed_feed, epoch_downloaded) + try: + core.store_feed_entries( + feed["id"], parsed_feed, epoch_downloaded + ) + except tagrss.StorageConstraintViolationError: + logging.warning( + f"Failed to update feed {feed['id']} with source {feed['source']} " + "due to constraint violation (feed already deleted?)." + ) logging.info("Finished updating feeds.") inner_update() diff --git a/tagrss.py b/tagrss.py index 25efdd1..c5ad368 100644 --- a/tagrss.py +++ b/tagrss.py @@ -31,6 +31,11 @@ class Sqlite3NotSerializedModeError(Exception): pass +class StorageConstraintViolationError(Exception): + def __init__(self, error): + super().__init__(error) + + def fetch_parsed_feed(feed_source: str) -> tuple[feedparser.FeedParserDict, int]: response = requests.get(feed_source) epoch_downloaded: int = int(time.time()) @@ -251,18 +256,23 @@ class TagRss: except TypeError: epoch_updated = None with self.connection: - self.connection.execute( - "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) \ - VALUES(?, ?, ?, ?, ?, ?);", - ( - feed_id, - title, - link, - epoch_published, - epoch_updated, - epoch_downloaded, - ), - ) + try: + self.connection.execute( + "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) \ + VALUES(?, ?, ?, ?, ?, ?);", + ( + feed_id, + title, + link, + epoch_published, + epoch_updated, + epoch_downloaded, + ), + ) + except sqlite3.IntegrityError as e: + # Probably feed deleted before we got here, so foreign key + # constraints would have been violated by the insert. + raise StorageConstraintViolationError(e) def close(self) -> None: self.connection.close() -- cgit v1.2.3-57-g22cb