diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-30 09:47:52 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-30 09:47:52 +0000 |
commit | 178b26030f56bd600c5efa33cffa2758f20a7639 (patch) | |
tree | c3d9afec48a665e82fe5e96320c13da2098f32f2 | |
parent | 58932c2d984ce2ca767d48be76d75664c660cabe (diff) | |
download | tagrss-178b26030f56bd600c5efa33cffa2758f20a7639.tar tagrss-178b26030f56bd600c5efa33cffa2758f20a7639.tar.gz tagrss-178b26030f56bd600c5efa33cffa2758f20a7639.zip |
Handle feed deletion after fetch but before inserting new entries
The approach we used is to catch an exception since EAFP. The
alternative would be to check if the feed exists just before trying to
insert, which would not be a race condition due to the lock.
-rwxr-xr-x | serve.py | 14 | ||||
-rw-r--r-- | tagrss.py | 34 |
2 files changed, 33 insertions, 15 deletions
@@ -226,7 +226,7 @@ def serve_static(path): def update_feeds(run_event: threading.Event): def inner_update(): - logging.info("Updating feeds...") + logging.info("Updating all feeds...") limit = 100 with core_lock: feed_count = core.get_feed_count() @@ -235,9 +235,17 @@ def update_feeds(run_event: threading.Event): feeds = core.get_feeds(limit=limit, offset=limit * i) for feed in feeds: parsed_feed, epoch_downloaded = tagrss.fetch_parsed_feed(feed["source"]) - logging.debug(f"Fetched feed {feed['id']} (source {feed['source']}).") + logging.info(f"Fetched feed {feed['id']} (source {feed['source']}).") with core_lock: - core.store_feed_entries(feed["id"], parsed_feed, epoch_downloaded) + try: + core.store_feed_entries( + feed["id"], parsed_feed, epoch_downloaded + ) + except tagrss.StorageConstraintViolationError: + logging.warning( + f"Failed to update feed {feed['id']} with source {feed['source']} " + "due to constraint violation (feed already deleted?)." + ) logging.info("Finished updating feeds.") inner_update() @@ -31,6 +31,11 @@ class Sqlite3NotSerializedModeError(Exception): pass +class StorageConstraintViolationError(Exception): + def __init__(self, error): + super().__init__(error) + + def fetch_parsed_feed(feed_source: str) -> tuple[feedparser.FeedParserDict, int]: response = requests.get(feed_source) epoch_downloaded: int = int(time.time()) @@ -251,18 +256,23 @@ class TagRss: except TypeError: epoch_updated = None with self.connection: - self.connection.execute( - "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) \ - VALUES(?, ?, ?, ?, ?, ?);", - ( - feed_id, - title, - link, - epoch_published, - epoch_updated, - epoch_downloaded, - ), - ) + try: + self.connection.execute( + "INSERT INTO entries(feed_id, title, link, epoch_published, epoch_updated, epoch_downloaded) \ + VALUES(?, ?, ?, ?, ?, ?);", + ( + feed_id, + title, + link, + epoch_published, + epoch_updated, + epoch_downloaded, + ), + ) + except sqlite3.IntegrityError as e: + # Probably feed deleted before we got here, so foreign key + # constraints would have been violated by the insert. + raise StorageConstraintViolationError(e) def close(self) -> None: self.connection.close() |