From 0e120e5221478a3140e09de717ec8e0ff2d5178a Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Thu, 3 Aug 2023 19:03:36 +0530 Subject: Simplify process to update feeds (i.e. fetch & store new entries) --- serve.py | 25 +++++++++++++++---------- tagrss.py | 17 +++++++++++------ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/serve.py b/serve.py index 87a30bb..6f9dea3 100755 --- a/serve.py +++ b/serve.py @@ -72,17 +72,24 @@ def serialise_tags(tags: list[str]) -> str: @bottle.get("/") def index(): - per_page: int = min(MAX_PER_PAGE_ENTRIES, int(bottle.request.query.get("per_page", DEFAULT_PER_PAGE_ENTRIES))) # type: ignore + per_page: int = min( + MAX_PER_PAGE_ENTRIES, + int(bottle.request.query.get("per_page", DEFAULT_PER_PAGE_ENTRIES)), # type: ignore + ) page_num = int(bottle.request.query.get("page_num", 1)) # type: ignore offset = (page_num - 1) * per_page - included_feeds_str: typing.Optional[str] = bottle.request.query.get("included_feeds", None) # type: ignore + included_feeds_str: typing.Optional[str] = bottle.request.query.get( # type: ignore + "included_feeds", None + ) included_feeds: typing.Optional[list[int]] = None if included_feeds_str: try: included_feeds = [int(feed_id) for feed_id in included_feeds_str.split(" ")] except ValueError: pass - included_tags_str: typing.Optional[str] = bottle.request.query.get("included_tags", None) # type: ignore + included_tags_str: typing.Optional[str] = bottle.request.query.get( # type: ignore + "included_tags", None + ) included_tags: typing.Optional[list[str]] = None if included_tags_str: included_tags = parse_space_separated_tags(included_tags_str) @@ -128,7 +135,10 @@ def index(): @bottle.get("/list_feeds") def list_feeds(): - per_page: int = min(MAX_PER_PAGE_ENTRIES, int(bottle.request.query.get("per_page", DEFAULT_PER_PAGE_ENTRIES))) # type: ignore + per_page: int = min( + MAX_PER_PAGE_ENTRIES, + int(bottle.request.query.get("per_page", DEFAULT_PER_PAGE_ENTRIES)), # type: ignore + ) page_num = int(bottle.request.query.get("page_num", 1)) # type: ignore offset = (page_num - 1) * per_page total_pages: int = max(1, math.ceil(core.get_feed_count() / per_page)) @@ -252,13 +262,8 @@ def update_feeds(run_event: threading.Event): for i in range(math.ceil(feed_count / limit)): feeds = core.get_feeds(limit=limit, offset=limit * i) for feed in feeds: - parsed, epoch_downloaded = core.fetch_and_parse_feed(feed.source) try: - core.store_feed_entries( - feed_id=feed.id, # type: ignore - parsed=parsed, - epoch_downloaded=epoch_downloaded, - ) + core.update_feed(feed.id) # type: ignore except tagrss.StorageConstraintViolationError: logging.warning( f"Failed to update feed {feed.id} with source {feed.source} due" diff --git a/tagrss.py b/tagrss.py index 5785fbb..d113a96 100644 --- a/tagrss.py +++ b/tagrss.py @@ -53,10 +53,6 @@ Epoch = int ParsedFeed = feedparser.FeedParserDict -class StorageProvider(abc.ABC): - pass - - @dataclasses.dataclass(kw_only=True) class PartialFeed: id: typing.Optional[FeedId] = None @@ -75,6 +71,10 @@ class Entry: epoch_updated: Epoch +class StorageProvider(abc.ABC): + pass + + class SqliteStorageProvider(StorageProvider): def __init__(self, storage_path: str | pathlib.Path): self.__raw_connection = sqlite3.connect(storage_path, check_same_thread=False) @@ -392,7 +392,7 @@ class TagRss: def __init__(self, *, storage_path: str | pathlib.Path): self.__storage = SqliteStorageProvider(storage_path) - def fetch_and_parse_feed(self, source) -> tuple[ParsedFeed, int]: + def __fetch_and_parse_feed(self, source) -> tuple[ParsedFeed, Epoch]: response = requests.get(source) epoch_downloaded: int = int(time.time()) if response.status_code != requests.codes.ok: @@ -412,7 +412,7 @@ class TagRss: source: str, tags: list[str], ) -> int: - parsed, epoch_downloaded = self.fetch_and_parse_feed(source) + parsed, epoch_downloaded = self.__fetch_and_parse_feed(source) title: str = parsed.feed.get("title", "") # type: ignore feed_id = self.__storage.store_feed(source=source, title=title, tags=tags) self.__storage.store_entries( @@ -493,6 +493,11 @@ class TagRss: included_feeds=included_feeds, included_tags=included_tags ) + def update_feed(self, feed_id: FeedId) -> None: + source = self.get_feed_source(feed_id) + parsed, epoch_downloaded = self.__fetch_and_parse_feed(source) + self.store_feed_entries(parsed, feed_id, epoch_downloaded) + def store_feed_entries( self, parsed: ParsedFeed, feed_id: FeedId, epoch_downloaded: int ): -- cgit v1.2.3-57-g22cb