From e59a283db5f850dd791df021e22b599ecc70a824 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Mon, 31 Jul 2023 03:11:12 +0530 Subject: Get feed info for every displayed entry in one query, fix crash Replacing the old approach where for every entry the title & tags were queried separately. If 50 entries are displayed, this is 100 extra queries that are now one query. If the maximum 1000 are displayed, this is 2000 queries to one. Benchmarking this change with ab -n 500 -c 100 http://localhost:8000/?page_num=1&per_page=1000 with 1000+ entries present, we get: (PREVIOUS) Time taken for tests: 0.840 seconds (NEW) Time taken for tests: 0.476 seconds The crash occurred when get_feeds() was called with get_tags=True and the query returned at least one feed with no tags. This was not handled properly by later code. It was not noticed when testing the previous commit since all feeds in the test instance had one or more tags. --- serve.py | 36 ++++++++++++++++++++------------ tagrss.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++--------- views/index.tpl | 5 +++-- 3 files changed, 81 insertions(+), 25 deletions(-) diff --git a/serve.py b/serve.py index 92c79ff..f456f80 100755 --- a/serve.py +++ b/serve.py @@ -103,20 +103,30 @@ def index(): included_feeds=included_feeds, included_tags=included_tags, ) - return bottle.template( - "index", - entries=entries, - offset=offset, - page_num=page_num, - total_pages=total_pages, - per_page=per_page, - max_per_page=MAX_PER_PAGE_ENTRIES, - included_feeds=included_feeds, - included_tags=included_tags, - included_feeds_str=included_feeds_str, - included_tags_str=included_tags_str, - core=core, + referenced_feed_ids = list({entry["feed_id"] for entry in entries}) + with core_lock: + referenced_feeds_list = core.get_feeds( + limit=len(referenced_feed_ids), + included_feeds=referenced_feed_ids, + get_tags=True, ) + referenced_feeds = {} + for feed in referenced_feeds_list: + referenced_feeds[feed["id"]] = {k: feed[k] for k in feed if k != "id"} + return bottle.template( + "index", + entries=entries, + offset=offset, + page_num=page_num, + total_pages=total_pages, + per_page=per_page, + max_per_page=MAX_PER_PAGE_ENTRIES, + included_feeds=included_feeds, + included_tags=included_tags, + included_feeds_str=included_feeds_str, + included_tags_str=included_tags_str, + referenced_feeds=referenced_feeds + ) @bottle.get("/list_feeds") diff --git a/tagrss.py b/tagrss.py index 41c8a70..5d5ad01 100644 --- a/tagrss.py +++ b/tagrss.py @@ -185,13 +185,32 @@ class TagRss: self.connection.execute("DELETE FROM feeds WHERE id = ?;", (feed_id,)) def get_feeds( - self, *, limit: int, offset: int = 0, get_tags: bool = False + self, + *, + limit: int, + offset: int = 0, + included_feeds: typing.Optional[list[int]] = None, + included_tags: typing.Optional[list[str]] = None, + get_tags: bool = False, ) -> list[dict[str, typing.Any]]: + where_clause = "WHERE 1" + if included_feeds: + where_clause += f" AND id IN ({','.join('?' * len(included_feeds))})" + if included_tags: + where_clause += " AND id IN (SELECT id FROM feed_tags WHERE tag = ?)" * len( + included_tags + ) with self.connection: resp = self.connection.execute( - "SELECT id, source, title FROM feeds \ + f"SELECT id, source, title FROM feeds \ + {where_clause} \ ORDER BY id ASC LIMIT ? OFFSET ?;", - (limit, offset), + ( + *(included_feeds if included_feeds else ()), + *(included_tags if included_tags else ()), + limit, + offset, + ), ).fetchall() feeds: dict[int, dict[str, typing.Any]] = {} for row in resp: @@ -201,7 +220,7 @@ class TagRss: } if get_tags: feed_ids = feeds.keys() - placeholder_str = ",".join(["?"] * len(feed_ids)) + placeholder_str = ",".join("?" * len(feed_ids)) with self.connection: resp = self.connection.execute( f"SELECT feed_id, tag FROM feed_tags WHERE feed_id in ({placeholder_str});", @@ -220,7 +239,10 @@ class TagRss: "title": item[1]["title"], } if get_tags: - feed["tags"] = item[1]["tags"] + try: + feed["tags"] = item[1]["tags"] + except KeyError: + feed["tags"] = [] result.append(feed) return result @@ -255,11 +277,34 @@ class TagRss: ), ).fetchone()[0] - def get_feed_count(self) -> int: - with self.connection: - return self.connection.execute("SELECT count from feed_count;").fetchone()[ - 0 - ] + def get_feed_count( + self, + *, + included_feeds: typing.Optional[typing.Collection[int]] = None, + included_tags: typing.Optional[typing.Collection[str]] = None, + ) -> int: + if not (included_feeds or included_tags): + with self.connection: + return self.connection.execute( + "SELECT count from feed_count;" + ).fetchone()[0] + else: + where_clause: str = "WHERE 1" + if included_feeds: + where_clause += f" AND id IN ({','.join('?' * len(included_feeds))})" + if included_tags: + where_clause += ( + " AND id IN (SELECT id FROM feed_tags WHERE tag = ?)" + * len(included_tags) + ) + with self.connection: + return self.connection.execute( + f"SELECT COUNT(*) FROM feeds {where_clause}", + ( + *(included_feeds if included_feeds else ()), + *(included_tags if included_tags else ()), + ), + ).fetchone()[0] def store_feed_entries(self, feed_id: int, parsed_feed, epoch_downloaded: int): for entry in reversed(parsed_feed.entries): diff --git a/views/index.tpl b/views/index.tpl index 738adb6..0145a4e 100644 --- a/views/index.tpl +++ b/views/index.tpl @@ -116,7 +116,7 @@
- % tags = core.get_feed_tags(entry["feed_id"]) + % tags = referenced_feeds[entry["feed_id"]]["tags"] % for i, tag in enumerate(tags): % if i > 0: {{", "}} @@ -128,7 +128,8 @@
- {{core.get_feed_title(entry["feed_id"])}} ({{entry["feed_id"]}}) + {{referenced_feeds[entry["feed_id"]]["title"]}} + ({{entry["feed_id"]}})
-- cgit v1.2.3-57-g22cb