diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-30 21:41:12 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2023-07-30 21:58:21 +0000 |
commit | e59a283db5f850dd791df021e22b599ecc70a824 (patch) | |
tree | c9a20c8e31b5cfd43abd13961d1ebd191d3a16aa | |
parent | 1910ead6204cb37e0a44b519e96eb1c526afffda (diff) | |
download | tagrss-e59a283db5f850dd791df021e22b599ecc70a824.tar tagrss-e59a283db5f850dd791df021e22b599ecc70a824.tar.gz tagrss-e59a283db5f850dd791df021e22b599ecc70a824.zip |
Get feed info for every displayed entry in one query, fix crash
Replacing the old approach where for every entry the title & tags were
queried separately. If 50 entries are displayed, this is 100 extra
queries that are now one query. If the maximum 1000 are displayed, this
is 2000 queries to one.
Benchmarking this change with
ab -n 500 -c 100 http://localhost:8000/?page_num=1&per_page=1000
with 1000+ entries present, we get:
(PREVIOUS) Time taken for tests: 0.840 seconds
(NEW) Time taken for tests: 0.476 seconds
The crash occurred when get_feeds() was called with get_tags=True and
the query returned at least one feed with no tags. This was not handled
properly by later code. It was not noticed when testing the previous
commit since all feeds in the test instance had one or more tags.
-rwxr-xr-x | serve.py | 36 | ||||
-rw-r--r-- | tagrss.py | 65 | ||||
-rw-r--r-- | views/index.tpl | 5 |
3 files changed, 81 insertions, 25 deletions
@@ -103,20 +103,30 @@ def index(): included_feeds=included_feeds, included_tags=included_tags, ) - return bottle.template( - "index", - entries=entries, - offset=offset, - page_num=page_num, - total_pages=total_pages, - per_page=per_page, - max_per_page=MAX_PER_PAGE_ENTRIES, - included_feeds=included_feeds, - included_tags=included_tags, - included_feeds_str=included_feeds_str, - included_tags_str=included_tags_str, - core=core, + referenced_feed_ids = list({entry["feed_id"] for entry in entries}) + with core_lock: + referenced_feeds_list = core.get_feeds( + limit=len(referenced_feed_ids), + included_feeds=referenced_feed_ids, + get_tags=True, ) + referenced_feeds = {} + for feed in referenced_feeds_list: + referenced_feeds[feed["id"]] = {k: feed[k] for k in feed if k != "id"} + return bottle.template( + "index", + entries=entries, + offset=offset, + page_num=page_num, + total_pages=total_pages, + per_page=per_page, + max_per_page=MAX_PER_PAGE_ENTRIES, + included_feeds=included_feeds, + included_tags=included_tags, + included_feeds_str=included_feeds_str, + included_tags_str=included_tags_str, + referenced_feeds=referenced_feeds + ) @bottle.get("/list_feeds") @@ -185,13 +185,32 @@ class TagRss: self.connection.execute("DELETE FROM feeds WHERE id = ?;", (feed_id,)) def get_feeds( - self, *, limit: int, offset: int = 0, get_tags: bool = False + self, + *, + limit: int, + offset: int = 0, + included_feeds: typing.Optional[list[int]] = None, + included_tags: typing.Optional[list[str]] = None, + get_tags: bool = False, ) -> list[dict[str, typing.Any]]: + where_clause = "WHERE 1" + if included_feeds: + where_clause += f" AND id IN ({','.join('?' * len(included_feeds))})" + if included_tags: + where_clause += " AND id IN (SELECT id FROM feed_tags WHERE tag = ?)" * len( + included_tags + ) with self.connection: resp = self.connection.execute( - "SELECT id, source, title FROM feeds \ + f"SELECT id, source, title FROM feeds \ + {where_clause} \ ORDER BY id ASC LIMIT ? OFFSET ?;", - (limit, offset), + ( + *(included_feeds if included_feeds else ()), + *(included_tags if included_tags else ()), + limit, + offset, + ), ).fetchall() feeds: dict[int, dict[str, typing.Any]] = {} for row in resp: @@ -201,7 +220,7 @@ class TagRss: } if get_tags: feed_ids = feeds.keys() - placeholder_str = ",".join(["?"] * len(feed_ids)) + placeholder_str = ",".join("?" * len(feed_ids)) with self.connection: resp = self.connection.execute( f"SELECT feed_id, tag FROM feed_tags WHERE feed_id in ({placeholder_str});", @@ -220,7 +239,10 @@ class TagRss: "title": item[1]["title"], } if get_tags: - feed["tags"] = item[1]["tags"] + try: + feed["tags"] = item[1]["tags"] + except KeyError: + feed["tags"] = [] result.append(feed) return result @@ -255,11 +277,34 @@ class TagRss: ), ).fetchone()[0] - def get_feed_count(self) -> int: - with self.connection: - return self.connection.execute("SELECT count from feed_count;").fetchone()[ - 0 - ] + def get_feed_count( + self, + *, + included_feeds: typing.Optional[typing.Collection[int]] = None, + included_tags: typing.Optional[typing.Collection[str]] = None, + ) -> int: + if not (included_feeds or included_tags): + with self.connection: + return self.connection.execute( + "SELECT count from feed_count;" + ).fetchone()[0] + else: + where_clause: str = "WHERE 1" + if included_feeds: + where_clause += f" AND id IN ({','.join('?' * len(included_feeds))})" + if included_tags: + where_clause += ( + " AND id IN (SELECT id FROM feed_tags WHERE tag = ?)" + * len(included_tags) + ) + with self.connection: + return self.connection.execute( + f"SELECT COUNT(*) FROM feeds {where_clause}", + ( + *(included_feeds if included_feeds else ()), + *(included_tags if included_tags else ()), + ), + ).fetchone()[0] def store_feed_entries(self, feed_id: int, parsed_feed, epoch_downloaded: int): for entry in reversed(parsed_feed.entries): diff --git a/views/index.tpl b/views/index.tpl index 738adb6..0145a4e 100644 --- a/views/index.tpl +++ b/views/index.tpl @@ -116,7 +116,7 @@ </td> <td class="td-tags"> <div> - % tags = core.get_feed_tags(entry["feed_id"]) + % tags = referenced_feeds[entry["feed_id"]]["tags"] % for i, tag in enumerate(tags): % if i > 0: {{", "}} @@ -128,7 +128,8 @@ <td class="td-feed"> <div> <a href="/manage_feed?feed={{entry['feed_id']}}" class="no-visited-indication">⚙</a> - {{core.get_feed_title(entry["feed_id"])}} <small>(</small>{{entry["feed_id"]}}<small>)</small> + {{referenced_feeds[entry["feed_id"]]["title"]}} + <small>(</small>{{entry["feed_id"]}}<small>)</small> </div> </td> </tr> |