diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2024-07-17 10:25:04 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2024-07-17 10:25:04 +0000 |
commit | 65c586632ae421b45c084630f448e67f6c7a7a02 (patch) | |
tree | 6c2579a76b90fc9582ebd1b0d9ef77423aedb4ce /wplink | |
parent | 0053f23bf9b75f4401c5934b63e373b2f487bb88 (diff) | |
download | aps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.tar aps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.tar.gz aps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.zip |
wplink: support multiple links per message, improve efficiency
Added some sanity checks, a cache, and used HEAD rather than GET
requests. Checked the respone status code.
Diffstat (limited to 'wplink')
-rw-r--r-- | wplink/info.json | 2 | ||||
-rw-r--r-- | wplink/wplink.py | 47 |
2 files changed, 33 insertions, 16 deletions
diff --git a/wplink/info.json b/wplink/info.json index bd066ba..297e4e0 100644 --- a/wplink/info.json +++ b/wplink/info.json @@ -1,4 +1,4 @@ { "author": ["Arjun Satarkar"], - "requirements": ["aiohttp"] + "requirements": ["aiohttp", "async-lru"] } diff --git a/wplink/wplink.py b/wplink/wplink.py index 8c27166..0cdd003 100644 --- a/wplink/wplink.py +++ b/wplink/wplink.py @@ -1,6 +1,8 @@ import aiohttp +import async_lru import discord from redbot.core import commands +import logging import re import urllib.parse @@ -11,25 +13,40 @@ class WPLink(commands.Cog): @commands.Cog.listener() async def on_message(self, message: discord.Message): - wikilink_pattern = r"\[\[(.+)\]\]" - match = re.search(wikilink_pattern, message.content) - if match is not None: - title = match.group(1) + WIKILINK_PATTERN = r"\[\[(.+?)\]\]" + MAX_LINKS_PER_MESSAGE = 6 + # Per https://www.mediawiki.org/wiki/Page_title_size_limitations + MAX_TITLE_LEN = 255 + + titles = re.findall(WIKILINK_PATTERN, message.content) + titles = titles[:MAX_LINKS_PER_MESSAGE] + + formatted_page_urls = [] + for title in titles: + if len(title) > MAX_TITLE_LEN: + continue page_url = await self.look_up_page(title) if page_url is not None: - await message.reply( - f"<{page_url}>", allowed_mentions=discord.AllowedMentions.none() - ) + formatted_page_urls.append(f"<{page_url}>") + + if formatted_page_urls: + await message.reply( + ", ".join(formatted_page_urls), + allowed_mentions=discord.AllowedMentions.none(), + ) + @async_lru.alru_cache(maxsize=512) async def look_up_page(self, title: str) -> str | None: + logging.info("Looking up page title %s", title) + MAX_URL_SIZE = 400 query_url = f"https://en.wikipedia.org/wiki/Special:Search?search={urllib.parse.quote(title)}&go=Go" async with aiohttp.ClientSession() as session: - async with session.get(query_url) as response: + async with session.head(query_url, allow_redirects=True) as response: + if response.status != 200: + return None result_url = str(response.url) - return ( - result_url - if not result_url.startswith( - "https://en.wikipedia.org/wiki/Special:Search?" - ) - else None - ) + if len(result_url) > MAX_URL_SIZE or result_url.startswith( + "https://en.wikipedia.org/wiki/Special:Search?" + ): + return None + return result_url |