From 65c586632ae421b45c084630f448e67f6c7a7a02 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Wed, 17 Jul 2024 15:55:04 +0530 Subject: wplink: support multiple links per message, improve efficiency Added some sanity checks, a cache, and used HEAD rather than GET requests. Checked the respone status code. --- wplink/info.json | 2 +- wplink/wplink.py | 47 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/wplink/info.json b/wplink/info.json index bd066ba..297e4e0 100644 --- a/wplink/info.json +++ b/wplink/info.json @@ -1,4 +1,4 @@ { "author": ["Arjun Satarkar"], - "requirements": ["aiohttp"] + "requirements": ["aiohttp", "async-lru"] } diff --git a/wplink/wplink.py b/wplink/wplink.py index 8c27166..0cdd003 100644 --- a/wplink/wplink.py +++ b/wplink/wplink.py @@ -1,6 +1,8 @@ import aiohttp +import async_lru import discord from redbot.core import commands +import logging import re import urllib.parse @@ -11,25 +13,40 @@ class WPLink(commands.Cog): @commands.Cog.listener() async def on_message(self, message: discord.Message): - wikilink_pattern = r"\[\[(.+)\]\]" - match = re.search(wikilink_pattern, message.content) - if match is not None: - title = match.group(1) + WIKILINK_PATTERN = r"\[\[(.+?)\]\]" + MAX_LINKS_PER_MESSAGE = 6 + # Per https://www.mediawiki.org/wiki/Page_title_size_limitations + MAX_TITLE_LEN = 255 + + titles = re.findall(WIKILINK_PATTERN, message.content) + titles = titles[:MAX_LINKS_PER_MESSAGE] + + formatted_page_urls = [] + for title in titles: + if len(title) > MAX_TITLE_LEN: + continue page_url = await self.look_up_page(title) if page_url is not None: - await message.reply( - f"<{page_url}>", allowed_mentions=discord.AllowedMentions.none() - ) + formatted_page_urls.append(f"<{page_url}>") + + if formatted_page_urls: + await message.reply( + ", ".join(formatted_page_urls), + allowed_mentions=discord.AllowedMentions.none(), + ) + @async_lru.alru_cache(maxsize=512) async def look_up_page(self, title: str) -> str | None: + logging.info("Looking up page title %s", title) + MAX_URL_SIZE = 400 query_url = f"https://en.wikipedia.org/wiki/Special:Search?search={urllib.parse.quote(title)}&go=Go" async with aiohttp.ClientSession() as session: - async with session.get(query_url) as response: + async with session.head(query_url, allow_redirects=True) as response: + if response.status != 200: + return None result_url = str(response.url) - return ( - result_url - if not result_url.startswith( - "https://en.wikipedia.org/wiki/Special:Search?" - ) - else None - ) + if len(result_url) > MAX_URL_SIZE or result_url.startswith( + "https://en.wikipedia.org/wiki/Special:Search?" + ): + return None + return result_url -- cgit v1.2.3-57-g22cb