aboutsummaryrefslogtreecommitdiff
path: root/wplink
diff options
context:
space:
mode:
authorArjun Satarkar <me@arjunsatarkar.net>2024-07-17 10:25:04 +0000
committerArjun Satarkar <me@arjunsatarkar.net>2024-07-17 10:25:04 +0000
commit65c586632ae421b45c084630f448e67f6c7a7a02 (patch)
tree6c2579a76b90fc9582ebd1b0d9ef77423aedb4ce /wplink
parent0053f23bf9b75f4401c5934b63e373b2f487bb88 (diff)
downloadaps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.tar
aps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.tar.gz
aps-cogs-65c586632ae421b45c084630f448e67f6c7a7a02.zip
wplink: support multiple links per message, improve efficiency
Added some sanity checks, a cache, and used HEAD rather than GET requests. Checked the respone status code.
Diffstat (limited to 'wplink')
-rw-r--r--wplink/info.json2
-rw-r--r--wplink/wplink.py47
2 files changed, 33 insertions, 16 deletions
diff --git a/wplink/info.json b/wplink/info.json
index bd066ba..297e4e0 100644
--- a/wplink/info.json
+++ b/wplink/info.json
@@ -1,4 +1,4 @@
{
"author": ["Arjun Satarkar"],
- "requirements": ["aiohttp"]
+ "requirements": ["aiohttp", "async-lru"]
}
diff --git a/wplink/wplink.py b/wplink/wplink.py
index 8c27166..0cdd003 100644
--- a/wplink/wplink.py
+++ b/wplink/wplink.py
@@ -1,6 +1,8 @@
import aiohttp
+import async_lru
import discord
from redbot.core import commands
+import logging
import re
import urllib.parse
@@ -11,25 +13,40 @@ class WPLink(commands.Cog):
@commands.Cog.listener()
async def on_message(self, message: discord.Message):
- wikilink_pattern = r"\[\[(.+)\]\]"
- match = re.search(wikilink_pattern, message.content)
- if match is not None:
- title = match.group(1)
+ WIKILINK_PATTERN = r"\[\[(.+?)\]\]"
+ MAX_LINKS_PER_MESSAGE = 6
+ # Per https://www.mediawiki.org/wiki/Page_title_size_limitations
+ MAX_TITLE_LEN = 255
+
+ titles = re.findall(WIKILINK_PATTERN, message.content)
+ titles = titles[:MAX_LINKS_PER_MESSAGE]
+
+ formatted_page_urls = []
+ for title in titles:
+ if len(title) > MAX_TITLE_LEN:
+ continue
page_url = await self.look_up_page(title)
if page_url is not None:
- await message.reply(
- f"<{page_url}>", allowed_mentions=discord.AllowedMentions.none()
- )
+ formatted_page_urls.append(f"<{page_url}>")
+
+ if formatted_page_urls:
+ await message.reply(
+ ", ".join(formatted_page_urls),
+ allowed_mentions=discord.AllowedMentions.none(),
+ )
+ @async_lru.alru_cache(maxsize=512)
async def look_up_page(self, title: str) -> str | None:
+ logging.info("Looking up page title %s", title)
+ MAX_URL_SIZE = 400
query_url = f"https://en.wikipedia.org/wiki/Special:Search?search={urllib.parse.quote(title)}&go=Go"
async with aiohttp.ClientSession() as session:
- async with session.get(query_url) as response:
+ async with session.head(query_url, allow_redirects=True) as response:
+ if response.status != 200:
+ return None
result_url = str(response.url)
- return (
- result_url
- if not result_url.startswith(
- "https://en.wikipedia.org/wiki/Special:Search?"
- )
- else None
- )
+ if len(result_url) > MAX_URL_SIZE or result_url.startswith(
+ "https://en.wikipedia.org/wiki/Special:Search?"
+ ):
+ return None
+ return result_url