aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArjun Satarkar <me@arjunsatarkar.net>2024-03-07 00:51:30 +0000
committerArjun Satarkar <me@arjunsatarkar.net>2024-03-07 00:51:30 +0000
commit6f4ea88e9570654d75153ab613ffae2176b495bd (patch)
tree8c566956469c933fc30cd8e596208b79bf2b4f62
parent80a31686aa0b3265c1d3299d4b5f6173a3bb7096 (diff)
downloadaps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.tar
aps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.tar.gz
aps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.zip
markov: normalize message content better
We now do NFKC normalization and replace U+2019 with the normal ASCII single quote.
-rw-r--r--markov/markov.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/markov/markov.py b/markov/markov.py
index ec5acbd..a8b749c 100644
--- a/markov/markov.py
+++ b/markov/markov.py
@@ -6,6 +6,7 @@ from redbot.core import commands
import math
import random
import re
+import unicodedata
from .errors import *
MAX_BLACKLISTED_STRINGS_PER_GUILD = 50
@@ -50,6 +51,10 @@ class Markov(commands.Cog):
)
async def process_message(self, clean_content: str, guild_id: int, member_id: int):
+ # Normalize
+ clean_content = unicodedata.normalize("NFKC", clean_content)
+ clean_content = clean_content.replace("’", "'")
+
# Ignore messages with blacklisted strings
for blacklisted_string in await self.config.guild_from_id(
guild_id