diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2024-03-07 00:51:30 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2024-03-07 00:51:30 +0000 |
commit | 6f4ea88e9570654d75153ab613ffae2176b495bd (patch) | |
tree | 8c566956469c933fc30cd8e596208b79bf2b4f62 /markov/markov.py | |
parent | 80a31686aa0b3265c1d3299d4b5f6173a3bb7096 (diff) | |
download | aps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.tar aps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.tar.gz aps-cogs-6f4ea88e9570654d75153ab613ffae2176b495bd.zip |
markov: normalize message content better
We now do NFKC normalization and replace U+2019 with the normal ASCII
single quote.
Diffstat (limited to 'markov/markov.py')
-rw-r--r-- | markov/markov.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/markov/markov.py b/markov/markov.py index ec5acbd..a8b749c 100644 --- a/markov/markov.py +++ b/markov/markov.py @@ -6,6 +6,7 @@ from redbot.core import commands import math import random import re +import unicodedata from .errors import * MAX_BLACKLISTED_STRINGS_PER_GUILD = 50 @@ -50,6 +51,10 @@ class Markov(commands.Cog): ) async def process_message(self, clean_content: str, guild_id: int, member_id: int): + # Normalize + clean_content = unicodedata.normalize("NFKC", clean_content) + clean_content = clean_content.replace("’", "'") + # Ignore messages with blacklisted strings for blacklisted_string in await self.config.guild_from_id( guild_id |