markov: support brackets, separate out append_token logic

author: Arjun Satarkar <me@arjunsatarkar.net> 2024-03-19 18:12:44 +0000
committer: Arjun Satarkar <me@arjunsatarkar.net> 2024-03-19 18:12:44 +0000
commit: 92a0d3eb4fd919ff9ed33b81d379ccb77af43026 (patch)
tree: c2fc790a99456783d0ca0f8e5ccf2c1ebaace3ae /markov/markov.py
parent: 314597b069c4fcd0f7d6c4f7796b0048809e0c8a (diff)
download: aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.tar
aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.tar.gz
aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.zip
1 files changed, 16 insertions, 10 deletions
diff --git a/markov/markov.py b/markov/markov.py
index b750519..e206800 100644
--- a/markov/markov.py
+++ b/markov/markov.py
@@ -86,13 +86,13 @@ class Markov(commands.Cog):
 
         # Extract words, punctuation, custom emoji, and mentions as
         # individual tokens, then add a sentinel (empty string) on either end.
-        # NOTE: if changing the punctuation in the regex, also change PUNCTUATION in generate()
+        # NOTE: if changing the punctuation in the regex, also change PUNCTUATION in append_token()
         tokens = (
             [""]
             + [
                 token
                 for token in re.findall(
-                    r"[\w']+|[\.,!?\/;]|<a?:\w+:\d+>|<#\d+>|<@!?\d+>", content
+                    r"[\w']+|[\.,!?\/;\(\)]|<a?:\w+:\d+>|<#\d+>|<@!?\d+>", content
                 )
                 if len(token) <= MAX_TOKEN_LENGTH
             ]
@@ -385,6 +385,19 @@ class Markov(commands.Cog):
             await db.commit()
         await ctx.reply("All markov data for this guild has been deleted.")
 
+    def append_token(self, text, token):
+        # NOTE: if changing PUNCTUATION, also change the regex in process_message() with the corresponding note
+        PUNCTUATION = r".,!?/;()"
+        if token == "/":
+            text = text[:-1] + token
+        elif token == "(":
+            text += token
+        elif token in PUNCTUATION:
+            text = text[:-1] + token + " "
+        else:
+            text += token + " "
+        return text
+
     @markov.command()
     async def generate(self, ctx, member: discord.Member | None):
         if not await self.config.guild(ctx.guild).use_messages():
@@ -458,8 +471,6 @@ class Markov(commands.Cog):
             next_token, frequency = row
             return next_token, frequency
 
-        # NOTE: if changing PUNCTUATION, also change the regex in process_message() with the corresponding note
-        PUNCTUATION = r".,!?/;"
         member_id = member.id if member else None
         result = ""
         token = ""
@@ -483,12 +494,7 @@ class Markov(commands.Cog):
                     if next_token is None:
                         raise NoNextTokenError(ctx.guild.id, member_id, token, i)
                     if random.randint(1, completion_count) <= frequency:
-                        if next_token == "/":
-                            result = result[:-1] + next_token
-                        elif next_token in PUNCTUATION:
-                            result = result[:-1] + next_token + " "
-                        else:
-                            result += next_token + " "
+                        result = self.append_token(result, next_token)
                         token = next_token
                         break
author	Arjun Satarkar <me@arjunsatarkar.net>	2024-03-19 18:12:44 +0000
committer	Arjun Satarkar <me@arjunsatarkar.net>	2024-03-19 18:12:44 +0000
commit	92a0d3eb4fd919ff9ed33b81d379ccb77af43026 (patch)
tree	c2fc790a99456783d0ca0f8e5ccf2c1ebaace3ae /markov/markov.py
parent	314597b069c4fcd0f7d6c4f7796b0048809e0c8a (diff)
download	aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.tar aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.tar.gz aps-cogs-92a0d3eb4fd919ff9ed33b81d379ccb77af43026.zip