From 92a0d3eb4fd919ff9ed33b81d379ccb77af43026 Mon Sep 17 00:00:00 2001
From: Arjun Satarkar <me@arjunsatarkar.net>
Date: Tue, 19 Mar 2024 14:12:44 -0400
Subject: markov: support brackets, separate out append_token logic

---
 markov/info.json |  2 +-
 markov/markov.py | 26 ++++++++++++++++----------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/markov/info.json b/markov/info.json
index 22ddb18..6002815 100644
--- a/markov/info.json
+++ b/markov/info.json
@@ -2,5 +2,5 @@
     "author": ["Arjun Satarkar"],
     "description": "Use Markov chains to mimic users or the server as a whole.",
     "short": "Markov chains based on message content.",
-    "requirements": ["aiosqlite"]
+    "requirements": ["aiosqlite", "more-itertools"]
 }
diff --git a/markov/markov.py b/markov/markov.py
index b750519..e206800 100644
--- a/markov/markov.py
+++ b/markov/markov.py
@@ -86,13 +86,13 @@ class Markov(commands.Cog):
 
         # Extract words, punctuation, custom emoji, and mentions as
         # individual tokens, then add a sentinel (empty string) on either end.
-        # NOTE: if changing the punctuation in the regex, also change PUNCTUATION in generate()
+        # NOTE: if changing the punctuation in the regex, also change PUNCTUATION in append_token()
         tokens = (
             [""]
             + [
                 token
                 for token in re.findall(
-                    r"[\w']+|[\.,!?\/;]|<a?:\w+:\d+>|<#\d+>|<@!?\d+>", content
+                    r"[\w']+|[\.,!?\/;\(\)]|<a?:\w+:\d+>|<#\d+>|<@!?\d+>", content
                 )
                 if len(token) <= MAX_TOKEN_LENGTH
             ]
@@ -385,6 +385,19 @@ class Markov(commands.Cog):
             await db.commit()
         await ctx.reply("All markov data for this guild has been deleted.")
 
+    def append_token(self, text, token):
+        # NOTE: if changing PUNCTUATION, also change the regex in process_message() with the corresponding note
+        PUNCTUATION = r".,!?/;()"
+        if token == "/":
+            text = text[:-1] + token
+        elif token == "(":
+            text += token
+        elif token in PUNCTUATION:
+            text = text[:-1] + token + " "
+        else:
+            text += token + " "
+        return text
+
     @markov.command()
     async def generate(self, ctx, member: discord.Member | None):
         if not await self.config.guild(ctx.guild).use_messages():
@@ -458,8 +471,6 @@ class Markov(commands.Cog):
             next_token, frequency = row
             return next_token, frequency
 
-        # NOTE: if changing PUNCTUATION, also change the regex in process_message() with the corresponding note
-        PUNCTUATION = r".,!?/;"
         member_id = member.id if member else None
         result = ""
         token = ""
@@ -483,12 +494,7 @@ class Markov(commands.Cog):
                     if next_token is None:
                         raise NoNextTokenError(ctx.guild.id, member_id, token, i)
                     if random.randint(1, completion_count) <= frequency:
-                        if next_token == "/":
-                            result = result[:-1] + next_token
-                        elif next_token in PUNCTUATION:
-                            result = result[:-1] + next_token + " "
-                        else:
-                            result += next_token + " "
+                        result = self.append_token(result, next_token)
                         token = next_token
                         break
 
-- 
cgit v1.2.3-70-g09d2