Initial commit

author: Arjun Satarkar <me@arjunsatarkar.net> 2024-12-21 09:43:42 +0000
committer: Arjun Satarkar <me@arjunsatarkar.net> 2024-12-21 09:43:42 +0000
commit: 60d0756a15b6c7fb739c6bbf06ef7f966822bbdc (patch)
tree: aadca3bb45ccbbad7ee88fe9896852566dfcbdc4 /rebreak_lines.py
download: srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar
srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar.gz
srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.zip
1 files changed, 64 insertions, 0 deletions
diff --git a/rebreak_lines.py b/rebreak_lines.py
new file mode 100755
index 0000000..41594a3
--- /dev/null
+++ b/rebreak_lines.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""
+Given an SRT file, remove existing line breaks and reinsert them automatically
+at word boundaries so as not to exceed 42 characters per line, trying to keep
+lines within a single subtitle event at roughly the same length.
+
+NOTE: Using this script is *generally a bad idea*; like many aspects of
+subtitling, placing line breaks benefits from contextual judgement. However,
+if an existing subtitle file has no line breaks or far too many, as is the case
+sometimes, this is an easy way to improve readability.
+"""
+import click
+import parse_srt
+import math
+import sys
+from typing import List
+
+# May still be exceeded if there are no word boundaries to wrap at
+MAX_LINE_LENGTH = 42
+
+
+@click.command()
+@click.argument("in_file_path")
+def main(in_file_path: str):
+    with open(in_file_path) as f:
+        text = f.read()
+    srt = parse_srt.SRT.from_str(text)
+
+    for event in srt.events:
+        event.content = rebreak(event.content)
+
+    sys.stdout.write(str(srt))
+
+
+def rebreak(text: str) -> str:
+    get_target_line_num = lambda length: math.ceil(length / MAX_LINE_LENGTH)
+    text = " ".join(text.split("\n"))
+    target_line_num = get_target_line_num(len(text))
+
+    lines: List[str] = []
+    for _ in range(target_line_num):
+        partition_at = round(len(text) / target_line_num) - 1
+
+        # Move to a word boundary
+        for steps_backward, c in enumerate(text[partition_at::-1]):
+            if c.isspace():
+                break
+        if partition_at - steps_backward != 0:
+            partition_at -= steps_backward
+        else:
+            for steps_forward, c in enumerate(text[partition_at:]):
+                if c.isspace():
+                    break
+            partition_at += steps_forward
+
+        lines.append(text[: partition_at + 1].strip())
+        text = text[partition_at + 1 :]
+        target_line_num = get_target_line_num(len(text))
+
+    return ("\n".join(lines) if lines else text) + "\n"
+
+
+if __name__ == "__main__":
+    main()
author	Arjun Satarkar <me@arjunsatarkar.net>	2024-12-21 09:43:42 +0000
committer	Arjun Satarkar <me@arjunsatarkar.net>	2024-12-21 09:43:42 +0000
commit	60d0756a15b6c7fb739c6bbf06ef7f966822bbdc (patch)
tree	aadca3bb45ccbbad7ee88fe9896852566dfcbdc4 /rebreak_lines.py
download	srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar.gz srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.zip