aboutsummaryrefslogtreecommitdiff
path: root/rebreak_lines.py
diff options
context:
space:
mode:
Diffstat (limited to 'rebreak_lines.py')
-rwxr-xr-xrebreak_lines.py70
1 files changed, 0 insertions, 70 deletions
diff --git a/rebreak_lines.py b/rebreak_lines.py
deleted file mode 100755
index 54473e2..0000000
--- a/rebreak_lines.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-"""
-Given an SRT file, remove existing line breaks and reinsert them automatically
-at word boundaries so as not to exceed 42 characters per line, trying to keep
-lines within a single subtitle event at roughly the same length.
-
-NOTE: Using this script is *generally a bad idea*; like many aspects of
-subtitling, placing line breaks benefits from contextual judgement. However,
-if an existing subtitle file has no line breaks or far too many, as is the case
-sometimes, this is an easy way to improve readability.
-"""
-import click
-import parse_srt
-import math
-import sys
-import typing
-
-# May still be exceeded if there are no word boundaries to wrap at
-MAX_LINE_LENGTH = 42
-
-
-@click.command()
-@click.argument("in_file_path")
-def main(in_file_path: str):
- with open(in_file_path) as f:
- text = f.read()
- srt = parse_srt.SRT.from_str(text)
-
- for event in srt.events:
- event.content = rebreak(event.content)
-
- sys.stdout.write(str(srt))
-
-
-def rebreak(text: str) -> str:
- get_target_line_num: typing.Callable[[int], int] = lambda length: math.ceil(
- length / MAX_LINE_LENGTH
- )
- text = " ".join(text.split("\n"))
- target_line_num = get_target_line_num(len(text))
-
- lines: list[str] = []
- for _ in range(target_line_num):
- partition_at = round(len(text) / target_line_num) - 1
-
- # Move to a word boundary
- steps_backward = 0
- for steps_backward, c in enumerate(text[partition_at::-1]):
- if c.isspace():
- break
- if partition_at - steps_backward != 0:
- partition_at -= steps_backward
- else:
- # Moving the partition backward would give us an empty line, so
- # move forward instead to ensure we always make progress.
- steps_forward = 0
- for steps_forward, c in enumerate(text[partition_at:]):
- if c.isspace():
- break
- partition_at += steps_forward
-
- lines.append(text[: partition_at + 1].strip())
- text = text[partition_at + 1 :]
- target_line_num = get_target_line_num(len(text))
-
- return ("\n".join(lines) if lines else text) + "\n"
-
-
-if __name__ == "__main__":
- main()