diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2024-12-21 09:43:42 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2024-12-21 09:43:42 +0000 |
commit | 60d0756a15b6c7fb739c6bbf06ef7f966822bbdc (patch) | |
tree | aadca3bb45ccbbad7ee88fe9896852566dfcbdc4 /rebreak_lines.py | |
download | srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar.gz srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.zip |
Initial commit
Diffstat (limited to 'rebreak_lines.py')
-rwxr-xr-x | rebreak_lines.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/rebreak_lines.py b/rebreak_lines.py new file mode 100755 index 0000000..41594a3 --- /dev/null +++ b/rebreak_lines.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +""" +Given an SRT file, remove existing line breaks and reinsert them automatically +at word boundaries so as not to exceed 42 characters per line, trying to keep +lines within a single subtitle event at roughly the same length. + +NOTE: Using this script is *generally a bad idea*; like many aspects of +subtitling, placing line breaks benefits from contextual judgement. However, +if an existing subtitle file has no line breaks or far too many, as is the case +sometimes, this is an easy way to improve readability. +""" +import click +import parse_srt +import math +import sys +from typing import List + +# May still be exceeded if there are no word boundaries to wrap at +MAX_LINE_LENGTH = 42 + + +@click.command() +@click.argument("in_file_path") +def main(in_file_path: str): + with open(in_file_path) as f: + text = f.read() + srt = parse_srt.SRT.from_str(text) + + for event in srt.events: + event.content = rebreak(event.content) + + sys.stdout.write(str(srt)) + + +def rebreak(text: str) -> str: + get_target_line_num = lambda length: math.ceil(length / MAX_LINE_LENGTH) + text = " ".join(text.split("\n")) + target_line_num = get_target_line_num(len(text)) + + lines: List[str] = [] + for _ in range(target_line_num): + partition_at = round(len(text) / target_line_num) - 1 + + # Move to a word boundary + for steps_backward, c in enumerate(text[partition_at::-1]): + if c.isspace(): + break + if partition_at - steps_backward != 0: + partition_at -= steps_backward + else: + for steps_forward, c in enumerate(text[partition_at:]): + if c.isspace(): + break + partition_at += steps_forward + + lines.append(text[: partition_at + 1].strip()) + text = text[partition_at + 1 :] + target_line_num = get_target_line_num(len(text)) + + return ("\n".join(lines) if lines else text) + "\n" + + +if __name__ == "__main__": + main() |