From df5a7db84432f4f60698b2ddb97a9f8955dd97fb Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Sat, 21 Dec 2024 19:05:04 +0530 Subject: Refactor for modularity, package --- rebreak_lines.py | 70 -------------------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100755 rebreak_lines.py (limited to 'rebreak_lines.py') diff --git a/rebreak_lines.py b/rebreak_lines.py deleted file mode 100755 index 54473e2..0000000 --- a/rebreak_lines.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -""" -Given an SRT file, remove existing line breaks and reinsert them automatically -at word boundaries so as not to exceed 42 characters per line, trying to keep -lines within a single subtitle event at roughly the same length. - -NOTE: Using this script is *generally a bad idea*; like many aspects of -subtitling, placing line breaks benefits from contextual judgement. However, -if an existing subtitle file has no line breaks or far too many, as is the case -sometimes, this is an easy way to improve readability. -""" -import click -import parse_srt -import math -import sys -import typing - -# May still be exceeded if there are no word boundaries to wrap at -MAX_LINE_LENGTH = 42 - - -@click.command() -@click.argument("in_file_path") -def main(in_file_path: str): - with open(in_file_path) as f: - text = f.read() - srt = parse_srt.SRT.from_str(text) - - for event in srt.events: - event.content = rebreak(event.content) - - sys.stdout.write(str(srt)) - - -def rebreak(text: str) -> str: - get_target_line_num: typing.Callable[[int], int] = lambda length: math.ceil( - length / MAX_LINE_LENGTH - ) - text = " ".join(text.split("\n")) - target_line_num = get_target_line_num(len(text)) - - lines: list[str] = [] - for _ in range(target_line_num): - partition_at = round(len(text) / target_line_num) - 1 - - # Move to a word boundary - steps_backward = 0 - for steps_backward, c in enumerate(text[partition_at::-1]): - if c.isspace(): - break - if partition_at - steps_backward != 0: - partition_at -= steps_backward - else: - # Moving the partition backward would give us an empty line, so - # move forward instead to ensure we always make progress. - steps_forward = 0 - for steps_forward, c in enumerate(text[partition_at:]): - if c.isspace(): - break - partition_at += steps_forward - - lines.append(text[: partition_at + 1].strip()) - text = text[partition_at + 1 :] - target_line_num = get_target_line_num(len(text)) - - return ("\n".join(lines) if lines else text) + "\n" - - -if __name__ == "__main__": - main() -- cgit v1.2.3-57-g22cb