diff options
author | Arjun Satarkar <me@arjunsatarkar.net> | 2024-12-21 09:43:42 +0000 |
---|---|---|
committer | Arjun Satarkar <me@arjunsatarkar.net> | 2024-12-21 09:43:42 +0000 |
commit | 60d0756a15b6c7fb739c6bbf06ef7f966822bbdc (patch) | |
tree | aadca3bb45ccbbad7ee88fe9896852566dfcbdc4 /parse_srt.py | |
download | srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.tar.gz srtfilter-60d0756a15b6c7fb739c6bbf06ef7f966822bbdc.zip |
Initial commit
Diffstat (limited to 'parse_srt.py')
-rw-r--r-- | parse_srt.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/parse_srt.py b/parse_srt.py new file mode 100644 index 0000000..acd72c0 --- /dev/null +++ b/parse_srt.py @@ -0,0 +1,79 @@ +from __future__ import annotations +import dataclasses +import enum +import itertools +import re +from typing import List + + +@dataclasses.dataclass +class Event: + start: str | None = None + end: str | None = None + content: str | None = None + + +class SRT: + def __init__(self): + self.events: List[Event] = [] + + @staticmethod + def from_str(text: str) -> SRT: + class ParseState(enum.Enum): + COUNTER = enum.auto() + TIMING = enum.auto() + CONTENT = enum.auto() + + PARSE_STATES = itertools.cycle(iter(ParseState)) + TIMESTAMP_CAPTURE = r"(\d\d:\d\d:\d\d,\d\d\d)" + TIMING_REGEX = rf"{TIMESTAMP_CAPTURE} --> {TIMESTAMP_CAPTURE}" + + srt = SRT() + lines = text.split("\n") + counter = 1 + state = next(PARSE_STATES) + event = Event() + for line_num, line in enumerate(lines, 1): + if not line: + match state: + case ParseState.CONTENT: + srt.events.append(event) + event = Event() + state = next(PARSE_STATES) + case ParseState.COUNTER: + pass + case _: + raise ParseError(f"Unexpected blank line (line {line_num})") + continue + match state: + case ParseState.COUNTER: + if int(line) == counter: + counter += 1 + state = next(PARSE_STATES) + else: + raise ParseError( + f"Invalid counter, expected {counter} (line {line_num})" + ) + case ParseState.TIMING: + match = re.fullmatch(TIMING_REGEX, line) + if match is None: + raise ParseError(f"Invalid timing info (line {line_num})") + event.start, event.end = match[1], match[2] + state = next(PARSE_STATES) + case ParseState.CONTENT: + event.content = ( + event.content if event.content is not None else "" + ) + f"{line}\n" + return srt + + def __str__(self): + result = "" + for counter, event in enumerate(self.events, 1): + result += f"{counter}\n" + result += f"{event.start} --> {event.end}\n" + result += f"{event.content}\n" + return result + + +class ParseError(Exception): + pass |