From 4ffc19bbbb2395b8204b852df325fe4b3c07e273 Mon Sep 17 00:00:00 2001 From: Arjun Satarkar Date: Sat, 21 Dec 2024 16:19:13 +0530 Subject: Add type checking, rework parsing, fix bugs --- parse_srt.py | 75 ++++++++++++++++++++++-------------------------------------- 1 file changed, 27 insertions(+), 48 deletions(-) (limited to 'parse_srt.py') diff --git a/parse_srt.py b/parse_srt.py index acd72c0..2e710ea 100644 --- a/parse_srt.py +++ b/parse_srt.py @@ -1,69 +1,45 @@ from __future__ import annotations import dataclasses -import enum -import itertools import re -from typing import List @dataclasses.dataclass class Event: - start: str | None = None - end: str | None = None - content: str | None = None + start: str + end: str + content: str class SRT: def __init__(self): - self.events: List[Event] = [] + self.events: list[Event] = [] @staticmethod def from_str(text: str) -> SRT: - class ParseState(enum.Enum): - COUNTER = enum.auto() - TIMING = enum.auto() - CONTENT = enum.auto() - - PARSE_STATES = itertools.cycle(iter(ParseState)) TIMESTAMP_CAPTURE = r"(\d\d:\d\d:\d\d,\d\d\d)" TIMING_REGEX = rf"{TIMESTAMP_CAPTURE} --> {TIMESTAMP_CAPTURE}" srt = SRT() - lines = text.split("\n") counter = 1 - state = next(PARSE_STATES) - event = Event() - for line_num, line in enumerate(lines, 1): - if not line: - match state: - case ParseState.CONTENT: - srt.events.append(event) - event = Event() - state = next(PARSE_STATES) - case ParseState.COUNTER: - pass - case _: - raise ParseError(f"Unexpected blank line (line {line_num})") - continue - match state: - case ParseState.COUNTER: - if int(line) == counter: - counter += 1 - state = next(PARSE_STATES) - else: - raise ParseError( - f"Invalid counter, expected {counter} (line {line_num})" - ) - case ParseState.TIMING: - match = re.fullmatch(TIMING_REGEX, line) - if match is None: - raise ParseError(f"Invalid timing info (line {line_num})") - event.start, event.end = match[1], match[2] - state = next(PARSE_STATES) - case ParseState.CONTENT: - event.content = ( - event.content if event.content is not None else "" - ) + f"{line}\n" + events = [event for event in text.split("\n\n") if event.strip()] + for event_str in events: + lines = event_str.split("\n") + counter_str, timing_str, content_lines = lines[0], lines[1], lines[2:] + + if int(counter_str) != counter: + raise ParseError( + f"Invalid counter '{counter_str}'; expected {counter}", event_str + ) + counter += 1 + + match = re.fullmatch(TIMING_REGEX, timing_str) + if match is None: + raise ParseError(f"Invalid timing info '{timing_str}'", event_str) + + content = "\n".join(content_lines + [""]) + + srt.events.append(Event(match[1], match[2], content)) + return srt def __str__(self): @@ -76,4 +52,7 @@ class SRT: class ParseError(Exception): - pass + def __init__(self, reason: str, event_str: str): + super().__init__(f"{reason}\nwhile parsing event:\n{event_str}") + self.reason = reason + self.event_str = event_str -- cgit v1.2.3-57-g22cb