aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArjun Satarkar <me@arjunsatarkar.net>2024-12-21 14:45:02 +0000
committerArjun Satarkar <me@arjunsatarkar.net>2024-12-21 14:46:33 +0000
commitdaee6a2921d42c9d9d87c86da3f1f8fa9e8d911e (patch)
treed1f066f927bf4c7cedb6c741a1a1e72df323c79a
parentf1fb64f592913615d76324964a533202c9b324f1 (diff)
downloadsrtfilter-daee6a2921d42c9d9d87c86da3f1f8fa9e8d911e.tar
srtfilter-daee6a2921d42c9d9d87c86da3f1f8fa9e8d911e.tar.gz
srtfilter-daee6a2921d42c9d9d87c86da3f1f8fa9e8d911e.zip
Parse timecodes into numeric components, improve Justfile
-rw-r--r--Justfile6
-rw-r--r--README.md13
-rw-r--r--pyproject.toml2
-rw-r--r--src/srtfilter/parse.py53
4 files changed, 54 insertions, 20 deletions
diff --git a/Justfile b/Justfile
index 76bebd4..3a33629 100644
--- a/Justfile
+++ b/Justfile
@@ -6,5 +6,11 @@ typecheck:
check_style:
black --check src
+build: check
+ pyproject-build
+
format:
black src
+
+clean:
+ rm -rf dist
diff --git a/README.md b/README.md
index 1c6e9e5..7b5cea3 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ More filters can be added in the `src/srtfilter/filters` directory.
### Library
-```
+```python
import srtfilter.parse as srtparse
import sys
@@ -28,10 +28,13 @@ with open("input.srt") as f:
srt = srtparse.SRT.from_str(f.read())
for event in srt.events:
- print(event.start, event.end, event.content)
- event.content = event.content.upper() # for example
+ # Make every event start and end a second later
+ event.start.second += 1
+ event.end.second += 1
+ # Capitalize all the displayed text
+ event.content = event.content.upper()
-# srt.__str__() produces a valid SRT file from the parsed representation
+# SRT.__str__() produces a valid SRT file from the parsed representation
sys.stdout.write(str(srt))
```
@@ -44,5 +47,5 @@ MIT License; see `LICENSE.txt`.
- [x] Parse SRT
- [x] Make CLI tool modular
- [x] Add filter for breaking lines
-- [ ] Parse timecodes and allow arithmetic with them
+- [x] Parse timecodes into their numeric components
- [ ] More filters? As and when use-cases emerge
diff --git a/pyproject.toml b/pyproject.toml
index ed89b87..4a69910 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "srtfilter"
-version = "0.1.1"
+version = "0.2.0"
authors = [
{ name="Arjun Satarkar", email="me@arjunsatarkar.net" },
]
diff --git a/src/srtfilter/parse.py b/src/srtfilter/parse.py
index 2e710ea..c5a0e24 100644
--- a/src/srtfilter/parse.py
+++ b/src/srtfilter/parse.py
@@ -3,21 +3,18 @@ import dataclasses
import re
-@dataclasses.dataclass
-class Event:
- start: str
- end: str
- content: str
-
-
class SRT:
+ # Can't use the one from Timecode because that has named groups, which can't be duplicates
+ timecode_pattern_str = r"\d\d:\d\d:\d\d,\d\d\d"
+ timing_line_pattern = re.compile(
+ rf"({timecode_pattern_str}) --> ({timecode_pattern_str})"
+ )
+
def __init__(self):
self.events: list[Event] = []
@staticmethod
def from_str(text: str) -> SRT:
- TIMESTAMP_CAPTURE = r"(\d\d:\d\d:\d\d,\d\d\d)"
- TIMING_REGEX = rf"{TIMESTAMP_CAPTURE} --> {TIMESTAMP_CAPTURE}"
srt = SRT()
counter = 1
@@ -32,13 +29,13 @@ class SRT:
)
counter += 1
- match = re.fullmatch(TIMING_REGEX, timing_str)
+ match = re.fullmatch(SRT.timing_line_pattern, timing_str)
if match is None:
raise ParseError(f"Invalid timing info '{timing_str}'", event_str)
content = "\n".join(content_lines + [""])
- srt.events.append(Event(match[1], match[2], content))
+ srt.events.append(Event(Timecode(match[1]), Timecode(match[2]), content))
return srt
@@ -51,8 +48,36 @@ class SRT:
return result
+@dataclasses.dataclass
+class Event:
+ start: Timecode
+ end: Timecode
+ content: str
+
+
+class Timecode:
+ timecode_pattern = re.compile(
+ r"(?P<hour>\d\d):(?P<minute>\d\d):(?P<second>\d\d),(?P<millisecond>\d\d\d)"
+ )
+
+ def __init__(self, timecode_str: str):
+ match = re.fullmatch(self.timecode_pattern, timecode_str)
+ if match is None:
+ raise ParseError(f"Invalid timecode '{timecode_str}'", timecode_str)
+ self.hour = int(match["hour"])
+ self.minute = int(match["minute"])
+ self.second = int(match["second"])
+ self.millisecond = int(match["millisecond"])
+
+ def __repr__(self):
+ return f"Timecode('{self.hour:02}:{self.minute:02}:{self.second:02},{self.millisecond:03}')"
+
+ def __str__(self):
+ return f"{self.hour:02}:{self.minute:02}:{self.second:02},{self.millisecond:03}"
+
+
class ParseError(Exception):
- def __init__(self, reason: str, event_str: str):
- super().__init__(f"{reason}\nwhile parsing event:\n{event_str}")
+ def __init__(self, reason: str, context_str: str):
+ super().__init__(f"{reason}\nwhile parsing the following:\n{context_str}")
self.reason = reason
- self.event_str = event_str
+ self.event_str = context_str