aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArjun Satarkar <me@arjunsatarkar.net>2025-03-01 19:29:30 +0000
committerArjun Satarkar <me@arjunsatarkar.net>2025-03-01 19:29:30 +0000
commit9b6061ee8e408c1308c377a45c1cafe59b739436 (patch)
treee4f57a2860f5d1c0b872fd37bc1ca109fa97e3d3
parentea7e692dfb2057ef27fd755f236356ccf0df9872 (diff)
Add output format abstraction and diffable output formatv0.3.0
-rw-r--r--Justfile2
-rw-r--r--pyproject.toml2
-rw-r--r--src/srtfilter/parse.py28
-rwxr-xr-xsrc/srtfilter/srtfilter_cli.py13
4 files changed, 36 insertions, 9 deletions
diff --git a/Justfile b/Justfile
index 3a33629..48e18e2 100644
--- a/Justfile
+++ b/Justfile
@@ -7,7 +7,7 @@ check_style:
black --check src
build: check
- pyproject-build
+ python3 -m build
format:
black src
diff --git a/pyproject.toml b/pyproject.toml
index b175e02..b660215 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "srtfilter"
-version = "0.2.1"
+version = "0.3.0"
authors = [
{ name="Arjun Satarkar", email="me@arjunsatarkar.net" },
]
diff --git a/src/srtfilter/parse.py b/src/srtfilter/parse.py
index cbfb5ea..b53bb37 100644
--- a/src/srtfilter/parse.py
+++ b/src/srtfilter/parse.py
@@ -1,8 +1,15 @@
from __future__ import annotations
import dataclasses
+import enum
import re
+class OutputFormat(enum.Enum):
+ none = enum.auto()
+ srt = enum.auto()
+ diffable_srt = enum.auto()
+
+
class SRT:
# Can't use the one from Timecode because that has named groups, which can't be duplicates
timecode_pattern_str = r"\d\d:\d\d:\d\d,\d\d\d"
@@ -40,11 +47,24 @@ class SRT:
return srt
def __str__(self):
+ return self.to_output_format(OutputFormat.srt)
+
+ def to_output_format(self, format: OutputFormat) -> str:
result = ""
- for counter, event in enumerate(self.events, 1):
- result += f"{counter}\n"
- result += f"{event.start} --> {event.end}\n"
- result += f"{event.content}\n"
+ match format:
+ case OutputFormat.none:
+ pass
+ case OutputFormat.srt:
+ for counter, event in enumerate(self.events, 1):
+ result += f"{counter}\n"
+ result += f"{event.start} --> {event.end}\n"
+ result += f"{event.content}\n"
+ case OutputFormat.diffable_srt:
+ # Not an actual subtitle format, just drops the counter to
+ # make it easier to diff actual changes (in timing/content).
+ for event in self.events:
+ result += f"{event.start} --> {event.end}\n"
+ result += f"{event.content}\n"
return result
diff --git a/src/srtfilter/srtfilter_cli.py b/src/srtfilter/srtfilter_cli.py
index bc44409..30aa942 100755
--- a/src/srtfilter/srtfilter_cli.py
+++ b/src/srtfilter/srtfilter_cli.py
@@ -8,8 +8,15 @@ from .filters import rebreak_lines
@click.command()
@click.argument("in_file_path")
@click.option("--filter", "filter_arg", default="")
-def main(in_file_path: str, filter_arg: str):
- with open(in_file_path) as f:
+@click.option(
+ "--out-format",
+ type=click.Choice([v.name for v in parse.OutputFormat]),
+ default="srt",
+)
+def main(in_file_path: str, filter_arg: str, out_format: str):
+ # Encoding utf-8-sig to handle BOM, which Aegisub seems to generate
+ # when exporting to SRT. This works fine even if there is no BOM.
+ with open(in_file_path, encoding="utf-8-sig") as f:
text = f.read()
srt = parse.SRT.from_str(text)
@@ -21,7 +28,7 @@ def main(in_file_path: str, filter_arg: str):
raise InvalidFilterError(unknown)
srt.events = [filter_module.filter(event) for event in srt.events]
- sys.stdout.write(str(srt))
+ sys.stdout.write(srt.to_output_format(parse.OutputFormat[out_format]))
class InvalidFilterError(Exception):