From 5aa84d49d95535fd3db4f109ef94ba8238973e87 Mon Sep 17 00:00:00 2001
From: Matt Zhang <set.stun@gmail.com>
Date: Thu, 18 Jan 2024 21:35:15 -0500
Subject: [PATCH] Passed tests and linting

---
 tests/test_subsamplers.py                        | 10 +++++-----
 video2dataset/subsamplers/__init__.py            |  2 +-
 video2dataset/subsamplers/clipping_subsampler.py | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_subsamplers.py b/tests/test_subsamplers.py
index e6a5b5f0..28ace480 100644
--- a/tests/test_subsamplers.py
+++ b/tests/test_subsamplers.py
@@ -11,6 +11,7 @@
     ClippingSubsampler,
     _get_seconds,
     _split_time_frame,
+    Streams,
     FFProbeSubsampler,
     ResolutionSubsampler,
     FrameSubsampler,
@@ -45,8 +46,8 @@ def test_clipping_subsampler(clips):
     min_length = 5.0 if clips == MULTI else 2.0
     max_length = 999999.0 if clips == MULTI else 3.0
     subsampler = ClippingSubsampler(
-        3,
-        {"video": "mp4", "audio": "mp3"},
+        oom_clip_count=3,
+        encode_formats={"video": "mp4", "audio": "mp3"},
         min_length=min_length,
         max_length=max_length,
         max_length_strategy="all",
@@ -58,7 +59,7 @@ def test_clipping_subsampler(clips):
         "clips": clips,
     }
 
-    streams = {"video": [video_bytes], "audio": [audio_bytes]}
+    streams: Streams = {"video": [video_bytes], "audio": [audio_bytes]}
     stream_fragments, meta_fragments, error_message = subsampler(streams, metadata)
     video_fragments = stream_fragments["video"]
     audio_fragments = stream_fragments["audio"]
@@ -84,7 +85,7 @@ def test_clipping_subsampler(clips):
             s_target, e_target = clips[key_ind]
             s_target, e_target = _get_seconds(s_target), _get_seconds(e_target)
             expected_clips = _split_time_frame(s_target, e_target, min_length, max_length)
-            assert (_get_seconds(s), _get_seconds(e)) in expected_clips
+            assert [_get_seconds(s), _get_seconds(e)] in expected_clips
             assert _get_seconds(e) - _get_seconds(s) >= min_length
 
             s_s, e_s = _get_seconds(s), _get_seconds(e)
@@ -92,7 +93,6 @@ def test_clipping_subsampler(clips):
             video_stream = [stream for stream in probe["streams"] if stream["codec_type"] == "video"][0]
             frag_len = float(video_stream["duration"])
 
-            # currently some segments can be pretty innacurate
             assert abs(frag_len - (e_s - s_s)) < 5.0
 
 
diff --git a/video2dataset/subsamplers/__init__.py b/video2dataset/subsamplers/__init__.py
index 5d4741f8..90e4cd58 100644
--- a/video2dataset/subsamplers/__init__.py
+++ b/video2dataset/subsamplers/__init__.py
@@ -3,7 +3,7 @@
 """
 
 from .audio_rate_subsampler import AudioRateSubsampler
-from .clipping_subsampler import ClippingSubsampler, _get_seconds, _split_time_frame
+from .clipping_subsampler import ClippingSubsampler, _get_seconds, _split_time_frame, Streams
 from .frame_subsampler import FrameSubsampler
 from .ffprobe_subsampler import FFProbeSubsampler
 from .noop_subsampler import NoOpSubsampler
diff --git a/video2dataset/subsamplers/clipping_subsampler.py b/video2dataset/subsamplers/clipping_subsampler.py
index 3f18d703..b3ae717a 100644
--- a/video2dataset/subsamplers/clipping_subsampler.py
+++ b/video2dataset/subsamplers/clipping_subsampler.py
@@ -22,8 +22,8 @@ class EncodeFormats(TypedDict):
 
 
 class Streams(TypedDict):
-    video: bytes
-    audio: bytes
+    video: list[bytes]
+    audio: list[bytes]
 
 
 def _get_seconds(t: str | float) -> float:
@@ -50,7 +50,7 @@ def _split_time_frame(s: float, e: float, min_length: float, max_length: float)
     time_d = e - s
     n_full_clips = int(time_d // max_length)
     clip_spans = [[s + i * max_length, s + (i + 1) * max_length] for i in range(n_full_clips)] + (
-        [[s + (n_full_clips - 1) * max_length, e]] if time_d % max_length > min_length else []
+        [[s + (n_full_clips) * max_length, e]] if time_d % max_length > min_length else []
     )
     return clip_spans
 
@@ -94,7 +94,7 @@ def _adjust_clip_spans(
 
 def _collate_clip_spans(clip_spans: list[ClipSpans]) -> tuple[str, list[int]]:
     """Collates clip spans into a single string for ffmpeg and a list of clip idxs"""
-    clip_times = [0.0]
+    clip_times = []
     clip_idxs = []
     e_prev = 0.0
     clip_idx = 0
@@ -216,7 +216,7 @@ def _get_clips(
                 raise err
 
             clips[k] = []
-            for _, (_, clip_idx) in enumerate(zip(clip_spans, clip_idxs)):
+            for clip_idx in clip_idxs:
                 with open(stream_clips[clip_idx], "rb") as vid_f:
                     clip_bytes = vid_f.read()
                     clips[k].append(clip_bytes)