Project-OSmOSE · mathieudpnt · Jan 23, 2026 · Jan 7, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
@@ -393,7 +393,6 @@ def plot(
             color = kwargs.get("color")
             season = kwargs.get("season")
             effort = kwargs.get("effort")
-
             if not bin_size:
                 msg = "'bin_size' missing for histogram plot."
                 raise ValueError(msg)
@@ -417,25 +416,28 @@ def plot(
             season = kwargs.get("season", False)
             bin_size = kwargs.get("bin_size")
 
-            return heatmap(df=df_filtered,
-                           ax=ax,
-                           bin_size=bin_size,
-                           time_range=time,
-                           show_rise_set=show_rise_set,
-                           season=season,
-                           coordinates=self.coordinates,
-                           )
+            return heatmap(
+                df=df_filtered,
+                ax=ax,
+                bin_size=bin_size,
+                time_range=time,
+                show_rise_set=show_rise_set,
+                season=season,
+                coordinates=self.coordinates,
+            )
 
         if mode == "scatter":
             show_rise_set = kwargs.get("show_rise_set", True)
             season = kwargs.get("season", False)
+            effort = kwargs.get("effort")
 
             return scatter(df=df_filtered,
                            ax=ax,
                            time_range=time,
                            show_rise_set=show_rise_set,
                            season=season,
                            coordinates=self.coordinates,
+                           effort=effort,
                            )
 
         if mode == "agreement":

diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py
@@ -7,7 +7,7 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, fields
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal
 
@@ -44,6 +44,12 @@ class DetectionFilter:
     box: bool = False
     filename_format: str = None
 
+    def __getitem__(self, key: str):
+        """Return the value of the given key."""
+        if key in {f.name for f in fields(self)}:
+            return getattr(self, key)
+        raise KeyError(key)
+
     @classmethod
     def from_yaml(
         cls,

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
@@ -8,19 +8,16 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES
-from osekit.utils.timestamp_utils import strptime_from_text
 from pandas import (
+    IntervalIndex,
     Series,
     Timedelta,
-    cut,
+    date_range,
     read_csv,
+    to_datetime,
 )
 
-from post_processing.utils.core_utils import (
-    get_time_range_and_bin_size,
-    localize_timestamps,
-)
+from post_processing.utils.core_utils import round_begin_end_timestamps
 from post_processing.utils.filtering_utils import (
     find_delimiter,
 )
@@ -33,7 +30,7 @@
 
 @dataclass(frozen=True)
 class RecordingPeriod:
-    """A class to handle recording periods."""
+    """Represents recording effort over time, aggregated into bins."""
 
     counts: Series
     timebin_origin: Timedelta
@@ -42,33 +39,125 @@ class RecordingPeriod:
     def from_path(
         cls,
         config: DetectionFilter,
-        date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
         *,
         bin_size: Timedelta | BaseOffset,
     ) -> RecordingPeriod:
-        """Return a list of Timestamps corresponding to recording periods."""
+        """Vectorised creation of recording coverage from CSV with start/end datetimes.
+
+        This method reads a CSV with columns:
+        - "start_recording"
+        - "end_recording"
+        - "start_deployment"
+        - "end_deployment"
+
+        It computes the **effective recording interval** as the intersection between
+        recording and deployment periods, builds a fine-grained timeline at
+        `timebin_origin` resolution, and aggregates effort into `bin_size` bins.
+
+        Parameters
+        ----------
+        config
+            Configuration object containing at least:
+            - `timestamp_file`: path to CSV
+            - `timebin_origin`: Timedelta resolution of detections
+        bin_size : Timedelta or BaseOffset
+            Size of the aggregation bin (e.g. Timedelta("1H") or "1D").
+
+        Returns
+        -------
+        RecordingPeriod
+            Object containing `counts` (Series indexed by IntervalIndex) and
+            `timebin_origin`.
+
+        """
+        # Read CSV and parse datetime columns
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
-        timestamp_df = read_csv(timestamp_file, delimiter=delim)
-
-        if "timestamp" in timestamp_df.columns:
-            msg = "Parsing 'timestamp' column not implemented yet."
-            raise NotImplementedError(msg)
-
-        if "filename" in timestamp_df.columns:
-            timestamps = [
-                    strptime_from_text(ts, date_format)
-                    for ts in timestamp_df["filename"]
-                ]
-            timestamps = localize_timestamps(timestamps, config.timezone)
-            time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)
-
-            binned = cut(timestamps, time_vector)
-            max_annot = bin_size / config.timebin_origin
-
-            return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
-                       timebin_origin=config.timebin_origin,
-                       )
-
-        msg = "Could not parse timestamps."
-        raise ValueError(msg)
+        df = read_csv(
+            config.timestamp_file,
+            parse_dates=[
+                "start_recording",
+                "end_recording",
+                "start_deployment",
+                "end_deployment",
+            ],
+            delimiter=delim,
+        )
+
+        if df.empty:
+            msg = "CSV is empty."
+            raise ValueError(msg)
+
+        # Ensure all required columns are present
+        required_columns = {
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        }
+
+        missing = required_columns - set(df.columns)
+
+        if missing:
+            msg = f"CSV is missing required columns: {', '.join(sorted(missing))}"
+            raise ValueError(msg)
+
+        # Normalise timezones: convert to UTC, then remove tz info (naive)
+        for col in [
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        ]:
+            df[col] = to_datetime(df[col], utc=True).dt.tz_convert(None)
+
+        # Compute effective recording intervals (intersection)
+        df["effective_start_recording"] = df[
+            ["start_recording", "start_deployment"]
+        ].max(axis=1)
+
+        df["effective_end_recording"] = df[
+            ["end_recording", "end_deployment"]
+        ].min(axis=1)
+
+        # Remove rows with no actual recording interval
+        df = df.loc[
+            df["effective_start_recording"] < df["effective_end_recording"]
+        ].copy()
+
+        if df.empty:
+            msg = "No valid recording intervals after deployment intersection."
+            raise ValueError(msg)
+
+        # Build fine-grained timeline at `timebin_origin` resolution
+        origin = config.timebin_origin
+        time_index = date_range(
+            start=df["effective_start_recording"].min(),
+            end=df["effective_end_recording"].max(),
+            freq=origin,
+        )
+
+        # Initialise effort vector (0 = no recording, 1 = recording)
+        # Compare each timestamp to all intervals in a vectorised manner
+        effort = Series(0, index=time_index)
+
+        # Vectorised interval coverage
+        t_vals = time_index.to_numpy()[:, None]
+        start_vals = df["effective_start_recording"].to_numpy()
+        end_vals = df["effective_end_recording"].to_numpy()
+
+        # Boolean matrix: True if the timestamp is within any recording interval
+        covered = (t_vals >= start_vals) & (t_vals < end_vals)
+        effort[:] = covered.any(axis=1).astype(int)
+
+        # Aggregate effort into user-defined bin_size
+        counts = effort.resample(bin_size, closed="left", label="left").sum()
+
+        counts.index = IntervalIndex.from_arrays(
+            counts.index,
+            counts.index +
+            round_begin_end_timestamps(list(counts.index), bin_size)[-1],
+            closed="left",
+        )
+
+        return cls(counts=counts, timebin_origin=origin)