Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
459c402
add show_recording_off
Jan 7, 2026
b8a3876
fix_ruff
MaelleTtrt Jan 9, 2026
639c6d6
fix_ruff2
MaelleTtrt Jan 9, 2026
b2ebfd7
adapt RecordingPeriod class
MaelleTtrt Jan 9, 2026
f74587e
add test_recording_preiod
MaelleTtrt Jan 12, 2026
c1600bf
add test_shade_no_effort
MaelleTtrt Jan 12, 2026
0e22298
delete useluss in shade_no_effort
MaelleTtrt Jan 13, 2026
e889c29
delete test shading_no_effort
MaelleTtrt Jan 13, 2026
2349713
fix ruff
MaelleTtrt Jan 13, 2026
77f71d1
delete matlab boring files
MaelleTtrt Jan 13, 2026
e8c73e0
few modifs
mathieudpnt Jan 14, 2026
fd913cf
effort legend
mathieudpnt Jan 14, 2026
df40ec9
ruff fix
mathieudpnt Jan 14, 2026
3bed1db
Merge pull request #3 from mathieudpnt/test/effort
MaelleTtrt Jan 14, 2026
0c74f8c
better shade handling
mathieudpnt Jan 15, 2026
ca539cb
histo legend fix
mathieudpnt Jan 15, 2026
360804d
histo legend fix
mathieudpnt Jan 15, 2026
ec8deec
ruff fix
mathieudpnt Jan 15, 2026
b7beb53
shade effort on scatter plot
mathieudpnt Jan 16, 2026
92d0a95
better season mngmt on plots
mathieudpnt Jan 16, 2026
552c729
differenciate LT et ST data plots
mathieudpnt Jan 19, 2026
d0cda89
test plot utils import fix
mathieudpnt Jan 19, 2026
4b9a2f1
ruff simplification
mathieudpnt Jan 19, 2026
bc647a6
draw effort positional argument
mathieudpnt Jan 19, 2026
cb07568
syntax
mathieudpnt Jan 19, 2026
a7ba68c
Merge branch 'add_effort' of https://github.com/MaelleTtrt/post_proce…
MaelleTtrt Jan 20, 2026
9d3188a
hotfix partial/no data bars
mathieudpnt Jan 20, 2026
4c2fc0d
ruff import
mathieudpnt Jan 20, 2026
c0e968f
RecordingPeriod hotfix
mathieudpnt Jan 20, 2026
dcb70fb
syntax
mathieudpnt Jan 20, 2026
47b170d
legend histo shade
mathieudpnt Jan 20, 2026
adb0d90
Merge pull request #4 from mathieudpnt/test/effort
MaelleTtrt Jan 22, 2026
915342e
Merge branch 'add_effort' of https://github.com/MaelleTtrt/post_proce…
MaelleTtrt Jan 22, 2026
ef0e2b6
improve test_recording_period
MaelleTtrt Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions src/post_processing/dataclass/data_aplose.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@ def plot(
color = kwargs.get("color")
season = kwargs.get("season")
effort = kwargs.get("effort")

if not bin_size:
msg = "'bin_size' missing for histogram plot."
raise ValueError(msg)
Expand All @@ -417,25 +416,28 @@ def plot(
season = kwargs.get("season", False)
bin_size = kwargs.get("bin_size")

return heatmap(df=df_filtered,
ax=ax,
bin_size=bin_size,
time_range=time,
show_rise_set=show_rise_set,
season=season,
coordinates=self.coordinates,
)
return heatmap(
df=df_filtered,
ax=ax,
bin_size=bin_size,
time_range=time,
show_rise_set=show_rise_set,
season=season,
coordinates=self.coordinates,
)

if mode == "scatter":
show_rise_set = kwargs.get("show_rise_set", True)
season = kwargs.get("season", False)
effort = kwargs.get("effort")

return scatter(df=df_filtered,
ax=ax,
time_range=time,
show_rise_set=show_rise_set,
season=season,
coordinates=self.coordinates,
effort=effort,
)

if mode == "agreement":
Expand Down
8 changes: 7 additions & 1 deletion src/post_processing/dataclass/detection_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from __future__ import annotations

from dataclasses import dataclass
from dataclasses import dataclass, fields
from pathlib import Path
from typing import TYPE_CHECKING, Literal

Expand Down Expand Up @@ -44,6 +44,12 @@ class DetectionFilter:
box: bool = False
filename_format: str = None

def __getitem__(self, key: str):
"""Return the value of the given key."""
if key in {f.name for f in fields(self)}:
return getattr(self, key)
raise KeyError(key)

@classmethod
def from_yaml(
cls,
Expand Down
155 changes: 122 additions & 33 deletions src/post_processing/dataclass/recording_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,16 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING

from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES
from osekit.utils.timestamp_utils import strptime_from_text
from pandas import (
IntervalIndex,
Series,
Timedelta,
cut,
date_range,
read_csv,
to_datetime,
)

from post_processing.utils.core_utils import (
get_time_range_and_bin_size,
localize_timestamps,
)
from post_processing.utils.core_utils import round_begin_end_timestamps
from post_processing.utils.filtering_utils import (
find_delimiter,
)
Expand All @@ -33,7 +30,7 @@

@dataclass(frozen=True)
class RecordingPeriod:
"""A class to handle recording periods."""
"""Represents recording effort over time, aggregated into bins."""

counts: Series
timebin_origin: Timedelta
Expand All @@ -42,33 +39,125 @@ class RecordingPeriod:
def from_path(
cls,
config: DetectionFilter,
date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
*,
bin_size: Timedelta | BaseOffset,
) -> RecordingPeriod:
"""Return a list of Timestamps corresponding to recording periods."""
"""Vectorised creation of recording coverage from CSV with start/end datetimes.

This method reads a CSV with columns:
- "start_recording"
- "end_recording"
- "start_deployment"
- "end_deployment"

It computes the **effective recording interval** as the intersection between
recording and deployment periods, builds a fine-grained timeline at
`timebin_origin` resolution, and aggregates effort into `bin_size` bins.

Parameters
----------
config
Configuration object containing at least:
- `timestamp_file`: path to CSV
- `timebin_origin`: Timedelta resolution of detections
bin_size : Timedelta or BaseOffset
Size of the aggregation bin (e.g. Timedelta("1H") or "1D").

Returns
-------
RecordingPeriod
Object containing `counts` (Series indexed by IntervalIndex) and
`timebin_origin`.

"""
# Read CSV and parse datetime columns
timestamp_file = config.timestamp_file
delim = find_delimiter(timestamp_file)
timestamp_df = read_csv(timestamp_file, delimiter=delim)

if "timestamp" in timestamp_df.columns:
msg = "Parsing 'timestamp' column not implemented yet."
raise NotImplementedError(msg)

if "filename" in timestamp_df.columns:
timestamps = [
strptime_from_text(ts, date_format)
for ts in timestamp_df["filename"]
]
timestamps = localize_timestamps(timestamps, config.timezone)
time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)

binned = cut(timestamps, time_vector)
max_annot = bin_size / config.timebin_origin

return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
timebin_origin=config.timebin_origin,
)

msg = "Could not parse timestamps."
raise ValueError(msg)
df = read_csv(
config.timestamp_file,
parse_dates=[
"start_recording",
"end_recording",
"start_deployment",
"end_deployment",
],
delimiter=delim,
)

if df.empty:
msg = "CSV is empty."
raise ValueError(msg)

# Ensure all required columns are present
required_columns = {
"start_recording",
"end_recording",
"start_deployment",
"end_deployment",
}

missing = required_columns - set(df.columns)

if missing:
msg = f"CSV is missing required columns: {', '.join(sorted(missing))}"
raise ValueError(msg)

# Normalise timezones: convert to UTC, then remove tz info (naive)
for col in [
"start_recording",
"end_recording",
"start_deployment",
"end_deployment",
]:
df[col] = to_datetime(df[col], utc=True).dt.tz_convert(None)

# Compute effective recording intervals (intersection)
df["effective_start_recording"] = df[
["start_recording", "start_deployment"]
].max(axis=1)

df["effective_end_recording"] = df[
["end_recording", "end_deployment"]
].min(axis=1)

# Remove rows with no actual recording interval
df = df.loc[
df["effective_start_recording"] < df["effective_end_recording"]
].copy()

if df.empty:
msg = "No valid recording intervals after deployment intersection."
raise ValueError(msg)

# Build fine-grained timeline at `timebin_origin` resolution
origin = config.timebin_origin
time_index = date_range(
start=df["effective_start_recording"].min(),
end=df["effective_end_recording"].max(),
freq=origin,
)

# Initialise effort vector (0 = no recording, 1 = recording)
# Compare each timestamp to all intervals in a vectorised manner
effort = Series(0, index=time_index)

# Vectorised interval coverage
t_vals = time_index.to_numpy()[:, None]
start_vals = df["effective_start_recording"].to_numpy()
end_vals = df["effective_end_recording"].to_numpy()

# Boolean matrix: True if the timestamp is within any recording interval
covered = (t_vals >= start_vals) & (t_vals < end_vals)
effort[:] = covered.any(axis=1).astype(int)

# Aggregate effort into user-defined bin_size
counts = effort.resample(bin_size, closed="left", label="left").sum()

counts.index = IntervalIndex.from_arrays(
counts.index,
counts.index +
round_begin_end_timestamps(list(counts.index), bin_size)[-1],
closed="left",
)

return cls(counts=counts, timebin_origin=origin)
Loading