From 459c402c1ac34ced3fbe0024e24b71476ba724a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20TORTEROTOT?= Date: Wed, 7 Jan 2026 17:29:22 +0100 Subject: [PATCH 01/30] add show_recording_off --- src/post_processing/dataclass/data_aplose.py | 4 +- .../dataclass/recording_period.py | 126 ++++++++++++++---- src/post_processing/utils/filtering_utils.py | 4 +- .../PAMGuardMatlab-main/.DS_Store | Bin 0 -> 6148 bytes .../PAMGuardMatlab-main/.MATLABDriveTag | 1 + .../pgmatlab/.MATLABDriveTag | 1 + .../pgmatlab/Array/.MATLABDriveTag | 1 + src/post_processing/utils/plot_utils.py | 77 +++++++++-- 8 files changed, 169 insertions(+), 45 deletions(-) create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py index 98e6d9c..f604f80 100644 --- a/src/post_processing/dataclass/data_aplose.py +++ b/src/post_processing/dataclass/data_aplose.py @@ -393,7 +393,7 @@ def plot( color = kwargs.get("color") season = kwargs.get("season") effort = kwargs.get("effort") - + show_recording_OFF = kwargs.get("show_recording_OFF") if not bin_size: msg = "'bin_size' missing for histogram plot." raise ValueError(msg) @@ -409,7 +409,7 @@ def plot( color=color, season=season, effort=effort, - coordinates=(self.lat, self.lon), + coordinates=(self.lat, self.lon) ) if mode == "heatmap": diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 4c09722..15def1e 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -16,6 +16,8 @@ cut, read_csv, ) +from pandas.tseries.offsets import BaseOffset +import pandas as pd from post_processing.utils.core_utils import ( get_time_range_and_bin_size, @@ -33,42 +35,108 @@ @dataclass(frozen=True) class RecordingPeriod: - """A class to handle recording periods.""" - counts: Series timebin_origin: Timedelta @classmethod def from_path( cls, - config: DetectionFilter, - date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES, + config, *, bin_size: Timedelta | BaseOffset, - ) -> RecordingPeriod: - """Return a list of Timestamps corresponding to recording periods.""" + ) -> "RecordingPeriod": + """Vectorized creation of recording coverage from CSV with start/end datetimes. + + CSV must have columns 'start_recording' and 'end_recording'. + bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) or a pandas offset (e.g., "1D"). + """ + # 1. Read CSV and parse datetimes timestamp_file = config.timestamp_file delim = find_delimiter(timestamp_file) - timestamp_df = read_csv(timestamp_file, delimiter=delim) - - if "timestamp" in timestamp_df.columns: - msg = "Parsing 'timestamp' column not implemented yet." - raise NotImplementedError(msg) - - if "filename" in timestamp_df.columns: - timestamps = [ - strptime_from_text(ts, date_format) - for ts in timestamp_df["filename"] - ] - timestamps = localize_timestamps(timestamps, config.timezone) - time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size) - - binned = cut(timestamps, time_vector) - max_annot = bin_size / config.timebin_origin - - return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot), - timebin_origin=config.timebin_origin, - ) - - msg = "Could not parse timestamps." - raise ValueError(msg) + df = pd.read_csv( + config.timestamp_file, + parse_dates=["start_recording", "end_recording"], + delimiter=delim + ) + + if df.empty: + raise ValueError("CSV is empty.") + + # 2. Normalize timezones if needed + df["start_recording"] = ( + pd.to_datetime(df["start_recording"], utc=True).dt.tz_convert(None) + ) + df["end_recording"] = ( + pd.to_datetime(df["end_recording"], utc=True).dt.tz_convert(None) + ) + + # Build fine-grained timeline (timebin_origin resolution) + origin = config.timebin_origin + time_index = pd.date_range( + start=df["start_recording"].min(), + end=df["end_recording"].max(), + freq=origin, + ) + + # Initialize effort vector + effort = pd.Series(0, index=time_index) + + # Vectorized interval coverage + tvals = time_index.values[:, None] + start_vals = df["start_recording"].values + end_vals = df["end_recording"].values + + covered = (tvals >= start_vals) & (tvals < end_vals) + effort[:] = covered.any(axis=1).astype(int) + + # Aggregate effort into bin_size + counts = effort.resample(bin_size).sum() + counts.index = pd.interval_range( + start=counts.index[0], + periods=len(counts), + freq=bin_size, + closed="left", + ) + return cls(counts=counts, timebin_origin=origin) + +# @dataclass(frozen=True) +# class RecordingPeriod: +# """A class to handle recording periods.""" +# +# counts: Series +# timebin_origin: Timedelta +# +# @classmethod +# def from_path( +# cls, +# config: DetectionFilter, +# date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES, +# *, +# bin_size: Timedelta | BaseOffset, +# ) -> RecordingPeriod: +# """Return a list of Timestamps corresponding to recording periods.""" +# timestamp_file = config.timestamp_file +# delim = find_delimiter(timestamp_file) +# timestamp_df = read_csv(timestamp_file, delimiter=delim) +# +# if "timestamp" in timestamp_df.columns: +# msg = "Parsing 'timestamp' column not implemented yet." +# raise NotImplementedError(msg) +# +# if "filename" in timestamp_df.columns: +# timestamps = [ +# strptime_from_text(ts, date_format) +# for ts in timestamp_df["filename"] +# ] +# timestamps = localize_timestamps(timestamps, config.timezone) +# time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size) +# +# binned = cut(timestamps, time_vector) +# max_annot = bin_size / config.timebin_origin +# +# return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot), +# timebin_origin=config.timebin_origin, +# ) +# +# msg = "Could not parse timestamps." +# raise ValueError(msg) diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py index c391ff6..650b5a8 100644 --- a/src/post_processing/utils/filtering_utils.py +++ b/src/post_processing/utils/filtering_utils.py @@ -509,8 +509,8 @@ def reshape_timebin( timebin_new: Timedelta The size of the new time bin. timestamp_audio: list[Timestamp] - A list of Timestamp objects corresponding to the shape - in which the data should be reshaped. + A list of Timestamp objects corresponding to the start of each wav + that corresponds to a detection Returns ------- diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8cbbca90b6e5166f54b08e62aaab675fe94f04f0 GIT binary patch literal 6148 zcmeHK%}T>S5Z-O8-BN@c6nb3nTCkQQl)(EZ>xnykjo@uf;KKT0MGoe+f+2zj`Rl2EONYMF$o&h?B#G(=p{2Emc4#&vuTKv)3b}~@k=~Q)SDrb1MganEEez%%1&Lc-Xe)rG6i3eSHu$% z1H=F^Kn&~;1NICM&HX8vDkTPpfgdt}`-6mr=vvGT>a7Dlczs5H4G{%&d`looi>}4o zAb3EyNd+{i+&(e5Ne8>MajwPOph;(3&kW<(nTy8@*Rz9N>Tt$ggY+c^h=FYes;2AU z`F{n!Oz9(kJB51005R~-7~r*$HyT1w_H5lL56@Z&?FkwR#^tDhfWCDJfDY~>E$uXZ bi8{o&7IT9*3fgr#AYBABA@m^zeu056q9RP< literal 0 HcmV?d00001 diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag new file mode 100644 index 0000000..84059a2 --- /dev/null +++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag @@ -0,0 +1 @@ +3496f669-9381-4974-bb7c-5cc1ddcb05d4 \ No newline at end of file diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag new file mode 100644 index 0000000..df9fcd4 --- /dev/null +++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag @@ -0,0 +1 @@ +ee53bc03-ef5e-44bc-aea4-8fae1e2a0b9f \ No newline at end of file diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag new file mode 100644 index 0000000..656b51a --- /dev/null +++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag @@ -0,0 +1 @@ +c4dd0a9d-e15d-496f-91ff-d9ff561a4fa0 \ No newline at end of file diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index f21e343..2746a85 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -13,7 +13,7 @@ from matplotlib.dates import num2date from matplotlib.ticker import PercentFormatter from numpy import ceil, histogram, polyfit -from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range +from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range, Series from pandas.tseries import frequencies from scipy.stats import pearsonr from seaborn import scatterplot @@ -107,8 +107,9 @@ def histo( else: legend_labels = None - if effort: - normalize_counts_by_effort(df, effort, time_bin) + # if effort: + # normalize_counts_by_effort(df, effort, time_bin) + n_groups = len(labels) if legend_labels else 1 bar_width = bin_size / n_groups @@ -128,6 +129,8 @@ def histo( bar_kwargs["label"] = legend_labels[i] ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs) + if kwargs.get("show_recording_OFF"): + ax.set_facecolor("lightgrey") if len(df.columns) > 1 and legend: ax.legend(labels=legend_labels, bbox_to_anchor=(1.01, 1), loc="upper left") @@ -138,7 +141,7 @@ def histo( f" - bin size: {bin_size_str})" ) ax.set_ylabel(y_label) - set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df) + #set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df) set_plot_title(ax, annotators, labels) ax.set_xlim(begin, end) @@ -659,20 +662,70 @@ def shade_no_effort( """ + """Shade areas of the plot where no observation effort was made.""" width_days = bar_width.total_seconds() / 86400 - no_effort_bins = bin_starts[observed.counts.reindex(bin_starts) == 0] - for ts in no_effort_bins: - start = mdates.date2num(ts) - ax.axvspan(start, start + width_days, color="grey", alpha=0.08, zorder=1.5) + # Convert effort IntervalIndex → DatetimeIndex (bin starts) + effort_by_start = Series( + observed.counts.values, + index=[i.left for i in observed.counts.index], + ).tz_localize("UTC") + + # Align effort to plotting bins + effort_aligned = effort_by_start.reindex(bin_starts) + max_effort = bar_width / observed.timebin_origin + effort_fraction = effort_aligned / max_effort + + no_effort = effort_fraction == 0 + partial_effort = (effort_fraction > 0) & (effort_fraction < 1) + # Draw partial effort first (lighter) + for ts in bin_starts[partial_effort]: + start = mdates.date2num(ts - bar_width) + ax.axvspan( + start, + start + width_days, + facecolor="0.65", + alpha=0.08, + linewidth=0, + zorder=0, + ) + + # Draw no effort on top (darker) + for ts in bin_starts[no_effort]: + start = mdates.date2num(ts - bar_width) + ax.axvspan( + start, + start + width_days, + facecolor="0.45", + alpha=0.15, + linewidth=0, + zorder=0, + ) + + # Outside data coverage x_min, x_max = ax.get_xlim() - data_min = mdates.date2num(bin_starts[0]) - data_max = mdates.date2num(bin_starts[-1]) + width_days + data_min = mdates.date2num(bin_starts[0] - bar_width) + data_max = mdates.date2num(bin_starts[-1] + bar_width) if x_min < data_min: - ax.axvspan(x_min, data_min, color="grey", alpha=0.08, zorder=1.5) + ax.axvspan( + x_min, + data_min, + facecolor="0.45", + alpha=0.15, + linewidth=0, + zorder=0, + ) if x_max > data_max: - ax.axvspan(data_max, x_max, color="grey", alpha=0.08, zorder=1.5) + ax.axvspan( + data_max, + x_max, + facecolor="0.45", + alpha=0.15, + linewidth=0, + zorder=0, + ) + ax.set_xlim(x_min, x_max) From b8a38764ce04b065dc67ae30523ae0c1fa6c1865 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:37:01 +0100 Subject: [PATCH 02/30] fix_ruff --- src/post_processing/dataclass/data_aplose.py | 1 - .../dataclass/recording_period.py | 66 ++----------------- 2 files changed, 7 insertions(+), 60 deletions(-) diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py index f604f80..76b6d98 100644 --- a/src/post_processing/dataclass/data_aplose.py +++ b/src/post_processing/dataclass/data_aplose.py @@ -393,7 +393,6 @@ def plot( color = kwargs.get("color") season = kwargs.get("season") effort = kwargs.get("effort") - show_recording_OFF = kwargs.get("show_recording_OFF") if not bin_size: msg = "'bin_size' missing for histogram plot." raise ValueError(msg) diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 15def1e..901d932 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -8,21 +8,12 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES -from osekit.utils.timestamp_utils import strptime_from_text +import pandas as pd from pandas import ( Series, Timedelta, - cut, - read_csv, ) -from pandas.tseries.offsets import BaseOffset -import pandas as pd -from post_processing.utils.core_utils import ( - get_time_range_and_bin_size, - localize_timestamps, -) from post_processing.utils.filtering_utils import ( find_delimiter, ) @@ -30,8 +21,6 @@ if TYPE_CHECKING: from pandas.tseries.offsets import BaseOffset - from post_processing.dataclass.detection_filter import DetectionFilter - @dataclass(frozen=True) class RecordingPeriod: @@ -44,19 +33,20 @@ def from_path( config, *, bin_size: Timedelta | BaseOffset, - ) -> "RecordingPeriod": - """Vectorized creation of recording coverage from CSV with start/end datetimes. + ) -> RecordingPeriod: + """Vectorized creation of recording coverage from CSV with start/end datetime. CSV must have columns 'start_recording' and 'end_recording'. - bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) or a pandas offset (e.g., "1D"). + bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) + or a pandas offset (e.g., "1D"). """ - # 1. Read CSV and parse datetimes + # 1. Read CSV and parse datetime timestamp_file = config.timestamp_file delim = find_delimiter(timestamp_file) df = pd.read_csv( config.timestamp_file, parse_dates=["start_recording", "end_recording"], - delimiter=delim + delimiter=delim, ) if df.empty: @@ -98,45 +88,3 @@ def from_path( closed="left", ) return cls(counts=counts, timebin_origin=origin) - -# @dataclass(frozen=True) -# class RecordingPeriod: -# """A class to handle recording periods.""" -# -# counts: Series -# timebin_origin: Timedelta -# -# @classmethod -# def from_path( -# cls, -# config: DetectionFilter, -# date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES, -# *, -# bin_size: Timedelta | BaseOffset, -# ) -> RecordingPeriod: -# """Return a list of Timestamps corresponding to recording periods.""" -# timestamp_file = config.timestamp_file -# delim = find_delimiter(timestamp_file) -# timestamp_df = read_csv(timestamp_file, delimiter=delim) -# -# if "timestamp" in timestamp_df.columns: -# msg = "Parsing 'timestamp' column not implemented yet." -# raise NotImplementedError(msg) -# -# if "filename" in timestamp_df.columns: -# timestamps = [ -# strptime_from_text(ts, date_format) -# for ts in timestamp_df["filename"] -# ] -# timestamps = localize_timestamps(timestamps, config.timezone) -# time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size) -# -# binned = cut(timestamps, time_vector) -# max_annot = bin_size / config.timebin_origin -# -# return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot), -# timebin_origin=config.timebin_origin, -# ) -# -# msg = "Could not parse timestamps." -# raise ValueError(msg) From 639c6d6d8f6fc0f738b2a6ce37ee47fbd4407ccc Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:38:51 +0100 Subject: [PATCH 03/30] fix_ruff2 --- src/post_processing/dataclass/data_aplose.py | 2 +- src/post_processing/utils/core_utils.py | 10 ++++---- src/post_processing/utils/plot_utils.py | 16 +++++++++---- tests/conftest.py | 2 -- tests/test_DataAplose.py | 9 +++++--- tests/test_core_utils.py | 19 +++++++++------- tests/test_filtering_utils.py | 24 +++++++++++--------- tests/test_glider_utils.py | 2 +- tests/test_metric_utils.py | 3 ++- tests/test_plot_utils.py | 6 ++--- 10 files changed, 52 insertions(+), 41 deletions(-) diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py index 76b6d98..3c8d1e4 100644 --- a/src/post_processing/dataclass/data_aplose.py +++ b/src/post_processing/dataclass/data_aplose.py @@ -408,7 +408,7 @@ def plot( color=color, season=season, effort=effort, - coordinates=(self.lat, self.lon) + coordinates=(self.lat, self.lon), ) if mode == "heatmap": diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index 5a831e1..9457ef2 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -11,7 +11,7 @@ from astral.sun import sunrise, sunset from matplotlib import pyplot as plt from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE -from osekit.utils.timestamp_utils import strptime_from_text, strftime_osmose_format +from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text from pandas import ( DataFrame, DatetimeIndex, @@ -255,7 +255,6 @@ def add_weak_detection( new_line.append(np.nan) df.loc[df.index.max() + 1] = new_line - return df.sort_values(by=["start_datetime", "annotator"]).reset_index(drop=True) @@ -509,11 +508,10 @@ def get_time_range_and_bin_size( if isinstance(bin_size, Timedelta): return timestamp_range, bin_size - elif isinstance(bin_size, BaseOffset): + if isinstance(bin_size, BaseOffset): return timestamp_range, timestamp_range[1] - timestamp_range[0] - else: - msg = "bin_size must be a Timedelta or BaseOffset." - raise TypeError(msg) + msg = "bin_size must be a Timedelta or BaseOffset." + raise TypeError(msg) def round_begin_end_timestamps( diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 2746a85..b6f566e 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -13,7 +13,15 @@ from matplotlib.dates import num2date from matplotlib.ticker import PercentFormatter from numpy import ceil, histogram, polyfit -from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range, Series +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + Timestamp, + date_range, +) from pandas.tseries import frequencies from scipy.stats import pearsonr from seaborn import scatterplot @@ -28,11 +36,10 @@ timedelta_to_str, ) from post_processing.utils.filtering_utils import ( + filter_by_annotator, get_max_time, get_timezone, - filter_by_annotator, ) -from post_processing.utils.metrics_utils import normalize_counts_by_effort if TYPE_CHECKING: from datetime import tzinfo @@ -110,7 +117,6 @@ def histo( # if effort: # normalize_counts_by_effort(df, effort, time_bin) - n_groups = len(labels) if legend_labels else 1 bar_width = bin_size / n_groups bin_starts = mdates.date2num(df.index) @@ -141,7 +147,7 @@ def histo( f" - bin size: {bin_size_str})" ) ax.set_ylabel(y_label) - #set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df) + # set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df) set_plot_title(ax, annotators, labels) ax.set_xlim(begin, end) diff --git a/tests/conftest.py b/tests/conftest.py index e03bf43..99ff7ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -122,8 +122,6 @@ """ - - STATUS = """dataset,filename,ann1,ann2,ann3,ann4,ann5,ann6 sample_dataset,2025_01_25_06_20_00,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED sample_dataset,2025_01_25_06_20_10,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED diff --git a/tests/test_DataAplose.py b/tests/test_DataAplose.py index 5ad1b04..9b9516c 100644 --- a/tests/test_DataAplose.py +++ b/tests/test_DataAplose.py @@ -19,6 +19,7 @@ def test_data_aplose_init(sample_df: DataFrame) -> None: assert data.begin == sample_df["start_datetime"].min() assert data.end == sample_df["end_datetime"].max() + def test_filter_df_single_pair(sample_df: DataFrame) -> None: data = DataAplose(sample_df) filtered_data = data.filter_df(annotator="ann1", label="lbl1") @@ -30,17 +31,19 @@ def test_filter_df_single_pair(sample_df: DataFrame) -> None: ].reset_index(drop=True) assert filtered_data.equals(expected) + def test_change_tz(sample_df: DataFrame) -> None: data = DataAplose(sample_df) - new_tz = 'Etc/GMT-7' + new_tz = "Etc/GMT-7" data.change_tz(new_tz) - start_dt = data.df['start_datetime'] - end_dt = data.df['end_datetime'] + start_dt = data.df["start_datetime"] + end_dt = data.df["end_datetime"] assert all(ts.tz.zone == new_tz for ts in start_dt), f"The detection start timestamps have to be in {new_tz} timezone" assert all(ts.tz.zone == new_tz for ts in end_dt), f"The detection end timestamps have to be in {new_tz} timezone" assert data.begin.tz.zone == new_tz, f"The begin value of the DataAplose has to be in {new_tz} timezone" assert data.end.tz.zone == new_tz, f"The end value of the DataAplose has to be in {new_tz} timezone" + def test_filter_df_multiple_pairs(sample_df: DataFrame) -> None: data = DataAplose(sample_df) filtered_data = data.filter_df(annotator=["ann1", "ann2"], label=["lbl1", "lbl2"]) diff --git a/tests/test_core_utils.py b/tests/test_core_utils.py index a1a3d73..e72e482 100644 --- a/tests/test_core_utils.py +++ b/tests/test_core_utils.py @@ -8,6 +8,8 @@ from post_processing.dataclass.data_aplose import DataAplose from post_processing.utils.core_utils import ( + add_recording_period, + add_season_period, add_weak_detection, get_coordinates, get_count, @@ -15,13 +17,11 @@ get_season, get_sun_times, get_time_range_and_bin_size, + json2df, localize_timestamps, round_begin_end_timestamps, - timedelta_to_str, - add_season_period, - add_recording_period, set_bar_height, - json2df, + timedelta_to_str, ) @@ -409,10 +409,11 @@ def test_add_season_no_data() -> None: # %% add_recording_period + def test_add_recording_period_valid() -> None: fig, ax = plt.subplots() start = Timestamp("2025-01-01T00:00:00+00:00") - stop = Timestamp("2025-01-02T00:00:00+00:00") + stop = Timestamp("2025-01-02T00:00:00+00:00") ts = date_range(start=start, end=stop, freq="H", tz="UTC") values = list(range(len(ts))) @@ -423,7 +424,7 @@ def test_add_recording_period_valid() -> None: [ Timestamp("2025-01-01T00:00:00+00:00"), Timestamp("2025-01-02T00:00:00+00:00"), - ] + ], ], columns=["deployment_date", "recovery_date"], ) @@ -438,6 +439,7 @@ def test_add_recording_period_no_data() -> None: # %% set_bar_height + def test_set_bar_height_valid() -> None: fig, ax = plt.subplots() start = Timestamp("2025-01-01T00:00:00+00:00") @@ -457,6 +459,7 @@ def test_set_bar_height_no_data() -> None: # %% json2df + def test_json2df_valid(tmp_path): fake_json = { "deployment_date": "2025-01-01T00:00:00+00:00", @@ -474,9 +477,9 @@ def test_json2df_valid(tmp_path): [ Timestamp("2025-01-01T00:00:00+00:00"), Timestamp("2025-01-02T00:00:00+00:00"), - ] + ], ], columns=["deployment_date", "recovery_date"], ) - assert df.equals(expected) \ No newline at end of file + assert df.equals(expected) diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py index 95fd987..3ec3760 100644 --- a/tests/test_filtering_utils.py +++ b/tests/test_filtering_utils.py @@ -77,7 +77,7 @@ def test_find_delimiter_unsupported_delimiter(tmp_path: Path) -> None: with pytest.raises( ValueError, - match=r"unsupported delimiter '&'" + match=r"unsupported delimiter '&'", ): find_delimiter(file) @@ -199,6 +199,7 @@ def test_filter_by_freq_valid(sample_df: DataFrame, f_min, f_max): if f_max is not None: assert (result["end_frequency"] <= f_max).all() + @pytest.mark.parametrize( "f_min, f_max, expected_msg", [ @@ -216,8 +217,6 @@ def test_filter_by_freq_valid(sample_df: DataFrame, f_min, f_max): ), ], ) - - def test_filter_by_freq_out_of_range(sample_df: DataFrame, f_min, f_max, expected_msg): with pytest.raises(ValueError, match=expected_msg): filter_by_freq(sample_df, f_min=f_min, f_max=f_max) @@ -331,7 +330,7 @@ def test_get_timezone_several(sample_df: DataFrame) -> None: } sample_df = concat( [sample_df, DataFrame([new_row])], - ignore_index=False + ignore_index=False, ) tz = get_timezone(sample_df) assert len(tz) == 2 @@ -340,6 +339,7 @@ def test_get_timezone_several(sample_df: DataFrame) -> None: # %% read DataFrame + def test_read_dataframe_comma_delimiter(tmp_path: Path) -> None: csv_file = tmp_path / "test.csv" csv_file.write_text( @@ -417,7 +417,7 @@ def test_no_timebin_several_tz(sample_df: DataFrame) -> None: } sample_df = concat( [sample_df, DataFrame([new_row])], - ignore_index=False + ignore_index=False, ) timestamp_wav = to_datetime(sample_df["filename"], format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(pytz.UTC) @@ -429,7 +429,7 @@ def test_no_timebin_original_timebin(sample_df: DataFrame) -> None: tz = get_timezone(sample_df) timestamp_wav = to_datetime( sample_df["filename"], - format="%Y_%m_%d_%H_%M_%S" + format="%Y_%m_%d_%H_%M_%S", ).dt.tz_localize(tz) df_out = reshape_timebin( sample_df, @@ -520,7 +520,7 @@ def test_simple_reshape_hourly(sample_df: DataFrame) -> None: tz = get_timezone(sample_df) timestamp_wav = to_datetime( sample_df["filename"], - format="%Y_%m_%d_%H_%M_%S" + format="%Y_%m_%d_%H_%M_%S", ).dt.tz_localize(tz) df_out = reshape_timebin( sample_df, @@ -538,7 +538,7 @@ def test_reshape_daily_multiple_bins(sample_df: DataFrame) -> None: tz = get_timezone(sample_df) timestamp_wav = to_datetime( sample_df["filename"], - format="%Y_%m_%d_%H_%M_%S" + format="%Y_%m_%d_%H_%M_%S", ).dt.tz_localize(tz) df_out = reshape_timebin(sample_df, timestamp_audio=timestamp_wav, timebin_new=Timedelta(days=1)) assert not df_out.empty @@ -555,7 +555,7 @@ def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None: df_out = reshape_timebin( sample_df, timestamp_audio=timestamp_wav, - timebin_new=Timedelta(hours=1) + timebin_new=Timedelta(hours=1), ) assert not df_out.empty @@ -589,6 +589,7 @@ def test_ensure_no_invalid_with_elements() -> None: assert "bar" in str(exc_info.value) assert "columns" in str(exc_info.value) + def test_ensure_no_invalid_single_element() -> None: invalid_items = ["baz"] with pytest.raises(ValueError) as exc_info: @@ -598,6 +599,7 @@ def test_ensure_no_invalid_single_element() -> None: # %% intersection / union + def test_intersection(sample_df) -> None: df_result = intersection_or_union(sample_df[sample_df["annotator"].isin(["ann1", "ann2"])], user_sel="intersection") @@ -628,7 +630,7 @@ def test_not_enough_annotators_raises() -> None: "annotation": ["cat"], "start_datetime": to_datetime(["2025-01-01 10:00"]), "end_datetime": to_datetime(["2025-01-01 10:01"]), - "annotator": ["A"] + "annotator": ["A"], }) with pytest.raises(ValueError, match="Not enough annotators detected"): - intersection_or_union(df_single_annotator, user_sel="intersection") \ No newline at end of file + intersection_or_union(df_single_annotator, user_sel="intersection") diff --git a/tests/test_glider_utils.py b/tests/test_glider_utils.py index 12d83df..d0247c5 100644 --- a/tests/test_glider_utils.py +++ b/tests/test_glider_utils.py @@ -56,7 +56,7 @@ def test_get_position_from_timestamp(nav_df: DataFrame) -> None: def test_plot_detections_with_nav_data( df_detections: DataFrame, - nav_df: DataFrame + nav_df: DataFrame, ) -> None: plot_detections_with_nav_data( df=df_detections, diff --git a/tests/test_metric_utils.py b/tests/test_metric_utils.py index 34ce769..35717e7 100644 --- a/tests/test_metric_utils.py +++ b/tests/test_metric_utils.py @@ -3,6 +3,7 @@ from post_processing.utils.metrics_utils import detection_perf + def test_detection_perf(sample_df: DataFrame) -> None: try: detection_perf(df=sample_df[sample_df["annotator"].isin(["ann1", "ann4"])], ref=("ann1", "lbl1")) @@ -12,4 +13,4 @@ def test_detection_perf(sample_df: DataFrame) -> None: def test_detection_perf_one_annotator(sample_df: DataFrame) -> None: with pytest.raises(ValueError, match="Two annotators needed"): - detection_perf(df=sample_df[sample_df["annotator"] == "ann1"], ref=("ann1", "lbl1")) \ No newline at end of file + detection_perf(df=sample_df[sample_df["annotator"] == "ann1"], ref=("ann1", "lbl1")) diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index d7392cf..e7a389c 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -4,10 +4,10 @@ from numpy import arange, testing from post_processing.utils.plot_utils import ( - overview, _wrap_xtick_labels, - set_y_axis_to_percentage, get_legend, + overview, + set_y_axis_to_percentage, ) @@ -103,4 +103,4 @@ def test_lists_and_strings_combined(): labels = ["Label1", "Label2"] result = get_legend(annotators, labels) expected = ["Alice\nLabel1", "Bob\nLabel2"] - assert result == expected \ No newline at end of file + assert result == expected From b2ebfd71485c864ea7f3f5fd5a7f526e1d43d5ce Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Fri, 9 Jan 2026 13:50:54 +0100 Subject: [PATCH 04/30] adapt RecordingPeriod class --- .../dataclass/recording_period.py | 98 +++++++++++++++---- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 901d932..11524e6 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -24,6 +24,8 @@ @dataclass(frozen=True) class RecordingPeriod: + """Represents recording effort over time, aggregated into bins.""" + counts: Series timebin_origin: Timedelta @@ -34,33 +36,91 @@ def from_path( *, bin_size: Timedelta | BaseOffset, ) -> RecordingPeriod: - """Vectorized creation of recording coverage from CSV with start/end datetime. + """Vectorized creation of recording coverage from CSV with start/end datetimes. + + This method reads a CSV with columns: + - 'start_recording' + - 'end_recording' + - 'start_deployment' + - 'end_deployment' + + It computes the **effective recording interval** as the intersection between + recording and deployment periods, builds a fine-grained timeline at + `timebin_origin` resolution, and aggregates effort into `bin_size` bins. + + Parameters + ---------- + config + Configuration object containing at least: + - `timestamp_file`: path to CSV + - `timebin_origin`: Timedelta resolution of detections + bin_size : Timedelta or BaseOffset + Size of the aggregation bin (e.g., pd.Timedelta("1H") or "1D"). + + Returns + ------- + RecordingPeriod + Object containing `counts` (Series indexed by IntervalIndex) and + `timebin_origin`. - CSV must have columns 'start_recording' and 'end_recording'. - bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) - or a pandas offset (e.g., "1D"). """ - # 1. Read CSV and parse datetime + # 1. Read CSV and parse datetime columns timestamp_file = config.timestamp_file delim = find_delimiter(timestamp_file) df = pd.read_csv( config.timestamp_file, - parse_dates=["start_recording", "end_recording"], + parse_dates=[ + "start_recording", + "end_recording", + "start_deployment", + "end_deployment", + ], delimiter=delim, ) if df.empty: raise ValueError("CSV is empty.") - # 2. Normalize timezones if needed - df["start_recording"] = ( - pd.to_datetime(df["start_recording"], utc=True).dt.tz_convert(None) - ) - df["end_recording"] = ( - pd.to_datetime(df["end_recording"], utc=True).dt.tz_convert(None) - ) + # Ensure all required columns are present + required_columns = { + "start_recording", + "end_recording", + "start_deployment", + "end_deployment", + } + + missing = required_columns - set(df.columns) + + if missing: + raise ValueError( + f"CSV is missing required columns: {', '.join(sorted(missing))}", + ) + + # 2. Normalize timezones: convert to UTC, then remove tz info (naive) + for col in [ + "start_recording", + "end_recording", + "start_deployment", + "end_deployment", + ]: + df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None) + + # 3. Compute effective recording intervals (intersection) + df["start_recording"] = df[ + ["start_recording", "start_deployment"] + ].max(axis=1) + + df["end_recording"] = df[ + ["end_recording", "end_deployment"] + ].min(axis=1) + + # Remove rows with no actual recording interval + df = df.loc[df["start_recording"] < df["end_recording"]].copy() - # Build fine-grained timeline (timebin_origin resolution) + if df.empty: + raise ValueError("No valid recording intervals after deployment intersection.") + + # 4. Build fine-grained timeline at `timebin_origin` resolution origin = config.timebin_origin time_index = pd.date_range( start=df["start_recording"].min(), @@ -68,19 +128,23 @@ def from_path( freq=origin, ) - # Initialize effort vector + # Initialize effort vector (0 = no recording, 1 = recording) + # Compare each timestamp to all intervals in a vectorized manner effort = pd.Series(0, index=time_index) - # Vectorized interval coverage + # 5. Vectorized interval coverage tvals = time_index.values[:, None] start_vals = df["start_recording"].values end_vals = df["end_recording"].values + # Boolean matrix: True if timestamp is within any recording interval covered = (tvals >= start_vals) & (tvals < end_vals) effort[:] = covered.any(axis=1).astype(int) - # Aggregate effort into bin_size + # 6. Aggregate effort into user-defined bin_size counts = effort.resample(bin_size).sum() + + # Replace index with IntervalIndex for downstream compatibility counts.index = pd.interval_range( start=counts.index[0], periods=len(counts), From f74587e880facfb01d71bb350d11b24b43bd5703 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Mon, 12 Jan 2026 15:37:47 +0100 Subject: [PATCH 05/30] add test_recording_preiod --- tests/conftest.py | 28 ++++++++++++++- tests/test_recording_period.py | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tests/test_recording_period.py diff --git a/tests/conftest.py b/tests/conftest.py index 99ff7ef..9a26f97 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ import soundfile as sf import yaml from osekit.utils.timestamp_utils import strftime_osmose_format -from pandas import DataFrame, read_csv +from pandas import DataFrame, Timedelta, read_csv SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11 @@ -132,6 +132,14 @@ sample_dataset,2025_01_26_06_20_20,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED """ +# --------------------------------------------------------------------------- +# Fake recording planning CSV used for tests +# --------------------------------------------------------------------------- +RECORDING_PLANNING_CSV = """start_recording,end_recording,start_deployment,end_deployment +2024-01-01 00:00:00+0000,2024-04-09 02:00:00+0000,2024-01-02 00:00:00+0000,2024-04-30 02:00:00+0000 +2024-04-30 01:00:00+0000,2024-07-03 06:00:00+0000,2024-04-09 04:00:00+0000,2024-07-14 14:00:00+0000 +""" + @pytest.fixture def sample_df() -> DataFrame: @@ -226,3 +234,21 @@ def create_file(path: Path, size: int = 2048): create_file(nested / "file4.wav") (tmp_path / "ignore.txt").write_text("not audio") return tmp_path + + +@pytest.fixture +def recording_planning_csv(tmp_path) -> Path: + """Create a temporary CSV file simulating a recording planning.""" + path = tmp_path / "recording_planning.csv" + path.write_text(RECORDING_PLANNING_CSV) + return path + + +@pytest.fixture +def recording_planning_config(recording_planning_csv): + """Minimal config object compatible with RecordingPeriod.from_path.""" + class RecordingPlanningConfig: + timestamp_file: Path = recording_planning_csv + timebin_origin = Timedelta("1min") + + return RecordingPlanningConfig() diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py new file mode 100644 index 0000000..12464e7 --- /dev/null +++ b/tests/test_recording_period.py @@ -0,0 +1,63 @@ +import pandas as pd + +from post_processing.dataclass.recording_period import RecordingPeriod + + +def test_recording_period_with_gaps(recording_planning_config): + """RecordingPeriod correctly represents long gaps with no recording effort. + + The planning contains two recording blocks separated by ~3 weeks with no + recording at all. Weekly aggregation must reflect: + - weeks with full effort, + - weeks with partial effort, + - weeks with zero effort. + """ + recording_period = RecordingPeriod.from_path( + config=recording_planning_config, + bin_size="1W", + ) + + counts = recording_period.counts + + # ------------------------------------------------------------------ + # Structural checks + # ------------------------------------------------------------------ + assert not counts.empty + assert counts.index.is_interval() + assert counts.min() >= 0 + + # One week = 7 * 24 hours (origin = 1min) + full_week_minutes = 7 * 24 * 60 + + # ------------------------------------------------------------------ + # Helper: find the bin covering a given timestamp + # ------------------------------------------------------------------ + def bin_covering(ts: pd.Timestamp) -> pd.Interval: + for interval in counts.index: + if interval.left <= ts < interval.right: + return interval + raise AssertionError(f"No bin covers timestamp {ts}") + + # ------------------------------------------------------------------ + # Week fully inside the long gap → zero effort + # ------------------------------------------------------------------ + gap_ts = pd.Timestamp("2024-04-21") + + gap_bin = bin_covering(gap_ts) + assert counts.loc[gap_bin] == 0 + + # ------------------------------------------------------------------ + # Week fully inside recording → full effort + # ------------------------------------------------------------------ + full_effort_ts = pd.Timestamp("2024-02-04") + + full_bin = bin_covering(full_effort_ts) + assert counts.loc[full_bin] == full_week_minutes + + # ------------------------------------------------------------------ + # Week overlapping recording stop → partial effort + # ------------------------------------------------------------------ + partial_ts = pd.Timestamp("2024-04-14") + + partial_bin = bin_covering(partial_ts) + assert counts.loc[partial_bin] == 1560 From c1600bf894333969ca8510417f304d8c9fa49437 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Mon, 12 Jan 2026 17:15:20 +0100 Subject: [PATCH 06/30] add test_shade_no_effort --- tests/conftest.py | 7 +++--- tests/test_plot_utils.py | 40 ++++++++++++++++++++++++++++++++++ tests/test_recording_period.py | 3 ++- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9a26f97..a6299e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,8 @@ import soundfile as sf import yaml from osekit.utils.timestamp_utils import strftime_osmose_format -from pandas import DataFrame, Timedelta, read_csv +from pandas import DataFrame, read_csv +from pandas.tseries import frequencies SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11 @@ -137,7 +138,7 @@ # --------------------------------------------------------------------------- RECORDING_PLANNING_CSV = """start_recording,end_recording,start_deployment,end_deployment 2024-01-01 00:00:00+0000,2024-04-09 02:00:00+0000,2024-01-02 00:00:00+0000,2024-04-30 02:00:00+0000 -2024-04-30 01:00:00+0000,2024-07-03 06:00:00+0000,2024-04-09 04:00:00+0000,2024-07-14 14:00:00+0000 +2024-04-30 01:00:00+0000,2024-07-14 06:00:00+0000,2024-04-30 02:00:00+0000,2024-07-06 14:00:00+0000 """ @@ -249,6 +250,6 @@ def recording_planning_config(recording_planning_csv): """Minimal config object compatible with RecordingPeriod.from_path.""" class RecordingPlanningConfig: timestamp_file: Path = recording_planning_csv - timebin_origin = Timedelta("1min") + timebin_origin = frequencies.to_offset("1min") return RecordingPlanningConfig() diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index e7a389c..37c2243 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -1,13 +1,19 @@ +from unittest.mock import MagicMock + import matplotlib.pyplot as plt import pytest from matplotlib.ticker import PercentFormatter from numpy import arange, testing +from pandas import Series, Timedelta, to_datetime +from pandas.tseries import frequencies +from post_processing.dataclass.recording_period import RecordingPeriod from post_processing.utils.plot_utils import ( _wrap_xtick_labels, get_legend, overview, set_y_axis_to_percentage, + shade_no_effort, ) @@ -104,3 +110,37 @@ def test_lists_and_strings_combined(): result = get_legend(annotators, labels) expected = ["Alice\nLabel1", "Bob\nLabel2"] assert result == expected + + +def test_shade_no_effort_from_recording_planning(recording_planning_config): + """shade_no_effort shades contiguous zero-effort periods.""" + + def count_contiguous_zero_segments(effort: Series) -> int: + """Return number of contiguous zero-effort segments.""" + is_zero = effort == 0 + return ((is_zero != is_zero.shift(fill_value=False)) & is_zero).sum() + + recording_period = RecordingPeriod.from_path( + config=recording_planning_config, + bin_size=frequencies.to_offset("1W"), + ) + + counts = recording_period.counts + + bin_starts = to_datetime( + [interval.left for interval in counts.index], + ) + + fig, ax = plt.subplots() + ax.axvspan = MagicMock() + + shade_no_effort( + ax=ax, + bin_starts=bin_starts, + observed=recording_period, + bar_width=Timedelta("7D"), + ) + + expected_spans = count_contiguous_zero_segments(counts) + + assert ax.axvspan.call_count == expected_spans diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py index 12464e7..41b9e59 100644 --- a/tests/test_recording_period.py +++ b/tests/test_recording_period.py @@ -1,4 +1,5 @@ import pandas as pd +from pandas.tseries import frequencies from post_processing.dataclass.recording_period import RecordingPeriod @@ -14,7 +15,7 @@ def test_recording_period_with_gaps(recording_planning_config): """ recording_period = RecordingPeriod.from_path( config=recording_planning_config, - bin_size="1W", + bin_size=frequencies.to_offset("1W"), ) counts = recording_period.counts From 0e22298596aeca66821bc6bbca16df72231f40fa Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Tue, 13 Jan 2026 09:47:44 +0100 Subject: [PATCH 07/30] delete useluss in shade_no_effort --- src/post_processing/utils/plot_utils.py | 26 ------------------------- 1 file changed, 26 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index b6f566e..75c421f 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -708,32 +708,6 @@ def shade_no_effort( zorder=0, ) - # Outside data coverage - x_min, x_max = ax.get_xlim() - data_min = mdates.date2num(bin_starts[0] - bar_width) - data_max = mdates.date2num(bin_starts[-1] + bar_width) - - if x_min < data_min: - ax.axvspan( - x_min, - data_min, - facecolor="0.45", - alpha=0.15, - linewidth=0, - zorder=0, - ) - if x_max > data_max: - ax.axvspan( - data_max, - x_max, - facecolor="0.45", - alpha=0.15, - linewidth=0, - zorder=0, - ) - - ax.set_xlim(x_min, x_max) - def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None: """Display sunrise/sunset times on plot.""" From e889c29cc52a864f29363257128b9f9dd55e2a59 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Tue, 13 Jan 2026 10:03:01 +0100 Subject: [PATCH 08/30] delete test shading_no_effort --- tests/test_plot_utils.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index 37c2243..49aff37 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -112,35 +112,3 @@ def test_lists_and_strings_combined(): assert result == expected -def test_shade_no_effort_from_recording_planning(recording_planning_config): - """shade_no_effort shades contiguous zero-effort periods.""" - - def count_contiguous_zero_segments(effort: Series) -> int: - """Return number of contiguous zero-effort segments.""" - is_zero = effort == 0 - return ((is_zero != is_zero.shift(fill_value=False)) & is_zero).sum() - - recording_period = RecordingPeriod.from_path( - config=recording_planning_config, - bin_size=frequencies.to_offset("1W"), - ) - - counts = recording_period.counts - - bin_starts = to_datetime( - [interval.left for interval in counts.index], - ) - - fig, ax = plt.subplots() - ax.axvspan = MagicMock() - - shade_no_effort( - ax=ax, - bin_starts=bin_starts, - observed=recording_period, - bar_width=Timedelta("7D"), - ) - - expected_spans = count_contiguous_zero_segments(counts) - - assert ax.axvspan.call_count == expected_spans From 2349713661ee44fd2bfa84e2a5a7ac7ff1d051d2 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Tue, 13 Jan 2026 10:04:58 +0100 Subject: [PATCH 09/30] fix ruff --- src/post_processing/dataclass/recording_period.py | 15 ++++++++------- tests/test_plot_utils.py | 7 ------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 11524e6..32319fd 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -106,16 +106,16 @@ def from_path( df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None) # 3. Compute effective recording intervals (intersection) - df["start_recording"] = df[ + df["effective_start_recording"] = df[ ["start_recording", "start_deployment"] ].max(axis=1) - df["end_recording"] = df[ + df["effective_end_recording"] = df[ ["end_recording", "end_deployment"] ].min(axis=1) # Remove rows with no actual recording interval - df = df.loc[df["start_recording"] < df["end_recording"]].copy() + df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy() if df.empty: raise ValueError("No valid recording intervals after deployment intersection.") @@ -123,8 +123,8 @@ def from_path( # 4. Build fine-grained timeline at `timebin_origin` resolution origin = config.timebin_origin time_index = pd.date_range( - start=df["start_recording"].min(), - end=df["end_recording"].max(), + start=df["effective_start_recording"].min(), + end=df["effective_end_recording"].max(), freq=origin, ) @@ -134,8 +134,8 @@ def from_path( # 5. Vectorized interval coverage tvals = time_index.values[:, None] - start_vals = df["start_recording"].values - end_vals = df["end_recording"].values + start_vals = df["effective_start_recording"].values + end_vals = df["effective_end_recording"].values # Boolean matrix: True if timestamp is within any recording interval covered = (tvals >= start_vals) & (tvals < end_vals) @@ -151,4 +151,5 @@ def from_path( freq=bin_size, closed="left", ) + return cls(counts=counts, timebin_origin=origin) diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index 49aff37..ffabbbd 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -1,19 +1,14 @@ -from unittest.mock import MagicMock import matplotlib.pyplot as plt import pytest from matplotlib.ticker import PercentFormatter from numpy import arange, testing -from pandas import Series, Timedelta, to_datetime -from pandas.tseries import frequencies -from post_processing.dataclass.recording_period import RecordingPeriod from post_processing.utils.plot_utils import ( _wrap_xtick_labels, get_legend, overview, set_y_axis_to_percentage, - shade_no_effort, ) @@ -110,5 +105,3 @@ def test_lists_and_strings_combined(): result = get_legend(annotators, labels) expected = ["Alice\nLabel1", "Bob\nLabel2"] assert result == expected - - From 77f71d1872e056186a071652056d980fcfeb5739 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Tue, 13 Jan 2026 10:34:28 +0100 Subject: [PATCH 10/30] delete matlab boring files --- .../PAMGuardMatlab-main/.DS_Store | Bin 6148 -> 0 bytes .../PAMGuardMatlab-main/.MATLABDriveTag | 1 - .../PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag | 1 - .../pgmatlab/Array/.MATLABDriveTag | 1 - 4 files changed, 3 deletions(-) delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store deleted file mode 100644 index 8cbbca90b6e5166f54b08e62aaab675fe94f04f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z-O8-BN@c6nb3nTCkQQl)(EZ>xnykjo@uf;KKT0MGoe+f+2zj`Rl2EONYMF$o&h?B#G(=p{2Emc4#&vuTKv)3b}~@k=~Q)SDrb1MganEEez%%1&Lc-Xe)rG6i3eSHu$% z1H=F^Kn&~;1NICM&HX8vDkTPpfgdt}`-6mr=vvGT>a7Dlczs5H4G{%&d`looi>}4o zAb3EyNd+{i+&(e5Ne8>MajwPOph;(3&kW<(nTy8@*Rz9N>Tt$ggY+c^h=FYes;2AU z`F{n!Oz9(kJB51005R~-7~r*$HyT1w_H5lL56@Z&?FkwR#^tDhfWCDJfDY~>E$uXZ bi8{o&7IT9*3fgr#AYBABA@m^zeu056q9RP< diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag deleted file mode 100644 index 84059a2..0000000 --- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag +++ /dev/null @@ -1 +0,0 @@ -3496f669-9381-4974-bb7c-5cc1ddcb05d4 \ No newline at end of file diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag deleted file mode 100644 index df9fcd4..0000000 --- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag +++ /dev/null @@ -1 +0,0 @@ -ee53bc03-ef5e-44bc-aea4-8fae1e2a0b9f \ No newline at end of file diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag deleted file mode 100644 index 656b51a..0000000 --- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag +++ /dev/null @@ -1 +0,0 @@ -c4dd0a9d-e15d-496f-91ff-d9ff561a4fa0 \ No newline at end of file From e8c73e074eff7de6719cc3150c867865ee2ff775 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:15:46 +0100 Subject: [PATCH 11/30] few modifs --- .../dataclass/detection_filter.py | 10 ++- .../dataclass/recording_period.py | 62 ++++++++++--------- src/post_processing/utils/filtering_utils.py | 22 ++++--- src/post_processing/utils/plot_utils.py | 2 +- 4 files changed, 55 insertions(+), 41 deletions(-) diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py index d636c4c..b2d2599 100644 --- a/src/post_processing/dataclass/detection_filter.py +++ b/src/post_processing/dataclass/detection_filter.py @@ -7,9 +7,9 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, fields from pathlib import Path -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, Any import yaml from pandas import Timedelta, Timestamp @@ -44,6 +44,12 @@ class DetectionFilter: box: bool = False filename_format: str = None + def __getitem__(self, key: str): + """Return the value of the given key.""" + if key in {f.name for f in fields(self)}: + return getattr(self, key) + raise KeyError(key) + @classmethod def from_yaml( cls, diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 32319fd..637733e 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -8,10 +8,13 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -import pandas as pd from pandas import ( Series, Timedelta, + date_range, + interval_range, + read_csv, + to_datetime, ) from post_processing.utils.filtering_utils import ( @@ -21,6 +24,8 @@ if TYPE_CHECKING: from pandas.tseries.offsets import BaseOffset + from post_processing.dataclass.detection_filter import DetectionFilter + @dataclass(frozen=True) class RecordingPeriod: @@ -32,17 +37,17 @@ class RecordingPeriod: @classmethod def from_path( cls, - config, + config: DetectionFilter, *, bin_size: Timedelta | BaseOffset, ) -> RecordingPeriod: """Vectorized creation of recording coverage from CSV with start/end datetimes. This method reads a CSV with columns: - - 'start_recording' - - 'end_recording' - - 'start_deployment' - - 'end_deployment' + - "start_recording" + - "end_recording" + - "start_deployment" + - "end_deployment" It computes the **effective recording interval** as the intersection between recording and deployment periods, builds a fine-grained timeline at @@ -55,7 +60,7 @@ def from_path( - `timestamp_file`: path to CSV - `timebin_origin`: Timedelta resolution of detections bin_size : Timedelta or BaseOffset - Size of the aggregation bin (e.g., pd.Timedelta("1H") or "1D"). + Size of the aggregation bin (e.g., Timedelta("1H") or "1D"). Returns ------- @@ -64,10 +69,10 @@ def from_path( `timebin_origin`. """ - # 1. Read CSV and parse datetime columns + # Read CSV and parse datetime columns timestamp_file = config.timestamp_file delim = find_delimiter(timestamp_file) - df = pd.read_csv( + df = read_csv( config.timestamp_file, parse_dates=[ "start_recording", @@ -79,7 +84,8 @@ def from_path( ) if df.empty: - raise ValueError("CSV is empty.") + msg = "CSV is empty." + raise ValueError(msg) # Ensure all required columns are present required_columns = { @@ -92,20 +98,19 @@ def from_path( missing = required_columns - set(df.columns) if missing: - raise ValueError( - f"CSV is missing required columns: {', '.join(sorted(missing))}", - ) + msg = f"CSV is missing required columns: {', '.join(sorted(missing))}" + raise ValueError(msg) - # 2. Normalize timezones: convert to UTC, then remove tz info (naive) + # Normalize timezones: convert to UTC, then remove tz info (naive) for col in [ "start_recording", "end_recording", "start_deployment", "end_deployment", ]: - df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None) + df[col] = to_datetime(df[col], utc=True).dt.tz_convert(None) - # 3. Compute effective recording intervals (intersection) + # Compute effective recording intervals (intersection) df["effective_start_recording"] = df[ ["start_recording", "start_deployment"] ].max(axis=1) @@ -118,11 +123,12 @@ def from_path( df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy() if df.empty: - raise ValueError("No valid recording intervals after deployment intersection.") + msg = "No valid recording intervals after deployment intersection." + raise ValueError(msg) - # 4. Build fine-grained timeline at `timebin_origin` resolution + # Build fine-grained timeline at `timebin_origin` resolution origin = config.timebin_origin - time_index = pd.date_range( + time_index = date_range( start=df["effective_start_recording"].min(), end=df["effective_end_recording"].max(), freq=origin, @@ -130,22 +136,22 @@ def from_path( # Initialize effort vector (0 = no recording, 1 = recording) # Compare each timestamp to all intervals in a vectorized manner - effort = pd.Series(0, index=time_index) + effort = Series(0, index=time_index) - # 5. Vectorized interval coverage - tvals = time_index.values[:, None] - start_vals = df["effective_start_recording"].values - end_vals = df["effective_end_recording"].values + # Vectorized interval coverage + t_vals = time_index.to_numpy()[:, None] + start_vals = df["effective_start_recording"].to_numpy() + end_vals = df["effective_end_recording"].to_numpy() - # Boolean matrix: True if timestamp is within any recording interval - covered = (tvals >= start_vals) & (tvals < end_vals) + # Boolean matrix: True if the timestamp is within any recording interval + covered = (t_vals >= start_vals) & (t_vals < end_vals) effort[:] = covered.any(axis=1).astype(int) - # 6. Aggregate effort into user-defined bin_size + # Aggregate effort into user-defined bin_size counts = effort.resample(bin_size).sum() # Replace index with IntervalIndex for downstream compatibility - counts.index = pd.interval_range( + counts.index = interval_range( start=counts.index[0], periods=len(counts), freq=bin_size, diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py index 650b5a8..faf2fd7 100644 --- a/src/post_processing/utils/filtering_utils.py +++ b/src/post_processing/utils/filtering_utils.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING import pytz +from osekit.utils.timestamp_utils import strptime_from_text from pandas import ( DataFrame, Timedelta, @@ -570,16 +571,17 @@ def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]: """ tz = get_timezone(df) - try: - return [ - to_datetime( - ts, - format=date_parser, - ).tz_localize(tz) for ts in df["filename"] - ] - except ValueError: - msg = """Could not parse timestamps from `df["filename"]`.""" - raise ValueError(msg) from None + timestamps = [ + strptime_from_text( + ts, + datetime_template=date_parser, + ) for ts in df["filename"] + ] + + if all(t.tz is None for t in timestamps): + timestamps = [t.tz_localize(tz) for t in timestamps] + + return timestamps def ensure_in_list(value: str, candidates: list[str], label: str) -> None: diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 75c421f..f1cd07f 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -691,7 +691,7 @@ def shade_no_effort( start, start + width_days, facecolor="0.65", - alpha=0.08, + alpha=0.1, linewidth=0, zorder=0, ) From fd913cfa38f284f4be6ebbb0f6265ba91434490d Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:30:07 +0100 Subject: [PATCH 12/30] effort legend --- src/post_processing/utils/plot_utils.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index f1cd07f..f5ad0df 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -11,6 +11,7 @@ import numpy as np from matplotlib import dates as mdates from matplotlib.dates import num2date +from matplotlib.patches import Patch from matplotlib.ticker import PercentFormatter from numpy import ceil, histogram, polyfit from pandas import ( @@ -114,9 +115,6 @@ def histo( else: legend_labels = None - # if effort: - # normalize_counts_by_effort(df, effort, time_bin) - n_groups = len(labels) if legend_labels else 1 bar_width = bin_size / n_groups bin_starts = mdates.date2num(df.index) @@ -684,6 +682,7 @@ def shade_no_effort( no_effort = effort_fraction == 0 partial_effort = (effort_fraction > 0) & (effort_fraction < 1) + # Draw partial effort first (lighter) for ts in bin_starts[partial_effort]: start = mdates.date2num(ts - bar_width) @@ -694,6 +693,7 @@ def shade_no_effort( alpha=0.1, linewidth=0, zorder=0, + label="partial data", ) # Draw no effort on top (darker) @@ -706,8 +706,24 @@ def shade_no_effort( alpha=0.15, linewidth=0, zorder=0, + label="no data", + ) + + handles = [] + + if partial_effort.any(): + handles.append( + Patch(facecolor="0.65", alpha=0.1, label="partial data") + ) + + if no_effort.any(): + handles.append( + Patch(facecolor="0.45", alpha=0.15, label="no data") ) + if handles: + ax.legend(handles=handles) + def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None: """Display sunrise/sunset times on plot.""" From df40ec9ccf440425c60221f6d9fce35b21f3b74c Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Wed, 14 Jan 2026 16:36:38 +0100 Subject: [PATCH 13/30] ruff fix --- src/post_processing/dataclass/detection_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py index b2d2599..b28c023 100644 --- a/src/post_processing/dataclass/detection_filter.py +++ b/src/post_processing/dataclass/detection_filter.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, fields from pathlib import Path -from typing import TYPE_CHECKING, Literal, Any +from typing import TYPE_CHECKING, Literal import yaml from pandas import Timedelta, Timestamp From 0c74f8c40e373122fd6ed374581951fdbf17771a Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Thu, 15 Jan 2026 15:26:42 +0100 Subject: [PATCH 14/30] better shade handling --- src/post_processing/utils/core_utils.py | 13 ++++--- src/post_processing/utils/plot_utils.py | 50 ++++++++++++++----------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index 9457ef2..32c5943 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -305,10 +305,10 @@ def add_season_period( ) season_colors = { - "winter": "#2ce5e3", - "spring": "#4fcf50", - "summer": "#ffcf50", - "autumn": "#fb9a67", + "winter": "#84eceb", + "spring": "#91de92", + "summer": "#fce097", + "autumn": "#f9c1a5", } bin_centers = [ @@ -329,8 +329,9 @@ def add_season_period( width=(bins[i + 1] - bins[i]), color=season_colors[season], align="center", - zorder=0, - alpha=0.6, + zorder=6, + alpha=1, + linewidth=0, ) ax.set_ylim(ax.dataLim.ymin, ax.dataLim.ymax) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index f5ad0df..2a22914 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -149,11 +149,6 @@ def histo( set_plot_title(ax, annotators, labels) ax.set_xlim(begin, end) - if season: - if lat is None or lon is None: - get_coordinates() - add_season_period(ax, northern=lat >= 0) - if effort: shade_no_effort( ax=ax, @@ -162,6 +157,11 @@ def histo( bar_width=bin_size, ) + if season: + if lat is None or lon is None: + get_coordinates() + add_season_period(ax, northern=lat >= 0) + def _prepare_timeline_plot( df: DataFrame, @@ -675,54 +675,62 @@ def shade_no_effort( index=[i.left for i in observed.counts.index], ).tz_localize("UTC") + effort_by_end = Series( + observed.counts.values, + index=[i.left for i in observed.counts.index], + ).tz_localize("UTC") + # Align effort to plotting bins - effort_aligned = effort_by_start.reindex(bin_starts) + effort_aligned_start = effort_by_start.reindex(bin_starts).fillna(0) + effort_aligned_end = effort_by_end.reindex(bin_starts + bar_width).fillna(0) + combined_effort = .5 * effort_aligned_start.add(effort_aligned_end, fill_value=0) + max_effort = bar_width / observed.timebin_origin - effort_fraction = effort_aligned / max_effort + effort_fraction = combined_effort / max_effort - no_effort = effort_fraction == 0 - partial_effort = (effort_fraction > 0) & (effort_fraction < 1) + no_effort = effort_fraction[effort_fraction == 0] + partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)] # Draw partial effort first (lighter) - for ts in bin_starts[partial_effort]: + for ts in partial_effort.index: start = mdates.date2num(ts - bar_width) ax.axvspan( start, start + width_days, facecolor="0.65", - alpha=0.1, + alpha=.1, linewidth=0, - zorder=0, + zorder=3, label="partial data", ) # Draw no effort on top (darker) - for ts in bin_starts[no_effort]: + for ts in no_effort.index: start = mdates.date2num(ts - bar_width) ax.axvspan( start, start + width_days, facecolor="0.45", - alpha=0.15, + alpha=.15, linewidth=0, - zorder=0, + zorder=3, label="no data", ) handles = [] - - if partial_effort.any(): + if len(partial_effort) > 0: handles.append( Patch(facecolor="0.65", alpha=0.1, label="partial data") ) - - if no_effort.any(): + if len(no_effort) > 0: handles.append( Patch(facecolor="0.45", alpha=0.15, label="no data") ) - if handles: - ax.legend(handles=handles) + ax.legend( + handles=handles, + loc="best", + ) def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None: From ca539cb63478f265142f23a7e517ff9a3baadedf Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Thu, 15 Jan 2026 17:40:32 +0100 Subject: [PATCH 15/30] histo legend fix --- src/post_processing/utils/plot_utils.py | 60 ++++++++++++++----------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 2a22914..4b268ee 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -133,19 +133,17 @@ def histo( bar_kwargs["label"] = legend_labels[i] ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs) - if kwargs.get("show_recording_OFF"): - ax.set_facecolor("lightgrey") if len(df.columns) > 1 and legend: - ax.legend(labels=legend_labels, bbox_to_anchor=(1.01, 1), loc="upper left") + legend_histo = ax.legend( + labels=legend_labels, + bbox_to_anchor=(1.01, 1), + loc="upper left", + ) + ax.add_artist(legend_histo) - y_label = ( - f"Detections{(' normalized by effort' if effort else '')}" - f"\n(detections: {timedelta_to_str(time_bin)}" - f" - bin size: {bin_size_str})" - ) - ax.set_ylabel(y_label) - # set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df) + ax.set_ylabel(f"Detections ({timedelta_to_str(time_bin)})") + ax.set_xlabel(f"Bin size ({bin_size_str})") set_plot_title(ax, annotators, labels) ax.set_xlim(begin, end) @@ -613,12 +611,15 @@ def get_bin_size_str(bin_size: Timedelta | BaseOffset) -> str: return str(bin_size.n) + bin_size.freqstr -def set_y_axis_to_percentage( - ax: plt.Axes, -) -> None: +def set_y_axis_to_percentage(ax: plt.Axes, max_val: float) -> None: """Set y-axis to percentage.""" - ax.yaxis.set_major_formatter(PercentFormatter(xmax=1.0)) - ax.set_yticks(np.arange(0, 1.02, 0.2)) + ax.yaxis.set_major_formatter( + plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%") + ) + + current_label = ax.get_ylabel() + if current_label and "%" not in current_label: + ax.set_ylabel(f"{current_label} (%)") def set_dynamic_ylim(ax: plt.Axes, @@ -691,6 +692,9 @@ def shade_no_effort( no_effort = effort_fraction[effort_fraction == 0] partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)] + # Get legend handle + handles1, labels1 = ax.get_legend_handles_labels() + # Draw partial effort first (lighter) for ts in partial_effort.index: start = mdates.date2num(ts - bar_width) @@ -698,7 +702,7 @@ def shade_no_effort( start, start + width_days, facecolor="0.65", - alpha=.1, + alpha=0.1, linewidth=0, zorder=3, label="partial data", @@ -711,25 +715,31 @@ def shade_no_effort( start, start + width_days, facecolor="0.45", - alpha=.15, + alpha=0.15, linewidth=0, zorder=3, label="no data", ) - handles = [] + handles_effort = [] if len(partial_effort) > 0: - handles.append( - Patch(facecolor="0.65", alpha=0.1, label="partial data") - ) + handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data")) if len(no_effort) > 0: - handles.append( + handles_effort.append( Patch(facecolor="0.45", alpha=0.15, label="no data") ) - if handles: + if handles_effort: + + labels_effort = [h.get_label() for h in handles_effort] + + handles = handles1 + handles_effort + labels = labels1 + labels_effort + ax.legend( - handles=handles, - loc="best", + handles, + labels, + bbox_to_anchor=(1.01, 1), + loc="upper left", ) From 360804d798b71188953a91aa52fdd5368ab04aa7 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Thu, 15 Jan 2026 17:41:38 +0100 Subject: [PATCH 16/30] histo legend fix --- src/post_processing/utils/plot_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 4b268ee..eaf3301 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -721,6 +721,7 @@ def shade_no_effort( label="no data", ) + # Add effort legend to current plot legend handles_effort = [] if len(partial_effort) > 0: handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data")) @@ -729,12 +730,9 @@ def shade_no_effort( Patch(facecolor="0.45", alpha=0.15, label="no data") ) if handles_effort: - labels_effort = [h.get_label() for h in handles_effort] - handles = handles1 + handles_effort labels = labels1 + labels_effort - ax.legend( handles, labels, From ec8deeccc2753c8484435cbdf8f08a688226e647 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Thu, 15 Jan 2026 17:45:32 +0100 Subject: [PATCH 17/30] ruff fix --- src/post_processing/utils/plot_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index eaf3301..1e83042 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -12,7 +12,6 @@ from matplotlib import dates as mdates from matplotlib.dates import num2date from matplotlib.patches import Patch -from matplotlib.ticker import PercentFormatter from numpy import ceil, histogram, polyfit from pandas import ( DataFrame, From b7beb53f10d5d7a5558d0a76e337724dddad309b Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Fri, 16 Jan 2026 11:59:43 +0100 Subject: [PATCH 18/30] shade effort on scatter plot --- src/post_processing/dataclass/data_aplose.py | 19 +++++++++++-------- src/post_processing/utils/core_utils.py | 15 +++++++++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py index 3c8d1e4..e1d3cae 100644 --- a/src/post_processing/dataclass/data_aplose.py +++ b/src/post_processing/dataclass/data_aplose.py @@ -416,18 +416,20 @@ def plot( season = kwargs.get("season", False) bin_size = kwargs.get("bin_size") - return heatmap(df=df_filtered, - ax=ax, - bin_size=bin_size, - time_range=time, - show_rise_set=show_rise_set, - season=season, - coordinates=self.coordinates, - ) + return heatmap( + df=df_filtered, + ax=ax, + bin_size=bin_size, + time_range=time, + show_rise_set=show_rise_set, + season=season, + coordinates=self.coordinates, + ) if mode == "scatter": show_rise_set = kwargs.get("show_rise_set", True) season = kwargs.get("season", False) + effort = kwargs.get("effort") return scatter(df=df_filtered, ax=ax, @@ -435,6 +437,7 @@ def plot( show_rise_set=show_rise_set, season=season, coordinates=self.coordinates, + effort=effort, ) if mode == "agreement": diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index 32c5943..b1df6bd 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -303,6 +303,10 @@ def add_season_period( start=Timestamp(ax.get_xlim()[0], unit="D").floor("1D"), end=Timestamp(ax.get_xlim()[1], unit="D").ceil("1D"), ) + bins = date_range( + start=Timestamp(ax.get_xlim()[0], unit="D"), + end=Timestamp(ax.get_xlim()[1], unit="D"), + ) season_colors = { "winter": "#84eceb", @@ -329,7 +333,7 @@ def add_season_period( width=(bins[i + 1] - bins[i]), color=season_colors[season], align="center", - zorder=6, + zorder=2, alpha=1, linewidth=0, ) @@ -520,8 +524,9 @@ def round_begin_end_timestamps( bin_size: Timedelta | BaseOffset, ) -> tuple[Timestamp, Timestamp, Timedelta]: """Return time vector given a bin size.""" - if (not isinstance(timestamp_list, list) or - not all(isinstance(ts, Timestamp) for ts in timestamp_list)): + if not isinstance(timestamp_list, list) or not all( + isinstance(ts, Timestamp) for ts in timestamp_list + ): msg = "timestamp_list must be a list[Timestamp]" raise TypeError(msg) @@ -535,6 +540,7 @@ def round_begin_end_timestamps( return start, end, bin_size if isinstance(bin_size, BaseOffset): + bin_size = offsets.Week(n=bin_size.n, weekday=timestamp_list[0].weekday()) start = bin_size.rollback(min(timestamp_list)) end = bin_size.rollforward(max(timestamp_list)) if not isinstance(bin_size, (offsets.Hour, offsets.Minute, offsets.Second)): @@ -545,7 +551,8 @@ def round_begin_end_timestamps( timestamp_range = date_range(start=start, end=end, freq=bin_size) bin_size = timestamp_range[1] - timestamp_range[0] - return start.floor(bin_size), end.ceil(bin_size), bin_size + # return start.floor(bin_size), end.ceil(bin_size), bin_size + return start, end, bin_size msg = "Could not get start/end timestamps." raise ValueError(msg) From 92d0a95767e371e1f730359f647baad71117fc02 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:00:54 +0100 Subject: [PATCH 19/30] better season mngmt on plots --- src/post_processing/utils/plot_utils.py | 46 +++++++++++-------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 1e83042..953d7db 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -20,6 +20,7 @@ Series, Timedelta, Timestamp, + concat, date_range, ) from pandas.tseries import frequencies @@ -134,12 +135,11 @@ def histo( ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs) if len(df.columns) > 1 and legend: - legend_histo = ax.legend( + ax.legend( labels=legend_labels, bbox_to_anchor=(1.01, 1), loc="upper left", ) - ax.add_artist(legend_histo) ax.set_ylabel(f"Detections ({timedelta_to_str(time_bin)})") ax.set_xlabel(f"Bin size ({bin_size_str})") @@ -149,9 +149,7 @@ def histo( if effort: shade_no_effort( ax=ax, - bin_starts=df.index, observed=effort, - bar_width=bin_size, ) if season: @@ -200,7 +198,6 @@ def _prepare_timeline_plot( ax.set_ylim(0, 24) ax.set_yticks(range(0, 25, 2)) ax.set_ylabel("Hour") - ax.set_xlabel("Date") ax.grid(color="k", linestyle="-", linewidth=0.2) set_plot_title(ax=ax, annotators=annotators, labels=labels) @@ -217,7 +214,7 @@ def scatter( df: DataFrame, ax: Axes, time_range: DatetimeIndex, - **kwargs: bool | tuple[float, float], + **kwargs: bool | tuple[float, float] | RecordingPeriod, ) -> None: """Scatter-plot of detections for a given annotator and label. @@ -241,6 +238,7 @@ def scatter( show_rise_set = kwargs.get("show_rise_set", False) season = kwargs.get("season", False) coordinates = kwargs.get("coordinates", False) + effort = kwargs.get("effort", False) _prepare_timeline_plot( df=df, @@ -280,6 +278,12 @@ def scatter( framealpha=0.6, ) + if effort: + shade_no_effort( + ax=ax, + observed=effort, + ) + def heatmap(df: DataFrame, ax: Axes, @@ -646,9 +650,7 @@ def set_plot_title(ax: plt.Axes, annotators: list[str], labels: list[str]) -> No def shade_no_effort( ax: plt.Axes, - bin_starts: Index, observed: RecordingPeriod, - bar_width: Timedelta, ) -> None: """Shade areas of the plot where no observation effort was made. @@ -656,38 +658,32 @@ def shade_no_effort( ---------- ax : plt.Axes The axes on which to draw the shaded regions. - bin_starts : Index - A datetime index representing the start times of each bin. observed : RecordingPeriod A Series with observation counts or flags, indexed by datetime. Should be aligned or re-indexable to `bin_starts`. - bar_width : Timedelta - Width of each time bin. Used to compute the span of the shaded areas. - """ - """Shade areas of the plot where no observation effort was made.""" - width_days = bar_width.total_seconds() / 86400 - # Convert effort IntervalIndex → DatetimeIndex (bin starts) effort_by_start = Series( observed.counts.values, index=[i.left for i in observed.counts.index], - ).tz_localize("UTC") - + ) effort_by_end = Series( observed.counts.values, index=[i.left for i in observed.counts.index], - ).tz_localize("UTC") + ) + combined_effort = .5 * effort_by_start.add(effort_by_end, fill_value=0) - # Align effort to plotting bins - effort_aligned_start = effort_by_start.reindex(bin_starts).fillna(0) - effort_aligned_end = effort_by_end.reindex(bin_starts + bar_width).fillna(0) - combined_effort = .5 * effort_aligned_start.add(effort_aligned_end, fill_value=0) + bar_width = effort_by_start.index[1] - effort_by_start.index[0] + width_days = bar_width.total_seconds() / 86400 max_effort = bar_width / observed.timebin_origin effort_fraction = combined_effort / max_effort + first_elem = Series([0], index=[effort_fraction.index[0] - bar_width]) + last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width]) + effort_fraction = concat([first_elem, effort_fraction, last_elem]) + no_effort = effort_fraction[effort_fraction == 0] partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)] @@ -703,7 +699,7 @@ def shade_no_effort( facecolor="0.65", alpha=0.1, linewidth=0, - zorder=3, + zorder=1, label="partial data", ) @@ -716,7 +712,7 @@ def shade_no_effort( facecolor="0.45", alpha=0.15, linewidth=0, - zorder=3, + zorder=1, label="no data", ) From 552c7297acbee8701662617922041c9040bdba92 Mon Sep 17 00:00:00 2001 From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:58:29 +0100 Subject: [PATCH 20/30] differenciate LT et ST data plots --- src/post_processing/utils/core_utils.py | 8 ++------ src/post_processing/utils/plot_utils.py | 1 - tests/test_plot_utils.py | 17 ++++++++--------- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index b1df6bd..a1ae91f 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -299,10 +299,6 @@ def add_season_period( msg = "Axes have no data" raise ValueError(msg) - bins = date_range( - start=Timestamp(ax.get_xlim()[0], unit="D").floor("1D"), - end=Timestamp(ax.get_xlim()[1], unit="D").ceil("1D"), - ) bins = date_range( start=Timestamp(ax.get_xlim()[0], unit="D"), end=Timestamp(ax.get_xlim()[1], unit="D"), @@ -540,7 +536,6 @@ def round_begin_end_timestamps( return start, end, bin_size if isinstance(bin_size, BaseOffset): - bin_size = offsets.Week(n=bin_size.n, weekday=timestamp_list[0].weekday()) start = bin_size.rollback(min(timestamp_list)) end = bin_size.rollforward(max(timestamp_list)) if not isinstance(bin_size, (offsets.Hour, offsets.Minute, offsets.Second)): @@ -551,7 +546,8 @@ def round_begin_end_timestamps( timestamp_range = date_range(start=start, end=end, freq=bin_size) bin_size = timestamp_range[1] - timestamp_range[0] - # return start.floor(bin_size), end.ceil(bin_size), bin_size + if bin_size.resolution_string in ["s", "min", "h"]: + return start.floor(bin_size), end.ceil(bin_size), bin_size return start, end, bin_size msg = "Could not get start/end timestamps." diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 953d7db..9c863f4 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -16,7 +16,6 @@ from pandas import ( DataFrame, DatetimeIndex, - Index, Series, Timedelta, Timestamp, diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index ffabbbd..4a7e9cc 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -58,16 +58,15 @@ def test_wrap_xtick_labels_no_spaces(): assert wrapped_labels[0] == expected -def test_y_axis_formatter_and_ticks(): +def test_set_y_axis_to_percentage(): fig, ax = plt.subplots() - - set_y_axis_to_percentage(ax) - - assert isinstance(ax.yaxis.get_major_formatter(), PercentFormatter) - assert ax.yaxis.get_major_formatter().xmax == 1.0 - - expected_ticks = arange(0, 1.02, 0.2) - testing.assert_allclose(ax.get_yticks(), expected_ticks) + ax.set_ylabel("Accuracy") + set_y_axis_to_percentage(ax, max_val=200) + formatter = ax.yaxis.get_major_formatter() + assert formatter(100, None) == "50%" + assert formatter(200, None) == "100%" + assert ax.get_ylabel() == "Accuracy (%)" + plt.close(fig) def test_single_annotator_multiple_labels(): From d0cda89ec0f61857fef98fc515780ae3bd758f20 Mon Sep 17 00:00:00 2001 From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com> Date: Mon, 19 Jan 2026 12:00:21 +0100 Subject: [PATCH 21/30] test plot utils import fix --- tests/test_plot_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py index 4a7e9cc..4306c38 100644 --- a/tests/test_plot_utils.py +++ b/tests/test_plot_utils.py @@ -1,8 +1,5 @@ - import matplotlib.pyplot as plt import pytest -from matplotlib.ticker import PercentFormatter -from numpy import arange, testing from post_processing.utils.plot_utils import ( _wrap_xtick_labels, From 4b9a2f1b43bec08e65307fc6e71d0a18522da80f Mon Sep 17 00:00:00 2001 From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:51:42 +0100 Subject: [PATCH 22/30] ruff simplification --- src/post_processing/utils/plot_utils.py | 114 +++++++++++++----------- 1 file changed, 60 insertions(+), 54 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 9c863f4..eaee63c 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -366,7 +366,7 @@ def heatmap(df: DataFrame, ) if coordinates and season: - lat, lon = coordinates + lat, _ = coordinates add_season_period(ax, northern=lat >= 0) bin_size_str = get_bin_size_str(bin_size) @@ -494,41 +494,33 @@ def agreement( """ labels, annotators = get_labels_and_annotators(df) - datetimes1 = list( - df[(df["annotator"] == annotators[0]) & (df["annotation"] == labels[0])][ - "start_datetime" - ], - ) - datetimes2 = list( - df[(df["annotator"] == annotators[1]) & (df["annotation"] == labels[1])][ - "start_datetime" - ], - ) + datetimes = [ + list( + df[ + (df["annotator"] == annotators[i]) & (df["annotation"] == labels[i]) + ]["start_datetime"], + ) + for i in range(2) + ] # scatter plot n_annot_max = bin_size.total_seconds() / df["end_time"].iloc[0] - start = df["start_datetime"].min() - stop = df["start_datetime"].max() - freq = ( bin_size if isinstance(bin_size, Timedelta) else str(bin_size.n) + bin_size.name ) bins = date_range( - start=start.floor(bin_size), - end=stop.ceil(bin_size), + start=df["start_datetime"].min().floor(bin_size), + end=df["start_datetime"].max().ceil(bin_size), freq=freq, ) - hist1, _ = histogram(datetimes1, bins=bins) - hist2, _ = histogram(datetimes2, bins=bins) - df_hist = ( DataFrame( { - annotators[0]: hist1, - annotators[1]: hist2, + annotators[0]: histogram(datetimes[0], bins=bins)[0], + annotators[1]: histogram(datetimes[1], bins=bins)[0], }, ) / n_annot_max @@ -616,7 +608,7 @@ def get_bin_size_str(bin_size: Timedelta | BaseOffset) -> str: def set_y_axis_to_percentage(ax: plt.Axes, max_val: float) -> None: """Set y-axis to percentage.""" ax.yaxis.set_major_formatter( - plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%") + plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%"), ) current_label = ax.get_ylabel() @@ -667,17 +659,12 @@ def shade_no_effort( observed.counts.values, index=[i.left for i in observed.counts.index], ) - effort_by_end = Series( - observed.counts.values, - index=[i.left for i in observed.counts.index], - ) - combined_effort = .5 * effort_by_start.add(effort_by_end, fill_value=0) bar_width = effort_by_start.index[1] - effort_by_start.index[0] width_days = bar_width.total_seconds() / 86400 max_effort = bar_width / observed.timebin_origin - effort_fraction = combined_effort / max_effort + effort_fraction = effort_by_start / max_effort first_elem = Series([0], index=[effort_fraction.index[0] - bar_width]) last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width]) @@ -689,39 +676,35 @@ def shade_no_effort( # Get legend handle handles1, labels1 = ax.get_legend_handles_labels() - # Draw partial effort first (lighter) - for ts in partial_effort.index: - start = mdates.date2num(ts - bar_width) - ax.axvspan( - start, - start + width_days, - facecolor="0.65", - alpha=0.1, - linewidth=0, - zorder=1, - label="partial data", - ) + _draw_effort_spans( + ax=ax, + effort_index=partial_effort.index, + bar_width=bar_width, + width_days=width_days, + facecolor="0.65", + alpha=0.1, + label="partial data", + ) - # Draw no effort on top (darker) - for ts in no_effort.index: - start = mdates.date2num(ts - bar_width) - ax.axvspan( - start, - start + width_days, - facecolor="0.45", - alpha=0.15, - linewidth=0, - zorder=1, - label="no data", - ) + _draw_effort_spans( + ax=ax, + effort_index=no_effort.index, + bar_width=bar_width, + width_days=width_days, + facecolor="0.45", + alpha=0.15, + label="no data", + ) # Add effort legend to current plot legend handles_effort = [] if len(partial_effort) > 0: - handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data")) + handles_effort.append( + Patch(facecolor="0.65", alpha=0.1, label="partial data"), + ) if len(no_effort) > 0: handles_effort.append( - Patch(facecolor="0.45", alpha=0.15, label="no data") + Patch(facecolor="0.45", alpha=0.15, label="no data"), ) if handles_effort: labels_effort = [h.get_label() for h in handles_effort] @@ -735,6 +718,29 @@ def shade_no_effort( ) +def _draw_effort_spans( + ax: plt.Axes, + effort_index: DatetimeIndex, + bar_width: Timedelta, + width_days: float, + facecolor: str, + alpha: float, + label: str, +) -> None: + """Draw vertical lines for effort plot.""" + for ts in effort_index: + start = mdates.date2num(ts - bar_width) + ax.axvspan( + start, + start + width_days, + facecolor=facecolor, + alpha=alpha, + linewidth=0, + zorder=1, + label=label, + ) + + def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None: """Display sunrise/sunset times on plot.""" x_min, x_max = ax.get_xlim() From bc647a664945e1ff403134b90b1a1ad8feb959b5 Mon Sep 17 00:00:00 2001 From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:56:50 +0100 Subject: [PATCH 23/30] draw effort positional argument --- src/post_processing/utils/plot_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index eaee63c..97733b1 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -723,6 +723,7 @@ def _draw_effort_spans( effort_index: DatetimeIndex, bar_width: Timedelta, width_days: float, + *, facecolor: str, alpha: float, label: str, From cb07568de99b52799987f5a6a7966095a1411dc9 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:27:11 +0100 Subject: [PATCH 24/30] syntax --- src/post_processing/utils/core_utils.py | 42 ++++++++++++++++--------- src/post_processing/utils/plot_utils.py | 12 +++---- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index a1ae91f..7951192 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -3,7 +3,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import astral import easygui @@ -35,7 +35,7 @@ ) if TYPE_CHECKING: - from datetime import tzinfo + from datetime import datetime, timedelta, tzinfo from pathlib import Path import matplotlib.pyplot as plt @@ -44,10 +44,10 @@ def get_season(ts: Timestamp, *, northern: bool = True) -> tuple[str, int]: """Determine the meteorological season from a Timestamp. - In the Northern hemisphere + In the Northern Hemisphere Winter: Dec-Feb, Spring: Mar-May, Summer: Jun-Aug, Autumn: Sep-Nov - In the Southern hemisphere + In the Southern Hemisphere Winter: Jun-Aug, Spring: Sep-Nov, Summer: Dec-Feb, Autumn: Mar-May Parameters @@ -133,8 +133,18 @@ def get_sun_times( dt_sunset = Timestamp(sunset(gps.observer, date=date)).tz_convert(tz) # Convert sunrise and sunset to decimal hours - h_sunrise.append(dt_sunrise.hour + dt_sunrise.minute / 60 + dt_sunrise.second / 3600 + dt_sunrise.microsecond / 3_600_000_000) - h_sunset.append(dt_sunset.hour + dt_sunset.minute / 60 + dt_sunset.second / 3600 + dt_sunset.microsecond / 3_600_000_000) + h_sunrise.append( + dt_sunrise.hour + + dt_sunrise.minute / 60 + + dt_sunrise.second / 3600 + + dt_sunrise.microsecond / 3_600_000_000, + ) + h_sunset.append( + dt_sunset.hour + + dt_sunset.minute / 60 + + dt_sunset.second / 3600 + + dt_sunset.microsecond / 3_600_000_000, + ) return h_sunrise, h_sunset @@ -215,8 +225,12 @@ def add_weak_detection( if not max_time: max_time = Timedelta(get_max_time(df), "s") - df["start_datetime"] = [strftime_osmose_format(start) for start in df["start_datetime"]] - df["end_datetime"] = [strftime_osmose_format(stop) for stop in df["end_datetime"]] + df["start_datetime"] = [ + strftime_osmose_format(start) for start in df["start_datetime"] + ] + df["end_datetime"] = [ + strftime_osmose_format(stop) for stop in df["end_datetime"] + ] for ant in annotators: for lbl in labels: @@ -259,12 +273,12 @@ def add_weak_detection( def json2df(json_path: Path) -> DataFrame: - """Convert a metadatax json file into a DataFrame. + """Convert a metadatax JSON file into a DataFrame. Parameters ---------- json_path: Path - Json file path + JSON file path """ with json_path.open(encoding="utf-8") as f: @@ -480,7 +494,7 @@ def get_labels_and_annotators(df: DataFrame) -> tuple[list, list]: def localize_timestamps(timestamps: list[Timestamp], tz: tzinfo) -> list[Timestamp]: - """Localize timestamps if necessary.""" + """Localise timestamps if necessary.""" localized = [] for ts in timestamps: if ts.tzinfo is None or ts.tzinfo.utcoffset(ts) is None: @@ -518,7 +532,7 @@ def get_time_range_and_bin_size( def round_begin_end_timestamps( timestamp_list: list[Timestamp], bin_size: Timedelta | BaseOffset, -) -> tuple[Timestamp, Timestamp, Timedelta]: +) -> tuple[Any, Any, Any]: """Return time vector given a bin size.""" if not isinstance(timestamp_list, list) or not all( isinstance(ts, Timestamp) for ts in timestamp_list @@ -546,7 +560,7 @@ def round_begin_end_timestamps( timestamp_range = date_range(start=start, end=end, freq=bin_size) bin_size = timestamp_range[1] - timestamp_range[0] - if bin_size.resolution_string in ["s", "min", "h"]: + if bin_size.resolution_string in {"s", "min", "h"}: return start.floor(bin_size), end.ceil(bin_size), bin_size return start, end, bin_size @@ -555,7 +569,7 @@ def round_begin_end_timestamps( def timedelta_to_str(td: Timedelta) -> str: - """From a Timedelta to corresponding string.""" + """From a Timedelta to the corresponding string.""" seconds = int(td.total_seconds()) if seconds % 86400 == 0: diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 97733b1..3c3a5cc 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -75,15 +75,15 @@ def histo( - legend: bool Whether to show the legend. - color: str | list[str] - Color or list of colors for the histogram bars. - If not provided, default colors will be used. + Colour or list of colours for the histogram bars. + If not provided, default colours will be used. - season: bool Whether to show the season. - coordinates: tuple[float, float] The coordinates of the plotted detections. - effort: RecordingPeriod Object corresponding to the observation effort. - If provided, data will be normalized by observation effort. + If provided, data will be normalised by observation effort. """ labels, annotators = zip(*[col.rsplit("-", 1) for col in df.columns], strict=False) @@ -473,7 +473,7 @@ def agreement( bin_size: Timedelta | BaseOffset, ax: plt.Axes, ) -> None: - """Compute and visualize agreement between two annotators. + """Compute and visualise agreement between two annotators. This function compares annotation timestamps from two annotators over a time range. It also fits and plots a linear regression line and displays the coefficient @@ -556,8 +556,8 @@ def timeline( Matplotlib axes object where the scatterplot and regression line will be drawn. **kwargs: Additional keyword arguments depending on the mode. - color: str | list[str] - Color or list of colors for the histogram bars. - If not provided, default colors will be used. + Colour or list of colours for the histogram bars. + If not provided, default colours will be used. """ color = kwargs.get("color") From 9d3188a1e0655b9ebb44a8c485df88ffd0b85f7e Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:35:58 +0100 Subject: [PATCH 25/30] hotfix partial/no data bars --- src/post_processing/utils/plot_utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 3c3a5cc..c4f79fb 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -660,11 +660,18 @@ def shade_no_effort( index=[i.left for i in observed.counts.index], ) - bar_width = effort_by_start.index[1] - effort_by_start.index[0] + effort_by_end = Series( + observed.counts.values, + index=[i.right for i in observed.counts.index], + ) + + combined_effort = 0.5 * effort_by_start.add(effort_by_end, fill_value=0) + + bar_width = combined_effort.index[1] - combined_effort.index[0] width_days = bar_width.total_seconds() / 86400 max_effort = bar_width / observed.timebin_origin - effort_fraction = effort_by_start / max_effort + effort_fraction = combined_effort / max_effort first_elem = Series([0], index=[effort_fraction.index[0] - bar_width]) last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width]) @@ -679,7 +686,6 @@ def shade_no_effort( _draw_effort_spans( ax=ax, effort_index=partial_effort.index, - bar_width=bar_width, width_days=width_days, facecolor="0.65", alpha=0.1, @@ -689,7 +695,6 @@ def shade_no_effort( _draw_effort_spans( ax=ax, effort_index=no_effort.index, - bar_width=bar_width, width_days=width_days, facecolor="0.45", alpha=0.15, @@ -721,7 +726,6 @@ def shade_no_effort( def _draw_effort_spans( ax: plt.Axes, effort_index: DatetimeIndex, - bar_width: Timedelta, width_days: float, *, facecolor: str, @@ -730,7 +734,7 @@ def _draw_effort_spans( ) -> None: """Draw vertical lines for effort plot.""" for ts in effort_index: - start = mdates.date2num(ts - bar_width) + start = mdates.date2num(ts) ax.axvspan( start, start + width_days, From 4c2fc0d62f66693ff6247172bb26eda8a57848f7 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:39:12 +0100 Subject: [PATCH 26/30] ruff import --- src/post_processing/utils/core_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py index 7951192..c149155 100644 --- a/src/post_processing/utils/core_utils.py +++ b/src/post_processing/utils/core_utils.py @@ -35,7 +35,7 @@ ) if TYPE_CHECKING: - from datetime import datetime, timedelta, tzinfo + from datetime import tzinfo from pathlib import Path import matplotlib.pyplot as plt From c0e968f178caba05e4b8205c8b82f0b265b727e8 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:24:19 +0100 Subject: [PATCH 27/30] RecordingPeriod hotfix --- .../dataclass/recording_period.py | 20 +++++++++---------- src/post_processing/utils/plot_utils.py | 11 ++-------- tests/test_recording_period.py | 20 ++++++++++--------- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index 637733e..f19fab2 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -9,14 +9,15 @@ from typing import TYPE_CHECKING from pandas import ( + IntervalIndex, Series, Timedelta, date_range, - interval_range, read_csv, to_datetime, ) +from post_processing.utils.core_utils import round_begin_end_timestamps from post_processing.utils.filtering_utils import ( find_delimiter, ) @@ -101,7 +102,7 @@ def from_path( msg = f"CSV is missing required columns: {', '.join(sorted(missing))}" raise ValueError(msg) - # Normalize timezones: convert to UTC, then remove tz info (naive) + # Normalise timezones: convert to UTC, then remove tz info (naive) for col in [ "start_recording", "end_recording", @@ -134,11 +135,11 @@ def from_path( freq=origin, ) - # Initialize effort vector (0 = no recording, 1 = recording) + # Initialise effort vector (0 = no recording, 1 = recording) # Compare each timestamp to all intervals in a vectorized manner effort = Series(0, index=time_index) - # Vectorized interval coverage + # Vectorised interval coverage t_vals = time_index.to_numpy()[:, None] start_vals = df["effective_start_recording"].to_numpy() end_vals = df["effective_end_recording"].to_numpy() @@ -148,13 +149,12 @@ def from_path( effort[:] = covered.any(axis=1).astype(int) # Aggregate effort into user-defined bin_size - counts = effort.resample(bin_size).sum() + counts = effort.resample(bin_size, closed="left", label="left").sum() - # Replace index with IntervalIndex for downstream compatibility - counts.index = interval_range( - start=counts.index[0], - periods=len(counts), - freq=bin_size, + counts.index = IntervalIndex.from_arrays( + counts.index, + counts.index + + round_begin_end_timestamps(list(counts.index), bin_size)[-1], closed="left", ) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index c4f79fb..3a7f486 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -660,18 +660,11 @@ def shade_no_effort( index=[i.left for i in observed.counts.index], ) - effort_by_end = Series( - observed.counts.values, - index=[i.right for i in observed.counts.index], - ) - - combined_effort = 0.5 * effort_by_start.add(effort_by_end, fill_value=0) - - bar_width = combined_effort.index[1] - combined_effort.index[0] + bar_width = effort_by_start.index[1] - effort_by_start.index[0] width_days = bar_width.total_seconds() / 86400 max_effort = bar_width / observed.timebin_origin - effort_fraction = combined_effort / max_effort + effort_fraction = effort_by_start / max_effort first_elem = Series([0], index=[effort_fraction.index[0] - bar_width]) last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width]) diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py index 41b9e59..73dd55e 100644 --- a/tests/test_recording_period.py +++ b/tests/test_recording_period.py @@ -1,10 +1,11 @@ -import pandas as pd +from pandas import Interval, Timestamp from pandas.tseries import frequencies +from post_processing.dataclass.detection_filter import DetectionFilter from post_processing.dataclass.recording_period import RecordingPeriod -def test_recording_period_with_gaps(recording_planning_config): +def test_recording_period_with_gaps(recording_planning_config: DetectionFilter) -> None: """RecordingPeriod correctly represents long gaps with no recording effort. The planning contains two recording blocks separated by ~3 weeks with no @@ -27,22 +28,23 @@ def test_recording_period_with_gaps(recording_planning_config): assert counts.index.is_interval() assert counts.min() >= 0 - # One week = 7 * 24 hours (origin = 1min) + # One week = 7 * 24 hours (origin = 1 min) full_week_minutes = 7 * 24 * 60 # ------------------------------------------------------------------ # Helper: find the bin covering a given timestamp # ------------------------------------------------------------------ - def bin_covering(ts: pd.Timestamp) -> pd.Interval: + def bin_covering(ts: Timestamp) -> Interval: for interval in counts.index: if interval.left <= ts < interval.right: return interval - raise AssertionError(f"No bin covers timestamp {ts}") + msg = f"No bin covers timestamp {ts}" + raise AssertionError(msg) # ------------------------------------------------------------------ # Week fully inside the long gap → zero effort # ------------------------------------------------------------------ - gap_ts = pd.Timestamp("2024-04-21") + gap_ts = Timestamp("2024-04-21") gap_bin = bin_covering(gap_ts) assert counts.loc[gap_bin] == 0 @@ -50,7 +52,7 @@ def bin_covering(ts: pd.Timestamp) -> pd.Interval: # ------------------------------------------------------------------ # Week fully inside recording → full effort # ------------------------------------------------------------------ - full_effort_ts = pd.Timestamp("2024-02-04") + full_effort_ts = Timestamp("2024-02-04") full_bin = bin_covering(full_effort_ts) assert counts.loc[full_bin] == full_week_minutes @@ -58,7 +60,7 @@ def bin_covering(ts: pd.Timestamp) -> pd.Interval: # ------------------------------------------------------------------ # Week overlapping recording stop → partial effort # ------------------------------------------------------------------ - partial_ts = pd.Timestamp("2024-04-14") + partial_ts = Timestamp("2024-04-14") partial_bin = bin_covering(partial_ts) - assert counts.loc[partial_bin] == 1560 + assert counts.loc[partial_bin] == 0 From dcb70fb141760c4d75a1678c2e3fd8ad7257a972 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:25:29 +0100 Subject: [PATCH 28/30] syntax --- src/post_processing/dataclass/recording_period.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py index f19fab2..d0d48b7 100644 --- a/src/post_processing/dataclass/recording_period.py +++ b/src/post_processing/dataclass/recording_period.py @@ -42,7 +42,7 @@ def from_path( *, bin_size: Timedelta | BaseOffset, ) -> RecordingPeriod: - """Vectorized creation of recording coverage from CSV with start/end datetimes. + """Vectorised creation of recording coverage from CSV with start/end datetimes. This method reads a CSV with columns: - "start_recording" @@ -61,7 +61,7 @@ def from_path( - `timestamp_file`: path to CSV - `timebin_origin`: Timedelta resolution of detections bin_size : Timedelta or BaseOffset - Size of the aggregation bin (e.g., Timedelta("1H") or "1D"). + Size of the aggregation bin (e.g. Timedelta("1H") or "1D"). Returns ------- @@ -121,7 +121,9 @@ def from_path( ].min(axis=1) # Remove rows with no actual recording interval - df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy() + df = df.loc[ + df["effective_start_recording"] < df["effective_end_recording"] + ].copy() if df.empty: msg = "No valid recording intervals after deployment intersection." @@ -136,7 +138,7 @@ def from_path( ) # Initialise effort vector (0 = no recording, 1 = recording) - # Compare each timestamp to all intervals in a vectorized manner + # Compare each timestamp to all intervals in a vectorised manner effort = Series(0, index=time_index) # Vectorised interval coverage From 47b170d89ecc620618bfd0e24533f7a0776d1202 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:20:43 +0100 Subject: [PATCH 29/30] legend histo shade --- src/post_processing/utils/plot_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py index 3a7f486..8d12fa3 100644 --- a/src/post_processing/utils/plot_utils.py +++ b/src/post_processing/utils/plot_utils.py @@ -149,6 +149,7 @@ def histo( shade_no_effort( ax=ax, observed=effort, + legend=legend, ) if season: @@ -642,6 +643,7 @@ def set_plot_title(ax: plt.Axes, annotators: list[str], labels: list[str]) -> No def shade_no_effort( ax: plt.Axes, observed: RecordingPeriod, + legend: bool, ) -> None: """Shade areas of the plot where no observation effort was made. @@ -652,6 +654,8 @@ def shade_no_effort( observed : RecordingPeriod A Series with observation counts or flags, indexed by datetime. Should be aligned or re-indexable to `bin_starts`. + legend : bool + Wether to add the legend entry for the shaded regions. """ # Convert effort IntervalIndex → DatetimeIndex (bin starts) @@ -704,7 +708,7 @@ def shade_no_effort( handles_effort.append( Patch(facecolor="0.45", alpha=0.15, label="no data"), ) - if handles_effort: + if handles_effort and legend: labels_effort = [h.get_label() for h in handles_effort] handles = handles1 + handles_effort labels = labels1 + labels_effort From ef0e2b6474f8044de561f39a70372178892de9e5 Mon Sep 17 00:00:00 2001 From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com> Date: Thu, 22 Jan 2026 11:52:18 +0100 Subject: [PATCH 30/30] improve test_recording_period --- tests/test_recording_period.py | 82 ++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py index 73dd55e..064c6b5 100644 --- a/tests/test_recording_period.py +++ b/tests/test_recording_period.py @@ -1,5 +1,4 @@ -from pandas import Interval, Timestamp -from pandas.tseries import frequencies +from pandas import Timedelta, read_csv, to_datetime from post_processing.dataclass.detection_filter import DetectionFilter from post_processing.dataclass.recording_period import RecordingPeriod @@ -14,53 +13,68 @@ def test_recording_period_with_gaps(recording_planning_config: DetectionFilter) - weeks with partial effort, - weeks with zero effort. """ + histo_x_bin_size = Timedelta("7D") recording_period = RecordingPeriod.from_path( config=recording_planning_config, - bin_size=frequencies.to_offset("1W"), + bin_size=histo_x_bin_size, ) counts = recording_period.counts + origin = recording_planning_config.timebin_origin + nb_timebin_origin_per_histo_x_bin_size = int(histo_x_bin_size / origin) + # Computes effective recording intervals from recording planning csv + df_planning = read_csv( + recording_planning_config.timestamp_file, + parse_dates=[ + "start_recording", + "end_recording", + "start_deployment", + "end_deployment", + ], + ) + for col in [ + "start_recording", + "end_recording", + "start_deployment", + "end_deployment", + ]: + df_planning[col] = ( + to_datetime(df_planning[col], utc=True) + .dt.tz_convert(None) + ) + + df_planning["start"] = df_planning[ + ["start_recording", "start_deployment"] + ].max(axis=1) + df_planning["end"] = df_planning[ + ["end_recording", "end_deployment"] + ].min(axis=1) + + planning = df_planning.loc[df_planning["start"] < df_planning["end"]] # ------------------------------------------------------------------ # Structural checks # ------------------------------------------------------------------ assert not counts.empty assert counts.index.is_interval() assert counts.min() >= 0 - - # One week = 7 * 24 hours (origin = 1 min) - full_week_minutes = 7 * 24 * 60 - - # ------------------------------------------------------------------ - # Helper: find the bin covering a given timestamp - # ------------------------------------------------------------------ - def bin_covering(ts: Timestamp) -> Interval: - for interval in counts.index: - if interval.left <= ts < interval.right: - return interval - msg = f"No bin covers timestamp {ts}" - raise AssertionError(msg) + assert counts.max() <= nb_timebin_origin_per_histo_x_bin_size # ------------------------------------------------------------------ - # Week fully inside the long gap → zero effort + # Find overlap (number of timebin_origin) within each effective recording period # ------------------------------------------------------------------ - gap_ts = Timestamp("2024-04-21") + for interval in counts.index: + bin_start = interval.left + bin_end = interval.right - gap_bin = bin_covering(gap_ts) - assert counts.loc[gap_bin] == 0 + # Compute overlap with all recording intervals + overlap_start = planning["start"].clip(lower=bin_start, upper=bin_end) + overlap_end = planning["end"].clip(lower=bin_start, upper=bin_end) - # ------------------------------------------------------------------ - # Week fully inside recording → full effort - # ------------------------------------------------------------------ - full_effort_ts = Timestamp("2024-02-04") - - full_bin = bin_covering(full_effort_ts) - assert counts.loc[full_bin] == full_week_minutes - - # ------------------------------------------------------------------ - # Week overlapping recording stop → partial effort - # ------------------------------------------------------------------ - partial_ts = Timestamp("2024-04-14") + overlap = (overlap_end - overlap_start).clip(lower=Timedelta(0)) + expected_minutes = int(overlap.sum() / recording_planning_config.timebin_origin) - partial_bin = bin_covering(partial_ts) - assert counts.loc[partial_bin] == 0 + assert counts.loc[interval] == expected_minutes, ( + f"Mismatch for bin {interval}: " + f"expected {expected_minutes}, got {counts.loc[interval]}" + )