From 735e4c31de0dfbadbbc0a6eb5336e0c64708d808 Mon Sep 17 00:00:00 2001 From: Niko Leskinen Date: Tue, 29 Jul 2025 15:51:31 +0300 Subject: [PATCH 1/2] Add monitoring --- scripts/cloudnet.py | 4 + src/monitoring/README.md | 15 + src/monitoring/__init__.py | 0 src/monitoring/__main__.py | 96 +++++ .../instruments/halo_doppler_lidar.py | 375 ++++++++++++++++++ src/monitoring/monitor.py | 36 ++ src/monitoring/monitoring_file.py | 116 ++++++ src/monitoring/period.py | 100 +++++ src/monitoring/plot_utils.py | 152 +++++++ src/monitoring/product.py | 41 ++ src/monitoring/py.typed | 0 src/monitoring/utils.py | 26 ++ src/processing/fetch.py | 4 + 13 files changed, 965 insertions(+) create mode 100644 src/monitoring/README.md create mode 100644 src/monitoring/__init__.py create mode 100644 src/monitoring/__main__.py create mode 100644 src/monitoring/instruments/halo_doppler_lidar.py create mode 100644 src/monitoring/monitor.py create mode 100644 src/monitoring/monitoring_file.py create mode 100644 src/monitoring/period.py create mode 100644 src/monitoring/plot_utils.py create mode 100644 src/monitoring/product.py create mode 100644 src/monitoring/py.typed create mode 100644 src/monitoring/utils.py diff --git a/scripts/cloudnet.py b/scripts/cloudnet.py index bb08b21c..5ee5ce93 100755 --- a/scripts/cloudnet.py +++ b/scripts/cloudnet.py @@ -162,6 +162,10 @@ def _parse_args(client: APIClient) -> Namespace: action="store_true", help="Fetch ALL raw data including .LV0. Only applicable if the command is 'fetch --raw'.", ) + group.add_argument( + "--include-pattern", + help="Regex pattern to filter raw files in the fetch command", + ) args = parser.parse_args() diff --git a/src/monitoring/README.md b/src/monitoring/README.md new file mode 100644 index 00000000..2f72f975 --- /dev/null +++ b/src/monitoring/README.md @@ -0,0 +1,15 @@ +# Monitoring + +## Usage + +Check available products from `/api/monitoring-products` + +Run monitoring: + +``` +python -m monitoring INSTRUMENT_UUID SITE all PRODUCT_ID +python -m monitoring INSTRUMENT_UUID SITE year:DATE PRODUCT_ID +python -m monitoring INSTRUMENT_UUID SITE month:DATE PRODUCT_ID +python -m monitoring INSTRUMENT_UUID SITE week:DATE PRODUCT_ID +python -m monitoring INSTRUMENT_UUID SITE day:DATE PRODUCT_ID +``` diff --git a/src/monitoring/__init__.py b/src/monitoring/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/monitoring/__main__.py b/src/monitoring/__main__.py new file mode 100644 index 00000000..4ca67171 --- /dev/null +++ b/src/monitoring/__main__.py @@ -0,0 +1,96 @@ +import argparse +from typing import Callable + +from cloudnet_api_client import APIClient +from cloudnet_api_client.containers import Instrument, Site + +from monitoring.monitor import monitor +from monitoring.period import Period, period_from_str +from monitoring.product import MonitoringProduct +from processing.config import Config + + +def main() -> None: + config = Config() + client = APIClient(f"{config.dataportal_url}/api/") + parser = argparse.ArgumentParser() + parser.add_argument("instrument", type=_instrument_from_client(client)) + parser.add_argument("site", type=_site_from_client(client)) + parser.add_argument("period", type=_period_from_str) + parser.add_argument("product", type=_product_from_client(client)) + args = parser.parse_args() + monitor( + site=args.site, + instrument=args.instrument, + period=args.period, + product=args.product, + ) + + +def _period_from_str(s: str) -> Period: + try: + return period_from_str(s) + except ValueError as err: + raise argparse.ArgumentTypeError(err) from err + + +def _site_from_client(client: APIClient) -> Callable[[str], Site]: + sites = client.sites() + site_dict = {site.id: site for site in sites} + + def validate_id(id_: str) -> Site: + if id_ not in site_dict: + print("Invalid site ID. Available sites:") + _print_sites(sites) + raise argparse.ArgumentTypeError("Invalid site ID.") + return site_dict[id_] + + return validate_id + + +def _product_from_client(client: APIClient) -> Callable[[str], MonitoringProduct]: + data = client.session.get(f"{client.base_url}monitoring-products/variables").json() + products = [MonitoringProduct.from_dict(d) for d in data] + product_dict = {p.id: p for p in products} + + def validate_product(id_: str) -> MonitoringProduct: + if id_ not in product_dict: + print("Invalid product ID. Available products:") + _print_products(products) + raise argparse.ArgumentTypeError(f"Invalid product ID: '{id_}'") + return product_dict[id_] + + return validate_product + + +def _instrument_from_client(client: APIClient) -> Callable[[str], Instrument]: + instruments = client.instruments() + instrument_dict = {str(inst.uuid): inst for inst in instruments} + + def validate_uuid(uuid: str) -> Instrument: + if uuid not in instrument_dict: + print("Invalid instrument UUID. Available instruments:") + _print_instruments(instruments) + raise argparse.ArgumentTypeError(f"Invalid instrument UUID: '{uuid}'") + return instrument_dict[uuid] + + return validate_uuid + + +def _print_products(products: list[MonitoringProduct]) -> None: + for p in products: + print(p.id) + + +def _print_sites(sites: list[Site]) -> None: + for s in sorted(sites, key=lambda x: x.id): + print(f"{s.id:<20} {s.human_readable_name}, {s.country}") + + +def _print_instruments(instruments: list[Instrument]) -> None: + for inst in sorted(instruments, key=lambda x: (x.type, x.name)): + print(f"{inst.type:<40} {inst.name:<40} {inst.uuid} {inst.pid}") + + +if __name__ == "__main__": + main() diff --git a/src/monitoring/instruments/halo_doppler_lidar.py b/src/monitoring/instruments/halo_doppler_lidar.py new file mode 100644 index 00000000..cf648b7d --- /dev/null +++ b/src/monitoring/instruments/halo_doppler_lidar.py @@ -0,0 +1,375 @@ +import datetime +from collections import Counter +from tempfile import TemporaryDirectory + +import matplotlib.pyplot as plt +import numpy as np +from cloudnet_api_client import APIClient +from cloudnet_api_client.containers import Instrument, Site +from doppy.raw import HaloBg, HaloHpl, HaloSysParams + +from monitoring.monitoring_file import ( + Dimensions, + MonitoringFile, + MonitoringVisualization, +) +from monitoring.period import Period, PeriodWithRange +from monitoring.plot_utils import ( + SCATTER_OPTS, + add_colorbar, + format_time_axis, + pretty_ax, + pretty_ax_2d, + save_fig, + scientific_cbar, + set_xlim_for_period, +) +from monitoring.product import MonitoringProduct, MonitoringVariable +from monitoring.utils import range_from_period + + +def monitor( + client: APIClient, + site: Site, + instrument: Instrument, + period: Period, + product: MonitoringProduct, +) -> None: + match product.id: + case "halo-doppler-lidar_housekeeping": + monitor_housekeeping(client, site, instrument, period, product) + case "halo-doppler-lidar_background": + monitor_background(client, site, instrument, period, product) + case "halo-doppler-lidar_signal": + monitor_signal(client, site, instrument, period, product) + + case _: + raise NotImplementedError( + f"Monitoring product '{product.id}' not implemented for '{instrument.instrument_id}'" + ) + + +def monitor_housekeeping( + client: APIClient, + site: Site, + instrument: Instrument, + period: Period, + product: MonitoringProduct, +) -> None: + start, end = range_from_period(period) + + start -= datetime.timedelta(days=32) + end += datetime.timedelta(days=32) + raw_files = client.raw_files( + site_id=site.id, + instrument_pid=instrument.pid, + date_from=start, + date_to=end, + filename_prefix="system_parameters_", + filename_suffix=".txt", + ) + if not raw_files: + raise ValueError( + f"Not raw files found for {period} {instrument.name} {product.id}" + ) + with TemporaryDirectory() as tempdir: + paths = client.download(raw_files, tempdir, progress=False) + sys_params_list = [HaloSysParams.from_src(p) for p in paths] + sys_params = ( + HaloSysParams.merge(sys_params_list) + .sorted_by_time() + .non_strictly_increasing_timesteps_removed() + ) + if isinstance(period, PeriodWithRange): + start_time = np.datetime64(period.start_date).astype(sys_params.time.dtype) + end_time = np.datetime64(period.end_date + datetime.timedelta(days=1)).astype( + sys_params.time.dtype + ) + select = (start_time <= sys_params.time) & (sys_params.time < end_time) + sys_params = sys_params[select] + + monitoring_file = MonitoringFile( + instrument, + site, + period, + product, + monitor_housekeeping_plots(sys_params, period, product), + ) + monitoring_file.upload() + + +def monitor_housekeeping_plots( + sys_params: HaloSysParams, period: Period, product: MonitoringProduct +) -> list[MonitoringVisualization]: + plots = [] + for variable in product.variables: + plots.append(plot_housekeeping_variable(sys_params, period, variable)) + return plots + + +def plot_housekeeping_variable( + sys_params: HaloSysParams, + period: Period, + variable: MonitoringVariable, +) -> MonitoringVisualization: + fig, ax = plt.subplots() + y = getattr(sys_params, variable.id.replace("-", "_")) + ax.scatter(sys_params.time, y, **SCATTER_OPTS) + set_xlim_for_period(ax, period, sys_params.time, pad=0.025) + format_time_axis(ax) + pretty_ax(ax, grid="y") + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) + + +def monitor_background( + client: APIClient, + site: Site, + instrument: Instrument, + period: Period, + product: MonitoringProduct, +) -> None: + start, end = range_from_period(period) + start -= datetime.timedelta(days=1) + end += datetime.timedelta(days=1) + raw_files = client.raw_files( + site_id=site.id, + instrument_pid=instrument.pid, + date_from=start, + date_to=end, + filename_prefix="Background_", + filename_suffix=".txt", + ) + raw_files = [r for r in raw_files if "cross" not in r.tags] + if not raw_files: + raise ValueError( + f"Not raw files found for {period} {instrument.name} {product.id}" + ) + with TemporaryDirectory() as tempdir: + paths = client.download(raw_files, tempdir, progress=False) + bgs = HaloBg.from_srcs(paths) + counter = Counter((bg.signal.shape[1] for bg in bgs)) + most_common_ngates = counter.most_common()[0][0] + bgs = [bg for bg in bgs if bg.signal.shape[1] == most_common_ngates] + bg = HaloBg.merge(bgs).sorted_by_time().non_strictly_increasing_timesteps_removed() + + if isinstance(period, PeriodWithRange): + start_time = np.datetime64(period.start_date).astype(bg.time.dtype) + end_time = np.datetime64(period.end_date + datetime.timedelta(days=1)).astype( + bg.time.dtype + ) + select = (start_time <= bg.time) & (bg.time < end_time) + bg = bg[select] + + monitoring_file = MonitoringFile( + instrument, + site, + period, + product, + [p for p in monitor_background_plots(bg, period, product) if p], + ) + monitoring_file.upload() + + +def monitor_background_plots( + bg: HaloBg, period: Period, product: MonitoringProduct +) -> list[MonitoringVisualization]: + plots = [] + for variable in product.variables: + plots.append(plot_background_variable(bg, period, variable)) + return plots + + +def plot_background_variable( + bg: HaloBg, period: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + match variable.id: + case "background-profile": + return plot_background_profile(bg, period, variable) + case "background-profile-variance": + return plot_background_variance(bg, period, variable) + case "time-averaged-background-profile-range": + return plot_time_averaged_background_profile(bg, period, variable) + case _: + raise NotImplementedError( + f"Variable '{variable.id}' not implemented for halo-doppler-lidar" + ) + + +def plot_background_profile( + bg: HaloBg, period: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + fig, ax = plt.subplots() + vmin, vmax = np.percentile(bg.signal.ravel(), [5, 95]) + cax = ax.pcolormesh( + bg.time, np.arange(bg.signal.shape[1]), bg.signal.T, vmin=vmin, vmax=vmax + ) + ax.set_ylabel("Range gate") + add_colorbar(fig, ax, cax, orientation="horizontal", shrink=0.5, pad=0.1, aspect=30) + set_xlim_for_period(ax, period, bg.time) + format_time_axis(ax) + pretty_ax_2d(ax) + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) + + +def plot_background_variance( + bg: HaloBg, period: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + fig, ax = plt.subplots() + var = bg.signal.var(axis=1) + ax.scatter(bg.time, var, **SCATTER_OPTS) + set_xlim_for_period(ax, period, bg.time) + format_time_axis(ax) + pretty_ax(ax, grid="y") + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) + + +def plot_time_averaged_background_profile( + bg: HaloBg, _: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + fig, ax = plt.subplots() + mean = bg.signal.mean(axis=0) + range_ = np.arange(bg.signal.shape[1]) + # Lowest ~3 range gates are often noisy, filter those out from the plot + # if they deviate from median too much + med_mean = np.median(mean) + abs_rel_dif = np.abs(np.abs(mean / med_mean) - 1) + select = (range_ > 3) | (abs_rel_dif < 0.001) + + ax.scatter(range_[select], mean[select], **SCATTER_OPTS) + ax.set_xlabel("Range gate") + + pretty_ax(ax, grid="y") + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) + + +def monitor_signal( + client: APIClient, + site: Site, + instrument: Instrument, + period: Period, + product: MonitoringProduct, +) -> None: + start, end = range_from_period(period) + + start -= datetime.timedelta(days=1) + end += datetime.timedelta(days=1) + raw_files = client.raw_files( + site_id=site.id, + instrument_pid=instrument.pid, + date_from=start, + date_to=end, + filename_suffix=".hpl", + ) + raw_files = [r for r in raw_files if "cross" not in r.tags] + if not raw_files: + raise ValueError( + f"No raw files found for {period} {instrument.name} {product.id}" + ) + with TemporaryDirectory() as tempdir: + paths = client.download(raw_files, tempdir, progress=False) + raws = HaloHpl.from_srcs(paths) + + def is_stare(raw: HaloHpl) -> bool: + elevations = set(np.round(raw.elevation)) + if len(elevations) != 1: + return False + elevation = elevations.pop() + if np.abs(elevation - 90) > 10: + return False + return True + + raws = [r for r in raws if is_stare(r)] + if not raws: + raise ValueError( + f"No raw stare files found for {period} {instrument.name} {product.id}" + ) + counter = Counter((raw.intensity.shape[1] for raw in raws)) + most_common_ngates = counter.most_common()[0][0] + raws = [raw for raw in raws if raw.intensity.shape[1] == most_common_ngates] + raw = ( + HaloHpl.merge(raws).sorted_by_time().non_strictly_increasing_timesteps_removed() + ) + if isinstance(period, PeriodWithRange): + start_time = np.datetime64(period.start_date).astype(raw.time.dtype) + end_time = np.datetime64(period.end_date + datetime.timedelta(days=1)).astype( + raw.time.dtype + ) + select = (start_time <= raw.time) & (raw.time < end_time) + raw = raw[select] + + monitoring_file = MonitoringFile( + instrument, + site, + period, + product, + monitor_signal_plots(raw, period, product), + ) + monitoring_file.upload() + + +def monitor_signal_plots( + raw: HaloHpl, period: Period, product: MonitoringProduct +) -> list[MonitoringVisualization]: + plots = [] + for variable in product.variables: + plots.append(plot_signal_variable(raw, period, variable)) + return plots + + +def plot_signal_variable( + raw: HaloHpl, period: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + match variable.id: + case "radial-velocity-histogram": + return plot_radial_velocity_histogram(raw, period, variable) + case "signal-radial-velocity": + return plot_signal_radial_velocity(raw, period, variable) + case _: + raise NotImplementedError( + f"Variable '{variable.id}' not implemented for halo-doppler-lidar" + ) + + +def plot_radial_velocity_histogram( + raw: HaloHpl, _: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + fig, ax = plt.subplots() + ax.hist(raw.radial_velocity.ravel(), bins=400) + ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0)) + ax.set_xlabel("Radial velocity") + ax.set_ylabel("Count") + pretty_ax(ax, grid="both") + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) + + +def plot_signal_radial_velocity( + raw: HaloHpl, _: Period, variable: MonitoringVariable +) -> MonitoringVisualization: + fig, ax = plt.subplots() + vmin, vmax = np.percentile(raw.intensity.ravel(), [2, 95]) + select = (vmin < raw.intensity) & (raw.intensity < vmax) + cax = ax.hexbin(raw.intensity[select].ravel(), raw.radial_velocity[select].ravel()) + cbar = add_colorbar(fig, ax, cax) + scientific_cbar(cbar) + ax.set_xlabel("Intensity") + ax.set_ylabel("Radial velocity") + pretty_ax(ax) + fig_ = save_fig(fig) + return MonitoringVisualization( + fig_.bytes, variable, Dimensions(fig_.width, fig_.height) + ) diff --git a/src/monitoring/monitor.py b/src/monitoring/monitor.py new file mode 100644 index 00000000..3f930706 --- /dev/null +++ b/src/monitoring/monitor.py @@ -0,0 +1,36 @@ +import logging + +from cloudnet_api_client import APIClient +from cloudnet_api_client.containers import Instrument, Site + +from monitoring.instruments import halo_doppler_lidar +from monitoring.period import Period +from monitoring.product import MonitoringProduct +from processing.config import Config + + +class C: + RESET = "\033[0m" + RED = "\033[31m" + GREEN = "\033[32m" + BLUE = "\033[34m" + CYAN = "\033[36m" + MAGENTA = "\033[35m" + YELLOW = "\033[33m" + BOLD = "\033[1m" + + +def monitor( + site: Site, instrument: Instrument, period: Period, product: MonitoringProduct +) -> None: + config = Config() + client = APIClient(f"{config.dataportal_url}/api/") + msg = ( + f"Monitoring: {C.RED}{site.human_readable_name} " + f"{C.BLUE}{instrument.name}({instrument.uuid}) " + f"{C.YELLOW}{period} {C.MAGENTA}{product}{C.RESET}" + ) + logging.info(msg) + match instrument.instrument_id: + case "halo-doppler-lidar": + halo_doppler_lidar.monitor(client, site, instrument, period, product) diff --git a/src/monitoring/monitoring_file.py b/src/monitoring/monitoring_file.py new file mode 100644 index 00000000..428607f8 --- /dev/null +++ b/src/monitoring/monitoring_file.py @@ -0,0 +1,116 @@ +from dataclasses import dataclass +from json import JSONDecodeError +from pathlib import Path +from tempfile import NamedTemporaryFile + +from cloudnet_api_client.containers import Instrument, Site + +from monitoring.period import AllPeriod, Period, PeriodWithRange +from monitoring.product import MonitoringProduct, MonitoringVariable +from monitoring.utils import get_apis + + +@dataclass +class Dimensions: + width: int + height: int + margin_top: int | None = None + margin_right: int | None = None + margin_bottom: int | None = None + margin_left: int | None = None + + def as_payload_dict(self) -> dict[str, int | None]: + return { + "width": self.width, + "height": self.height, + "marginTop": self.margin_top, + "marginRight": self.margin_right, + "marginBottom": self.margin_bottom, + "marginLeft": self.margin_left, + } + + +@dataclass +class MonitoringVisualization: + fig: bytes + variable: MonitoringVariable + dimensions: Dimensions + + +@dataclass +class MonitoringFile: + instrument: Instrument + site: Site + period: Period + product: MonitoringProduct + visualisations: list[MonitoringVisualization] + + def upload(self) -> None: + if not self.visualisations: + raise ValueError( + f"No visualisations for product {self.product.id}, {self.site.id}, {self.instrument.name}, {self.period}" + ) + md_api, storage_api = get_apis() + payload = { + "periodType": self.period.period, + "siteId": self.site.id, + "productId": self.product.id, + "instrumentUuid": str(self.instrument.uuid), + } + if isinstance(self.period, PeriodWithRange): + payload["startDate"] = str(self.period.start_date) + res = md_api.post("monitoring-files", payload=payload) + if not res.ok: + raise RuntimeError(f"Could not post file: {payload}") + try: + data = res.json() + except (JSONDecodeError, ValueError) as err: + raise RuntimeError( + f"Failed to decode JSON from POST monitoring-files with payload {payload}" + ) from err + + file_uuid = data["uuid"] + for vis in self.visualisations: + s3key = generate_s3_key( + self.site, self.instrument, self.product, vis.variable, self.period + ) + with NamedTemporaryFile() as tempfile: + tempfile.write(vis.fig) + storage_api.upload_image(full_path=Path(tempfile.name), s3key=s3key) + payload = { + "s3key": s3key, + "sourceFileUuid": file_uuid, + "variableId": vis.variable.id, + } + res = md_api.post("monitoring-visualizations", payload=payload) + if not res.ok: + raise RuntimeError(f"Could not post visualisation: {payload}") + + +def generate_s3_key( + site: Site, + instrument: Instrument, + product: MonitoringProduct, + variable: MonitoringVariable, + period: Period, +) -> str: + instrument_uuid_short = str(instrument.uuid)[:8] + period_str = _period_for_s3key(period) + return f"monitoring/{period_str}_{site.id}_{product.id}_{variable.id}_{instrument_uuid_short}.png" + + +def _period_for_s3key(p: Period) -> str: + if isinstance(p, AllPeriod): + return "All" + period_str = p.period.capitalize() + match p.period: + case "year": + date_str = p.start_date.strftime("%Y") + case "month": + date_str = p.start_date.strftime("%Y-%m") + case "week": + date_str = f"{p.start_date.isocalendar().week:02d}" + case "day": + date_str = p.start_date.isoformat() + + return f"{period_str}{date_str}" diff --git a/src/monitoring/period.py b/src/monitoring/period.py new file mode 100644 index 00000000..68f768f1 --- /dev/null +++ b/src/monitoring/period.py @@ -0,0 +1,100 @@ +import calendar +import datetime +from dataclasses import dataclass +from typing import Literal, cast + + +class AllPeriod: + period = "all" + + def __repr__(self) -> str: + return "AllPeriod" + + def __str__(self) -> str: + return "all" + + +ALL_PERIOD = AllPeriod() + +PeriodWithRangeType = Literal["year", "month", "week", "day"] + + +@dataclass +class PeriodWithRange: + period: PeriodWithRangeType + start_date: datetime.date + + def __repr__(self) -> str: + return f"{self.period.capitalize()}({self.start_date})" + + @property + def end_date(self) -> datetime.date: + start = self.start_date + match self.period: + case "year": + return datetime.date(start.year, 12, 31) + case "month": + last_day = calendar.monthrange(start.year, start.month)[1] + return datetime.date(start.year, start.month, last_day) + case "week": + return start + datetime.timedelta(days=6) + case "day": + return start + + def as_interval(self) -> tuple[datetime.date, datetime.date]: + return (self.start_date, self.end_date) + + +Period = AllPeriod | PeriodWithRange + + +def period_from_str(s: str, normalise: bool = True) -> Period: + s = s.lower().strip() + + if s == "all": + return ALL_PERIOD + try: + period_type, start_str = s.split(":") + except ValueError as err: + raise ValueError( + f"Invalid period format '{s}'. Expected format: " + "'year|month|week|day:YYYY-MM-DD' or 'all'." + ) from err + if period_type not in ("year", "month", "week", "day"): + raise ValueError( + f"Invalid period type '{period_type}'. " + "Expected period types: all|month|week|day" + ) + period_literal = cast(PeriodWithRangeType, period_type) + try: + start_date = datetime.date.fromisoformat(start_str) + except ValueError as err: + raise ValueError( + f"Invalid start date format: '{start_str}'. Expected format: 'YYYY-MM-DD'" + ) from err + period = PeriodWithRange(period=period_literal, start_date=start_date) + if normalise: + return to_normalised_period(period) + return period + + +def to_normalised_period(period: PeriodWithRange) -> PeriodWithRange: + func = { + "year": normalise_year, + "month": normalise_month, + "week": normalise_week, + "day": lambda d: d, + } + return PeriodWithRange(period.period, func[period.period](period.start_date)) + + +def normalise_year(d: datetime.date) -> datetime.date: + return datetime.date(d.year, 1, 1) + + +def normalise_month(d: datetime.date) -> datetime.date: + return datetime.date(d.year, d.month, 1) + + +def normalise_week(d: datetime.date) -> datetime.date: + return d - datetime.timedelta(days=d.weekday()) diff --git a/src/monitoring/plot_utils.py b/src/monitoring/plot_utils.py new file mode 100644 index 00000000..8e5156c0 --- /dev/null +++ b/src/monitoring/plot_utils.py @@ -0,0 +1,152 @@ +import datetime +import io +from dataclasses import dataclass +from typing import Literal, TypedDict + +import matplotlib.dates +import matplotlib.ticker +import numpy as np +from matplotlib.axes import Axes +from matplotlib.collections import PolyCollection, QuadMesh +from matplotlib.colorbar import Colorbar +from matplotlib.figure import Figure +from numpy.typing import NDArray + +from monitoring.period import Period, PeriodWithRange + +FONT_SIZE = 30 +DPI = 400 +HEIGHT_INCH = 16 +ASPECT = 16 / 9 + + +class ScatterOpts(TypedDict): + s: int + c: str + + +SCATTER_OPTS: ScatterOpts = {"s": 50, "c": "#2D9AE0"} + + +@dataclass +class Fig: + bytes: bytes + width: int + height: int + + +def format_time_axis(ax: Axes) -> None: + locator = matplotlib.dates.AutoDateLocator() + ax.xaxis.set_major_locator(locator) + ax.xaxis.set_major_formatter(matplotlib.dates.ConciseDateFormatter(locator)) + + +def pretty_ax(ax: Axes, grid: Literal["x", "y", "both"] | None = None) -> None: + ax.set_facecolor("#f0f0f0") + if grid is not None: + ax.grid(True, axis=grid, color="white", linestyle="-", linewidth=4) + ax.set_axisbelow(True) + ax.tick_params( + axis="both", + length=12, + width=3, + direction="out", + pad=15, + ) + ax.xaxis.labelpad = 20 + ax.yaxis.labelpad = 20 + + ax.spines["left"].set_position(("outward", 10)) + ax.spines["bottom"].set_position(("outward", 10)) + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + + +def scientific_cbar(cbar: Colorbar) -> None: + formatter = matplotlib.ticker.ScalarFormatter() + formatter.set_scientific(True) + formatter.set_powerlimits((-1, 1)) + cbar.ax.yaxis.set_major_formatter(formatter) + offset_text = cbar.ax.yaxis.get_offset_text() + offset_text.set_verticalalignment("bottom") + offset_text.set_horizontalalignment("right") + + +def add_colorbar( + fig: Figure, + ax: Axes, + cax: PolyCollection | QuadMesh, + shrink: float = 1, + pad: float = 0.02, + aspect: float = 20, + orientation: Literal["vertical", "horizontal"] = "vertical", +) -> Colorbar: + cbar = fig.colorbar( + cax, ax=ax, orientation=orientation, shrink=shrink, pad=pad, aspect=aspect + ) + cbar.outline.set_visible(False) # type: ignore + return cbar + + +def pretty_ax_2d(ax: Axes) -> None: + ax.set_facecolor("#f0f0f0") + ax.grid(True, axis="y", color="white", linestyle="-", linewidth=4) + ax.set_axisbelow(True) + ax.tick_params( + axis="both", + length=12, + width=3, + direction="out", + pad=15, + ) + ax.spines["left"].set_position(("outward", 15)) + ax.spines["bottom"].set_position(("outward", 15)) + ax.spines["left"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + + +def set_xlim_for_period( + ax: Axes, period: Period, time: NDArray[np.datetime64], pad: float = 0 +) -> None: + if isinstance(period, PeriodWithRange): + start_lim = np.datetime64(period.start_date).astype(time.dtype) + end_lim = np.datetime64(period.end_date + datetime.timedelta(days=1)).astype( + time.dtype + ) + delta = (end_lim - start_lim) * pad + ax.set_xlim(start_lim - delta, end_lim + delta) # type: ignore[arg-type] + + +def save_fig(fig: Figure) -> Fig: + aspect = ASPECT + dpi = DPI + height_inch = HEIGHT_INCH + height_px = height_inch * dpi + width_px = aspect * height_px + fig.set_size_inches(width_px / dpi, height_px / dpi) + + font_size = FONT_SIZE + for ax in fig.axes: + ax.tick_params(axis="both", labelsize=font_size) + ax.xaxis.label.set_fontsize(font_size) + ax.yaxis.label.set_fontsize(font_size) + ax.title.set_fontsize(font_size + 2) + ax.xaxis.get_offset_text().set_fontsize(font_size) + ax.yaxis.get_offset_text().set_fontsize(font_size) + + buf = io.BytesIO() + bbox_extra_artists = [ax.yaxis.label for ax in fig.axes] + fig.savefig( + buf, + format="png", + dpi=dpi, + bbox_inches="tight", + pad_inches=0, + bbox_extra_artists=bbox_extra_artists, + ) + buf.seek(0) + return Fig(buf.read(), int(width_px), int(height_px)) diff --git a/src/monitoring/product.py b/src/monitoring/product.py new file mode 100644 index 00000000..0d0d7e2e --- /dev/null +++ b/src/monitoring/product.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class MonitoringVariable: + id: str + humanReadableName: str + order: int = field(default=0) + + def __repr__(self) -> str: + return f"Variable({self.id})" + + @staticmethod + def from_dict(data: dict[str, Any]) -> "MonitoringVariable": + return MonitoringVariable( + id=data["id"], + humanReadableName=data["humanReadableName"], + order=data.get("order", 0), + ) + + +@dataclass +class MonitoringProduct: + id: str + humanReadableName: str + variables: list[MonitoringVariable] + + def __repr__(self) -> str: + return f"Product({self.id})" + + @staticmethod + def from_dict(data: dict[str, Any]) -> "MonitoringProduct": + variables = [ + MonitoringVariable.from_dict(var) for var in data.get("variables", []) + ] + return MonitoringProduct( + id=data["id"], + humanReadableName=data["humanReadableName"], + variables=variables, + ) diff --git a/src/monitoring/py.typed b/src/monitoring/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/monitoring/utils.py b/src/monitoring/utils.py new file mode 100644 index 00000000..ab90c7f4 --- /dev/null +++ b/src/monitoring/utils.py @@ -0,0 +1,26 @@ +import datetime + +from monitoring.period import AllPeriod, Period, PeriodWithRange +from processing.config import Config +from processing.metadata_api import MetadataApi +from processing.storage_api import StorageApi +from processing.utils import make_session + + +def get_apis() -> tuple[MetadataApi, StorageApi]: + config = Config() + session = make_session() + md_api = MetadataApi(config, session) + storage_api = StorageApi(config, session) + return md_api, storage_api + + +def range_from_period(period: Period) -> tuple[datetime.date, datetime.date]: + match period: + case AllPeriod(): + start = datetime.date(1900, 1, 1) + end = datetime.date.today() + datetime.timedelta(days=1) + case PeriodWithRange(): + start = period.start_date + end = period.end_date + return start, end diff --git a/src/processing/fetch.py b/src/processing/fetch.py index 620dae42..94ea6c2b 100644 --- a/src/processing/fetch.py +++ b/src/processing/fetch.py @@ -3,6 +3,7 @@ import hashlib import logging import os +import re import sys from argparse import Namespace from base64 import b64encode @@ -71,6 +72,9 @@ def get_raw_instrument_metadata(self) -> list: res = requests.get(url=url, params=payload) res.raise_for_status() metadata = res.json() + if self.args.include_pattern: + include_re = re.compile(self.args.include_pattern) + metadata = [m for m in metadata if include_re.match(m["filename"])] if self.args.all: return metadata return [m for m in metadata if not m["filename"].lower().endswith(".lv0")] From 79e111726cb0863d1e55b59ee2f68f4f67e14edf Mon Sep 17 00:00:00 2001 From: Niko Leskinen Date: Tue, 16 Dec 2025 13:39:14 +0200 Subject: [PATCH 2/2] Compute monitoring vis dims --- .../instruments/halo_doppler_lidar.py | 41 +++++++++-------- src/monitoring/monitoring_file.py | 44 ++++++++----------- 2 files changed, 41 insertions(+), 44 deletions(-) diff --git a/src/monitoring/instruments/halo_doppler_lidar.py b/src/monitoring/instruments/halo_doppler_lidar.py index cf648b7d..31da5f46 100644 --- a/src/monitoring/instruments/halo_doppler_lidar.py +++ b/src/monitoring/instruments/halo_doppler_lidar.py @@ -7,6 +7,7 @@ from cloudnet_api_client import APIClient from cloudnet_api_client.containers import Instrument, Site from doppy.raw import HaloBg, HaloHpl, HaloSysParams +from numpy.typing import NDArray from monitoring.monitoring_file import ( Dimensions, @@ -119,9 +120,7 @@ def plot_housekeeping_variable( format_time_axis(ax) pretty_ax(ax, grid="y") fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) def monitor_background( @@ -212,9 +211,7 @@ def plot_background_profile( format_time_axis(ax) pretty_ax_2d(ax) fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) def plot_background_variance( @@ -227,9 +224,7 @@ def plot_background_variance( format_time_axis(ax) pretty_ax(ax, grid="y") fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) def plot_time_averaged_background_profile( @@ -249,9 +244,7 @@ def plot_time_averaged_background_profile( pretty_ax(ax, grid="y") fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) def monitor_signal( @@ -346,15 +339,27 @@ def plot_radial_velocity_histogram( raw: HaloHpl, _: Period, variable: MonitoringVariable ) -> MonitoringVisualization: fig, ax = plt.subplots() - ax.hist(raw.radial_velocity.ravel(), bins=400) + bins = _compute_radial_velocity_bins(raw) + ax.hist(raw.radial_velocity.ravel(), bins=bins) ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0)) ax.set_xlabel("Radial velocity") ax.set_ylabel("Count") pretty_ax(ax, grid="both") fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) + + +def _compute_radial_velocity_bins(raw: HaloHpl) -> list[float] | int: + v_uniq = np.unique(np.round(raw.radial_velocity.ravel(), decimals=5)) + if len(v_uniq) < 100 or len(v_uniq) > 3000: + return 100 + midpoints = (v_uniq[:-1] + v_uniq[1:]) / 2 + left_gap = v_uniq[1] - v_uniq[0] + right_gap = v_uniq[-1] - v_uniq[-2] + left_edge = v_uniq[0] - left_gap / 2 + right_edge = v_uniq[-1] + right_gap / 2 + bins = np.concatenate(([left_edge], midpoints, [right_edge])) + return bins.tolist() def plot_signal_radial_velocity( @@ -370,6 +375,4 @@ def plot_signal_radial_velocity( ax.set_ylabel("Radial velocity") pretty_ax(ax) fig_ = save_fig(fig) - return MonitoringVisualization( - fig_.bytes, variable, Dimensions(fig_.width, fig_.height) - ) + return MonitoringVisualization(fig_.bytes, variable, Dimensions(fig, [ax])) diff --git a/src/monitoring/monitoring_file.py b/src/monitoring/monitoring_file.py index 428607f8..eb56dfc5 100644 --- a/src/monitoring/monitoring_file.py +++ b/src/monitoring/monitoring_file.py @@ -2,34 +2,16 @@ from json import JSONDecodeError from pathlib import Path from tempfile import NamedTemporaryFile +from typing import Any from cloudnet_api_client.containers import Instrument, Site +from cloudnetpy.plotting.plotting import Dimensions from monitoring.period import AllPeriod, Period, PeriodWithRange from monitoring.product import MonitoringProduct, MonitoringVariable from monitoring.utils import get_apis -@dataclass -class Dimensions: - width: int - height: int - margin_top: int | None = None - margin_right: int | None = None - margin_bottom: int | None = None - margin_left: int | None = None - - def as_payload_dict(self) -> dict[str, int | None]: - return { - "width": self.width, - "height": self.height, - "marginTop": self.margin_top, - "marginRight": self.margin_right, - "marginBottom": self.margin_bottom, - "marginLeft": self.margin_left, - } - - @dataclass class MonitoringVisualization: fig: bytes @@ -37,6 +19,17 @@ class MonitoringVisualization: dimensions: Dimensions +def _dimensions_as_payload(dim: Dimensions) -> dict[str, int]: + return { + "width": dim.width, + "height": dim.height, + "marginTop": dim.margin_top, + "marginRight": dim.margin_right, + "marginBottom": dim.margin_bottom, + "marginLeft": dim.margin_left, + } + + @dataclass class MonitoringFile: instrument: Instrument @@ -77,14 +70,15 @@ def upload(self) -> None: with NamedTemporaryFile() as tempfile: tempfile.write(vis.fig) storage_api.upload_image(full_path=Path(tempfile.name), s3key=s3key) - payload = { + payload_vis: dict[str, str | int] = { "s3key": s3key, "sourceFileUuid": file_uuid, "variableId": vis.variable.id, + **_dimensions_as_payload(vis.dimensions), } - res = md_api.post("monitoring-visualizations", payload=payload) + res = md_api.post("monitoring-visualizations", payload=payload_vis) if not res.ok: - raise RuntimeError(f"Could not post visualisation: {payload}") + raise RuntimeError(f"Could not post visualisation: {payload_vis}") def generate_s3_key( @@ -101,8 +95,8 @@ def generate_s3_key( def _period_for_s3key(p: Period) -> str: if isinstance(p, AllPeriod): - return "All" - period_str = p.period.capitalize() + return "all" + period_str = p.period match p.period: case "year": date_str = p.start_date.strftime("%Y")