From 4e5f4a1c0c0c727a034233dab51547638fefeb1d Mon Sep 17 00:00:00 2001 From: Joaquin Anton Guirao Date: Fri, 12 Dec 2025 14:19:53 +0100 Subject: [PATCH 1/2] Replace pydicom-seg with highdicom and upgrade pydicom to 3.0.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major changes: - Replace unmaintained pydicom-seg library with well-established highdicom - Upgrade pydicom from 2.4.4 to 3.0.1, enabling HTJ2K support Refactoring in monailabel/datastore/utils/convert.py: - Split nifti_to_dicom_seg into dispatcher + two implementations: * _highdicom_nifti_to_dicom_seg: Highdicom-based conversion * _itk_nifti_to_dicom_seg: Dcmqi/itkimage2segimage conversion - Rename itk_image_to_dicom_seg → _dcmqi_nifti_to_dicom_seg for consistency Tests in tests/unit/datastore/test_convert.py: - Test both highdicom and ITK implementations via helper methods - Validate metadata preservation, geometry, pixel data, and segment remapping - Tests for non-sequential label remapping (1,5,10 → 1,2,3) - Round-trip conversion tests with 3% tolerance for both implementations - Handle ITK-specific behavior (stores only non-empty slices) Minor improvements: - Update pynrrd from 1.0.0 to 1.1.3 (former caused problem with numpy 2.x) - Add documentation for optional dcmqi dependency Signed-off-by: Joaquin Anton Guirao [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Code review fixes Signed-off-by: Joaquin Anton Guirao [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Lint fixes Signed-off-by: Joaquin Anton Guirao More code review fixes Signed-off-by: Joaquin Anton Guirao More fixes Signed-off-by: Joaquin Anton Guirao [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Fixes Signed-off-by: Joaquin Anton Guirao [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Joaquin Anton Guirao --- monailabel/config.py | 2 +- monailabel/datastore/utils/convert.py | 580 ++++++++++++++++-- monailabel/endpoints/datastore.py | 7 +- requirements.txt | 11 +- setup.cfg | 6 +- tests/unit/datastore/test_convert.py | 849 +++++++++++++++++++++++++- 6 files changed, 1363 insertions(+), 92 deletions(-) diff --git a/monailabel/config.py b/monailabel/config.py index 4de6c896f..ea8d1c37e 100644 --- a/monailabel/config.py +++ b/monailabel/config.py @@ -18,7 +18,7 @@ def is_package_installed(name): - return name in (x.metadata.get("Name") for x in distributions()) + return name in (x.metadata.get("Name") for x in distributions() if x.metadata is not None) class Settings(BaseSettings): diff --git a/monailabel/datastore/utils/convert.py b/monailabel/datastore/utils/convert.py index f5429a1ef..b990ba86c 100644 --- a/monailabel/datastore/utils/convert.py +++ b/monailabel/datastore/utils/convert.py @@ -9,6 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import json import logging import os @@ -18,15 +19,24 @@ import numpy as np import pydicom -import pydicom_seg +import pydicom.errors import SimpleITK from monai.transforms import LoadImage from pydicom.filereader import dcmread +from pydicom.sr.codedict import codes +from pydicom.sr.coding import Code +try: + import highdicom as hd + + HIGHDICOM_AVAILABLE = True +except ImportError: + HIGHDICOM_AVAILABLE = False + +from monailabel import __version__ from monailabel.config import settings from monailabel.datastore.utils.colors import GENERIC_ANATOMY_COLORS from monailabel.transform.writer import write_itk -from monailabel.utils.others.generic import run_command logger = logging.getLogger(__name__) @@ -81,35 +91,355 @@ def binary_to_image(reference_image, label, dtype=np.uint8, file_ext=".nii.gz"): return output_file -def nifti_to_dicom_seg(series_dir, label, label_info, file_ext="*", use_itk=None) -> str: +def _extract_label_info(label, label_info): + """Extract unique labels and model info from label file. - # Only use config if no explicit override - if use_itk is None: - use_itk = settings.MONAI_LABEL_USE_ITK_FOR_DICOM_SEG + Args: + label: Path to NIfTI label file + label_info: List of dictionaries containing segment information, each with optional + "idx"/"labelID"/"label" field to map to actual label values - start = time.time() + Returns: + tuple: (unique_labels, info_by_label_id, model_name) or (None, None, None) if empty + """ + # Load label file using SimpleITK (consistent with conversion pipeline) + mask = SimpleITK.ReadImage(label) + label_array = SimpleITK.GetArrayFromImage(mask) - label_np, meta_dict = LoadImage(image_only=False)(label) - unique_labels = np.unique(label_np.flatten()).astype(np.int_) + # Extract unique non-zero labels + unique_labels = np.unique(label_array).astype(np.int_) unique_labels = unique_labels[unique_labels != 0] - info = label_info[0] if label_info and 0 < len(label_info) else {} - model_name = info.get("model_name", "AIName") + if not unique_labels.size: + logger.warning("No non-zero labels found in segmentation") + return None, None, None + + # Build mapping from label ID to metadata + # Look for explicit ID fields: "idx", "labelID", or "label" + info_by_label_id = {} + has_explicit_ids = False + + if label_info: + for entry in label_info: + # Find the label ID from various possible field names + label_id = entry.get("idx") or entry.get("labelID") or entry.get("label") + if label_id is not None: + info_by_label_id[int(label_id)] = entry + has_explicit_ids = True + + # If no explicit IDs found, fall back to positional mapping + # Assume label_info is ordered to match unique_labels + if not has_explicit_ids: + for i, label_id in enumerate(unique_labels): + if i < len(label_info): + info_by_label_id[int(label_id)] = label_info[i] + + # Extract model_name (can be in any entry, prefer first entry) + model_name = "MONAILabel" # Default + if label_info and len(label_info) > 0: + model_name = label_info[0].get("model_name", "MONAILabel") + + return unique_labels, info_by_label_id, model_name + + +def _highdicom_nifti_to_dicom_seg( + series_dir, label, label_info, file_ext="*", omit_empty_frames=False, custom_tags=None +) -> str: + """Convert NIfTI segmentation to DICOM SEG format using highdicom. + + Args: + series_dir: Directory containing source DICOM images + label: Path to NIfTI label file + label_info: List of dictionaries containing segment information (name, description, color, etc.) + file_ext: File extension pattern for DICOM files (default: "*") + omit_empty_frames: If True, omit frames with no segmented pixels (default: False) + custom_tags: Optional dictionary of custom DICOM tags to add (keyword: value) + + Returns: + Path to output DICOM SEG file, or empty string if conversion fails + """ + # Input validation + if label_info is None: + label_info = [] + + if not os.path.exists(label): + logger.error(f"Label file not found: {label}") + return "" + + if not os.path.exists(series_dir): + logger.error(f"Series directory not found: {series_dir}") + return "" + + # Extract label information + unique_labels, info_by_label_id, model_name = _extract_label_info(label, label_info) + if unique_labels is None: + return "" - segment_attributes = [] - for i, idx in enumerate(unique_labels): - info = label_info[i] if label_info and i < len(label_info) else {} - name = info.get("name", "unknown") - description = info.get("description", "Unknown") - rgb = list(info.get("color", GENERIC_ANATOMY_COLORS.get(name, (255, 0, 0))))[0:3] - rgb = [int(x) for x in rgb] + # Build highdicom segment descriptions + segment_descriptions = [] + for i, label_id in enumerate(unique_labels): + # Look up metadata by actual label ID, fall back to empty dict + info = info_by_label_id.get(int(label_id), {}) + name = info.get("name", f"Segment_{label_id}") + logger.info(f"Segment {i}: idx={label_id}, name={name}") + + # Get category code from label_info or use default + category_code_dict = info.get("SegmentedPropertyCategoryCodeSequence", {}) + if category_code_dict and isinstance(category_code_dict, dict): + category_code = Code( + value=category_code_dict.get("CodeValue", "123037004"), + scheme_designator=category_code_dict.get("CodingSchemeDesignator", "SCT"), + meaning=category_code_dict.get("CodeMeaning", "Anatomical Structure"), + ) + else: + category_code = codes.SCT.Organ # Default: Organ + + # Get type code from label_info or use default + type_code_dict = info.get("SegmentedPropertyTypeCodeSequence", {}) + if type_code_dict and isinstance(type_code_dict, dict): + type_code = Code( + value=type_code_dict.get("CodeValue", "78961009"), + scheme_designator=type_code_dict.get("CodingSchemeDesignator", "SCT"), + meaning=type_code_dict.get("CodeMeaning", name), + ) + else: + # Default type code + type_code = Code("78961009", "SCT", name) + + # Create highdicom segment description + # Use sequential segment numbers (1, 2, 3...) to match remapped pixel array + seg_desc = hd.seg.SegmentDescription( + segment_number=i + 1, # Sequential numbering: 1, 2, 3... + segment_label=name, + segmented_property_category=category_code, + segmented_property_type=type_code, + algorithm_identification=hd.AlgorithmIdentificationSequence( + name="MONAILABEL", family=codes.DCM.ArtificialIntelligence, version=model_name + ), + algorithm_type="AUTOMATIC", + ) + segment_descriptions.append(seg_desc) - logger.info(f"{i} => {idx} => {name}") + if not segment_descriptions: + logger.error("Missing segment descriptions") + return "" + + # Read source DICOM images (headers only for memory efficiency) + series_dir = pathlib.Path(series_dir) + image_files = list(series_dir.glob(file_ext)) + + # Read DICOM files with error handling for non-DICOM files + image_datasets = [] + for f in image_files: + try: + ds = dcmread(str(f), stop_before_pixels=True) + image_datasets.append(ds) + except (pydicom.errors.InvalidDicomError, OSError, ValueError) as e: + logger.warning(f"Skipping non-DICOM or invalid file {f}: {e}") + continue + + if not image_datasets: + logger.warning(f"No DICOM images found in {series_dir} with pattern {file_ext}") + return "" + + # Spatially sort DICOM images for correct slice ordering + # Use ImageOrientationPatient and ImagePositionPatient for robust sorting + def spatial_sort_key(ds): + """Generate sort key based on spatial position.""" + try: + # Get image orientation (row and column direction cosines) + iop = ds.ImageOrientationPatient + row_dir = np.array(iop[0:3]) + col_dir = np.array(iop[3:6]) + + # Compute plane normal (perpendicular to image plane) + normal = np.cross(row_dir, col_dir) + + # Get image position (origin of slice) + position = np.array(ds.ImagePositionPatient) + + # Project position onto normal direction (gives slice location) + slice_location = np.dot(position, normal) + return slice_location + except (AttributeError, KeyError, TypeError): + # Fall back to InstanceNumber if spatial attributes missing + try: + return float(ds.InstanceNumber) + except (AttributeError, KeyError, TypeError): + # Final fallback: use 0 (will maintain original order) + return 0.0 + + image_datasets = sorted(image_datasets, key=spatial_sort_key) + logger.info(f"Total Source Images: {len(image_datasets)}") + + # Load label using SimpleITK for correct axis ordering (D, H, W) + # SimpleITK natively gives (D, H, W) which matches DICOM/highdicom expectations + mask = SimpleITK.ReadImage(label) + mask = SimpleITK.Cast(mask, SimpleITK.sitkUInt16) # Support up to 65,535 segments + seg_array = SimpleITK.GetArrayFromImage(mask) + + # Remap label values to sequential 1, 2, 3... as required by DICOM SEG + # Value 0 is reserved for background (no segment) + remapped_array = np.zeros_like(seg_array, dtype=np.uint16) + for new_idx, orig_idx in enumerate(unique_labels, start=1): + remapped_array[seg_array == orig_idx] = new_idx + seg_array = remapped_array + + # Get software version + try: + software_version = f"MONAI Label {__version__}" + except (AttributeError, NameError): + software_version = "MONAI Label" + + # Get consistent timestamp for all DICOM attributes + dt_now = datetime.datetime.now() + + # DICOM series number is 4 digits max (0-9999) + MAX_SERIES_NUMBER = 9999 + series_number = int(dt_now.strftime("%H%M%S")) % (MAX_SERIES_NUMBER + 1) + + # Create DICOM SEG using highdicom + # Use LABELMAP type for indexed labelmap (integer array with values 0..N) + # BINARY type requires 4D one-hot encoding (F, H, W, S) + seg = hd.seg.Segmentation( + source_images=image_datasets, + pixel_array=seg_array, + segmentation_type=hd.seg.SegmentationTypeValues.LABELMAP, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=series_number, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer="MONAI Consortium", + manufacturer_model_name="MONAI Label", + software_versions=software_version, + device_serial_number="0000", + omit_empty_frames=omit_empty_frames, + ) + + # Add timestamp and timezone + seg.SeriesDate = dt_now.strftime("%Y%m%d") + seg.SeriesTime = dt_now.strftime("%H%M%S") + + # Compute timezone offset in ±HHMM format + def format_timezone_offset(dt): + """Compute timezone offset from UTC in ±HHMM format.""" + offset = dt.utcoffset() + if offset is None: + return "+0000" + + total_seconds = int(offset.total_seconds()) + sign = "+" if total_seconds >= 0 else "-" + abs_seconds = abs(total_seconds) + hours = abs_seconds // 3600 + minutes = (abs_seconds % 3600) // 60 + return f"{sign}{hours:02d}{minutes:02d}" + + seg.TimezoneOffsetFromUTC = format_timezone_offset(dt_now) + seg.SeriesDescription = model_name + + # Add Contributing Equipment Sequence + try: + from pydicom.dataset import Dataset + from pydicom.sequence import Sequence as PyDicomSequence + + # Create Purpose of Reference Code Sequence + seq_purpose_of_reference_code = PyDicomSequence() + seg_purpose_of_reference_code = Dataset() + seg_purpose_of_reference_code.CodeValue = "Newcode1" + seg_purpose_of_reference_code.CodingSchemeDesignator = "99IHE" + seg_purpose_of_reference_code.CodeMeaning = "Processing Algorithm" + seq_purpose_of_reference_code.append(seg_purpose_of_reference_code) + + # Create Contributing Equipment Sequence + seq_contributing_equipment = PyDicomSequence() + seg_contributing_equipment = Dataset() + seg_contributing_equipment.PurposeOfReferenceCodeSequence = seq_purpose_of_reference_code + seg_contributing_equipment.Manufacturer = "MONAI Consortium" + seg_contributing_equipment.ManufacturerModelName = model_name + seg_contributing_equipment.SoftwareVersions = software_version + seg_contributing_equipment.DeviceUID = hd.UID() + seq_contributing_equipment.append(seg_contributing_equipment) + seg.ContributingEquipmentSequence = seq_contributing_equipment + except (AttributeError, KeyError, TypeError) as e: + logger.warning(f"Could not add ContributingEquipmentSequence: {e}") + + # Add custom tags if provided + if custom_tags: + from pydicom.datadict import tag_for_keyword + + for keyword, value in custom_tags.items(): + if not isinstance(keyword, str): + logger.warning(f"Custom tag key must be a DICOM keyword string; got {type(keyword)}") + continue + try: + if tag_for_keyword(keyword) is None: + logger.warning(f"Unknown DICOM keyword: {keyword}; skipping") + continue + setattr(seg, keyword, value) + except (AttributeError, KeyError, TypeError, pydicom.errors.InvalidDicomError) as ex: + logger.exception(f"Custom tag {keyword} was not written") + continue + + # Save DICOM SEG + output_file = tempfile.NamedTemporaryFile(suffix=".dcm", delete=False).name + seg.save_as(output_file) + logger.info(f"DICOM SEG saved to: {output_file}") + + return output_file + + +def _itk_nifti_to_dicom_seg(series_dir, label, label_info) -> str: + """Convert NIfTI segmentation to DICOM SEG format using ITK/dcmqi. + + Args: + series_dir: Directory containing source DICOM images + label: Path to NIfTI label file + label_info: List of dictionaries containing segment information (name, description, color, etc.) + + Returns: + Path to output DICOM SEG file, or empty string if conversion fails + """ + # Input validation + if label_info is None: + label_info = [] + + if not os.path.exists(label): + logger.error(f"Label file not found: {label}") + return "" + + if not os.path.exists(series_dir): + logger.error(f"Series directory not found: {series_dir}") + return "" + + # Extract label information (reuse helper function) + unique_labels, info_by_label_id, model_name = _extract_label_info(label, label_info) + if unique_labels is None: + return "" - segment_attribute = info.get( - "segmentAttribute", - { - "labelID": int(idx), + # Build ITK segment descriptions + segment_descriptions = [] + for i, label_id in enumerate(unique_labels): + # Look up metadata by actual label ID, fall back to empty dict + info = info_by_label_id.get(int(label_id), {}) + name = info.get("name", f"Segment_{label_id}") + description = info.get("description", name) + + logger.info(f"Segment {i}: idx={label_id}, name={name}") + + # Check if custom segmentAttribute is provided + segment_attr = info.get("segmentAttribute") + + if segment_attr: + # Use custom attribute as-is + segment_descriptions.append(segment_attr) + else: + # Build default template for ITK method + rgb = list(info.get("color", GENERIC_ANATOMY_COLORS.get(name, (255, 0, 0))))[0:3] + rgb = [int(x) for x in rgb] + + segment_attr = { + "labelID": int(label_id), "SegmentLabel": name, "SegmentDescription": description, "SegmentAlgorithmType": "AUTOMATIC", @@ -125,64 +455,118 @@ def nifti_to_dicom_seg(series_dir, label, label_info, file_ext="*", use_itk=None "CodeMeaning": name, }, "recommendedDisplayRGBValue": rgb, - }, - ) - segment_attributes.append(segment_attribute) + } + segment_descriptions.append(segment_attr) + + if not segment_descriptions: + logger.error("Missing segment descriptions") + return "" + # Extract metadata from label_info (use first segment's metadata for study-level info) + first_info = label_info[0] if label_info and len(label_info) > 0 else {} + + # Get timestamp-based series number (consistent with highdicom implementation) + dt_now = datetime.datetime.now() + MAX_SERIES_NUMBER = 9999 + series_number = int(dt_now.strftime("%H%M%S")) % (MAX_SERIES_NUMBER + 1) + + # Build ITK template with extracted or sensible default values template = { - "ContentCreatorName": "Reader1", - "ClinicalTrialSeriesID": "Session1", - "ClinicalTrialTimePointID": "1", + "ContentCreatorName": first_info.get("creator", "MONAI Label"), + "ClinicalTrialSeriesID": first_info.get("session_id", "Session1"), + "ClinicalTrialTimePointID": first_info.get("timepoint_id", "1"), "SeriesDescription": model_name, - "SeriesNumber": "300", + "SeriesNumber": str(series_number), # Use timestamp-based number "InstanceNumber": "1", - "segmentAttributes": [segment_attributes], + "segmentAttributes": [segment_descriptions], "ContentLabel": "SEGMENTATION", "ContentDescription": "MONAI Label - Image segmentation", "ClinicalTrialCoordinatingCenterName": "MONAI", - "BodyPartExamined": "", + "BodyPartExamined": first_info.get("body_part", ""), } + logger.debug("dcmqi template: %s", json.dumps(template, indent=2)) - logger.info(json.dumps(template, indent=2)) - if not segment_attributes: - logger.error("Missing Attributes/Empty Label provided") + # Call dcmqi converter with error handling + try: + return _dcmqi_nifti_to_dicom_seg(label, series_dir, template) + except (RuntimeError, OSError, ValueError) as e: + logger.exception("ITK DICOM SEG conversion failed") return "" + +def nifti_to_dicom_seg( + series_dir, label, label_info, file_ext="*", use_itk=None, omit_empty_frames=False, custom_tags=None +) -> str: + """Convert NIfTI segmentation to DICOM SEG format. + + This dispatcher function selects between highdicom (default) or ITK/dcmqi implementations. + + Args: + series_dir: Directory containing source DICOM images + label: Path to NIfTI label file + label_info: List of dictionaries containing segment information + file_ext: File extension pattern for DICOM files (default: "*") + use_itk: If True, use ITK/dcmqi. If False/None, use highdicom (default). + omit_empty_frames: If True, omit frames with no segmented pixels (highdicom only) + custom_tags: Optional dictionary of custom DICOM tags (highdicom only) + + Returns: + Path to output DICOM SEG file, or empty string if conversion fails + """ + start = time.time() + + # Determine which implementation to use + if use_itk is None: + use_itk = settings.MONAI_LABEL_USE_ITK_FOR_DICOM_SEG + + # Check if highdicom is available (unless using ITK) + if not use_itk and not HIGHDICOM_AVAILABLE: + raise ImportError("highdicom is not available") + + # Dispatch to appropriate implementation if use_itk: - output_file = itk_image_to_dicom_seg(label, series_dir, template) + output_file = _itk_nifti_to_dicom_seg(series_dir, label, label_info) else: - template = pydicom_seg.template.from_dcmqi_metainfo(template) - writer = pydicom_seg.MultiClassWriter( - template=template, - inplane_cropping=False, - skip_empty_slices=False, - skip_missing_segment=False, + output_file = _highdicom_nifti_to_dicom_seg( + series_dir, label, label_info, file_ext, omit_empty_frames, custom_tags ) - # Read source Images - series_dir = pathlib.Path(series_dir) - image_files = series_dir.glob(file_ext) - image_datasets = [dcmread(str(f), stop_before_pixels=True) for f in image_files] - logger.info(f"Total Source Images: {len(image_datasets)}") + logger.info(f"nifti_to_dicom_seg latency: {time.time() - start:.3f} sec") + return output_file - mask = SimpleITK.ReadImage(label) - mask = SimpleITK.Cast(mask, SimpleITK.sitkUInt16) - output_file = tempfile.NamedTemporaryFile(suffix=".dcm").name - dcm = writer.write(mask, image_datasets) - dcm.save_as(output_file) +def _dcmqi_nifti_to_dicom_seg(label, series_dir, template) -> str: + """Convert NIfTI to DICOM SEG using dcmqi's itkimage2segimage command-line tool. - logger.info(f"nifti_to_dicom_seg latency : {time.time() - start} (sec)") - return output_file + This is a low-level wrapper around the dcmqi itkimage2segimage tool. + Called by _itk_nifti_to_dicom_seg() as the actual conversion implementation. + """ + import shutil + from monailabel.utils.others.generic import run_command -def itk_image_to_dicom_seg(label, series_dir, template) -> str: - output_file = tempfile.NamedTemporaryFile(suffix=".dcm").name - meta_data = tempfile.NamedTemporaryFile(suffix=".json").name + command = "itkimage2segimage" + if not shutil.which(command): + error_msg = ( + f"\n{'=' * 80}\n" + f"ERROR: {command} command-line tool not found\n" + f"{'=' * 80}\n\n" + f"The ITK-based DICOM SEG conversion requires the dcmqi package.\n\n" + f"Install dcmqi:\n" + f" pip install dcmqi\n\n" + f"For more information:\n" + f" https://github.com/QIICR/dcmqi\n\n" + f"Note: Consider using the default highdicom-based conversion (use_itk=False)\n" + f"which doesn't require dcmqi.\n" + f"{'=' * 80}\n" + ) + raise RuntimeError(error_msg) + + output_file = tempfile.NamedTemporaryFile(suffix=".dcm", delete=False).name + meta_data = tempfile.NamedTemporaryFile(suffix=".json", delete=False).name with open(meta_data, "w") as fp: json.dump(template, fp) - command = "itkimage2segimage" args = [ "--inputImageList", label, @@ -199,15 +583,83 @@ def itk_image_to_dicom_seg(label, series_dir, template) -> str: def dicom_seg_to_itk_image(label, output_ext=".seg.nrrd"): - filename = label if not os.path.isdir(label) else os.path.join(label, os.listdir(label)[0]) + """Convert DICOM SEG to NIfTI/NRRD format using highdicom. + + Args: + label: Path to DICOM SEG file or directory containing it + output_ext: Output file extension (default: ".seg.nrrd", also supports ".nii.gz") + + Returns: + Path to output file, or None if conversion fails + """ + # Handle both file and directory inputs + if os.path.isdir(label): + # List and sort files for deterministic behavior + files = sorted(os.listdir(label)) + + # Filter for valid DICOM files + filename = None + for f in files: + filepath = os.path.join(label, f) + if not os.path.isfile(filepath): + continue + try: + # Attempt to read as DICOM + pydicom.dcmread(filepath, stop_before_pixels=True) + filename = filepath + break + except (pydicom.errors.InvalidDicomError, OSError, PermissionError) as e: + # Not a valid DICOM or inaccessible file, log and continue searching + logger.debug(f"Skipping file {f}: {type(e).__name__}: {e}") + continue + + if filename is None: + raise ValueError( + f"No valid DICOM files found in directory: {label}\n" + f"Searched {len(files)} file(s). Ensure the directory contains valid DICOM SEG files." + ) + else: + filename = label - dcm = pydicom.dcmread(filename) - reader = pydicom_seg.MultiClassReader() - result = reader.read(dcm) - image = result.image + if not HIGHDICOM_AVAILABLE: + raise ImportError("highdicom is not available") - output_file = tempfile.NamedTemporaryFile(suffix=output_ext).name + # Use pydicom to read DICOM SEG + dcm = pydicom.dcmread(filename) + # Extract volume from DICOM SEG using highdicom + seg_dataset = hd.seg.Segmentation.from_dataset(dcm) + + # Use get_volume() to extract the segmentation as a 3D volume + # This automatically handles reconstruction, spacing, and geometry + volume = seg_dataset.get_volume(combine_segments=True, relabel=True) + + # Convert to SimpleITK image + image = SimpleITK.GetImageFromArray(volume.array) + + # Convert spacing from highdicom to SimpleITK order + # highdicom: (slice, row, column) for axes (0, 1, 2) + # SimpleITK: (x, y, z) = (column, row, slice) + # Therefore: reverse the spacing tuple + sitk_spacing = tuple(reversed(volume.spacing)) + image.SetSpacing(sitk_spacing) + + # Set origin and direction if available + if hasattr(volume, "position") and volume.position is not None: + # Origin (position) is in LPS physical coordinates for voxel (0,0,0) + # Both highdicom and SimpleITK use the same coordinate system, so no conversion needed + image.SetOrigin(volume.position) + + if hasattr(volume, "direction") and volume.direction is not None: + # Direction matrix columns need reordering + # highdicom: columns are [slice_dir, row_dir, col_dir] = [z, y, x] + # SimpleITK: columns must be [x_dir, y_dir, z_dir] + # Therefore: reorder columns from [z, y, x] to [x, y, z] + direction_reordered = volume.direction[:, [2, 1, 0]] # Swap columns: [0,1,2] → [2,1,0] + direction_flat = tuple(direction_reordered.flatten()) + image.SetDirection(direction_flat) + + output_file = tempfile.NamedTemporaryFile(suffix=output_ext, delete=False).name SimpleITK.WriteImage(image, output_file, True) if not os.path.exists(output_file): diff --git a/monailabel/endpoints/datastore.py b/monailabel/endpoints/datastore.py index 119f5f941..fdd63bb6e 100644 --- a/monailabel/endpoints/datastore.py +++ b/monailabel/endpoints/datastore.py @@ -133,8 +133,10 @@ def remove_label(id: str, tag: str, user: Optional[str] = None): def download_image(image: str, check_only=False, check_sum=None): instance: MONAILabelApp = app_instance() image = instance.datastore().get_image_uri(image) + if not os.path.isfile(image): - raise HTTPException(status_code=404, detail="Image NOT Found") + logger.error(f"Image NOT Found or is a directory: {image}") + raise HTTPException(status_code=404, detail="Image NOT Found or is a directory") if check_only: if check_sum: @@ -151,7 +153,8 @@ def download_label(label: str, tag: str, check_only=False): instance: MONAILabelApp = app_instance() label = instance.datastore().get_label_uri(label, tag) if not os.path.isfile(label): - raise HTTPException(status_code=404, detail="Label NOT Found") + logger.error(f"Label NOT Found or is a directory: {label}") + raise HTTPException(status_code=404, detail="Label NOT Found or is a directory") if check_only: return {} diff --git a/requirements.txt b/requirements.txt index 9a2873647..365a8420b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,10 +24,10 @@ expiringdict==1.2.2 expiring_dict==1.1.0 cachetools==5.3.3 watchdog==4.0.0 -pydicom==2.4.4 -pydicom-seg==0.4.1 +pydicom==3.0.1 +highdicom==0.26.1 pynetdicom==2.0.2 -pynrrd==1.0.0 +pynrrd==1.1.3 numpymaxflow==0.0.7 setuptools>=61 setuptools-scm<8.0.0 @@ -52,6 +52,11 @@ SAM-2 @ git+https://github.com/facebookresearch/sam2.git@c2ec8e14a185632b0a5d8b1 # scipy and scikit-learn latest packages are missing on python 3.8 # sudo apt-get install openslide-tools -y +# Optional dependencies: +# - dcmqi (provides itkimage2segimage command-line tool for legacy DICOM SEG conversion) +# Install with: pip install dcmqi +# More info: https://github.com/QIICR/dcmqi + # How to auto update versions? # pip install pur # pur -r requirements.txt diff --git a/setup.cfg b/setup.cfg index 83b3d77e0..99aa85373 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,10 +50,10 @@ install_requires = expiring_dict>=1.1.0 cachetools>=5.3.3 watchdog>=4.0.0 - pydicom>=2.4.4 - pydicom-seg>=0.4.1 + pydicom>=3.0.1 + highdicom>=0.26.1 pynetdicom>=2.0.2 - pynrrd>=1.0.0 + pynrrd>=1.1.3 numpymaxflow>=0.0.7 girder-client>=3.2.3 ninja>=1.11.1.1 diff --git a/tests/unit/datastore/test_convert.py b/tests/unit/datastore/test_convert.py index 9c190f162..81d19b128 100644 --- a/tests/unit/datastore/test_convert.py +++ b/tests/unit/datastore/test_convert.py @@ -14,9 +14,16 @@ import unittest import numpy as np +import pydicom +import SimpleITK as sitk from monai.transforms import LoadImage -from monailabel.datastore.utils.convert import binary_to_image, dicom_to_nifti, nifti_to_dicom_seg +from monailabel.datastore.utils.convert import ( + binary_to_image, + dicom_seg_to_itk_image, + dicom_to_nifti, + nifti_to_dicom_seg, +) class TestConvert(unittest.TestCase): @@ -24,49 +31,853 @@ class TestConvert(unittest.TestCase): local_dataset = os.path.join(base_dir, "data", "dataset", "local", "spleen") dicom_dataset = os.path.join(base_dir, "data", "dataset", "dicomweb", "e7567e0a064f0c334226a0658de23afd") + # Test data constants + TEST_SERIES_ID = "1.2.826.0.1.3680043.8.274.1.1.8323329.686549.1629744177.996087" + + # === Utility Methods === + + def _get_test_paths(self): + """Get standard DICOM series and label paths.""" + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) + label = os.path.join( + self.dicom_dataset, + "labels", + "final", + f"{self.TEST_SERIES_ID}.nii.gz", + ) + return series_dir, label + + def _load_dicom_series(self, series_dir): + """Load DICOM series and return reference image and dimensions.""" + reader = sitk.ImageSeriesReader() + dicom_names = reader.GetGDCMSeriesFileNames(series_dir) + assert len(dicom_names) > 0, f"No DICOM series found in {series_dir}" + reader.SetFileNames(dicom_names) + reference_image = reader.Execute() + return reference_image, reference_image.GetSize() + + def _create_label_file(self, label_array, reference_image): + """Create a temporary NIfTI label file from array with proper geometry.""" + label_sitk = sitk.GetImageFromArray(label_array) + label_sitk.CopyInformation(reference_image) + label_file = tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False).name + sitk.WriteImage(label_sitk, label_file) + return label_file + + def _extract_pixels(self, dicom_seg_file, remove_background=True): + """Extract pixel values from DICOM SEG.""" + result_nifti = dicom_seg_to_itk_image(dicom_seg_file) + assert os.path.exists(result_nifti), "Failed to convert DICOM SEG back to image" + + try: + result_img = sitk.ReadImage(result_nifti) + pixel_array = sitk.GetArrayFromImage(result_img) + + unique_values = np.unique(pixel_array) + if remove_background: + unique_values = unique_values[unique_values != 0] + + return pixel_array, unique_values + finally: + os.unlink(result_nifti) # Cleanup temp file + + def _count_segments_in_label(self, label_path): + """Load label file and count unique segments (excluding background).""" + label_img = sitk.ReadImage(label_path) + label_array = sitk.GetArrayFromImage(label_img) + return len(np.unique(label_array)) - 1 + + def _validate_dicom_seg(self, dcm_file, expected_segments=None): + """Validate basic DICOM SEG attributes and return dataset.""" + dcm = pydicom.dcmread(dcm_file) + + # Accept both Segmentation Storage and Labelmap Segmentation Storage + valid_sop_classes = [ + "1.2.840.10008.5.1.4.1.1.66.4", # Segmentation Storage + "1.2.840.10008.5.1.4.1.1.66.7", # Labelmap Segmentation Storage + ] + assert dcm.SOPClassUID in valid_sop_classes, f"Not a valid DICOM SEG: got {dcm.SOPClassUID}" + assert len(dcm.SegmentSequence) > 0, "No segments in DICOM SEG" + assert dcm.NumberOfFrames > 0, "No frames in DICOM SEG" + assert hasattr(dcm, "PixelData"), "Missing PixelData" + + if expected_segments is not None: + assert ( + len(dcm.SegmentSequence) == expected_segments + ), f"Expected {expected_segments} segments, got {len(dcm.SegmentSequence)}" + + return dcm + + def _create_multi_segment_array(self, dims, segment_values, region_type="full_slice"): + """Create synthetic label array with multiple segments. + + Args: + dims: Tuple of (width, height, depth) + segment_values: List of label values (e.g., [1, 5, 10]) + region_type: 'full_slice' (entire slices) or 'regions' (spatial regions) + + Returns: + numpy array of shape (depth, height, width) + """ + label_array = np.zeros((dims[2], dims[1], dims[0]), dtype=np.uint8) + z_segment = max(1, dims[2] // len(segment_values)) + + for i, value in enumerate(segment_values): + z_start = i * z_segment + z_end = (i + 1) * z_segment if i < len(segment_values) - 1 else dims[2] + + if region_type == "full_slice": + label_array[z_start:z_end, :, :] = value + elif region_type == "regions": + # Create non-overlapping regions with safe bounds + # Enforce minimum sizes (at least 1 pixel) to handle very small images + box_w = max(1, min(80, dims[0] // (len(segment_values) + 1))) + box_h = max(1, min(80, dims[1] // 2)) + + # Position regions non-overlapping horizontally + # Enforce minimum spacing to prevent division by zero + x_spacing = max(1, dims[0] // (len(segment_values) + 1)) + x_center = (i + 1) * x_spacing + # Clamp x_center to valid range + x_center = max(0, min(x_center, dims[0] - 1)) + + x0 = max(0, x_center - box_w // 2) + x1 = min(dims[0], x0 + box_w) + # Ensure at least one column + x1 = max(x1, x0 + 1) + + # Center vertically + y_center = dims[1] // 2 + y0 = max(0, y_center - box_h // 2) + y1 = min(dims[1], y0 + box_h) + # Ensure at least one row + y1 = max(y1, y0 + 1) + + label_array[z_start:z_end, y0:y1, x0:x1] = value + elif region_type == "large_regions": + # Create large centered regions for robustness (target 100x100) + # Enforce minimum size to handle very small images + box_size = max(1, min(100, dims[0] // 2, dims[1] // 2)) + + # Center the box in X dimension + x0 = max(0, dims[0] // 2 - box_size // 2) + x1 = min(dims[0], x0 + box_size) + # Ensure at least one column + x1 = max(x1, x0 + 1) + + # Center the box in Y dimension + y0 = max(0, dims[1] // 2 - box_size // 2) + y1 = min(dims[1], y0 + box_size) + # Ensure at least one row + y1 = max(y1, y0 + 1) + + label_array[z_start:z_end, y0:y1, x0:x1] = value + + return label_array + + # === Test Methods === + def test_dicom_to_nifti(self): - series_dir = os.path.join(self.dicom_dataset, "1.2.826.0.1.3680043.8.274.1.1.8323329.686549.1629744177.996087") + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) result = dicom_to_nifti(series_dir) assert os.path.exists(result) assert result.endswith(".nii.gz") + + # Verify the converted image is valid and has reasonable properties + result_img = sitk.ReadImage(result) + result_size = result_img.GetSize() + + # Verify 3D image with reasonable dimensions + assert len(result_size) == 3, "Should be 3D image" + assert result_size[0] > 0, "Width must be > 0" + assert result_size[1] > 0, "Height must be > 0" + assert result_size[2] > 0, "Depth must be > 0" + + # Verify pixel data is not all zeros + pixel_array = sitk.GetArrayFromImage(result_img) + assert not np.all(pixel_array == 0), "Image should not be all zeros" + + # Verify pixel values are in reasonable range for medical imaging (HU units) + assert pixel_array.min() >= -2048, "Pixel values too low (HU range)" + assert pixel_array.max() <= 4095, "Pixel values too high (HU range)" + + # Verify spacing is reasonable (not zero, not extreme) + spacing = result_img.GetSpacing() + for i, s in enumerate(spacing): + assert 0.1 < s < 100, f"Spacing[{i}] = {s} is unreasonable" + os.unlink(result) def test_binary_to_image(self): reference_image = os.path.join(self.local_dataset, "labels", "final", "spleen_3.nii.gz") + + # Load reference using both methods to get expected values + ref_img = sitk.ReadImage(reference_image) + # Geometry validated via spacing checks below; array not needed. label = LoadImage(image_only=True)(reference_image) label = label.astype(np.uint8) + original_unique_values = np.unique(label) + original_nonzero_count = np.count_nonzero(label) label = label.flatten(order="F") - label_bin = tempfile.NamedTemporaryFile(suffix=".bin").name + label_bin = tempfile.NamedTemporaryFile(suffix=".bin", delete=False).name label.tofile(label_bin) - result = binary_to_image(reference_image, label_bin) - os.unlink(label_bin) + try: + result = binary_to_image(reference_image, label_bin) + self.addCleanup(os.unlink, result) + finally: + os.unlink(label_bin) assert os.path.exists(result) assert result.endswith(".nii.gz") + + # Verify the result is valid and readable + result_img = sitk.ReadImage(result) + result_array = sitk.GetArrayFromImage(result_img) + + # Verify 3D structure exists + assert len(result_array.shape) == 3, "Should be 3D image" + assert result_array.shape[0] > 0, "Depth must be > 0" + assert result_array.shape[1] > 0, "Height must be > 0" + assert result_array.shape[2] > 0, "Width must be > 0" + + # Verify geometry matches reference (spacing is preserved) + result_spacing = result_img.GetSpacing() + ref_spacing = ref_img.GetSpacing() + for i in range(3): + spacing_diff = abs(result_spacing[i] - ref_spacing[i]) + assert spacing_diff < 0.01, f"Spacing mismatch in dim {i}: {ref_spacing[i]} vs {result_spacing[i]}" + + # Verify data content is reasonable (same unique values, similar nonzero count) + result_unique_values = np.unique(result_array) + result_nonzero_count = np.count_nonzero(result_array) + + assert set(result_unique_values) == set( + original_unique_values + ), f"Unique values changed: {set(original_unique_values)} vs {set(result_unique_values)}" + + # Allow 1% difference in nonzero count due to potential boundary effects + count_diff_ratio = abs(result_nonzero_count - original_nonzero_count) / max(original_nonzero_count, 1) + assert ( + count_diff_ratio < 0.01 + ), f"Nonzero count changed significantly: {original_nonzero_count} vs {result_nonzero_count}" + os.unlink(result) - def test_nifti_to_dicom_seg(self): - image = os.path.join(self.dicom_dataset, "1.2.826.0.1.3680043.8.274.1.1.8323329.686549.1629744177.996087") - label = os.path.join( - self.dicom_dataset, - "labels", - "final", - "1.2.826.0.1.3680043.8.274.1.1.8323329.686549.1629744177.996087.nii.gz", - ) - result = nifti_to_dicom_seg(image, label, None, use_itk=False) + def _test_nifti_to_dicom_seg_with_label_info_impl(self, use_itk): + """Helper: Test NIfTI to DICOM SEG conversion with custom label info.""" + series_dir, label = self._get_test_paths() + + label_info = [ + { + "name": "Spleen", + "description": "Spleen organ", + "color": [255, 0, 0], + "model_name": "TestModel", + } + ] + + result = nifti_to_dicom_seg(series_dir, label, label_info, use_itk=use_itk) + self.addCleanup(os.unlink, result) assert os.path.exists(result) - assert result.endswith(".dcm") + dcm = pydicom.dcmread(result) + + # Verify series description + assert dcm.SeriesDescription == "TestModel" + + # Verify segment metadata is properly set + # Note: LABELMAP type creates a Background segment with SegmentNumber=0 + assert len(dcm.SegmentSequence) >= 1 + + # Find the first real segment (skip Background if present) + real_segments = [s for s in dcm.SegmentSequence if s.SegmentNumber > 0] + assert len(real_segments) >= 1, "Should have at least one real segment" + seg = real_segments[0] + + # Verify segment label from label_info + assert seg.SegmentLabel == "Spleen", f"Expected 'Spleen', got '{seg.SegmentLabel}'" + + # Note: SegmentDescription is optional in DICOM, may not be present + # The description is typically in SegmentedPropertyTypeCodeSequence.CodeMeaning + + # Verify algorithm information + assert seg.SegmentAlgorithmType == "AUTOMATIC" + assert seg.SegmentAlgorithmName == "MONAILABEL" + + # Verify segment has required code sequences + assert hasattr(seg, "SegmentedPropertyCategoryCodeSequence") + assert len(seg.SegmentedPropertyCategoryCodeSequence) > 0 + assert hasattr(seg, "SegmentedPropertyTypeCodeSequence") + assert len(seg.SegmentedPropertyTypeCodeSequence) > 0 + + # Verify pixel data exists + assert hasattr(dcm, "PixelData") + assert len(dcm.PixelData) > 0 + + # Verify frame count + assert hasattr(dcm, "NumberOfFrames") + assert dcm.NumberOfFrames > 0 + + def test_nifti_to_dicom_seg_with_label_info_highdicom(self): + """Test label info conversion using highdicom implementation.""" + self._test_nifti_to_dicom_seg_with_label_info_impl(use_itk=False) + + def test_nifti_to_dicom_seg_with_label_info_itk(self): + """Test label info conversion using ITK implementation.""" + self._test_nifti_to_dicom_seg_with_label_info_impl(use_itk=True) + + def _test_segment_number_mapping_impl(self, use_itk): + """Helper: Test that non-sequential label values are correctly mapped to sequential segment numbers.""" + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) + + # Load DICOM series to get proper dimensions + reference_image, dims = self._load_dicom_series(series_dir) + + # Create synthetic label with non-sequential values (1, 5, 10) + label_array = self._create_multi_segment_array(dims, [1, 5, 10], region_type="large_regions") + label_file = self._create_label_file(label_array, reference_image) + + # Define label info for all three segments + label_info = [ + {"name": "Segment1", "description": "First segment", "color": [255, 0, 0]}, + {"name": "Segment5", "description": "Second segment", "color": [0, 255, 0]}, + {"name": "Segment10", "description": "Third segment", "color": [0, 0, 255]}, + ] + + # Convert to DICOM SEG + result = nifti_to_dicom_seg(series_dir, label_file, label_info, use_itk=use_itk) + + assert os.path.exists(result) + + # Read back and verify + dcm = pydicom.dcmread(result) + + # Filter out Background segment (SegmentNumber=0) created by LABELMAP type + real_segments = [s for s in dcm.SegmentSequence if s.SegmentNumber > 0] + + # Verify we have 3 real segments + assert len(real_segments) == 3, f"Expected 3 real segments, got {len(real_segments)}" + + # Verify segment numbers are sequential (1, 2, 3) + # This is the main bug we fixed - non-sequential labels (1, 5, 10) + # should be remapped to sequential segment numbers (1, 2, 3) + segment_numbers = [seg_item.SegmentNumber for seg_item in real_segments] + assert segment_numbers == [1, 2, 3], f"Expected [1, 2, 3], got {segment_numbers}" + + # Verify segment labels match our input + segment_labels = [seg_item.SegmentLabel for seg_item in real_segments] + assert segment_labels == ["Segment1", "Segment5", "Segment10"], f"Expected correct labels, got {segment_labels}" + + # Verify it's a valid DICOM SEG (accept both Segmentation Storage and Labelmap Segmentation Storage) + assert dcm.SOPClassUID in ["1.2.840.10008.5.1.4.1.1.66.4", "1.2.840.10008.5.1.4.1.1.66.7"] + + # Verify pixel data + pixel_array, unique_values = self._extract_pixels(result) + + # Must have exactly 3 segments with values 1, 2, 3 + assert ( + len(unique_values) == 3 + ), f"Expected exactly 3 segments in pixel data, found {len(unique_values)}: {list(unique_values)}" + + # Verify segments are exactly {1, 2, 3}, not the original {1, 5, 10} + assert set(unique_values) == { + 1, + 2, + 3, + }, f"Pixel values must be {{1,2,3}} (remapped from {{1,5,10}}), got {set(unique_values)}" + + # SPATIAL VERIFICATION: Verify each segment has substantial voxels + # Compute expected voxels based on actual test geometry + # "large_regions" creates box_size x box_size x z_segment regions + num_segments = 3 + box_size = max(1, min(100, dims[0] // 2, dims[1] // 2)) + z_segment = max(1, dims[2] // num_segments) + + # For the last segment, it gets all remaining slices + z_slices_per_segment = [z_segment, z_segment, dims[2] - 2 * z_segment] + + # Compute expected voxels per segment (box_size * box_size * z_slices) + expected_voxels_per_segment = [box_size * box_size * z for z in z_slices_per_segment] + + # Use 80% tolerance to account for compression and round-trip losses + min_expected_per_segment = [int(expected * 0.8) for expected in expected_voxels_per_segment] + expected_total = int(sum(expected_voxels_per_segment) * 0.8) + + count_per_segment = {1: np.sum(pixel_array == 1), 2: np.sum(pixel_array == 2), 3: np.sum(pixel_array == 3)} + + # Verify each segment has expected voxels (with tolerance) + for i, (seg_num, count) in enumerate(count_per_segment.items()): + min_expected = min_expected_per_segment[i] + assert ( + count >= min_expected + ), f"Segment {seg_num} has too few voxels: {count} < {min_expected} (box_size={box_size}, z_slices={z_slices_per_segment[i]})" + + # Verify total matches expected + total_nonzero = sum(count_per_segment.values()) + assert ( + total_nonzero >= expected_total + ), f"Total segmentation voxels too low: {total_nonzero} (expected >= {expected_total}, box_size={box_size}, dims={dims})" + + # Verify frames exist in DICOM SEG + assert dcm.NumberOfFrames > 0, "No frames in segmentation" + + # Cleanup + os.unlink(label_file) os.unlink(result) - def test_itk_image_to_dicom_seg(self): - pass + def test_segment_number_mapping_highdicom(self): + """Test non-sequential label remapping using highdicom implementation.""" + self._test_segment_number_mapping_impl(use_itk=False) + + def test_segment_number_mapping_itk(self): + """Test non-sequential label remapping using ITK implementation.""" + self._test_segment_number_mapping_impl(use_itk=True) + + def _test_round_trip_impl(self, use_itk): + """Helper: Test NIfTI → DICOM SEG → NIfTI preserves data accurately.""" + series_dir, label = self._get_test_paths() + + label_info = [ + {"name": "Spleen", "color": [255, 0, 0]}, + ] + + # Load original for comparison + original_sitk = sitk.ReadImage(label) + original_array = sitk.GetArrayFromImage(original_sitk) + original_spacing = original_sitk.GetSpacing() + original_size = original_sitk.GetSize() + + # Convert to DICOM SEG + dicom_seg_file = nifti_to_dicom_seg(series_dir, label, label_info, use_itk=use_itk) + assert os.path.exists(dicom_seg_file) + + # Convert back to NIfTI + result_nifti = dicom_seg_to_itk_image(dicom_seg_file) + assert os.path.exists(result_nifti) + + # Load result + result_sitk = sitk.ReadImage(result_nifti) + result_array = sitk.GetArrayFromImage(result_sitk) + result_spacing = result_sitk.GetSpacing() + result_size = result_sitk.GetSize() + + # Verify dimensions are preserved + # NOTE: ITK/dcmqi only stores non-empty slices, so Z dimension may differ + if use_itk: + # For ITK, only check X/Y dimensions match + assert result_size[0] == original_size[0], f"X dimension changed: {original_size[0]} → {result_size[0]}" + assert result_size[1] == original_size[1], f"Y dimension changed: {original_size[1]} → {result_size[1]}" + # Z dimension will be smaller (only non-empty slices) + assert ( + result_size[2] <= original_size[2] + ), f"Z dimension should be ≤ original: {original_size[2]} → {result_size[2]}" + else: + # For highdicom, expect exact dimension match + assert result_size == original_size, f"Dimensions changed: {original_size} → {result_size}" - def test_itk_dicom_seg_to_image(self): - pass + # Verify geometry is preserved (with tolerance for floating point) + for i in range(3): + spacing_diff = abs(original_spacing[i] - result_spacing[i]) + assert spacing_diff < 0.01, f"Spacing changed in dimension {i}: {original_spacing[i]} → {result_spacing[i]}" + + # Note: Origin may not be preserved through DICOM SEG conversion + # This is expected due to DICOM coordinate system transformations + # We primarily care that dimensions, spacing, and data are correct + + # Verify unique values are preserved + original_unique = sorted(np.unique(original_array)) + result_unique = sorted(np.unique(result_array)) + assert original_unique == result_unique, f"Unique values mismatch: {original_unique} vs {result_unique}" + + # Verify label counts are similar (within 3% due to potential compression/resampling) + for label_value in original_unique: + if label_value == 0: + continue + orig_count = np.sum(original_array == label_value) + result_count = np.sum(result_array == label_value) + + if orig_count > 0: + ratio = result_count / orig_count + assert ( + 0.97 <= ratio <= 1.03 + ), f"Label {label_value} count changed by >3%: {orig_count} → {result_count} (ratio: {ratio:.2f})" + + # Verify result is not empty + assert np.any(result_array != 0), "Result segmentation is all zeros" + + # Cleanup + os.unlink(dicom_seg_file) + os.unlink(result_nifti) + + def test_round_trip_highdicom(self): + """Test round-trip conversion using highdicom implementation.""" + self._test_round_trip_impl(use_itk=False) + + def test_round_trip_itk(self): + """Test round-trip conversion using ITK implementation.""" + self._test_round_trip_impl(use_itk=True) + + def _test_empty_label_impl(self, use_itk): + """Helper: Test handling of empty label files.""" + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) + + # Load DICOM series to get reference image + reference_image, _ = self._load_dicom_series(series_dir) + + # Create an empty label (all zeros) with matching dimensions + label_sitk = sitk.Image(reference_image.GetSize(), sitk.sitkUInt8) + label_sitk.CopyInformation(reference_image) + label_file = tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False).name + sitk.WriteImage(label_sitk, label_file) + + # Should return empty string for empty label + result = nifti_to_dicom_seg(series_dir, label_file, None, use_itk=use_itk) + assert result == "" + + # Cleanup + os.unlink(label_file) + + def test_empty_label_highdicom(self): + """Test empty label handling using highdicom implementation.""" + self._test_empty_label_impl(use_itk=False) + + def test_empty_label_itk(self): + """Test empty label handling using ITK implementation.""" + self._test_empty_label_impl(use_itk=True) + + def _test_missing_label_info_impl(self, use_itk): + """Helper: Test that conversion works with missing/incomplete label_info and applies defaults correctly.""" + series_dir, label = self._get_test_paths() + + # Count expected segments in label + expected_segments = self._count_segments_in_label(label) + + # Convert with None label_info + result = nifti_to_dicom_seg(series_dir, label, None, use_itk=use_itk) + assert os.path.exists(result) + + # Verify default values were used for all segments + dcm = pydicom.dcmread(result) + + # Filter out Background segment (SegmentNumber=0) for LABELMAP type + real_segments = [s for s in dcm.SegmentSequence if s.SegmentNumber > 0] + + assert ( + len(real_segments) == expected_segments + ), f"Expected {expected_segments} real segments, got {len(real_segments)}" + + # Verify each real segment has default naming + for i, seg in enumerate(real_segments, start=1): + assert seg.SegmentLabel == f"Segment_{i}", f"Expected default label 'Segment_{i}', got '{seg.SegmentLabel}'" + + # Verify default algorithm info + assert seg.SegmentAlgorithmType == "AUTOMATIC" + assert seg.SegmentAlgorithmName == "MONAILABEL" + + # Verify has default code sequences + assert hasattr(seg, "SegmentedPropertyCategoryCodeSequence") + assert len(seg.SegmentedPropertyCategoryCodeSequence) > 0 + + # Cleanup + os.unlink(result) + + def test_missing_label_info_highdicom(self): + """Test missing label_info handling using highdicom implementation.""" + self._test_missing_label_info_impl(use_itk=False) + + def test_missing_label_info_itk(self): + """Test missing label_info handling using ITK implementation.""" + self._test_missing_label_info_impl(use_itk=True) + + def test_dicom_seg_to_itk_image(self): + """Test DICOM SEG to NIfTI/NRRD conversion.""" + series_dir, label = self._get_test_paths() + + # Count expected segments in original label + expected_segments = self._count_segments_in_label(label) + + # First create a DICOM SEG + dicom_seg_file = nifti_to_dicom_seg(series_dir, label, None, use_itk=False) + self.addCleanup(os.unlink, dicom_seg_file) + assert os.path.exists(dicom_seg_file) + + # Convert to ITK image + result = dicom_seg_to_itk_image(dicom_seg_file) + self.addCleanup(os.unlink, result) + assert os.path.exists(result) + assert result.endswith(".seg.nrrd") + + # Verify it's readable and has correct structure + result_img = sitk.ReadImage(result) + result_array = sitk.GetArrayFromImage(result_img) + + # Verify 3D structure + assert result_img.GetSize()[0] > 0, "Width must be > 0" + assert result_img.GetSize()[1] > 0, "Height must be > 0" + assert result_img.GetSize()[2] > 0, "Depth must be > 0" + + # Verify pixel data contains segments + unique_values = np.unique(result_array) + unique_values = unique_values[unique_values != 0] + + assert ( + len(unique_values) == expected_segments + ), f"Expected {expected_segments} segments, found {len(unique_values)}" + + # Verify result is not empty + assert np.any(result_array != 0), "Result is all zeros" + + # Verify spacing is reasonable + spacing = result_img.GetSpacing() + for i, s in enumerate(spacing): + assert 0.1 < s < 100, f"Spacing[{i}] = {s} is unreasonable" + + def test_custom_tags(self): + """Test that custom DICOM tags are properly added.""" + series_dir, label = self._get_test_paths() + + custom_tags = {"ContentCreatorName": "TestUser", "ClinicalTrialSeriesID": "TRIAL123"} + + result = nifti_to_dicom_seg(series_dir, label, None, use_itk=False, custom_tags=custom_tags) + self.addCleanup(os.unlink, result) + assert os.path.exists(result) + + # Verify custom tags + dcm = pydicom.dcmread(result) + assert dcm.ContentCreatorName == "TestUser" + assert dcm.ClinicalTrialSeriesID == "TRIAL123" + + def _test_multiple_segments_with_different_properties_impl(self, use_itk): + """Helper: Test multiple segments each with unique names, colors, and descriptions.""" + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) + + # Load DICOM series and create label with 3 different segments + reference_image, dims = self._load_dicom_series(series_dir) + label_array = self._create_multi_segment_array(dims, [1, 2, 3], region_type="regions") + label_file = self._create_label_file(label_array, reference_image) + self.addCleanup(os.unlink, label_file) + + # Define distinct properties for each segment + label_info = [ + { + "name": "Liver", + "description": "Liver structure", + "color": [255, 0, 0], + }, + { + "name": "Spleen", + "description": "Spleen structure", + "color": [0, 255, 0], + }, + { + "name": "Kidney", + "description": "Kidney structure", + "color": [0, 0, 255], + }, + ] + + # Convert to DICOM SEG + result = nifti_to_dicom_seg(series_dir, label_file, label_info, use_itk=use_itk) + self.addCleanup(os.unlink, result) + assert os.path.exists(result) + + # Verify all segments have correct properties + dcm = pydicom.dcmread(result) + + # Filter out Background segment (SegmentNumber=0) for LABELMAP type + real_segments = [s for s in dcm.SegmentSequence if s.SegmentNumber > 0] + assert len(real_segments) == 3 + + # Verify each real segment's properties + for i, expected_info in enumerate(label_info): + seg = real_segments[i] + assert seg.SegmentNumber == i + 1 + assert ( + seg.SegmentLabel == expected_info["name"] + ), f"Segment {i + 1}: expected '{expected_info['name']}', got '{seg.SegmentLabel}'" + + # Note: SegmentDescription is optional in DICOM + # Verify the segment has required code sequences instead + assert hasattr(seg, "SegmentedPropertyCategoryCodeSequence") + assert hasattr(seg, "SegmentedPropertyTypeCodeSequence") + + # Verify all 3 real segments are present in metadata + segments_in_metadata = [seg.SegmentNumber for seg in real_segments] + + assert ( + len(segments_in_metadata) == 3 + ), f"Expected 3 real segments in metadata, found {len(segments_in_metadata)}" + assert set(segments_in_metadata) == {1, 2, 3}, f"Expected segments {{1,2,3}}, found {set(segments_in_metadata)}" + + # Verify frames exist for the segments + assert dcm.NumberOfFrames > 0, "No frames in DICOM SEG" + + # Verify pixel data contains all 3 segments via round-trip + result_nifti = dicom_seg_to_itk_image(result) + self.addCleanup(os.unlink, result_nifti) + result_img = sitk.ReadImage(result_nifti) + pixel_array = sitk.GetArrayFromImage(result_img) + + unique_in_pixels = np.unique(pixel_array) + unique_in_pixels = unique_in_pixels[unique_in_pixels != 0] + + assert len(unique_in_pixels) == 3, f"Expected 3 segments in pixel data, found {len(unique_in_pixels)}" + assert set(unique_in_pixels) == {1, 2, 3}, f"Expected pixel values {{1,2,3}}, found {set(unique_in_pixels)}" + + def test_multiple_segments_with_different_properties_highdicom(self): + """Test multiple segments with unique properties using highdicom implementation.""" + self._test_multiple_segments_with_different_properties_impl(use_itk=False) + + def test_multiple_segments_with_different_properties_itk(self): + """Test multiple segments with unique properties using ITK implementation.""" + self._test_multiple_segments_with_different_properties_impl(use_itk=True) + + def _test_large_label_values_impl(self, use_itk): + """Helper: Test that large label values (100, 200, 255) are correctly remapped to sequential (1, 2, 3).""" + series_dir = os.path.join(self.dicom_dataset, self.TEST_SERIES_ID) + + # Load DICOM series and create label with large values: 100, 200, 255 + reference_image, dims = self._load_dicom_series(series_dir) + label_array = self._create_multi_segment_array(dims, [100, 200, 255], region_type="full_slice") + label_file = self._create_label_file(label_array, reference_image) + + label_info = [ + {"name": "Segment100"}, + {"name": "Segment200"}, + {"name": "Segment255"}, + ] + + # Convert to DICOM SEG + result = nifti_to_dicom_seg(series_dir, label_file, label_info, use_itk=use_itk) + assert os.path.exists(result) + + # Verify segment numbers are sequential 1, 2, 3 (not 100, 200, 255) + dcm = pydicom.dcmread(result) + + # Filter out Background segment (SegmentNumber=0) for LABELMAP type + real_segments = [s for s in dcm.SegmentSequence if s.SegmentNumber > 0] + + assert len(real_segments) == 3 + segment_numbers = [seg.SegmentNumber for seg in real_segments] + assert segment_numbers == [1, 2, 3], f"Large values not remapped: expected [1, 2, 3], got {segment_numbers}" + + # Verify labels are preserved + segment_labels = [seg.SegmentLabel for seg in real_segments] + assert segment_labels == ["Segment100", "Segment200", "Segment255"] + + # Verify segment numbers are remapped to 1, 2, 3 (not 100, 200, 255) + segments_in_metadata = [seg.SegmentNumber for seg in real_segments] + + assert ( + len(segments_in_metadata) == 3 + ), f"Expected 3 real segments in metadata, found {len(segments_in_metadata)}" + assert set(segments_in_metadata) == { + 1, + 2, + 3, + }, f"Segment numbers must be {{1,2,3}} not {{100,200,255}}, got {set(segments_in_metadata)}" + + # Verify frames exist + assert dcm.NumberOfFrames > 0, "No frames in DICOM SEG" + + # Verify pixel data also contains remapped values via round-trip + # With full-slice regions, all segments should survive round-trip + result_nifti = dicom_seg_to_itk_image(result) + result_img = sitk.ReadImage(result_nifti) + pixel_array = sitk.GetArrayFromImage(result_img) + + unique_in_pixels = np.unique(pixel_array) + unique_in_pixels = unique_in_pixels[unique_in_pixels != 0] + + assert len(unique_in_pixels) == 3, f"Expected 3 segments in pixel data, found {len(unique_in_pixels)}" + assert set(unique_in_pixels) == { + 1, + 2, + 3, + }, f"Pixel values must be {{1,2,3}} not {{100,200,255}}, found {set(unique_in_pixels)}" + + os.unlink(result_nifti) + + # Cleanup + os.unlink(label_file) + os.unlink(result) + + def test_large_label_values_highdicom(self): + """Test large label value remapping using highdicom implementation.""" + self._test_large_label_values_impl(use_itk=False) + + def test_large_label_values_itk(self): + """Test large label value remapping using ITK implementation.""" + self._test_large_label_values_impl(use_itk=True) + + def test_invalid_series_directory(self): + """Test handling of non-existent DICOM directory.""" + invalid_dir = "/nonexistent/path/to/dicom" + label = tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False).name + + # Create a minimal valid label file + label_img = sitk.Image([10, 10, 10], sitk.sitkUInt8) + label_array = sitk.GetArrayFromImage(label_img) + label_array[5, 5, 5] = 1 + label_img = sitk.GetImageFromArray(label_array) + sitk.WriteImage(label_img, label) + + try: + result = nifti_to_dicom_seg(invalid_dir, label, None, use_itk=False) + assert result == "" + finally: + os.unlink(label) + + def test_dicom_seg_metadata_completeness(self): + """Test that generated DICOM SEG has all required metadata.""" + series_dir, label = self._get_test_paths() + + result = nifti_to_dicom_seg(series_dir, label, None, use_itk=False) + assert os.path.exists(result) + + dcm = pydicom.dcmread(result) + + # Verify required DICOM SEG attributes exist + required_attrs = [ + "SOPClassUID", + "SOPInstanceUID", + "SeriesInstanceUID", + "StudyInstanceUID", + "Modality", + "SeriesNumber", + "InstanceNumber", + "SegmentSequence", + "PixelData", + "Rows", + "Columns", + "NumberOfFrames", + ] + + for attr in required_attrs: + assert hasattr(dcm, attr), f"Missing required attribute: {attr}" + + # Verify SOP Class is Segmentation Storage (or Labelmap Segmentation Storage for LABELMAP type) + assert dcm.SOPClassUID in [ + "1.2.840.10008.5.1.4.1.1.66.4", # Segmentation Storage + "1.2.840.10008.5.1.4.1.1.66.7", # Labelmap Segmentation Storage + ] + + # Verify Modality is SEG + assert dcm.Modality == "SEG" + + # Verify SegmentSequence is not empty + assert len(dcm.SegmentSequence) > 0 + + # Verify each segment has required attributes + for seg in dcm.SegmentSequence: + seg_attrs = ["SegmentNumber", "SegmentLabel", "SegmentAlgorithmType"] + for attr in seg_attrs: + assert hasattr(seg, attr), f"Segment missing required attribute: {attr}" + + os.unlink(result) if __name__ == "__main__": From c62d40a53ee8068709e66eb3475b882815bbc6a5 Mon Sep 17 00:00:00 2001 From: Joaquin Anton Guirao Date: Fri, 19 Dec 2025 14:52:41 +0100 Subject: [PATCH 2/2] Remove Python 3.9 Signed-off-by: Joaquin Anton Guirao linter fixes Signed-off-by: Joaquin Anton Guirao [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: Joaquin Anton Guirao --- .github/workflows/docker-plugin.yml | 4 ++-- .github/workflows/release.yml | 4 ++-- docs/source/installation.rst | 2 +- monailabel/interfaces/app.py | 13 ++++++++++++- monailabel/interfaces/tasks/infer_v2.py | 16 ++++++++-------- monailabel/interfaces/utils/app.py | 2 -- monailabel/tasks/infer/bundle.py | 6 +++--- plugins/dsa/Dockerfile | 2 +- setup.cfg | 2 +- 9 files changed, 30 insertions(+), 21 deletions(-) diff --git a/.github/workflows/docker-plugin.yml b/.github/workflows/docker-plugin.yml index 0d69f3bb2..4550260b4 100644 --- a/.github/workflows/docker-plugin.yml +++ b/.github/workflows/docker-plugin.yml @@ -26,10 +26,10 @@ jobs: MONAI_ZOO_AUTH_TOKEN: ${{ github.token }} steps: - uses: actions/checkout@v5 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.10 - name: clean up run: | sudo rm -rf /usr/share/dotnet diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ba4bd9d67..1fe43bf8e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -35,10 +35,10 @@ jobs: with: fetch-depth: 0 - uses: actions/setup-node@v4 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.10 - name: Cache for pip uses: actions/cache@v4 id: cache diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 671b0375b..7990b1993 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -52,7 +52,7 @@ Prerequisites -------------------------- MONAI Label supports both **Ubuntu** and **Windows** OS with GPU/CUDA enabled. -Make sure you have python 3.8/3.9 version environment with PyTorch and CUDA installed. MONAI Label features on other python version are not verified. +Make sure you have python 3.10+ version environment with PyTorch and CUDA installed. MONAI Label features on other python version are not verified. - Install `Python `_ - Install the following Python libraries diff --git a/monailabel/interfaces/app.py b/monailabel/interfaces/app.py index 215f8c598..f9b405bde 100644 --- a/monailabel/interfaces/app.py +++ b/monailabel/interfaces/app.py @@ -102,7 +102,11 @@ def __init__( self._trainers = self.init_trainers() if settings.MONAI_LABEL_TASKS_TRAIN else {} self._strategies = self.init_strategies() if settings.MONAI_LABEL_TASKS_STRATEGY else {} self._scoring_methods = self.init_scoring_methods() if settings.MONAI_LABEL_TASKS_SCORING else {} - self._batch_infer = self.init_batch_infer() if settings.MONAI_LABEL_TASKS_BATCH_INFER else {} + self._batch_infer: Callable = ( + self.init_batch_infer() + if settings.MONAI_LABEL_TASKS_BATCH_INFER + else lambda *args, **kwargs: self._raise_batch_infer_disabled() + ) self._auto_update_scoring = settings.MONAI_LABEL_AUTO_UPDATE_SCORING self._sessions = self._load_sessions(load=settings.MONAI_LABEL_SESSIONS) @@ -319,6 +323,13 @@ def run_infer_in_thread(t, r): return {"label": label_id, "tag": DefaultLabelTag.ORIGINAL, "file": result_file_name, "params": result_json} + def _raise_batch_infer_disabled(self): + """Raise an exception when batch inference is disabled.""" + raise MONAILabelException( + MONAILabelError.INVALID_INPUT, + "Batch inference is disabled. Set MONAI_LABEL_TASKS_BATCH_INFER to true to enable it.", + ) + def batch_infer(self, request, datastore=None): """ Run batch inference for an existing pre-trained model. diff --git a/monailabel/interfaces/tasks/infer_v2.py b/monailabel/interfaces/tasks/infer_v2.py index f87b8db7b..35ca31e65 100644 --- a/monailabel/interfaces/tasks/infer_v2.py +++ b/monailabel/interfaces/tasks/infer_v2.py @@ -32,14 +32,14 @@ class InferType(str, Enum): OTHERS - Other Model Type """ - SEGMENTATION: str = "segmentation" - ANNOTATION: str = "annotation" - CLASSIFICATION: str = "classification" - DEEPGROW: str = "deepgrow" - DEEPEDIT: str = "deepedit" - SCRIBBLES: str = "scribbles" - DETECTION: str = "detection" - OTHERS: str = "others" + SEGMENTATION = "segmentation" + ANNOTATION = "annotation" + CLASSIFICATION = "classification" + DEEPGROW = "deepgrow" + DEEPEDIT = "deepedit" + SCRIBBLES = "scribbles" + DETECTION = "detection" + OTHERS = "others" class InferTask(metaclass=ABCMeta): diff --git a/monailabel/interfaces/utils/app.py b/monailabel/interfaces/utils/app.py index f6ad59da4..2127389b6 100644 --- a/monailabel/interfaces/utils/app.py +++ b/monailabel/interfaces/utils/app.py @@ -28,7 +28,6 @@ def app_instance(app_dir=None, studies=None, conf=None): studies = studies if studies else settings.MONAI_LABEL_STUDIES cache_key = f"{app_dir}{studies}" - global apps app = apps.get(cache_key) if app is not None: return app @@ -53,7 +52,6 @@ def app_instance(app_dir=None, studies=None, conf=None): def clear_cache(): - global apps apps.clear() diff --git a/monailabel/tasks/infer/bundle.py b/monailabel/tasks/infer/bundle.py index 3023f0fc5..adff175aa 100644 --- a/monailabel/tasks/infer/bundle.py +++ b/monailabel/tasks/infer/bundle.py @@ -194,7 +194,7 @@ def pre_transforms(self, data=None) -> Sequence[Callable]: unload_module("scripts") self._update_device(data) - pre = [] + pre: list[Callable[..., Any]] = [] for k in self.const.key_preprocessing(): if self.bundle_config.get(k): c = self.bundle_config.get_parsed_content(k, instantiate=True) @@ -207,7 +207,7 @@ def pre_transforms(self, data=None) -> Sequence[Callable]: t._loader.image_only = False if pre and self.extend_load_image: - res = [] + res: list[Callable[..., Any]] = [] for t in pre: if isinstance(t, LoadImaged): res.append(LoadImageTensord(keys=t.keys, load_image_d=t)) @@ -257,7 +257,7 @@ def post_transforms(self, data=None) -> Sequence[Callable]: unload_module("scripts") self._update_device(data) - post = [] + post: list[Callable[..., Any]] = [] for k in self.const.key_postprocessing(): if self.bundle_config.get(k): c = self.bundle_config.get_parsed_content(k, instantiate=True) diff --git a/plugins/dsa/Dockerfile b/plugins/dsa/Dockerfile index 547e63f51..01784e036 100644 --- a/plugins/dsa/Dockerfile +++ b/plugins/dsa/Dockerfile @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM python:3.9-slim +FROM python:3.10-slim RUN python -m pip install histomicstk --find-links https://girder.github.io/large_image_wheels diff --git a/setup.cfg b/setup.cfg index 99aa85373..8f81210c1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,7 @@ project_urls = Source Code=https://github.com/Project-MONAI/MONAILabel [options] -python_requires = >= 3.9 +python_requires = >= 3.10 # for compiling and develop setup only # no need to specify the versions so that we could # compile for multiple targeted versions.