From c9e8ee488a7b05b8e1139253a52d82379885ac5e Mon Sep 17 00:00:00 2001
From: Scott <scott.levin@indico.io>
Date: Mon, 31 Mar 2025 14:33:33 -0400
Subject: [PATCH 01/14] remove doc extraction, review, and dataset classes

---
 examples/copy_teach_task.py                   |  20 -
 .../create_auto_classification_workflow.py    |  27 --
 examples/dataset_tasks.py                     |  25 --
 examples/merge_snapshots.py                   |  20 -
 examples/pdf_highlighter.py                   |  35 --
 examples/submitting_to_doc_extraction.py      |  24 --
 indico_toolkit/auto_populate/__init__.py      |   3 -
 indico_toolkit/auto_populate/populator.py     | 362 ------------------
 indico_toolkit/auto_populate/types.py         |  66 ----
 indico_toolkit/indico_wrapper/__init__.py     |   6 -
 indico_toolkit/indico_wrapper/dataset.py      | 114 ------
 .../indico_wrapper/doc_extraction.py          |  79 ----
 indico_toolkit/indico_wrapper/reviewer.py     | 111 ------
 indico_toolkit/ocr/__init__.py                |   2 -
 indico_toolkit/ocr/customocr_object.py        |  42 --
 mypy.ini                                      |   3 -
 tests/integration/conftest.py                 |  33 +-
 .../indico_wrapper/test_dataset.py            |  40 --
 .../indico_wrapper/test_doc_extraction.py     | 127 ------
 .../indico_wrapper/test_reviewer.py           |  60 ---
 tests/integration/ocr/__init__.py             |   0
 .../integration/ocr/test_customocr_object.py  |  47 ---
 tests/integration/ocr/test_ondoc_object.py    |  63 ---
 tests/integration/ocr/test_standard_object.py |  33 --
 tests/integration/test_populator.py           |  80 ----
 25 files changed, 1 insertion(+), 1421 deletions(-)
 delete mode 100644 examples/copy_teach_task.py
 delete mode 100644 examples/create_auto_classification_workflow.py
 delete mode 100644 examples/dataset_tasks.py
 delete mode 100644 examples/pdf_highlighter.py
 delete mode 100644 examples/submitting_to_doc_extraction.py
 delete mode 100644 indico_toolkit/auto_populate/__init__.py
 delete mode 100644 indico_toolkit/auto_populate/populator.py
 delete mode 100644 indico_toolkit/auto_populate/types.py
 delete mode 100644 indico_toolkit/indico_wrapper/dataset.py
 delete mode 100644 indico_toolkit/indico_wrapper/doc_extraction.py
 delete mode 100644 indico_toolkit/indico_wrapper/reviewer.py
 delete mode 100644 indico_toolkit/ocr/customocr_object.py
 delete mode 100644 tests/integration/indico_wrapper/test_dataset.py
 delete mode 100644 tests/integration/indico_wrapper/test_doc_extraction.py
 delete mode 100644 tests/integration/indico_wrapper/test_reviewer.py
 delete mode 100644 tests/integration/ocr/__init__.py
 delete mode 100644 tests/integration/ocr/test_customocr_object.py
 delete mode 100644 tests/integration/ocr/test_ondoc_object.py
 delete mode 100644 tests/integration/ocr/test_standard_object.py
 delete mode 100644 tests/integration/test_populator.py

diff --git a/examples/copy_teach_task.py b/examples/copy_teach_task.py
deleted file mode 100644
index d0b7f459..00000000
--- a/examples/copy_teach_task.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from indico_toolkit import create_client
-from indico_toolkit.auto_populate import AutoPopulator
-
-"""
-Create a new copied Workflow based on given Teach Task Id 
-and corresponding Dataset Id.
-"""
-
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
-DATASET_ID = 0
-TEACH_TASK_ID = 0
-
-client = create_client(HOST, API_TOKEN_PATH)
-auto_populator = AutoPopulator(client)
-new_workflow = auto_populator.copy_teach_task(
-    dataset_id=DATASET_ID,
-    teach_task_id=TEACH_TASK_ID,
-    workflow_name="Copied Workflow",
-)
diff --git a/examples/create_auto_classification_workflow.py b/examples/create_auto_classification_workflow.py
deleted file mode 100644
index 355adeb4..00000000
--- a/examples/create_auto_classification_workflow.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from indico_toolkit import create_client
-from indico_toolkit.auto_populate import AutoPopulator
-
-"""
-Create an Indico Classification Workflow without any labeling using an organized
-directory/folder structure. Each folder/directory should contain only one file type.
-
-For example, you would target '/base_directory/' if you had your files organized like:
-
-/base_directory/
-/base_directory/invoices/ -> contains only invoice files
-/base_directory/disclosures/ -> contains only disclosure files
-"""
-
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
-
-DIRECTORY_FILE_PATH = "./base_directory/"
-
-client = create_client(HOST, API_TOKEN_PATH)
-auto_populator = AutoPopulator(client)
-new_workflow = auto_populator.create_auto_classification_workflow(
-    DIRECTORY_FILE_PATH,
-    "My dataset",
-    "My workflow",
-    "My teach task",
-)
diff --git a/examples/dataset_tasks.py b/examples/dataset_tasks.py
deleted file mode 100644
index a53a2196..00000000
--- a/examples/dataset_tasks.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from indico_toolkit import create_client
-from indico_toolkit.indico_wrapper import Datasets, Download
-from indico_toolkit.pipelines import FileProcessing
-
-DATASET_ID = 1234
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
-
-# Instantiate the datasets class
-client = create_client(HOST, API_TOKEN_PATH)
-datasets = Datasets(client, DATASET_ID)
-downloader = Download(client)
-"""
-Example 1:
-
-Upload files to an existing dataset in batches
-"""
-# Collect files to upload
-fp = FileProcessing()
-fp.get_file_paths_from_dir("./datasets/disclosures/")
-
-# Upload files to dataset in batches
-for paths in fp.batch_files(batch_size=2):
-    datasets.add_files_to_dataset(paths)
-    print(f"Uploaded {len(paths)} files")
diff --git a/examples/merge_snapshots.py b/examples/merge_snapshots.py
index 6b004d3f..d204f206 100644
--- a/examples/merge_snapshots.py
+++ b/examples/merge_snapshots.py
@@ -1,9 +1,5 @@
-from indico_toolkit import create_client
-from indico_toolkit.indico_wrapper import Datasets
 from indico_toolkit.snapshots import Snapshot
 
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
 PATH_TO_SNAPSHOT = "./snapshot_1.csv"
 PATH_TO_SNAPSHOT_2 = "./snapshot_2.csv"
 OUTPUT_PATH = "./merged_snapshot_output.csv"
@@ -34,19 +30,3 @@
 # will now include all of the samples from snap_to_append as well
 print(main_snap.number_of_samples)
 main_snap.to_csv(OUTPUT_PATH)
-
-"""
-With that merged snapshot, you can now use the toolkit to upload and train a model.
-"""
-client = create_client(HOST, API_TOKEN_PATH)
-dataset = Datasets(client)
-uploaded_dataset = dataset.create_dataset([OUTPUT_PATH], dataset_name="my_dataset")
-print(f"My Dataset ID is {uploaded_dataset.id}")
-model = dataset.train_model(
-    uploaded_dataset,
-    model_name="my_model",
-    source_col=main_snap.text_col,
-    target_col=main_snap.label_col,
-    wait=False,
-)
-print(f"My Model Group ID is {model.id}")
diff --git a/examples/pdf_highlighter.py b/examples/pdf_highlighter.py
deleted file mode 100644
index 3540205b..00000000
--- a/examples/pdf_highlighter.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-Highlight Indico Extraction Predictions on the source PDF
-"""
-
-from indico_toolkit import create_client
-from indico_toolkit.highlighter import Highlighter
-from indico_toolkit.indico_wrapper import Workflow
-
-WORKFLOW_ID = 1418
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
-PATH_TO_DOCUMENT = "./mydocument.pdf"
-# Instantiate the workflow class
-client = create_client(HOST, API_TOKEN_PATH)
-wflow = Workflow(client)
-
-# Get predictions and ondocument OCR object
-submission_ids = wflow.submit_documents_to_workflow(WORKFLOW_ID, [PATH_TO_DOCUMENT])
-submission_result = wflow.get_submission_results_from_ids(submission_ids)[0]
-ocr_object = wflow.get_ondoc_ocr_from_etl_url(submission_result.etl_url)
-
-# Highlight Predictions onto source document and write it to disc
-highlighter = Highlighter(submission_result.predictions, PATH_TO_DOCUMENT)
-highlighter.collect_tokens(ocr_object.token_objects)
-highlighter.highlight_pdf("./highlighted_doc.pdf", ocr_object.page_heights_and_widths)
-
-# You can also have unique color highlights for each label group, write the label above
-# the highlight, and add bookmarks of what labels appear on which pages
-highlighter.highlight_pdf(
-    "./highlighted_doc.pdf",
-    ocr_object.page_heights_and_widths,
-    all_yellow_highlight=False,
-    add_label_annotations=True,
-    add_bookmarks=True,
-)
diff --git a/examples/submitting_to_doc_extraction.py b/examples/submitting_to_doc_extraction.py
deleted file mode 100644
index 2bc20cc2..00000000
--- a/examples/submitting_to_doc_extraction.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from indico_toolkit import create_client
-from indico_toolkit.indico_wrapper import DocExtraction
-from indico_toolkit.pipelines import FileProcessing
-
-"""
-Retrieves a list of raw full document texts for all files in a folder
-"""
-
-HOST = "app.indico.io"
-API_TOKEN_PATH = "./indico_api_token.txt"
-
-# Instantiate the doc_extraction class
-client = create_client(HOST, API_TOKEN_PATH)
-doc_extraction = DocExtraction(client=client, preset_config="ondocument")
-
-# Collect files to submit
-fp = FileProcessing()
-fp.get_file_paths_from_dir("./datasets/disclosures/")
-
-# Submit documents with optional text setting and save results to variable
-doc_texts = []
-for paths in fp.batch_files(batch_size=10):
-    doc_texts.append(doc_extraction.run_ocr(filepaths=paths, text_setting="full_text"))
-print(doc_texts)
diff --git a/indico_toolkit/auto_populate/__init__.py b/indico_toolkit/auto_populate/__init__.py
deleted file mode 100644
index c3330c10..00000000
--- a/indico_toolkit/auto_populate/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .populator import AutoPopulator
-
-__all__ = ("AutoPopulator",)
diff --git a/indico_toolkit/auto_populate/populator.py b/indico_toolkit/auto_populate/populator.py
deleted file mode 100644
index 9b2e6396..00000000
--- a/indico_toolkit/auto_populate/populator.py
+++ /dev/null
@@ -1,362 +0,0 @@
-import dataclasses
-import time
-from json import loads
-from os import PathLike
-from pathlib import Path
-from typing import Dict, List, Tuple, Union
-
-from indico import IndicoClient
-from indico.queries import (
-    CreateExport,
-    DownloadExport,
-    GetDataset,
-    GetModelGroup,
-)
-from indico.types import Workflow
-
-from ..errors import ToolkitPopulationError
-from ..structure.create_structure import Structure
-from .types import (
-    Example,
-    ExampleList,
-    LabelInput,
-    LabelInst,
-    TokenSpanInput,
-)
-
-
-class AutoPopulator:
-    def __init__(self, client: IndicoClient):
-        """
-        Module for manipulating and creating new workflows and teach tasks.
-
-        Args:
-            client (IndicoClient): instantiated Indico Client
-        """
-        self.client = client
-        self.structure = Structure(client)
-        self._exceptions = []
-
-    def create_auto_classification_workflow(
-        self,
-        directory_path: Union[str, PathLike[str]],
-        dataset_name: str,
-        workflow_name: str,
-        teach_task_name: str,
-        accepted_types: Tuple[str, ...] = (
-            "csv",
-            "doc",
-            "docx",
-            "eml",
-            "jpeg",
-            "jpg",
-            "msg",
-            "pdf",
-            "png",
-            "pptx",
-            "rtf",
-            "svg",
-            "tif",
-            "tiff",
-            "txt",
-            "xls",
-            "xlsx",
-        ),
-    ) -> Workflow:
-        """
-        Label and train a model based on a directory structure or existing teach task.
-        You should have a base directory containing sub directories where each
-        directory contains a unique file type and only that file type.
-
-        Example:
-            base_directory/
-            base_directory/invoices/ -> folder containing only invoices
-            base_directory/disclosures/ -> folder containing only disclosures
-            etc. etc.
-        Args:
-            directory_path (str): Path to a directory containing your filepath structure
-            dataset_name (str): Name of created dataset
-            worlflow_name (str): Name of created workflow
-            teach_task_name (str): Name of created teach task
-            accepted_types (Tuple[str], optional): List of accepted file types to search
-        Returns:
-            Workflow: a Workflow object representation of the newly created workflow
-        """
-
-        def valid_file(file: Path) -> bool:
-            return (
-                file.is_file() and file.suffix.strip(".").casefold() in accepted_types
-            )
-
-        folder = Path(directory_path)
-        files = list(filter(valid_file, folder.glob("*/*")))
-        classes = list(set(file.parent.name for file in files))
-        labeled_files = {file.name: [{"label": file.parent.name}] for file in files}
-
-        if len(classes) < 2:
-            raise ToolkitPopulationError(
-                "You must have documents in at least 2 directories, "
-                f"you only have {len(classes)}"
-            )
-
-        # Upload files to a new dataset.
-        dataset = self.structure.create_dataset(
-            dataset_name=dataset_name,
-            files_to_upload=files,
-            read_api=True,
-            single_column=False,
-            auto_rotate=False,
-            upscale_images=True,
-            languages=["ENG"],
-        )
-
-        # Create a new workflow with classification model.
-        workflow = self.structure.create_workflow(workflow_name, dataset.id)
-        workflow = self.structure.add_teach_task(
-            task_name=teach_task_name,
-            labelset_name=f"{teach_task_name}_labelset",
-            target_names=classes,
-            dataset_id=dataset.id,
-            workflow_id=workflow.id,
-            model_type="classification",
-        )
-        teach_task_id = workflow.components[-1].model_group.questionnaire_id
-        labelset_id, model_group_id, label_map = self._get_teach_task_details(
-            teach_task_id
-        )
-
-        labels = self.get_labels_by_filename(model_group_id, labeled_files, label_map)
-        self.structure.label_teach_task(
-            label_set_id=labelset_id,
-            labels=list(map(dataclasses.asdict, labels)),
-            model_group_id=model_group_id,
-        )
-
-        return workflow
-
-    def copy_teach_task(
-        self,
-        dataset_id: int,
-        teach_task_id: int,
-        workflow_name: str,
-        data_column: str = "document",
-        rename_labels: Dict[str, str] = None,
-        remove_labels: List[str] = None,
-    ) -> Workflow:
-        """
-        Create duplicate teach task in same Indico platform.
-
-        Note: Does not work with datasets created with a snapshot
-
-        Args:
-            dataset_id (int): The dataset id of the dataset you wish to copy
-            teach_task_id (int): The teach task id of the corresponding teach task to
-                the dataset
-            workflow_name (string): The name of the newly created workflow
-            data_column_id (str, optional): The datacolumn id of the corresponding
-                dataset. Defaults to 'document'
-            rename_labels (dict, optional): Dictionary in format
-                {old_label_name : new_label_name}
-            remove_labels (list, optional): List of labels to remove from old teach task
-
-        Returns:
-            Workflow: a Workflow object representation of the newly created workflow
-        """
-        dataset = self.client.call(GetDataset(dataset_id))
-        (
-            old_labelset_id,
-            old_model_group_id,
-            old_target_name_map,
-        ) = self._get_teach_task_details(teach_task_id=teach_task_id)
-        # get dataset snapshot
-        export = self.client.call(
-            CreateExport(dataset_id=dataset.id, labelset_id=old_labelset_id, wait=True)
-        )
-        csv = self.client.call(DownloadExport(export.id))
-        print("Obtained snapshot")
-
-        # create workflow
-        workflow = self.structure.create_workflow(
-            name=workflow_name, dataset_id=dataset_id
-        )
-        time.sleep(2)
-        print("Created workflow")
-        old_model_group = self.client.call(
-            GetModelGroup(id=old_model_group_id, wait=True)
-        )
-        model_type = old_model_group.task_type.lower()
-        # Create new teach task
-        workflow = self.structure.add_teach_task(
-            task_name=workflow_name,
-            labelset_name=workflow_name,
-            target_names=list(old_target_name_map.keys()),
-            dataset_id=dataset.id,
-            workflow_id=workflow.id,
-            model_type=model_type,
-            data_column=data_column,
-        )
-        (
-            new_labelset_id,
-            new_model_group_id,
-            new_target_name_map,
-        ) = self._get_teach_task_details(
-            workflow.components[-1].model_group.questionnaire_id
-        )
-        # Get file_to_targets from export CSV
-        file_to_targets = {}
-        for _, row in csv.iterrows():
-            # Check for NaN filled rows
-            if isinstance(row[2], float):
-                continue
-            old_example_id = row[0]
-            old_examples = self._get_example_list(old_model_group_id)
-            targets_list = loads(row[2])["targets"]
-            file_to_targets[old_examples.get_example(old_example_id).data_file_name] = (
-                targets_list
-            )
-        labels = self.get_labels_by_filename(
-            new_model_group_id,
-            file_to_targets,
-            new_target_name_map,
-            rename_labels,
-            remove_labels,
-        )
-        # Label new teach task
-        result = self.structure.label_teach_task(
-            label_set_id=new_labelset_id,
-            labels=[dataclasses.asdict(label) for label in labels],
-            model_group_id=new_model_group_id,
-        )
-        if not result["submitLabelsV2"]["success"]:
-            raise ToolkitPopulationError("Error: Failed to submit labels")
-        return workflow
-
-    def inject_labels_into_teach_task(
-        self,
-        workflow_id: int,
-        teach_task_id: int,
-        file_to_targets: dict,
-        rename_labels: Dict[str, str] = None,
-        remove_labels: List[str] = None,
-    ):
-        """
-        Add label data into existing teach task
-
-        Args:
-            workflow_id (int): Id of the workflow you wish to add labels to
-            teach_task_id (int): Id of the corresponding teach task to the workflow
-            file_to_targets (dict): mapping of filenames to target label data
-            rename_labels (dict, optional): Dictionary in format
-                {old_label_name : new_label_name}
-            remove_labels (list, optional): List of labels to remove from old teach task
-        """
-        (
-            labelset_id,
-            model_group_id,
-            target_name_map,
-        ) = self._get_teach_task_details(teach_task_id)
-        labels = self.get_labels_by_filename(
-            model_group_id,
-            file_to_targets,
-            target_name_map,
-            rename_labels,
-            remove_labels,
-        )
-        # Label new teach task
-        result = self.structure.label_teach_task(
-            label_set_id=labelset_id,
-            labels=[dataclasses.asdict(label) for label in labels],
-            model_group_id=model_group_id,
-        )
-        if not result["submitLabelsV2"]["success"]:
-            raise ToolkitPopulationError("Error: Failed to submit labels")
-
-    def get_labels_by_filename(
-        self,
-        model_group_id: int,
-        file_to_targets: dict,
-        target_name_map: dict,
-        rename_labels: Dict[str, str] = None,
-        remove_labels: List[str] = None,
-    ) -> List[LabelInput]:
-        """
-        Args:
-            model_group_id (int): ID of the model group to be labeled
-            file_to_targets (dict): mapping in the format {filename : targets_list}
-            target_name_map (dict): mapping of field name to corresponding target ID
-            rename_labels (dict, optional): Dictionary in format
-                {old_label_name : new_label_name}
-            remove_labels (list, optional): List of labels to remove from old teach task
-
-        Returns:
-            A list of LabelInput to be ingested by the platform via submitLabelsV2
-        """
-        labels = []
-        # Retrieve examples and match against filename
-        examples = self._get_example_list(model_group_id)
-
-        for filename, targets_list in file_to_targets.items():
-            if rename_labels or remove_labels:
-                targets_list = self._edit_labels(
-                    targets_list, rename_labels, remove_labels
-                )
-            targets_list = self._convert_label(targets_list, target_name_map)
-            example_id = examples.get_example_id(filename)
-            if example_id:
-                labels.append(LabelInput(example_id, targets_list))
-        return labels
-
-    def _edit_labels(
-        self,
-        targets_list: List[dict],
-        rename_labels: Dict[str, str],
-        remove_labels: List[str],
-    ):
-        new_targets_list = []
-        for target in targets_list:
-            if remove_labels and target["label"] not in remove_labels:
-                if rename_labels and rename_labels.get(target["label"]):
-                    target["label"] = rename_labels[target["label"]]
-                new_targets_list.append(target)
-        return new_targets_list
-
-    def _convert_label(
-        self, targets_list: List[dict], target_name_map: dict
-    ) -> List[LabelInst]:
-        updated_labels = []
-        for target in targets_list:
-            updated_label = LabelInst(target_name_map[target["label"]])
-            if target.get("spans"):
-                updated_spans = [
-                    TokenSpanInput(span["start"], span["end"], span["page_num"])
-                    for span in target["spans"]
-                ]
-                updated_label.spans = updated_spans
-            updated_labels.append(updated_label)
-        return updated_labels
-
-    def _get_teach_task_details(self, teach_task_id: int):
-        teach_task_details = self.structure.get_teach_details(
-            teach_task_id=teach_task_id
-        )
-        labelset_id = teach_task_details["questionnaire"]["question"]["labelset"]["id"]
-        model_group_id = teach_task_details["questionnaire"]["question"]["modelGroupId"]
-        target_names = teach_task_details["questionnaire"]["question"]["labelset"][
-            "targetNames"
-        ]
-        target_name_map = {}
-        for target in target_names:
-            target_name_map[target["name"]] = target["id"]
-        return labelset_id, model_group_id, target_name_map
-
-    def _get_example_list(self, model_group_id: int, limit=1000):
-        examples = self.structure.get_example_ids(
-            model_group_id=model_group_id, limit=limit
-        )
-        examples = ExampleList(
-            examples=[
-                Example(i["id"], i["datafile"]["name"])
-                for i in examples["modelGroup"]["pagedExamples"]["examples"]
-            ]
-        )
-        return examples
diff --git a/indico_toolkit/auto_populate/types.py b/indico_toolkit/auto_populate/types.py
deleted file mode 100644
index dae2460d..00000000
--- a/indico_toolkit/auto_populate/types.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from dataclasses import dataclass
-from typing import List
-
-
-@dataclass
-class Example:
-    id: int
-    data_file_name: str
-
-
-class ExampleList:
-    def __init__(self, examples: List[Example]):
-        self.examples = examples
-
-    def get_example(self, example_id: int) -> Example:
-        """
-        Returns example with matching example_id. If no matching example id found,
-        return None.
-        """
-        for example in self.examples:
-            if example.id == example_id:
-                return example
-        return None
-
-    def get_example_id(self, example_data_file_name: str) -> int:
-        """
-        Returns id for a specific example with the same name as example_data_file_name.
-        If no matching example found, return None. Assumes no duplicate filenames in
-        dataset
-        """
-        for example in self.examples:
-            if example.data_file_name == example_data_file_name:
-                return example.id
-        return None
-
-
-@dataclass
-class TokenSpanInput:
-    start: int
-    end: int
-    pageNum: int
-
-
-@dataclass
-class SpatialSpanInput:
-    top: int
-    bottom: int
-    left: int
-    right: int
-    pageNum: int
-
-
-@dataclass
-class LabelInst:
-    clsId: int
-    spans: List[TokenSpanInput] = None
-    bounds: List[SpatialSpanInput] = None
-
-
-@dataclass
-class LabelInput:
-    exampleId: int
-    targets: List[LabelInst]
-    rejected: bool = None
-    override: bool = None
-    partial: bool = None
diff --git a/indico_toolkit/indico_wrapper/__init__.py b/indico_toolkit/indico_wrapper/__init__.py
index 40dd45aa..faa39014 100644
--- a/indico_toolkit/indico_wrapper/__init__.py
+++ b/indico_toolkit/indico_wrapper/__init__.py
@@ -1,15 +1,9 @@
-from .dataset import Datasets
-from .doc_extraction import DocExtraction
 from .download import Download
 from .indico_wrapper import IndicoWrapper
-from .reviewer import Reviewer
 from .workflow import Workflow
 
 __all__ = (
-    "Datasets",
-    "DocExtraction",
     "Download",
     "IndicoWrapper",
-    "Reviewer",
     "Workflow",
 )
diff --git a/indico_toolkit/indico_wrapper/dataset.py b/indico_toolkit/indico_wrapper/dataset.py
deleted file mode 100644
index a954d5ab..00000000
--- a/indico_toolkit/indico_wrapper/dataset.py
+++ /dev/null
@@ -1,114 +0,0 @@
-from typing import List
-
-from indico import IndicoClient
-from indico.queries import (
-    AddDataToWorkflow,
-    AddFiles,
-    CreateDataset,
-    CreateEmptyDataset,
-    DeleteDataset,
-    GetDataset,
-)
-from indico.types import Dataset, OcrEngine, Workflow
-
-from .indico_wrapper import IndicoWrapper
-
-
-class Datasets(IndicoWrapper):
-    def __init__(self, client: IndicoClient):
-        self.client = client
-
-    def get_dataset(self, dataset_id: int):
-        return self.client.call(GetDataset(dataset_id))
-
-    def add_files_to_dataset(self, dataset_id: int, filepaths: List[str]) -> Dataset:
-        """
-        Upload documents to an existing dataset and wait for them to OCR
-        """
-        dataset = self.client.call(
-            AddFiles(
-                dataset_id=dataset_id, files=filepaths, autoprocess=True, wait=True
-            )
-        )
-        return dataset
-
-    def add_new_files_to_task(self, workflow_id: id, wait: bool = True) -> Workflow:
-        """
-        Add newly uploaded documents to an existing teach task given the task's
-        associated workflow ID
-
-        Args:
-            workflow_id (id): workflow ID associated with teach task
-            wait (bool, optional): wait for data to be added. Defaults to True.
-        """
-        workflow = self.client.call(AddDataToWorkflow(workflow_id, wait))
-        if wait:
-            print(f"Data added to all teach tasks associated with {workflow.id}")
-        return workflow
-
-    def create_empty_dataset(
-        self,
-        dataset_name: str,
-        dataset_type: str = "DOCUMENT",
-        ocr_engine: OcrEngine = OcrEngine.READAPI,
-    ) -> Dataset:
-        """
-        Create an empty dataset
-        Args:
-            name (str): Name of the dataset
-            dataset_type (str, optional): TEXT, IMAGE, or DOCUMENT.
-                Defaults to "DOCUMENT".
-        """
-        return self.client.call(
-            CreateEmptyDataset(dataset_name, dataset_type, ocr_engine)
-        )
-
-    def create_dataset(
-        self,
-        filepaths: List[str],
-        dataset_name: str,
-        ocr_engine: OcrEngine = OcrEngine.READAPI,
-    ) -> Dataset:
-        dataset = self.client.call(
-            CreateDataset(
-                name=dataset_name,
-                files=filepaths,
-                ocr_engine=ocr_engine,
-            )
-        )
-        self.dataset_id = dataset.id
-        return dataset
-
-    def delete_dataset(self, dataset_id: int) -> bool:
-        """
-        Returns True if operation is succesful
-        """
-        return self.client.call(DeleteDataset(id=dataset_id))
-
-    def get_dataset_metadata(self, dataset_id: int) -> List[dict]:
-        """
-        Get list of dataset files with information like file name, status, and number of
-        pages
-        """
-        query = """
-            query GetDataset($id: Int) {
-                dataset(id: $id) {
-                    id
-                    name
-                    files {
-                        id
-                        name
-                        numPages
-                        status
-                    }
-                }
-            }
-        """
-        dataset = self.graphQL_request(
-            graphql_query=query, variables={"id": dataset_id}
-        )
-        return dataset["dataset"]["files"]
-
-    def get_col_name_by_id(self, dataset_id: int, col_id: int) -> str:
-        dataset = self.get_dataset(dataset_id)
-        return next(c.name for c in dataset.datacolumns if c.id == col_id)
diff --git a/indico_toolkit/indico_wrapper/doc_extraction.py b/indico_toolkit/indico_wrapper/doc_extraction.py
deleted file mode 100644
index 3a10170d..00000000
--- a/indico_toolkit/indico_wrapper/doc_extraction.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from typing import List, Union
-
-from indico import IndicoClient
-from indico.queries import DocumentExtraction, Job
-
-from ..ocr import CustomOcr, OnDoc, StandardOcr
-from .indico_wrapper import IndicoWrapper
-
-
-class DocExtraction(IndicoWrapper):
-    """
-    Class to support DocumentExtraction-related API calls
-    """
-
-    def __init__(
-        self,
-        client: IndicoClient,
-        preset_config: str = "standard",
-        custom_config: dict = None,
-    ):
-        """
-        Args:
-            preset_config (str): Options are simple, legacy, detailed, ondocument, and
-                standard.
-        """
-        self._preset_config = preset_config
-        self.client = client
-        self.json_config = {"preset_config": preset_config}
-        if custom_config:
-            self.json_config = custom_config
-
-    def run_ocr(
-        self, filepaths: List[str], text_setting: str = None
-    ) -> List[Union[StandardOcr, OnDoc, CustomOcr, str]]:
-        """
-        Args:
-            filepaths (List[str]): List of paths to local documents you would like to
-                submit for extraction
-            text_setting (str): Options are full_text and page_texts.
-
-        Returns:
-            extracted_data (List[Union[StandardOcr, OnDoc, CustomOcr, str]]): data from
-                DocumentExtraction converted to OCR objects or string text
-        """
-        jobs = self._submit_to_ocr(filepaths)
-        extracted_data = []
-        for ind, job in enumerate(jobs):
-            status = self.get_job_status(job.id, True)
-            if status.status == "SUCCESS":
-                result = self.get_storage_object(status.result)
-                if text_setting == "full_text":
-                    extracted_data.append(self._convert_ocr_objects(result).full_text)
-                elif text_setting == "page_texts":
-                    extracted_data.append(self._convert_ocr_objects(result).page_texts)
-                else:
-                    extracted_data.append(self._convert_ocr_objects(result))
-            else:
-                raise RuntimeError(
-                    f"{filepaths[ind]} {status.status}: {status.result}."
-                )
-        return extracted_data
-
-    def _submit_to_ocr(self, filepaths: List[str]) -> List[Job]:
-        return self.client.call(
-            DocumentExtraction(files=filepaths, json_config=self.json_config)
-        )
-
-    def _convert_ocr_objects(
-        self, extracted_data: Union[List[dict], dict]
-    ) -> Union[StandardOcr, OnDoc, CustomOcr]:
-        if self.json_config == {"preset_config": "ondocument"}:
-            return OnDoc(extracted_data)
-        elif (
-            self.json_config == {"preset_config": "standard"}
-            or self.json_config is None
-        ):
-            return StandardOcr(extracted_data)
-        else:
-            return CustomOcr(extracted_data)
diff --git a/indico_toolkit/indico_wrapper/reviewer.py b/indico_toolkit/indico_wrapper/reviewer.py
deleted file mode 100644
index faf6e9a4..00000000
--- a/indico_toolkit/indico_wrapper/reviewer.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import json
-
-from indico import IndicoClient
-
-from .indico_wrapper import Workflow
-
-
-class Reviewer(Workflow):
-    """
-    Class to simulate human reviewer
-    """
-
-    def __init__(
-        self,
-        client: IndicoClient,
-        workflow_id: int,
-    ):
-        self.client = client
-        self.workflow_id = workflow_id
-
-    def accept_review(self, submission_id: int, changes: dict) -> None:
-        """
-        Accept a submission in the review queue
-        Args:
-            submission_id (int): submission ID
-            changes (dict): accepted predictions with format like,
-                e.g. {"model_name": [{"label"...}]}
-        """
-        self.graphQL_request(
-            SUBMIT_REVIEW,
-            {
-                "rejected": False,
-                "submissionId": submission_id,
-                "changes": json.dumps(changes),
-            },
-        )
-
-    def get_random_review_id(self):
-        response = self.graphQL_request(
-            GET_RANDOM_REVIEW_ID, {"workflowId": self.workflow_id}
-        )
-        try:
-            return response["randomSubmission"]["id"]
-        except Exception:
-            raise RuntimeError("The review queue is empty")
-
-    def get_random_exception_id(self):
-        response = self.graphQL_request(
-            GET_RANDOM_EXCEPTION_ID, {"workflowId": self.workflow_id}
-        )
-        try:
-            return response["randomSubmission"]["id"]
-        except Exception:
-            raise RuntimeError("The exception queue is empty")
-
-    def reject_submission(self, submission_id):
-        return self.graphQL_request(
-            SUBMIT_REVIEW, {"rejected": True, "submissionId": submission_id}
-        )
-
-
-SUBMIT_REVIEW = """
-mutation submitStandardQueue(
-    $changes: JSONString,
-    $rejected: Boolean,
-    $submissionId: Int!,
-    $notes: String
-) {
-  submitReview(
-    changes: $changes,
-    rejected: $rejected,
-    submissionId: $submissionId,
-    notes: $notes
-) {
-    id
-    __typename
-  }
-}
-"""
-
-GET_RANDOM_EXCEPTION_ID = """
-query getExceptionsSubmission($workflowId: Int!) {
-  randomSubmission(adminReview: true, workflowId: $workflowId) {
-    id
-    resultFile
-    inputFilename
-    autoReview {
-      id
-      changes
-      __typename
-    }
-    __typename
-  }
-}
-"""
-
-GET_RANDOM_REVIEW_ID = """
-query getSubmission($workflowId: Int!) {
-  randomSubmission(adminReview: false, workflowId: $workflowId) {
-    id
-    resultFile
-    inputFilename
-    autoReview {
-      id
-      changes
-      __typename
-    }
-    __typename
-  }
-}
-"""
diff --git a/indico_toolkit/ocr/__init__.py b/indico_toolkit/ocr/__init__.py
index fbf15bf5..7d728eac 100644
--- a/indico_toolkit/ocr/__init__.py
+++ b/indico_toolkit/ocr/__init__.py
@@ -1,9 +1,7 @@
-from .customocr_object import CustomOcr
 from .ondoc_object import OnDoc
 from .standard_object import StandardOcr
 
 __all__ = (
-    "CustomOcr",
     "OnDoc",
     "StandardOcr",
 )
diff --git a/indico_toolkit/ocr/customocr_object.py b/indico_toolkit/ocr/customocr_object.py
deleted file mode 100644
index 78e1c79c..00000000
--- a/indico_toolkit/ocr/customocr_object.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import List, Union
-
-
-class CustomOcr:
-    """
-    CustomOcr is a helper class for the raw preset config OCR results. Enables easy
-    extraction of full text and page-level text.
-    """
-
-    def __init__(self, customocr: Union[List[dict], dict]):
-        """
-        customocr Union[List[dict], dict]: result object from
-        indico.queries.DocumentExtraction
-        """
-        self.customocr = customocr
-
-    @property
-    def full_text(self) -> str:
-        """
-        Return full document text as string
-        """
-        if isinstance(self.customocr, dict) and "text" in self.customocr:
-            return self.customocr["text"]
-        elif isinstance(self.customocr, dict) and "pages" in self.customocr:
-            if "text" in self.customocr["pages"][0]:
-                return "\n".join(page["text"] for page in self.customocr["pages"])
-        elif isinstance(self.customocr, list) and "pages" in self.customocr[0]:
-            if "text" in self.customocr[0]["pages"][0]:
-                return "\n".join(page["pages"][0]["text"] for page in self.customocr)
-        raise RuntimeError("JSON configuration setting does not have full text.")
-
-    @property
-    def page_texts(self) -> List[str]:
-        """
-        Return list of page-level text
-        """
-        if isinstance(self.customocr, dict) and "pages" in self.customocr:
-            return [page["text"] for page in self.customocr["pages"]]
-        elif isinstance(self.customocr, list) and "pages" in self.customocr[0]:
-            if "text" in self.customocr[0]["pages"][0]:
-                return [page["pages"][0]["text"] for page in self.customocr]
-        raise RuntimeError("JSON configuration setting does not have page-level text.")
diff --git a/mypy.ini b/mypy.ini
index a205789e..fd98fdd6 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -18,9 +18,6 @@ ignore_missing_imports = True
 [mypy-indico_toolkit.association.*]
 ignore_errors = True
 
-[mypy-indico_toolkit.auto_populate.*]
-ignore_errors = True
-
 [mypy-indico_toolkit.auto_review.*]
 ignore_errors = True
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index b0c3d356..ec91e109 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -6,14 +6,11 @@
     AddModelGroupComponent,
     CreateDataset,
     CreateWorkflow,
-    DocumentExtraction,
     GetTrainingModelWithProgress,
     GraphQLRequest,
-    JobStatus,
-    RetrieveStorageObject,
 )
 
-from indico_toolkit.indico_wrapper import DocExtraction, Workflow
+from indico_toolkit.indico_wrapper import Workflow
 
 
 def pytest_addoption(parser: pytest.Parser) -> None:
@@ -62,11 +59,6 @@ def dataset_id(dataset):
     return dataset.id
 
 
-@pytest.fixture(scope="session")
-def doc_extraction_standard(indico_client):
-    return DocExtraction(indico_client)
-
-
 @pytest.fixture(scope="session")
 def extraction_model_group_id(workflow):
     return workflow.components[-1].model_group.id
@@ -102,18 +94,6 @@ def module_submission_ids(workflow_id, indico_client, pdf_file):
     return sub_ids
 
 
-@pytest.fixture(scope="session")
-def ondoc_ocr_object(indico_client, pdf_file):
-    job = indico_client.call(
-        DocumentExtraction(
-            files=[pdf_file], json_config={"preset_config": "ondocument"}
-        )
-    )
-    job = indico_client.call(JobStatus(id=job[0].id, wait=True))
-    extracted_data = indico_client.call(RetrieveStorageObject(job.result))
-    return extracted_data
-
-
 @pytest.fixture(scope="session")
 def pdf_file(tests_folder: Path) -> Path:
     return tests_folder / "data/samples/fin_disc.pdf"
@@ -124,17 +104,6 @@ def populator_snapshot_file(tests_folder: Path) -> Path:
     return tests_folder / "data/snapshots/populator_snapshot.csv"
 
 
-@pytest.fixture(scope="session")
-def standard_ocr_object(indico_client, pdf_file):
-    # TODO: this can be static-- probably should be "ondoc" as well
-    job = indico_client.call(
-        DocumentExtraction(files=[pdf_file], json_config={"preset_config": "standard"})
-    )
-    job = indico_client.call(JobStatus(id=job[0].id, wait=True))
-    extracted_data = indico_client.call(RetrieveStorageObject(job.result))
-    return extracted_data
-
-
 @pytest.fixture(scope="session")
 def teach_task_id(workflow):
     return workflow.components[-1].model_group.questionnaire_id
diff --git a/tests/integration/indico_wrapper/test_dataset.py b/tests/integration/indico_wrapper/test_dataset.py
deleted file mode 100644
index 2b785071..00000000
--- a/tests/integration/indico_wrapper/test_dataset.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Test Datasets class methods
-"""
-
-import pytest
-from indico.types import Dataset
-
-from indico_toolkit.indico_wrapper import Datasets
-
-
-@pytest.fixture(scope="module")
-def dataset_wrapper(indico_client):
-    return Datasets(indico_client)
-
-
-def test_get_dataset(dataset_wrapper, dataset_id):
-    dataset = dataset_wrapper.get_dataset(dataset_id)
-    assert isinstance(dataset, Dataset)
-
-
-def test_add_to_dataset(dataset_wrapper, dataset_id, pdf_file):
-    dataset = dataset_wrapper.add_files_to_dataset(dataset_id, filepaths=[pdf_file])
-    assert isinstance(dataset, Dataset)
-    for f in dataset.files:
-        assert f.status in ["PROCESSED", "FAILED"]
-
-
-def test_get_dataset_files(dataset_wrapper, dataset_id):
-    files_list = dataset_wrapper.get_dataset_metadata(dataset_id)
-    assert isinstance(files_list, list)
-    assert len(files_list) > 0
-
-
-def test_create_delete_dataset(dataset_wrapper, pdf_file):
-    dataset = dataset_wrapper.create_dataset(
-        filepaths=[pdf_file], dataset_name="Toolkit Integration Tests"
-    )
-    assert isinstance(dataset, Dataset)
-    status = dataset_wrapper.delete_dataset(dataset.id)
-    assert status
diff --git a/tests/integration/indico_wrapper/test_doc_extraction.py b/tests/integration/indico_wrapper/test_doc_extraction.py
deleted file mode 100644
index 7917f68d..00000000
--- a/tests/integration/indico_wrapper/test_doc_extraction.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from indico_toolkit.indico_wrapper import DocExtraction
-from indico_toolkit.ocr import OnDoc, StandardOcr
-
-
-def test_run_ocr_ondoc(indico_client, pdf_file):
-    doc_extraction_ondoc = DocExtraction(indico_client, preset_config="ondocument")
-    extracted_data = doc_extraction_ondoc.run_ocr(filepaths=[pdf_file])
-    for item in extracted_data:
-        assert isinstance(item, OnDoc)
-
-
-def test_run_ocr_standard(doc_extraction_standard, pdf_file):
-    extracted_data = doc_extraction_standard.run_ocr(filepaths=[pdf_file])
-    for item in extracted_data:
-        assert isinstance(item, StandardOcr)
-
-
-def test_run_ocr_standard_full_text(doc_extraction_standard, pdf_file):
-    full_text_result = doc_extraction_standard.run_ocr(
-        filepaths=[pdf_file], text_setting="full_text"
-    )
-    assert len(full_text_result[0]) == 2062
-
-
-def test_run_ocr_standard_page_texts(doc_extraction_standard, pdf_file):
-    page_texts_result = doc_extraction_standard.run_ocr(
-        filepaths=[pdf_file], text_setting="page_texts"
-    )
-    assert len(page_texts_result[0][0]) == 1153
-
-
-def test_run_ocr_custom_full_text(indico_client, pdf_file):
-    doc_extraction_custom = DocExtraction(
-        indico_client,
-        custom_config={
-            "top_level": "page",
-            "nest": False,
-            "reblocking": ["style", "list", "inline-header"],
-            "pages": [
-                "text",
-                "size",
-                "dpi",
-                "doc_offset",
-                "page_num",
-                "image",
-                "thumbnail",
-            ],
-            "blocks": [
-                "text",
-                "doc_offset",
-                "page_offset",
-                "position",
-                "block_type",
-                "page_num",
-            ],
-            "tokens": [
-                "text",
-                "doc_offset",
-                "page_offset",
-                "block_offset",
-                "position",
-                "page_num",
-                "style",
-            ],
-            "chars": [
-                "text",
-                "doc_index",
-                "block_index",
-                "page_index",
-                "page_num",
-                "position",
-            ],
-        },
-    )
-    full_text_result = doc_extraction_custom.run_ocr(
-        filepaths=[pdf_file], text_setting="full_text"
-    )
-    assert len(full_text_result[0]) == 2067
-
-
-def test_run_ocr_custom_page_texts(indico_client, pdf_file):
-    doc_extraction_custom = DocExtraction(
-        indico_client,
-        custom_config={
-            "top_level": "page",
-            "nest": False,
-            "reblocking": ["style", "list", "inline-header"],
-            "pages": [
-                "text",
-                "size",
-                "dpi",
-                "doc_offset",
-                "page_num",
-                "image",
-                "thumbnail",
-            ],
-            "blocks": [
-                "text",
-                "doc_offset",
-                "page_offset",
-                "position",
-                "block_type",
-                "page_num",
-            ],
-            "tokens": [
-                "text",
-                "doc_offset",
-                "page_offset",
-                "block_offset",
-                "position",
-                "page_num",
-                "style",
-            ],
-            "chars": [
-                "text",
-                "doc_index",
-                "block_index",
-                "page_index",
-                "page_num",
-                "position",
-            ],
-        },
-    )
-    page_texts_result = doc_extraction_custom.run_ocr(
-        filepaths=[pdf_file], text_setting="page_texts"
-    )
-    assert len(page_texts_result[0][0]) == 1158
diff --git a/tests/integration/indico_wrapper/test_reviewer.py b/tests/integration/indico_wrapper/test_reviewer.py
deleted file mode 100644
index 9b26a4ce..00000000
--- a/tests/integration/indico_wrapper/test_reviewer.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import pytest
-
-from indico_toolkit.indico_wrapper import Reviewer, Workflow
-
-
-@pytest.fixture(scope="module")
-def submissions_awaiting_review(workflow_id, indico_client, pdf_file):
-    """
-    Ensure that auto review is turned off and there are two submissions "PENDING_REVIEW"
-    """
-    workflow_wrapper = Workflow(indico_client)
-    workflow_wrapper.update_workflow_settings(
-        workflow_id, enable_review=True, enable_auto_review=False
-    )
-    sub_ids = workflow_wrapper.submit_documents_to_workflow(
-        workflow_id, files=[pdf_file, pdf_file]
-    )
-    workflow_wrapper.wait_for_submissions_to_process(sub_ids)
-
-
-def get_change_formatted_predictions(workflow_result):
-    """
-    Helper function for get change format for accepted predictions in test_accept_review
-    """
-    return {workflow_result.model_name: workflow_result.get_predictions.to_list()}
-
-
-@pytest.mark.skip(reason="broken on indico-client>=6.1.0")
-def test_accept_review(submissions_awaiting_review, indico_client, workflow_id):
-    reviewer_wrapper = Reviewer(indico_client, workflow_id)
-    id_in_review = reviewer_wrapper.get_random_review_id()
-    submission = reviewer_wrapper.get_submission_object(id_in_review)
-    assert submission.status == "PENDING_REVIEW"
-    predictions = reviewer_wrapper.get_submission_results_from_ids([id_in_review])
-    changes = get_change_formatted_predictions(predictions[0])
-    reviewer_wrapper.accept_review(id_in_review, changes)
-    submission = reviewer_wrapper.get_submission_object(id_in_review)
-    assert submission.status == "COMPLETE"
-
-
-@pytest.mark.skip(reason="flaky, depends on submission processing time")
-def test_reject_from_review(submissions_awaiting_review, indico_client, workflow_id):
-    reviewer_wrapper = Reviewer(indico_client, workflow_id)
-    id_in_review = reviewer_wrapper.get_random_review_id()
-    reviewer_wrapper.reject_submission(id_in_review)
-    submission = reviewer_wrapper.get_submission_object(id_in_review)
-    assert submission.status == "PENDING_ADMIN_REVIEW"
-
-
-@pytest.mark.skip(reason="flaky, depends on submission processing time")
-def test_reject_from_admin_review(
-    submissions_awaiting_review, indico_client, workflow_id
-):
-    reviewer_wrapper = Reviewer(indico_client, workflow_id)
-    id_in_exception = reviewer_wrapper.get_random_exception_id()
-    submission = reviewer_wrapper.get_submission_object(id_in_exception)
-    assert submission.status == "PENDING_ADMIN_REVIEW"
-    reviewer_wrapper.reject_submission(id_in_exception)
-    submission = reviewer_wrapper.get_submission_object(id_in_exception)
-    assert submission.status == "COMPLETE"
diff --git a/tests/integration/ocr/__init__.py b/tests/integration/ocr/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/integration/ocr/test_customocr_object.py b/tests/integration/ocr/test_customocr_object.py
deleted file mode 100644
index 078ca7f2..00000000
--- a/tests/integration/ocr/test_customocr_object.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import pytest
-
-from indico_toolkit.indico_wrapper import DocExtraction
-
-
-def test_full_text(indico_client, pdf_file):
-    doc_extraction = DocExtraction(indico_client, preset_config="simple")
-    custom_ocr = doc_extraction.run_ocr(filepaths=[pdf_file])
-    assert len(custom_ocr[0].full_text) == 2823
-
-
-def test_full_text_exception(indico_client, pdf_file):
-    doc_extraction = DocExtraction(
-        indico_client,
-        custom_config={
-            "nest": True,
-            "top_level": "document",
-            "native_pdf": True,
-            "blocks": ["text", "position", "doc_offset", "page_offset"],
-        },
-    )
-    custom_ocr = doc_extraction.run_ocr(filepaths=[pdf_file])
-    with pytest.raises(Exception):
-        custom_ocr[0].full_text
-
-
-def test_page_texts(indico_client, pdf_file):
-    doc_extraction = DocExtraction(
-        indico_client,
-        custom_config={
-            "nest": True,
-            "top_level": "document",
-            "native_pdf": True,
-            "pages": ["text", "size", "dpi", "doc_offset", "page_num", "image"],
-            "blocks": ["text", "position", "doc_offset", "page_offset"],
-        },
-    )
-    custom_ocr = doc_extraction.run_ocr(filepaths=[pdf_file])
-    assert isinstance(custom_ocr[0].page_texts, list)
-    assert isinstance(custom_ocr[0].page_texts[0], str)
-
-
-def test_page_texts_exception(indico_client, pdf_file):
-    doc_extraction = DocExtraction(indico_client, preset_config="legacy")
-    custom_ocr = doc_extraction.run_ocr(filepaths=[pdf_file])
-    with pytest.raises(Exception):
-        custom_ocr.page_texts
diff --git a/tests/integration/ocr/test_ondoc_object.py b/tests/integration/ocr/test_ondoc_object.py
deleted file mode 100644
index e5f49c6b..00000000
--- a/tests/integration/ocr/test_ondoc_object.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import pytest
-
-from indico_toolkit.ocr import OnDoc
-
-
-def test_ondoc_full_text(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert len(ondoc_ocr.full_text) == 2067
-
-
-def test_ondoc_page_texts(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert len(ondoc_ocr.page_texts) == 2
-    assert len(ondoc_ocr.page_texts[0]) == 1158
-
-
-def test_ondoc_page_results(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert len(ondoc_ocr.page_results) == 2
-    assert len(ondoc_ocr.page_results[0]) == 8
-
-
-def test_ondoc_block_texts(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert len(ondoc_ocr.block_texts) == 41
-
-
-def test_ondoc_token_objects(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert len(ondoc_ocr.token_objects) == 304
-
-
-def test_ondoc_total_pages(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert ondoc_ocr.total_pages == 2
-
-
-def test_ondoc_total_characters(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert ondoc_ocr.total_characters == 2067
-
-
-def test_ondoc_total_tokens(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert ondoc_ocr.total_tokens == 304
-
-
-def test_ondoc_confidence(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    assert isinstance(ondoc_ocr.ocr_confidence("mean"), float)
-    assert 1 <= ondoc_ocr.ocr_confidence("mean") <= 100
-
-
-def test_ondoc_confidence_metric_exception(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object)
-    with pytest.raises(Exception):
-        ondoc_ocr.ocr_confidence("average")
-
-
-def test_ondoc_excluded_confidence_exception(ondoc_ocr_object):
-    ondoc_ocr = OnDoc(ondoc_ocr_object[0]["chars"][0].pop("confidence"))
-    with pytest.raises(Exception):
-        ondoc_ocr.ocr_confidence("mean")
diff --git a/tests/integration/ocr/test_standard_object.py b/tests/integration/ocr/test_standard_object.py
deleted file mode 100644
index 4753b8da..00000000
--- a/tests/integration/ocr/test_standard_object.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from indico_toolkit.ocr import StandardOcr
-
-
-def test_standard_object_full_text(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert len(standard_ocr.full_text) == 2062
-
-
-def test_standard_object_page_texts(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert len(standard_ocr.page_texts) == 2
-    assert len(standard_ocr.page_texts[0]) == 1153
-
-
-def test_standard_object_page_results(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert len(standard_ocr.page_results) == 2
-    assert len(standard_ocr.page_results[0]) == 4
-
-
-def test_standard_object_block_texts(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert len(standard_ocr.block_texts) == 36
-
-
-def test_standard_object_total_pages(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert standard_ocr.total_pages == 2
-
-
-def test_standard_object_total_characters(standard_ocr_object):
-    standard_ocr = StandardOcr(standard_ocr_object)
-    assert standard_ocr.total_characters == 2062
diff --git a/tests/integration/test_populator.py b/tests/integration/test_populator.py
deleted file mode 100644
index bd2981fd..00000000
--- a/tests/integration/test_populator.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import json
-import os
-
-import pytest
-from indico.queries import GetWorkflow
-from indico.types import Workflow
-
-from indico_toolkit.auto_populate import AutoPopulator
-from indico_toolkit.auto_populate.types import LabelInput, LabelInst
-
-pd = pytest.importorskip("pandas")
-
-
-@pytest.fixture(scope="function")
-def static_file_to_targets(populator_snapshot_file):
-    df = pd.read_csv(populator_snapshot_file)
-    file_to_targets = {}
-    for file, target in zip(
-        df["file_name_1820"].to_list(), df["Toolkit Test Financial Model"].to_list()
-    ):
-        if not isinstance(target, float):
-            file_to_targets[file] = json.loads(target)["targets"]
-    return file_to_targets
-
-
-def test_create_classification_workflow(indico_client, tests_folder):
-    auto_populator = AutoPopulator(indico_client)
-    new_workflow = auto_populator.create_auto_classification_workflow(
-        os.path.join(tests_folder, "data/auto_class"),
-        "My dataset",
-        "My workflow",
-        "My teach task",
-    )
-    assert isinstance(new_workflow, Workflow)
-
-
-def test_create_classification_workflow_too_few_classes(indico_client, tests_folder):
-    auto_populator = AutoPopulator(indico_client)
-    with pytest.raises(Exception):
-        auto_populator.create_auto_classification_workflow(
-            os.path.join(tests_folder, "data/auto_class/class_a/"),
-            "My dataset",
-            "My workflow",
-            "My teach task",
-        )
-
-
-def test_copy_teach_task(indico_client, dataset, workflow_id, teach_task_id):
-    auto_populator = AutoPopulator(indico_client)
-    original_workflow = indico_client.call(GetWorkflow(workflow_id))
-    new_workflow = auto_populator.copy_teach_task(
-        dataset_id=dataset.id,
-        teach_task_id=teach_task_id,
-        workflow_name=f"{original_workflow.name}_Copied",
-        data_column="text",
-    )
-    assert isinstance(new_workflow, Workflow)
-
-
-def test_get_labels_by_filename(
-    indico_client,
-    extraction_model_group_id,
-    teach_task_id,
-    static_file_to_targets,
-):
-    populator = AutoPopulator(indico_client)
-    (
-        labelset_id,
-        model_group_id,
-        target_name_map,
-    ) = populator._get_teach_task_details(teach_task_id)
-
-    labels = populator.get_labels_by_filename(
-        extraction_model_group_id, static_file_to_targets, target_name_map
-    )
-    assert len(labels) != 0
-    for label in labels:
-        assert isinstance(label, LabelInput)
-        for target in label.targets:
-            assert isinstance(target, LabelInst)

From c283802eac959b53f57bb144f675d1a6dd34d2f1 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Tue, 1 Apr 2025 08:33:41 -0500
Subject: [PATCH 02/14] Copy mypy config from pyproject.toml since mypy won't
 merge them

---
 mypy.ini | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index a205789e..582db9bb 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,7 +1,7 @@
-[mypy]
 #
-# pyproject.toml contains the main configuration for mypy, which applies to existing
-# modules that are type hinted and any new modules that are added.
+# pyproject.toml contains the main configuration for mypy, which is copied here as mypy
+# will not merge the configs. This config applies to existing modules that are type
+# hinted and any new modules that are added.
 #
 # This file contains overrides to ignore errors in older modules and dependencies that
 # aren't type hinted, with the intention that these errors are fixed over time so that
@@ -11,6 +11,11 @@
 # address the errors it finds, repeating until the module passes. Once it passes,
 # remove the override from the list and commit the changes.
 #
+[mypy]
+strict = true
+show_error_codes = true
+warn_unreachable = true
+disallow_any_unimported = true
 
 [mypy-indico.*]
 ignore_missing_imports = True

From 6acf68c892b749516d53d2be6ffa18795f68688a Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Tue, 1 Apr 2025 08:34:28 -0500
Subject: [PATCH 03/14] Unignore type hinted modules

---
 mypy.ini | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index 582db9bb..8ec91288 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -29,9 +29,6 @@ ignore_errors = True
 [mypy-indico_toolkit.auto_review.*]
 ignore_errors = True
 
-[mypy-indico_toolkit.etloutput.*]
-ignore_errors = True
-
 [mypy-indico_toolkit.indico_wrapper.*]
 ignore_errors = True
 
@@ -44,12 +41,6 @@ ignore_errors = True
 [mypy-indico_toolkit.pipelines.*]
 ignore_errors = True
 
-[mypy-indico_toolkit.polling.*]
-ignore_errors = True
-
-[mypy-indico_toolkit.results.*]
-ignore_errors = True
-
 [mypy-indico_toolkit.snapshots.*]
 ignore_errors = True
 

From 8536eab4b9fad2821028e2fa6c35b81e4ebabeb8 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Tue, 1 Apr 2025 08:37:12 -0500
Subject: [PATCH 04/14] Fix type ignores related to indico-client

---
 indico_toolkit/client.py             | 2 +-
 indico_toolkit/polling/autoreview.py | 6 +++---
 indico_toolkit/polling/downstream.py | 8 ++++----
 indico_toolkit/polling/queries.py    | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/indico_toolkit/client.py b/indico_toolkit/client.py
index 1a4c4cd7..49f69468 100644
--- a/indico_toolkit/client.py
+++ b/indico_toolkit/client.py
@@ -8,7 +8,7 @@
 
 
 @retry(IndicoRequestError, ConnectionError)
-def create_client(
+def create_client(  # type: ignore[no-any-unimported]
     host: str,
     api_token_path: "str | None" = None,
     api_token_string: "str | None" = None,
diff --git a/indico_toolkit/polling/autoreview.py b/indico_toolkit/polling/autoreview.py
index 401a3128..460d077b 100644
--- a/indico_toolkit/polling/autoreview.py
+++ b/indico_toolkit/polling/autoreview.py
@@ -3,8 +3,8 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-from indico import AsyncIndicoClient, IndicoConfig  # type: ignore[import-untyped]
-from indico.queries import (  # type: ignore[import-untyped]
+from indico import AsyncIndicoClient, IndicoConfig
+from indico.queries import (
     GetSubmission,
     JobStatus,
     RetrieveStorageObject,
@@ -41,7 +41,7 @@ class AutoReviewPoller:
     and submits the review results concurrently.
     """
 
-    def __init__(
+    def __init__(  # type: ignore[no-any-unimported]
         self,
         config: IndicoConfig,
         workflow_id: int,
diff --git a/indico_toolkit/polling/downstream.py b/indico_toolkit/polling/downstream.py
index b22b7f15..88506f98 100644
--- a/indico_toolkit/polling/downstream.py
+++ b/indico_toolkit/polling/downstream.py
@@ -2,12 +2,12 @@
 import logging
 from typing import TYPE_CHECKING
 
-from indico import AsyncIndicoClient, IndicoConfig  # type: ignore[import-untyped]
-from indico.queries import (  # type: ignore[import-untyped]
+from indico import AsyncIndicoClient, IndicoConfig
+from indico.queries import (
     GetSubmission,
     UpdateSubmission,
 )
-from indico.types import Submission  # type: ignore[import-untyped]
+from indico.types import Submission
 
 from ..retry import retry
 from .queries import SubmissionIdsPendingDownstream
@@ -29,7 +29,7 @@ class DownstreamPoller:
     them concurrently, and marks them as retrieved.
     """
 
-    def __init__(
+    def __init__(  # type: ignore[no-any-unimported]
         self,
         config: IndicoConfig,
         workflow_id: int,
diff --git a/indico_toolkit/polling/queries.py b/indico_toolkit/polling/queries.py
index bd629f5c..e67c257e 100644
--- a/indico_toolkit/polling/queries.py
+++ b/indico_toolkit/polling/queries.py
@@ -1,12 +1,12 @@
 from typing import TYPE_CHECKING
 
-from indico.queries import GraphQLRequest  # type: ignore[import-untyped]
+from indico.queries import GraphQLRequest
 
 if TYPE_CHECKING:
     from typing import Any
 
 
-class SubmissionIdsPendingAutoReview(GraphQLRequest):  # type: ignore[misc]
+class SubmissionIdsPendingAutoReview(GraphQLRequest):  # type: ignore[misc, no-any-unimported]
     QUERY = """
     query SubmissionIdsPendingAutoReview($workflowIds: [Int]) {
         submissions(
@@ -33,7 +33,7 @@ def process_response(self, response: "Any") -> set[int]:
         }
 
 
-class SubmissionIdsPendingDownstream(GraphQLRequest):  # type: ignore[misc]
+class SubmissionIdsPendingDownstream(GraphQLRequest):  # type: ignore[misc, no-any-unimported]
     QUERY = """
     query SubmissionIdsPendingDownstream($workflowIds: [Int]) {
         submissions(

From 2e771b059e638b43abd47699eb565db3f0d9f494 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Tue, 1 Apr 2025 08:38:23 -0500
Subject: [PATCH 05/14] Fix result file test asserts that mypy thinks are
 unreachable

---
 tests/results/test_predictions.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/results/test_predictions.py b/tests/results/test_predictions.py
index 114329d1..b3b43910 100644
--- a/tests/results/test_predictions.py
+++ b/tests/results/test_predictions.py
@@ -16,7 +16,7 @@ def test_confidence() -> None:
     assert prediction.confidence == 1.0
 
 
-def test_extractions() -> None:
+def test_accepted() -> None:
     prediction = Extraction(
         document=None,  # type: ignore[arg-type]
         model=None,  # type: ignore[arg-type]
@@ -35,6 +35,20 @@ def test_extractions() -> None:
     prediction.unaccept()
     assert not prediction.accepted
 
+
+def test_rejected() -> None:
+    prediction = Extraction(
+        document=None,  # type: ignore[arg-type]
+        model=None,  # type: ignore[arg-type]
+        review=None,
+        label="Label",
+        confidences={"Label": 0.5},
+        extras=None,  # type: ignore[arg-type]
+        text="Value",
+        accepted=False,
+        rejected=False,
+    )
+
     prediction.accept()
     prediction.reject()
     assert prediction.rejected

From 0c726eb11af615ee2b04892185cccc3020f6f784 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Fri, 25 Apr 2025 11:09:24 -0500
Subject: [PATCH 06/14] Parse static model results from `component_results`
 using IPA 7.2 `component_metadata`

---
 indico_toolkit/results/document.py       | 18 ++++---
 indico_toolkit/results/predictionlist.py | 29 ++++++++---
 indico_toolkit/results/result.py         | 65 +++++++++++++++++++++---
 3 files changed, 91 insertions(+), 21 deletions(-)

diff --git a/indico_toolkit/results/document.py b/indico_toolkit/results/document.py
index c620a218..2e695978 100644
--- a/indico_toolkit/results/document.py
+++ b/indico_toolkit/results/document.py
@@ -12,13 +12,14 @@ class Document:
     error: str
     traceback: str
 
-    # Auto review changes must reproduce all model sections that were present in the
-    # original result file. This may not be possible from the predictions alone--if a
-    # model had an empty section because it didn't produce predictions or if all of
-    # the predictions were removed to reject them. As such, the models seen when
-    # parsing result files are tracked per-document so that the empty sections can be
-    # reproduced later.
+    # Auto review changes must reproduce all model and component sections that were
+    # present in the original result file. This may not be possible from the
+    # predictions alone--if a model or component had an empty section because it didn't
+    # produce predictions or if all of the predictions for that section were dropped.
+    # As such, the models and components seen when parsing a result file are tracked
+    # per-document so that the empty sections can be reproduced later.
     _model_sections: "frozenset[str]"
+    _component_sections: "frozenset[str]"
 
     @staticmethod
     def from_v1_dict(result: object) -> "Document":
@@ -38,6 +39,7 @@ def from_v1_dict(result: object) -> "Document":
             error="",
             traceback="",
             _model_sections=model_names,
+            _component_sections=frozenset(),
         )
 
     @staticmethod
@@ -46,7 +48,9 @@ def from_v3_dict(document: object) -> "Document":
         Create a `Document` from a v3 document dictionary.
         """
         model_results = get(document, dict, "model_results", "ORIGINAL")
+        component_results = get(document, dict, "component_results", "ORIGINAL")
         model_ids = frozenset(model_results.keys())
+        component_ids = frozenset(component_results.keys())
         etl_output_uri = get(document, str, "etl_output")
 
         return Document(
@@ -57,6 +61,7 @@ def from_v3_dict(document: object) -> "Document":
             error="",
             traceback="",
             _model_sections=model_ids,
+            _component_sections=component_ids,
         )
 
     @staticmethod
@@ -75,4 +80,5 @@ def from_v3_errored_file(errored_file: object) -> "Document":
             error=error,
             traceback=traceback,
             _model_sections=frozenset(),
+            _component_sections=frozenset(),
         )
diff --git a/indico_toolkit/results/predictionlist.py b/indico_toolkit/results/predictionlist.py
index 755c0c7a..cbea64f8 100644
--- a/indico_toolkit/results/predictionlist.py
+++ b/indico_toolkit/results/predictionlist.py
@@ -370,13 +370,8 @@ def to_v3_changes(self, documents: "Iterable[Document]") -> "list[dict[str, Any]
                 continue
 
             model_results: "dict[str, Any]" = {}
-            changes.append(
-                {
-                    "submissionfile_id": document.id,
-                    "model_results": model_results,
-                    "component_results": {},
-                }
-            )
+            component_results: "dict[str, Any]" = {}
+
             predictions_by_model = self.where(
                 document=document,
             ).groupby(
@@ -384,12 +379,30 @@ def to_v3_changes(self, documents: "Iterable[Document]") -> "list[dict[str, Any]
             )
 
             for model, predictions in predictions_by_model.items():
-                model_results[str(model.id)] = [
+                model_id = str(model.id)
+                prediction_dicts = [
                     prediction.to_v3_dict() for prediction in predictions
                 ]
 
+                if model_id in document._model_sections:
+                    model_results[model_id] = prediction_dicts
+                elif model_id in document._component_sections:
+                    component_results[model_id] = prediction_dicts
+
             for model_id in document._model_sections:
                 if model_id not in model_results:
                     model_results[model_id] = []
 
+            for component_id in document._component_sections:
+                if component_id not in component_results:
+                    component_results[component_id] = []
+
+            changes.append(
+                {
+                    "submissionfile_id": document.id,
+                    "model_results": model_results,
+                    "component_results": component_results,
+                }
+            )
+
         return changes
diff --git a/indico_toolkit/results/result.py b/indico_toolkit/results/result.py
index 4a6c79d3..ceefb5cd 100644
--- a/indico_toolkit/results/result.py
+++ b/indico_toolkit/results/result.py
@@ -5,12 +5,13 @@
 
 from . import predictions as prediction
 from .document import Document
+from .errors import ResultError
 from .model import ModelGroup
 from .normalization import normalize_v1_result, normalize_v3_result
 from .predictionlist import PredictionList
 from .predictions import Prediction
 from .review import Review, ReviewType
-from .utils import get
+from .utils import get, has
 
 if TYPE_CHECKING:
     from typing import Any
@@ -106,16 +107,33 @@ def from_v3_dict(result: object) -> "Result":
         submission_id = get(result, int, "submission_id")
         submission_results = get(result, list, "submission_results")
         modelgroup_metadata = get(result, dict, "modelgroup_metadata")
+        component_metadata = get(result, dict, "component_metadata")
         review_metadata = get(result, dict, "reviews")
-
-        processed_documents = map(Document.from_v3_dict, submission_results)
         errored_files = get(result, dict, "errored_files").values()
-        failed_documents = map(Document.from_v3_errored_file, errored_files)
-        documents = sorted(chain(processed_documents, failed_documents))
-        models = sorted(map(ModelGroup.from_v3_dict, modelgroup_metadata.values()))
-        predictions: "PredictionList[Prediction]" = PredictionList()
+
+        static_model_components = filter(
+            lambda component: (
+                get(component, str, "component_type").casefold() == "static_model"
+            ),
+            component_metadata.values(),
+        )
+
+        documents = sorted(
+            chain(
+                map(Document.from_v3_dict, submission_results),
+                map(Document.from_v3_errored_file, errored_files),
+            )
+        )
+        models = sorted(
+            chain(
+                map(ModelGroup.from_v3_dict, modelgroup_metadata.values()),
+                map(ModelGroup.from_v3_dict, static_model_components),
+            )
+        )
         reviews = sorted(map(Review.from_dict, review_metadata.values()))
 
+        predictions: "PredictionList[Prediction]" = PredictionList()
+
         for document_dict in submission_results:
             document_id = get(document_dict, int, "submissionfile_id")
             document = next(
@@ -124,11 +142,17 @@ def from_v3_dict(result: object) -> "Result":
             reviewed_model_predictions: "list[tuple[Review | None, Any]]" = [
                 (None, get(document_dict, dict, "model_results", "ORIGINAL"))
             ]
+            reviewed_component_predictions: "list[tuple[Review | None, Any]]" = [
+                (None, get(document_dict, dict, "component_results", "ORIGINAL"))
+            ]
 
             if reviews:
                 reviewed_model_predictions.append(
                     (reviews[-1], get(document_dict, dict, "model_results", "FINAL"))
                 )
+                reviewed_component_predictions.append(
+                    (reviews[-1], get(document_dict, dict, "component_results", "FINAL"))  # fmt: skip  # noqa: E501
+                )
 
             for review, model_section in reviewed_model_predictions:
                 for model_id, model_predictions in model_section.items():
@@ -142,6 +166,33 @@ def from_v3_dict(result: object) -> "Result":
                         )
                     )
 
+            for review, component_section in reviewed_component_predictions:
+                for component_id, component_predictions in component_section.items():
+                    try:
+                        model = next(
+                            filter(lambda model: model.id == int(component_id), models)
+                        )
+                    except StopIteration:
+                        if has(component_metadata, str, component_id, "component_type"):
+                            component_type = get(
+                                component_metadata, str, component_id, "component_type"
+                            )
+                            raise ResultError(
+                                f"unsupported component type `{component_type!r}` "
+                                f"for component {component_id}"
+                            )
+                        else:
+                            raise ResultError(
+                                f"no component metadata for component {component_id}"
+                            )
+
+                    predictions.extend(
+                        map(
+                            partial(prediction.from_v3_dict, document, model, review),
+                            component_predictions,
+                        )
+                    )
+
         return Result(
             version=version,
             submission_id=submission_id,

From 6a2d06cc8f0e380913a1043b043ffe66f3c5ced9 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Fri, 25 Apr 2025 11:34:12 -0500
Subject: [PATCH 07/14] Include component results in normalization and unit
 tests

---
 indico_toolkit/results/normalization.py       |   17 +-
 .../data/results/97211_v3_static_models.json  | 1215 +++++++++++++++++
 tests/results/test_document.py                |   17 +-
 tests/results/test_predictionlist.py          |    1 +
 4 files changed, 1246 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/results/97211_v3_static_models.json

diff --git a/indico_toolkit/results/normalization.py b/indico_toolkit/results/normalization.py
index 7924f347..b96dff0d 100644
--- a/indico_toolkit/results/normalization.py
+++ b/indico_toolkit/results/normalization.py
@@ -1,4 +1,5 @@
 import re
+from itertools import chain
 from typing import TYPE_CHECKING
 
 from .utils import get, has
@@ -88,12 +89,18 @@ def normalize_v3_result(result: "Any") -> None:
     """
     task_type_by_model_group_id = {
         model_group_id: model_group["task_type"]
-        for model_group_id, model_group in result["modelgroup_metadata"].items()
+        for model_group_id, model_group in chain(
+            result["modelgroup_metadata"].items(),
+            result.get("component_metadata", {}).items(),
+        )
     }
     predictions_with_task_type: "Iterator[tuple[Any, str]]" = (
-        (prediction, task_type_by_model_group_id[model_group_id])
+        (prediction, task_type_by_model_group_id.get(model_group_id, ""))
         for submission_result in get(result, list, "submission_results")
-        for review_result in get(submission_result, dict, "model_results").values()
+        for review_result in chain(
+            get(submission_result, dict, "model_results").values(),
+            get(submission_result, dict, "component_results").values(),
+        )
         for model_group_id, model_results in review_result.items()
         for prediction in model_results
     )
@@ -139,6 +146,10 @@ def normalize_v3_result(result: "Any") -> None:
         if task_type == "summarization" and "citations" not in prediction:
             prediction["citations"] = []
 
+    # Prior to 7.2, v3 result files don't include a `component_metadata` section.
+    if not has(result, dict, "component_metadata"):
+        result["component_metadata"] = {}
+
     # Prior to 6.8, v3 result files don't include a `reviews` section.
     if not has(result, dict, "reviews"):
         result["reviews"] = {}
diff --git a/tests/data/results/97211_v3_static_models.json b/tests/data/results/97211_v3_static_models.json
new file mode 100644
index 00000000..07c6782e
--- /dev/null
+++ b/tests/data/results/97211_v3_static_models.json
@@ -0,0 +1,1215 @@
+{
+    "file_version": 3,
+    "submission_id": 97211,
+    "modelgroup_metadata": {},
+    "component_metadata": {
+        "19407": {
+            "id": 19407,
+            "name": null,
+            "component_type": "input_ocr_extraction",
+            "task_type": null
+        },
+        "19408": {
+            "id": 19408,
+            "name": null,
+            "component_type": "output_json_formatter",
+            "task_type": null
+        },
+        "19409": {
+            "id": 19409,
+            "name": "Accounting Classification",
+            "component_type": "static_model",
+            "task_type": "classification"
+        },
+        "19410": {
+            "id": 19410,
+            "name": "Agent Link",
+            "component_type": "link_classification_model",
+            "task_type": null
+        },
+        "19411": {
+            "id": 19411,
+            "name": "Invoice Extraction",
+            "component_type": "static_model",
+            "task_type": "annotation"
+        },
+        "19412": {
+            "id": 19412,
+            "name": "Purchase Order Extraction",
+            "component_type": "static_model",
+            "task_type": "annotation"
+        },
+        "19413": {
+            "id": 19413,
+            "name": "Invoice Line Items",
+            "component_type": "link_label",
+            "task_type": null
+        },
+        "19414": {
+            "id": 19414,
+            "name": "Purchase Order Line Items",
+            "component_type": "link_label",
+            "task_type": null
+        },
+        "19415": {
+            "id": 19415,
+            "name": "Review",
+            "component_type": "review",
+            "task_type": null
+        },
+        "19416": {
+            "id": 19416,
+            "name": "Standard Output",
+            "component_type": "default_output",
+            "task_type": null
+        }
+    },
+    "submission_results": [
+        {
+            "submissionfile_id": 93479,
+            "etl_output": "indico-file:///storage/submission/5588/97211/93479/etl_output.json",
+            "input_filename": "invoice.pdf",
+            "input_filepath": "indico-file:///storage/submission/5588/97211/93479.pdf",
+            "input_filesize": 426157,
+            "model_results": { "ORIGINAL": {}, "FINAL": {} },
+            "component_results": {
+                "ORIGINAL": {
+                    "19409": [
+                        {
+                            "field_id": 858117,
+                            "confidence": {
+                                "Invoice": 0.9999999853918985,
+                                "Purchase Order": 1.4608101511772668e-8
+                            },
+                            "label": "Invoice"
+                        }
+                    ],
+                    "19411": [
+                        {
+                            "label": "Vendor Name",
+                            "spans": [{ "start": 0, "end": 13, "page_num": 0 }],
+                            "span_id": "93479:c:19411:idx:0",
+                            "confidence": {
+                                "Invoice Date": 3.3424697676309734e-8,
+                                "Invoice Number": 3.447171437187535e-8,
+                                "Invoice Subtotal": 2.993116865468437e-8,
+                                "Invoice Tax": 3.6883669451981405e-8,
+                                "Invoice Total": 2.7991509554681215e-8,
+                                "Line Item Name": 8.883939806025865e-9,
+                                "Line Item Quantity": 5.827023485949212e-8,
+                                "Line Item Total": 5.176908146609094e-8,
+                                "Vendor Name": 0.9999996423721313
+                            },
+                            "field_id": 858126,
+                            "location_type": "exact",
+                            "text": "HubSpot, Inc.",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "HubSpot, Inc.",
+                                "start": 0,
+                                "end": 13,
+                                "structured": null,
+                                "formatted": "HubSpot, Inc.",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "HubSpot, Inc.",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Invoice Date",
+                            "spans": [
+                                { "start": 125, "end": 135, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:1",
+                            "confidence": {
+                                "Invoice Date": 0.9999996423721313,
+                                "Invoice Number": 3.765953948686729e-8,
+                                "Invoice Subtotal": 2.3938278914670263e-8,
+                                "Invoice Tax": 5.890121812512916e-8,
+                                "Invoice Total": 2.9429369163835872e-8,
+                                "Line Item Name": 1.0651284299001418e-7,
+                                "Line Item Quantity": 1.2222901091263338e-7,
+                                "Line Item Total": 4.1870002576160914e-8,
+                                "Vendor Name": 1.4272615089794272e-8
+                            },
+                            "field_id": 858119,
+                            "location_type": "exact",
+                            "text": "06/21/2016",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "06/21/2016",
+                                "start": 125,
+                                "end": 135,
+                                "structured": null,
+                                "formatted": "06/21/2016",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "06/21/2016",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Invoice Number",
+                            "spans": [
+                                { "start": 146, "end": 153, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:2",
+                            "confidence": {
+                                "Invoice Date": 1.8282889868714847e-8,
+                                "Invoice Number": 0.9999997019767761,
+                                "Invoice Subtotal": 5.3069218353130054e-8,
+                                "Invoice Tax": 3.253961722293752e-8,
+                                "Invoice Total": 1.3829179579261108e-7,
+                                "Line Item Name": 4.505617923200589e-8,
+                                "Line Item Quantity": 2.9700066406235237e-8,
+                                "Line Item Total": 2.1834006602716727e-8,
+                                "Vendor Name": 5.3663740118281567e-8
+                            },
+                            "field_id": 858120,
+                            "location_type": "exact",
+                            "text": "3927578",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "3927578",
+                                "start": 146,
+                                "end": 153,
+                                "structured": null,
+                                "formatted": "3927578",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "3927578",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Name",
+                            "spans": [
+                                { "start": 340, "end": 407, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:3",
+                            "confidence": {
+                                "Invoice Date": 7.043927041650022e-8,
+                                "Invoice Number": 1.85422432963378e-8,
+                                "Invoice Subtotal": 9.213624529991193e-9,
+                                "Invoice Tax": 8.453332611679798e-8,
+                                "Invoice Total": 1.9014857244314953e-8,
+                                "Line Item Name": 0.9999997019767761,
+                                "Line Item Quantity": 8.187748257171279e-9,
+                                "Line Item Total": 6.951040631975047e-9,
+                                "Vendor Name": 7.4245527059702e-8
+                            },
+                            "field_id": 858121,
+                            "location_type": "exact",
+                            "text": "HubSpot Enterprise\nIncluded Contacts\nEnterprise Contacts - Per 1000",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "HubSpot Enterprise\nIncluded Contacts\nEnterprise Contacts - Per 1000",
+                                "start": 340,
+                                "end": 407,
+                                "structured": null,
+                                "formatted": "HubSpot Enterprise\nIncluded Contacts\nEnterprise Contacts - Per 1000",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "HubSpot Enterprise\nIncluded Contacts\nEnterprise Contacts - Per 1000",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "start": 476, "end": 477, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:4",
+                            "confidence": {
+                                "Invoice Date": 9.375153098289957e-8,
+                                "Invoice Number": 5.951939741066781e-8,
+                                "Invoice Subtotal": 6.869062474379461e-8,
+                                "Invoice Tax": 1.196322472196698e-7,
+                                "Invoice Total": 7.469050444797176e-8,
+                                "Line Item Name": 6.045668499154999e-8,
+                                "Line Item Quantity": 0.9999992847442627,
+                                "Line Item Total": 1.0597534583212109e-7,
+                                "Vendor Name": 1.2245095604157541e-8
+                            },
+                            "field_id": 858122,
+                            "location_type": "exact",
+                            "text": "1",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "1",
+                                "start": 476,
+                                "end": 477,
+                                "structured": null,
+                                "formatted": "1",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "1",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "start": 478, "end": 487, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:5",
+                            "confidence": {
+                                "Invoice Date": 2.9472216667159046e-8,
+                                "Invoice Number": 6.045723477399179e-9,
+                                "Invoice Subtotal": 3.819852167907811e-8,
+                                "Invoice Tax": 6.359238025055447e-9,
+                                "Invoice Total": 1.601269694617713e-8,
+                                "Line Item Name": 4.507643325268873e-8,
+                                "Line Item Quantity": 9.02977035366348e-8,
+                                "Line Item Total": 0.9999998211860657,
+                                "Vendor Name": 3.079120602933472e-8
+                            },
+                            "field_id": 858123,
+                            "location_type": "exact",
+                            "text": "$1,200.00",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "$1,200.00",
+                                "start": 478,
+                                "end": 487,
+                                "structured": null,
+                                "formatted": "$1,200.00",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$1,200.00",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "start": 499, "end": 501, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:6",
+                            "confidence": {
+                                "Invoice Date": 1.4731193687111954e-7,
+                                "Invoice Number": 4.397265485067692e-8,
+                                "Invoice Subtotal": 8.046448840559606e-8,
+                                "Invoice Tax": 1.2959037576365517e-7,
+                                "Invoice Total": 5.365328092921118e-8,
+                                "Line Item Name": 4.536295961088399e-8,
+                                "Line Item Quantity": 0.9999994039535522,
+                                "Line Item Total": 6.116934514466266e-8,
+                                "Vendor Name": 1.0360611923942997e-8
+                            },
+                            "field_id": 858122,
+                            "location_type": "exact",
+                            "text": "10",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 5,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "10",
+                                "start": 499,
+                                "end": 501,
+                                "structured": null,
+                                "formatted": "10",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "10",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "start": 502, "end": 507, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:7",
+                            "confidence": {
+                                "Invoice Date": 2.487784023230688e-8,
+                                "Invoice Number": 5.1960333813383386e-9,
+                                "Invoice Subtotal": 2.7321593876195038e-8,
+                                "Invoice Tax": 5.000234892804656e-9,
+                                "Invoice Total": 1.1822152146123699e-8,
+                                "Line Item Name": 3.007375326546935e-8,
+                                "Line Item Quantity": 8.39153528886527e-8,
+                                "Line Item Total": 0.9999997615814209,
+                                "Vendor Name": 1.9458024524965367e-8
+                            },
+                            "field_id": 858123,
+                            "location_type": "exact",
+                            "text": "$0.00",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 5,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "$0.00",
+                                "start": 502,
+                                "end": 507,
+                                "structured": null,
+                                "formatted": "$0.00",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$0.00",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "start": 519, "end": 520, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:8",
+                            "confidence": {
+                                "Invoice Date": 1.3724279313009902e-7,
+                                "Invoice Number": 4.43608350053637e-8,
+                                "Invoice Subtotal": 9.689298252624212e-8,
+                                "Invoice Tax": 1.3488433125985466e-7,
+                                "Invoice Total": 5.79847068138406e-8,
+                                "Line Item Name": 4.117256224844823e-8,
+                                "Line Item Quantity": 0.9999994039535522,
+                                "Line Item Total": 8.499782211401907e-8,
+                                "Vendor Name": 9.074271112297083e-9
+                            },
+                            "field_id": 858122,
+                            "location_type": "exact",
+                            "text": "5",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 6,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "5",
+                                "start": 519,
+                                "end": 520,
+                                "structured": null,
+                                "formatted": "5",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "5",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "start": 521, "end": 527, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:9",
+                            "confidence": {
+                                "Invoice Date": 3.019848193730468e-8,
+                                "Invoice Number": 5.982286666039727e-9,
+                                "Invoice Subtotal": 4.329503866529194e-8,
+                                "Invoice Tax": 5.606381581202413e-9,
+                                "Invoice Total": 1.3499708018116507e-8,
+                                "Line Item Name": 3.2691968243625524e-8,
+                                "Line Item Quantity": 8.082533042852447e-8,
+                                "Line Item Total": 0.9999997615814209,
+                                "Vendor Name": 2.1103359060248295e-8
+                            },
+                            "field_id": 858123,
+                            "location_type": "exact",
+                            "text": "$25.00",
+                            "groupings": [
+                                {
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 6,
+                                    "group_id": "19411:Invoice Line Item"
+                                }
+                            ],
+                            "normalized": {
+                                "text": "$25.00",
+                                "start": 521,
+                                "end": 527,
+                                "structured": null,
+                                "formatted": "$25.00",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$25.00",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Invoice Subtotal",
+                            "spans": [
+                                { "start": 537, "end": 546, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:10",
+                            "confidence": {
+                                "Invoice Date": 9.50030809576674e-9,
+                                "Invoice Number": 3.281632743323826e-8,
+                                "Invoice Subtotal": 1.0,
+                                "Invoice Tax": 3.089213507223576e-8,
+                                "Invoice Total": 4.5628176792433806e-9,
+                                "Line Item Name": 4.575837042608555e-9,
+                                "Line Item Quantity": 1.3403760767971562e-8,
+                                "Line Item Total": 3.6726991226032624e-8,
+                                "Vendor Name": 9.200683770416163e-9
+                            },
+                            "field_id": 858118,
+                            "location_type": "exact",
+                            "text": "$1,225.00",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "$1,225.00",
+                                "start": 537,
+                                "end": 546,
+                                "structured": null,
+                                "formatted": "$1,225.00",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$1,225.00",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Invoice Tax",
+                            "spans": [
+                                { "start": 557, "end": 563, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:11",
+                            "confidence": {
+                                "Invoice Date": 6.549097975039331e-8,
+                                "Invoice Number": 1.693945250735851e-8,
+                                "Invoice Subtotal": 4.138750497872934e-8,
+                                "Invoice Tax": 0.9999997615814209,
+                                "Invoice Total": 1.009439642984944e-8,
+                                "Line Item Name": 6.045966927104018e-8,
+                                "Line Item Quantity": 4.567436207025821e-8,
+                                "Line Item Total": 2.1588691723195552e-8,
+                                "Vendor Name": 7.856115757931548e-9
+                            },
+                            "field_id": 858125,
+                            "location_type": "exact",
+                            "text": "$76.56",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "$76.56",
+                                "start": 557,
+                                "end": 563,
+                                "structured": null,
+                                "formatted": "$76.56",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$76.56",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "label": "Invoice Total",
+                            "spans": [
+                                { "start": 570, "end": 579, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:12",
+                            "confidence": {
+                                "Invoice Date": 3.388734981513153e-8,
+                                "Invoice Number": 4.117489993404888e-8,
+                                "Invoice Subtotal": 2.353094430418423e-8,
+                                "Invoice Tax": 1.6204319663870592e-8,
+                                "Invoice Total": 0.9999997019767761,
+                                "Line Item Name": 7.487341413536797e-9,
+                                "Line Item Quantity": 4.3006924244082256e-8,
+                                "Line Item Total": 7.838348636823866e-8,
+                                "Vendor Name": 3.8579965888629886e-8
+                            },
+                            "field_id": 858124,
+                            "location_type": "exact",
+                            "text": "$1,301.56",
+                            "groupings": [],
+                            "normalized": {
+                                "text": "$1,301.56",
+                                "start": 570,
+                                "end": 579,
+                                "structured": null,
+                                "formatted": "$1,301.56",
+                                "status": "SUCCESS",
+                                "comparison_type": "string",
+                                "comparison_value": "$1,301.56",
+                                "validation": [
+                                    {
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "error_message": null,
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ]
+                            }
+                        }
+                    ]
+                },
+                "FINAL": {
+                    "19409": [
+                        {
+                            "label": "Invoice",
+                            "field_id": 858117,
+                            "confidence": {
+                                "Invoice": 0.9999999853918985,
+                                "Purchase Order": 1.4608101511772668e-8
+                            }
+                        }
+                    ],
+                    "19411": [
+                        {
+                            "text": "Updated!",
+                            "label": "Vendor Name",
+                            "spans": [{ "end": 13, "start": 0, "page_num": 0 }],
+                            "span_id": "93479:c:19411:idx:0",
+                            "field_id": 858126,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 3.6883669451981405e-8,
+                                "Vendor Name": 0.9999996423721313,
+                                "Invoice Date": 3.3424697676309734e-8,
+                                "Invoice Total": 2.7991509554681215e-8,
+                                "Invoice Number": 3.447171437187535e-8,
+                                "Line Item Name": 8.883939806025865e-9,
+                                "Line Item Total": 5.176908146609094e-8,
+                                "Invoice Subtotal": 2.993116865468437e-8,
+                                "Line Item Quantity": 5.827023485949212e-8
+                            },
+                            "normalized": {
+                                "end": 13,
+                                "text": "Updated!",
+                                "start": 0,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "HubSpot, Inc."
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Invoice Date",
+                            "spans": [
+                                { "end": 135, "start": 125, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:1",
+                            "field_id": 858119,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 5.890121812512916e-8,
+                                "Vendor Name": 1.4272615089794272e-8,
+                                "Invoice Date": 0.9999996423721313,
+                                "Invoice Total": 2.9429369163835872e-8,
+                                "Invoice Number": 3.765953948686729e-8,
+                                "Line Item Name": 1.0651284299001418e-7,
+                                "Line Item Total": 4.1870002576160914e-8,
+                                "Invoice Subtotal": 2.3938278914670263e-8,
+                                "Line Item Quantity": 1.2222901091263338e-7
+                            },
+                            "normalized": {
+                                "end": 135,
+                                "text": "Updated!",
+                                "start": 125,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "06/21/2016"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Invoice Number",
+                            "spans": [
+                                { "end": 153, "start": 146, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:2",
+                            "field_id": 858120,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 3.253961722293752e-8,
+                                "Vendor Name": 5.3663740118281567e-8,
+                                "Invoice Date": 1.8282889868714847e-8,
+                                "Invoice Total": 1.3829179579261108e-7,
+                                "Invoice Number": 0.9999997019767761,
+                                "Line Item Name": 4.505617923200589e-8,
+                                "Line Item Total": 2.1834006602716727e-8,
+                                "Invoice Subtotal": 5.3069218353130054e-8,
+                                "Line Item Quantity": 2.9700066406235237e-8
+                            },
+                            "normalized": {
+                                "end": 153,
+                                "text": "Updated!",
+                                "start": 146,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "3927578"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Name",
+                            "spans": [
+                                { "end": 407, "start": 340, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:3",
+                            "field_id": 858121,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 8.453332611679798e-8,
+                                "Vendor Name": 7.4245527059702e-8,
+                                "Invoice Date": 7.043927041650022e-8,
+                                "Invoice Total": 1.9014857244314953e-8,
+                                "Invoice Number": 1.85422432963378e-8,
+                                "Line Item Name": 0.9999997019767761,
+                                "Line Item Total": 6.951040631975047e-9,
+                                "Invoice Subtotal": 9.213624529991193e-9,
+                                "Line Item Quantity": 8.187748257171279e-9
+                            },
+                            "normalized": {
+                                "end": 407,
+                                "text": "Updated!",
+                                "start": 340,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "HubSpot Enterprise\nIncluded Contacts\nEnterprise Contacts - Per 1000"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "end": 477, "start": 476, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:4",
+                            "field_id": 858122,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 1.196322472196698e-7,
+                                "Vendor Name": 1.2245095604157541e-8,
+                                "Invoice Date": 9.375153098289957e-8,
+                                "Invoice Total": 7.469050444797176e-8,
+                                "Invoice Number": 5.951939741066781e-8,
+                                "Line Item Name": 6.045668499154999e-8,
+                                "Line Item Total": 1.0597534583212109e-7,
+                                "Invoice Subtotal": 6.869062474379461e-8,
+                                "Line Item Quantity": 0.9999992847442627
+                            },
+                            "normalized": {
+                                "end": 477,
+                                "text": "Updated!",
+                                "start": 476,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "1"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "end": 487, "start": 478, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:5",
+                            "field_id": 858123,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 4
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 6.359238025055447e-9,
+                                "Vendor Name": 3.079120602933472e-8,
+                                "Invoice Date": 2.9472216667159046e-8,
+                                "Invoice Total": 1.601269694617713e-8,
+                                "Invoice Number": 6.045723477399179e-9,
+                                "Line Item Name": 4.507643325268873e-8,
+                                "Line Item Total": 0.9999998211860657,
+                                "Invoice Subtotal": 3.819852167907811e-8,
+                                "Line Item Quantity": 9.02977035366348e-8
+                            },
+                            "normalized": {
+                                "end": 487,
+                                "text": "Updated!",
+                                "start": 478,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$1,200.00"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "end": 501, "start": 499, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:6",
+                            "field_id": 858122,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 5
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 1.2959037576365517e-7,
+                                "Vendor Name": 1.0360611923942997e-8,
+                                "Invoice Date": 1.4731193687111954e-7,
+                                "Invoice Total": 5.365328092921118e-8,
+                                "Invoice Number": 4.397265485067692e-8,
+                                "Line Item Name": 4.536295961088399e-8,
+                                "Line Item Total": 6.116934514466266e-8,
+                                "Invoice Subtotal": 8.046448840559606e-8,
+                                "Line Item Quantity": 0.9999994039535522
+                            },
+                            "normalized": {
+                                "end": 501,
+                                "text": "Updated!",
+                                "start": 499,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "10"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "end": 507, "start": 502, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:7",
+                            "field_id": 858123,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 5
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 5.000234892804656e-9,
+                                "Vendor Name": 1.9458024524965367e-8,
+                                "Invoice Date": 2.487784023230688e-8,
+                                "Invoice Total": 1.1822152146123699e-8,
+                                "Invoice Number": 5.1960333813383386e-9,
+                                "Line Item Name": 3.007375326546935e-8,
+                                "Line Item Total": 0.9999997615814209,
+                                "Invoice Subtotal": 2.7321593876195038e-8,
+                                "Line Item Quantity": 8.39153528886527e-8
+                            },
+                            "normalized": {
+                                "end": 507,
+                                "text": "Updated!",
+                                "start": 502,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$0.00"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Quantity",
+                            "spans": [
+                                { "end": 520, "start": 519, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:8",
+                            "field_id": 858122,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 6
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 1.3488433125985466e-7,
+                                "Vendor Name": 9.074271112297083e-9,
+                                "Invoice Date": 1.3724279313009902e-7,
+                                "Invoice Total": 5.79847068138406e-8,
+                                "Invoice Number": 4.43608350053637e-8,
+                                "Line Item Name": 4.117256224844823e-8,
+                                "Line Item Total": 8.499782211401907e-8,
+                                "Invoice Subtotal": 9.689298252624212e-8,
+                                "Line Item Quantity": 0.9999994039535522
+                            },
+                            "normalized": {
+                                "end": 520,
+                                "text": "Updated!",
+                                "start": 519,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "5"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Line Item Total",
+                            "spans": [
+                                { "end": 527, "start": 521, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:9",
+                            "field_id": 858123,
+                            "groupings": [
+                                {
+                                    "group_id": "19411:Invoice Line Item",
+                                    "group_name": "Invoice Line Item",
+                                    "group_index": 6
+                                }
+                            ],
+                            "confidence": {
+                                "Invoice Tax": 5.606381581202413e-9,
+                                "Vendor Name": 2.1103359060248295e-8,
+                                "Invoice Date": 3.019848193730468e-8,
+                                "Invoice Total": 1.3499708018116507e-8,
+                                "Invoice Number": 5.982286666039727e-9,
+                                "Line Item Name": 3.2691968243625524e-8,
+                                "Line Item Total": 0.9999997615814209,
+                                "Invoice Subtotal": 4.329503866529194e-8,
+                                "Line Item Quantity": 8.082533042852447e-8
+                            },
+                            "normalized": {
+                                "end": 527,
+                                "text": "Updated!",
+                                "start": 521,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$25.00"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Invoice Subtotal",
+                            "spans": [
+                                { "end": 546, "start": 537, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:10",
+                            "field_id": 858118,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 3.089213507223576e-8,
+                                "Vendor Name": 9.200683770416163e-9,
+                                "Invoice Date": 9.50030809576674e-9,
+                                "Invoice Total": 4.5628176792433806e-9,
+                                "Invoice Number": 3.281632743323826e-8,
+                                "Line Item Name": 4.575837042608555e-9,
+                                "Line Item Total": 3.6726991226032624e-8,
+                                "Invoice Subtotal": 1.0,
+                                "Line Item Quantity": 1.3403760767971562e-8
+                            },
+                            "normalized": {
+                                "end": 546,
+                                "text": "Updated!",
+                                "start": 537,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$1,225.00"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Invoice Tax",
+                            "spans": [
+                                { "end": 563, "start": 557, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:11",
+                            "field_id": 858125,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 0.9999997615814209,
+                                "Vendor Name": 7.856115757931548e-9,
+                                "Invoice Date": 6.549097975039331e-8,
+                                "Invoice Total": 1.009439642984944e-8,
+                                "Invoice Number": 1.693945250735851e-8,
+                                "Line Item Name": 6.045966927104018e-8,
+                                "Line Item Total": 2.1588691723195552e-8,
+                                "Invoice Subtotal": 4.138750497872934e-8,
+                                "Line Item Quantity": 4.567436207025821e-8
+                            },
+                            "normalized": {
+                                "end": 563,
+                                "text": "Updated!",
+                                "start": 557,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$76.56"
+                            },
+                            "location_type": "exact"
+                        },
+                        {
+                            "text": "Updated!",
+                            "label": "Invoice Total",
+                            "spans": [
+                                { "end": 579, "start": 570, "page_num": 0 }
+                            ],
+                            "span_id": "93479:c:19411:idx:12",
+                            "field_id": 858124,
+                            "groupings": [],
+                            "confidence": {
+                                "Invoice Tax": 1.6204319663870592e-8,
+                                "Vendor Name": 3.8579965888629886e-8,
+                                "Invoice Date": 3.388734981513153e-8,
+                                "Invoice Total": 0.9999997019767761,
+                                "Invoice Number": 4.117489993404888e-8,
+                                "Line Item Name": 7.487341413536797e-9,
+                                "Line Item Total": 7.838348636823866e-8,
+                                "Invoice Subtotal": 2.353094430418423e-8,
+                                "Line Item Quantity": 4.3006924244082256e-8
+                            },
+                            "normalized": {
+                                "end": 579,
+                                "text": "Updated!",
+                                "start": 570,
+                                "status": "SUCCESS",
+                                "formatted": "Updated!",
+                                "structured": null,
+                                "validation": [
+                                    {
+                                        "error_message": null,
+                                        "validation_type": "TYPE_CONVERSION",
+                                        "validation_status": "SUCCESS"
+                                    }
+                                ],
+                                "comparison_type": "string",
+                                "comparison_value": "$1,301.56"
+                            },
+                            "location_type": "exact"
+                        }
+                    ]
+                }
+            },
+            "rejected": {
+                "models": {},
+                "components": { "19409": [], "19411": [] }
+            }
+        }
+    ],
+    "reviews": {
+        "69196": {
+            "review_id": 69196,
+            "reviewer_id": 422,
+            "review_notes": null,
+            "review_rejected": false,
+            "review_type": "auto"
+        }
+    },
+    "errored_files": {}
+}
diff --git a/tests/results/test_document.py b/tests/results/test_document.py
index df9862da..6e71855b 100644
--- a/tests/results/test_document.py
+++ b/tests/results/test_document.py
@@ -40,6 +40,14 @@ def test_empy_v3_sections() -> None:
                     }
                 }
             },
+            "component_metadata": {
+                "456": {
+                    "id": 456,
+                    "component_type": "static_model",
+                    "task_type": "annotation",
+                    "name": "Empty Model Section"
+                }
+            },
             "submission_results": [
                 {
                     "submissionfile_id": 0,
@@ -49,6 +57,11 @@ def test_empy_v3_sections() -> None:
                         "ORIGINAL": {
                             "123": []
                         }
+                    },
+                    "component_results": {
+                        "ORIGINAL": {
+                            "456": []
+                        }
                     }
                 }
             ]
@@ -61,6 +74,8 @@ def test_empy_v3_sections() -> None:
             "model_results": {
                 "123": [],
             },
-            "component_results": {},
+            "component_results": {
+                "456": [],
+            },
         }
     ]
diff --git a/tests/results/test_predictionlist.py b/tests/results/test_predictionlist.py
index 5df12291..4f15cfa4 100644
--- a/tests/results/test_predictionlist.py
+++ b/tests/results/test_predictionlist.py
@@ -27,6 +27,7 @@ def document() -> Document:
         error="",
         traceback="",
         _model_sections=frozenset({"124", "123", "122", "121"}),
+        _component_sections=frozenset(),
     )
 
 

From 3706eb93a7ddd2676be8acb7c315ed00deb9862f Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Fri, 25 Apr 2025 12:02:46 -0500
Subject: [PATCH 08/14] Improve the specificity of errors raised by
 `results.utils.get()`

---
 indico_toolkit/results/utils.py | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/indico_toolkit/results/utils.py b/indico_toolkit/results/utils.py
index 3b016bb7..1c65d738 100644
--- a/indico_toolkit/results/utils.py
+++ b/indico_toolkit/results/utils.py
@@ -1,33 +1,35 @@
 from collections.abc import Iterable, Iterator
 from typing import Callable, TypeVar
 
-from .errors import ResultError
-
 Value = TypeVar("Value")
 
 
 def get(result: object, value_type: "type[Value]", *keys: "str | int") -> Value:
     """
-    Return the value obtained by traversing `result` using `keys` as indices if that
-    value has type `value_type`. Raise a `ResultError` otherwise.
+    Return the value of type `value_type` obtained by traversing `result` using `keys`.
+    Raise an error if a key doesn't exist or the value has the wrong type.
     """
     for key in keys:
-        if isinstance(key, str) and isinstance(result, dict) and key in result:
-            result = result[key]
-        elif isinstance(key, int) and isinstance(result, list) and key < len(result):
-            result = result[key]
+        if isinstance(result, dict):
+            if key in result:
+                result = result[key]
+            else:
+                raise KeyError(f"`{result!r}` does not contain key `{key!r}`")
+        elif isinstance(result, list):
+            if isinstance(key, int):
+                if 0 >= key < len(result):
+                    result = result[key]
+                else:
+                    raise IndexError(f"`{result!r}` does not contain index `{key!r}`")
+            else:
+                TypeError(f"`{result!r}` can not be indexed with `{key!r}`")
         else:
-            raise ResultError(
-                f"result object `{type(result)!r}` does not contain key `{key!r}`"
-            )
+            TypeError(f"`{result!r}` is not a container")
 
     if isinstance(result, value_type):
         return result
     else:
-        raise ResultError(
-            f"result object `{type(result)!r}` does not have a value for "
-            f"key `{key!r}` of type `{value_type}`"
-        )
+        raise TypeError(f"`{result!r}` is not of type `{value_type}`")
 
 
 def has(result: object, value_type: "type[Value]", *keys: "str | int") -> bool:

From cc981d6e443a627e6f6fb8ba59ccc3c3c60f71fe Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Fri, 25 Apr 2025 12:04:00 -0500
Subject: [PATCH 09/14] Parse and preserve full spans for classify + unbundle
 predictions

---
 .../results/predictions/unbundling.py           | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/indico_toolkit/results/predictions/unbundling.py b/indico_toolkit/results/predictions/unbundling.py
index c6d2b81e..c6512325 100644
--- a/indico_toolkit/results/predictions/unbundling.py
+++ b/indico_toolkit/results/predictions/unbundling.py
@@ -4,6 +4,7 @@
 from ..review import Review
 from ..utils import get, omit
 from .prediction import Prediction
+from .span import Span
 
 if TYPE_CHECKING:
     from typing import Any
@@ -14,7 +15,14 @@
 
 @dataclass
 class Unbundling(Prediction):
-    pages: "list[int]"
+    spans: "list[Span]"
+
+    @property
+    def pages(self) -> "tuple[int, ...]":
+        """
+        Return the pages covered by `self.spans`.
+        """
+        return tuple(span.page for span in self.spans)
 
     @staticmethod
     def from_v3_dict(
@@ -32,10 +40,7 @@ def from_v3_dict(
             review=review,
             label=get(prediction, str, "label"),
             confidences=get(prediction, dict, "confidence"),
-            pages=[
-                get(span, int, "page_num")
-                for span in get(prediction, list, "spans")  # fmt: skip
-            ],
+            spans=sorted(map(Span.from_dict, get(prediction, list, "spans"))),
             extras=omit(prediction, "confidence", "label", "spans"),
         )
 
@@ -47,5 +52,5 @@ def to_v3_dict(self) -> "dict[str, Any]":
             **self.extras,
             "label": self.label,
             "confidence": self.confidences,
-            "spans": [{"page_num": page} for page in self.pages],
+            "spans": [span.to_dict() for span in self.spans],
         }

From 1d9671bf78b9c0f2318101a3584fe4ead0f105b5 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Fri, 25 Apr 2025 12:04:30 -0500
Subject: [PATCH 10/14] Support `group = next(group)` idiom for linked label
 groups

---
 indico_toolkit/results/predictions/group.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/indico_toolkit/results/predictions/group.py b/indico_toolkit/results/predictions/group.py
index 3fb94960..e9e000d0 100644
--- a/indico_toolkit/results/predictions/group.py
+++ b/indico_toolkit/results/predictions/group.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from typing import TYPE_CHECKING
 
 from ..utils import get
@@ -13,6 +13,14 @@ class Group:
     name: str
     index: int
 
+    def __next__(self) -> "Group":
+        """
+        Return the `Group` with the next index.
+
+        Supports `group = next(group)`.
+        """
+        return replace(self, index=self.index + 1)
+
     @staticmethod
     def from_dict(group: object) -> "Group":
         return Group(

From 3a4bca78490c326b6fabecf163527c1d8c09d487 Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Mon, 5 May 2025 09:01:07 -0500
Subject: [PATCH 11/14] Improve the specificity of errors raised by
 `results.utils.get()` more

Avoid including an entire nested JSON structure when possible:
- Only include dict keys instead of the whole dict,
- Only include list length instead of the whole list,
- Only include the object type that can't be traversed, etc.
---
 indico_toolkit/results/utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/indico_toolkit/results/utils.py b/indico_toolkit/results/utils.py
index 1c65d738..d16ddd27 100644
--- a/indico_toolkit/results/utils.py
+++ b/indico_toolkit/results/utils.py
@@ -14,22 +14,22 @@ def get(result: object, value_type: "type[Value]", *keys: "str | int") -> Value:
             if key in result:
                 result = result[key]
             else:
-                raise KeyError(f"`{result!r}` does not contain key `{key!r}`")
+                raise KeyError(f"{key!r} not in {result.keys()!r}")
         elif isinstance(result, list):
             if isinstance(key, int):
                 if 0 >= key < len(result):
                     result = result[key]
                 else:
-                    raise IndexError(f"`{result!r}` does not contain index `{key!r}`")
+                    raise IndexError(f"list index {key} out of range {len(result)}")
             else:
-                TypeError(f"`{result!r}` can not be indexed with `{key!r}`")
+                TypeError(f"list cannot be indexed with {key!r}")
         else:
-            TypeError(f"`{result!r}` is not a container")
+            TypeError(f"{type(result)} cannot be traversed")
 
     if isinstance(result, value_type):
         return result
     else:
-        raise TypeError(f"`{result!r}` is not of type `{value_type}`")
+        raise TypeError(f"value `{result!r}` does not have expected type {value_type}")
 
 
 def has(result: object, value_type: "type[Value]", *keys: "str | int") -> bool:

From 5712724a74626c22ad93c907698f9227a954914b Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Mon, 5 May 2025 09:14:32 -0500
Subject: [PATCH 12/14] Clean up some error messages

---
 indico_toolkit/results/__init__.py             | 2 +-
 indico_toolkit/results/predictionlist.py       | 2 +-
 indico_toolkit/results/predictions/__init__.py | 4 ++--
 indico_toolkit/results/result.py               | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/indico_toolkit/results/__init__.py b/indico_toolkit/results/__init__.py
index 97e6acc2..93872c50 100644
--- a/indico_toolkit/results/__init__.py
+++ b/indico_toolkit/results/__init__.py
@@ -101,4 +101,4 @@ def _load(result: object) -> Result:
     elif file_version == 3:
         return Result.from_v3_dict(result)
     else:
-        raise ResultError(f"unsupported file version `{file_version!r}`")
+        raise ResultError(f"unsupported file version `{file_version}`")
diff --git a/indico_toolkit/results/predictionlist.py b/indico_toolkit/results/predictionlist.py
index cbea64f8..34db0b62 100644
--- a/indico_toolkit/results/predictionlist.py
+++ b/indico_toolkit/results/predictionlist.py
@@ -337,7 +337,7 @@ def to_changes(self, result: "Result") -> "Any":
         elif result.version == 3:
             return self.to_v3_changes(result.documents)
         else:
-            raise ValueError(f"unsupported file version `{result.version!r}`")
+            raise ValueError(f"unsupported file version `{result.version}`")
 
     def to_v1_changes(self, document: "Document") -> "dict[str, Any]":
         """
diff --git a/indico_toolkit/results/predictions/__init__.py b/indico_toolkit/results/predictions/__init__.py
index aa652770..b385c80f 100644
--- a/indico_toolkit/results/predictions/__init__.py
+++ b/indico_toolkit/results/predictions/__init__.py
@@ -62,7 +62,7 @@ def from_v1_dict(
     elif model.type == FORM_EXTRACTION:
         return FormExtraction.from_v1_dict(document, model, review, prediction)
     else:
-        raise ResultError(f"unsupported v1 model type `{model.type!r}`")
+        raise ResultError(f"unsupported v1 model type {model.type!r}")
 
 
 def from_v3_dict(
@@ -85,4 +85,4 @@ def from_v3_dict(
     elif model.type == UNBUNDLING:
         return Unbundling.from_v3_dict(document, model, review, prediction)
     else:
-        raise ResultError(f"unsupported v3 model type `{model.type!r}`")
+        raise ResultError(f"unsupported v3 model type {model.type!r}")
diff --git a/indico_toolkit/results/result.py b/indico_toolkit/results/result.py
index ceefb5cd..6325c715 100644
--- a/indico_toolkit/results/result.py
+++ b/indico_toolkit/results/result.py
@@ -178,7 +178,7 @@ def from_v3_dict(result: object) -> "Result":
                                 component_metadata, str, component_id, "component_type"
                             )
                             raise ResultError(
-                                f"unsupported component type `{component_type!r}` "
+                                f"unsupported component type {component_type!r} "
                                 f"for component {component_id}"
                             )
                         else:

From 2f7cc38f7de5e203a43355286cf97cec97a8923b Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Thu, 8 May 2025 11:46:59 -0500
Subject: [PATCH 13/14] Fix errors in `utils.get()` and `utils.has()`

---
 indico_toolkit/results/utils.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/indico_toolkit/results/utils.py b/indico_toolkit/results/utils.py
index d16ddd27..97f70897 100644
--- a/indico_toolkit/results/utils.py
+++ b/indico_toolkit/results/utils.py
@@ -17,19 +17,19 @@ def get(result: object, value_type: "type[Value]", *keys: "str | int") -> Value:
                 raise KeyError(f"{key!r} not in {result.keys()!r}")
         elif isinstance(result, list):
             if isinstance(key, int):
-                if 0 >= key < len(result):
+                if 0 <= key < len(result):
                     result = result[key]
                 else:
-                    raise IndexError(f"list index {key} out of range {len(result)}")
+                    raise IndexError(f"{key} out of range [0,{len(result)})")
             else:
-                TypeError(f"list cannot be indexed with {key!r}")
+                raise TypeError(f"list can't be indexed with {key!r}")
         else:
-            TypeError(f"{type(result)} cannot be traversed")
+            raise TypeError(f"{type(result)} can't be traversed")
 
     if isinstance(result, value_type):
         return result
     else:
-        raise TypeError(f"value `{result!r}` does not have expected type {value_type}")
+        raise TypeError(f"value `{result!r}` doesn't have type {value_type}")
 
 
 def has(result: object, value_type: "type[Value]", *keys: "str | int") -> bool:
@@ -37,9 +37,9 @@ def has(result: object, value_type: "type[Value]", *keys: "str | int") -> bool:
     Check if `result` can be traversed using `keys` to a value of type `value_type`.
     """
     for key in keys:
-        if isinstance(key, str) and isinstance(result, dict) and key in result:
+        if isinstance(result, dict) and key in result:
             result = result[key]
-        elif isinstance(key, int) and isinstance(result, list) and key < len(result):
+        elif isinstance(result, list) and isinstance(key, int) and 0 <= key < len(result):  # fmt: skip  # noqa: E501
             result = result[key]
         else:
             return False

From e7e53449b14535e70a055ecc03f71e30bc69b71b Mon Sep 17 00:00:00 2001
From: Michael Welborn <michael.welborn@indicodata.ai>
Date: Thu, 8 May 2025 12:03:24 -0500
Subject: [PATCH 14/14] Bump version and update changelog

---
 CHANGELOG.md               | 13 +++++++++++++
 indico_toolkit/__init__.py |  2 +-
 pyproject.toml             |  2 +-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e268c7ff..2d89f631 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -153,3 +153,16 @@ This is the first major version release tested to work on Indico 6.X.
 * Added `etloutput` module.
 * Refactored `retry` decorator with asyncio support.
 * Switched to Poetry for packaging and dependency management.
+
+## 6.14.1 3/20/25
+
+* Improved Poetry and Poe configuration.
+* Update more attributes when prediction text changes to avoid TAK normalization issues.
+
+## 6.14.2 5/8/25
+
+* Fixed Mypy configuration.
+* Removed `AutoPopulator`, `CustomOcr`, `Datasets`, `DocExtraction`, `Reviewer` classes.
+* Added support for imported models using IPA 7.2 `component_metadata` section.
+* Parse and preserve full span information for `Unbundling` predictions.
+* Add `group = next(group)` idiom.
diff --git a/indico_toolkit/__init__.py b/indico_toolkit/__init__.py
index 6148a3cd..31d417c4 100644
--- a/indico_toolkit/__init__.py
+++ b/indico_toolkit/__init__.py
@@ -21,4 +21,4 @@
     "ToolkitStaggeredLoopError",
     "ToolkitStatusError",
 )
-__version__ = "6.14.1"
+__version__ = "6.14.2"
diff --git a/pyproject.toml b/pyproject.toml
index ba174db3..5bb3043a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ authors = [
 readme = "README.md"
 urls = { source = "https://github.com/IndicoDataSolutions/Indico-Solutions-Toolkit" }
 requires-python = ">=3.10"
-version = "6.14.1"
+version = "6.14.2"
 dependencies = ["indico-client (>=6.14.0,<7.0.0)"]
 
 [project.optional-dependencies]