Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a96e2b5
feat(medcat-service): Update gradio version
alhendrickson Jan 8, 2026
476ebff
build(medcat-service): Update fastapi dependency
alhendrickson Jan 8, 2026
7a9e413
feat(medcat-servie): Fix gradio version root path bug
alhendrickson Jan 8, 2026
3f6e38c
build(medcat-service): Add hot module reloader. Update gradio demo
alhendrickson Jan 8, 2026
e452614
docs(medcat-service): Add dev readme. refactor gradio to extract the …
alhendrickson Jan 8, 2026
9e436f2
feat(medcat-service): Add anoncat demo text
alhendrickson Jan 8, 2026
e7c3a11
feat(medcat-service): Move out of main.py. Configure overflow scrollb…
alhendrickson Jan 8, 2026
eecf7b9
test(medcat-service): Create gradio logic tests. Split into its own file
alhendrickson Jan 8, 2026
c055092
refactor(medcat-service): Update start_service_debug.sh for clarity a…
alhendrickson Jan 8, 2026
d78f2e3
feat(medcat-service): Enhance Gradio demo layout and add logging for …
alhendrickson Jan 8, 2026
fea70a6
Merge branch 'main' into feat/medcat-service/gradio-uplift
alhendrickson Feb 2, 2026
5c443f1
fix(medct-service): fix syntax
alhendrickson Feb 2, 2026
760abd1
build(medct-service): Update gradio version
alhendrickson Feb 2, 2026
8e179fe
refactor(medcat-service): Move example files to txt files
alhendrickson Feb 2, 2026
f1daebf
feat(medcat-service): In demo Click on annotation to view details
alhendrickson Feb 2, 2026
a9e9227
feat(medcat-service): In demo Click on annotation to view details - text
alhendrickson Feb 2, 2026
6d0f7c7
feat(medcat-service): In demo Click on annotation to view details - text
alhendrickson Feb 2, 2026
3ebfdc1
feat(medcat-service): In demo Click on annotation to view details - ruff
alhendrickson Feb 2, 2026
5da1b9d
feat(medcat-service): In demo move resource txt files to subfolder
alhendrickson Feb 2, 2026
ec2ff6c
feat(medcat-service): In demo move resource txt files to subfolder - …
alhendrickson Feb 2, 2026
93d8095
feat(medcat-service): In demo move resource txt files to subfolder - …
alhendrickson Feb 2, 2026
cdc4a6a
feat(medcat-service): Support boolean redact flag in deid processor a…
alhendrickson Feb 2, 2026
b708c7c
feat(medcat-service): Fix mypy errors
alhendrickson Feb 3, 2026
2e9e5c9
feat(medcat-service): Fix mypy errors
alhendrickson Feb 3, 2026
bca7d60
feat(medcat-service): Fix unit tests
alhendrickson Feb 3, 2026
aeadb78
feat(medcat-service): Fix unit tests
alhendrickson Feb 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions medcat-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,17 @@ The main settings that can be used to improve the performance when querying larg
MedCAT parameters are defined in selected `envs/medcat*` file.

For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/).

## Local development

For local development, set up a Python virtual environment, install dependencies with pip, and make sure to also install the local MedCAT core library (the `medcat-v2` folder) in editable mode.

```bash
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt -r requirements-dev.txt
SETUPTOOLS_SCM_PRETEND_VERSION="2.4.0-dev0" pip install -e "../medcat-v2[meta-cat,spacy]"
bash start_service_debug.sh

# Service will run on localhost:8000
```
2 changes: 1 addition & 1 deletion medcat-service/medcat_service/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Settings(BaseSettings):
)

app_root_path: str = Field(
default="/",
default="",
description="The Root Path for the FastAPI App",
examples=["/medcat-service"],
)
Expand Down
16 changes: 16 additions & 0 deletions medcat-service/medcat_service/demo/demo_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import importlib.resources
from functools import cache


@cache
def _read_file(filename: str) -> str:
package = importlib.resources.files(__package__ or 'medcat_service.demo')
file_path = package / 'resources' / filename
return file_path.read_text(encoding='utf-8')


short_example = _read_file('short_example.txt')
long_example = _read_file('long_example.txt')
anoncat_example = _read_file('anoncat_example.txt')
article_footer = _read_file('article_footer.txt')
anoncat_help_content = _read_file('anoncat_help_content.txt')
176 changes: 176 additions & 0 deletions medcat-service/medcat_service/demo/demo_logic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
This module provides conversion utilities between the MedCAT output format
and the exact format expected by Gradio components, specifically aligning
with the output schema of Hugging Face Transformers pipelines (e.g., for
NER highlighting). Use these definitions and helper functions to bridge
MedCAT's annotation results and Gradio's interactive demo expectations.
"""

import logging

from pydantic import BaseModel

from medcat_service.dependencies import get_medcat_processor, get_settings
from medcat_service.types import ProcessAPIInputContent, ProcessErrorsResult, ProcessResult
from medcat_service.types_entities import Entity

logger = logging.getLogger(__name__)


class EntityAnnotation(BaseModel):
"""
Expected data format for NER in gradio
"""

entity: str
score: float
index: int
word: str
start: int
end: int


headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"]


class EntityAnnotationDisplay(BaseModel):
"""
DIsplay data format for use in a datatable
"""

pretty_name: str
identifier: str
score: float
start: int
end: int
id: int
# Misisng Meta Anns


class EntityResponse(BaseModel):
"""
Expected data format of gradio highlightedtext component
"""

entities: list[EntityAnnotation]
text: str


def convert_annotation_to_ner_model(entity: Entity, index: int) -> EntityAnnotation:
return EntityAnnotation(
entity=entity.get("cui", "UNKNOWN"),
score=entity.get("acc", 0.0),
index=index,
word=entity.get("detected_name", ""),
start=entity.get("start", -1),
end=entity.get("end", -1),
)


def convert_annotation_to_display_model(entity: Entity) -> EntityAnnotationDisplay:
return EntityAnnotationDisplay(
pretty_name=entity.get("pretty_name", ""),
identifier=entity.get("cui", "UNKNOWN"),
score=entity.get("acc", 0.0),
start=entity.get("start", -1),
end=entity.get("end", -1),
id=entity.get("id", -1),
# medcat-demo-app/webapp/demo/views.py
# if key == 'meta_anns':
# meta_anns=ent.get("meta_anns", {})
# if meta_anns:
# for meta_ann in meta_anns.keys():
# new_ent[meta_ann]=meta_anns[meta_ann]['value']
)


def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotation]:
annotations: list[EntityAnnotation] = []
for entity_dict in entity_dict_list:
for key, entity in entity_dict.items():
annotations.append(convert_annotation_to_ner_model(entity, index=int(key)))
return annotations


def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]:
logger.debug("Converting entity dict to display model")
annotations: list[EntityAnnotationDisplay] = []
for entity_dict in entity_dict_list:
for key, entity in entity_dict.items():
annotations.append(convert_annotation_to_display_model(entity))
return annotations


def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]:
return [
[str(getattr(entity, field)) for field in EntityAnnotationDisplay.model_fields]
for entity in entity_display_model
]


def perform_named_entity_resolution(input_text: str, redact: bool | None = None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps adding the return type a type hint here would be useful? You've descried it in the docs anyway (though it's missing the last str).

I.e -> tuple[dict, list[list[str]], str]

"""
Performs clinical coding by processing the input text with MedCAT to extract and
annotate medical concepts (entities).

Returns:
1. A dictionary following the NER response model (EntityResponse), containing the original text
and the list of detected entities.
2. A datatable-compatible list of lists, where each sublist represents an entity annotation and
its attributes for display purposes.

This method is used as the main function for the Gradio MedCAT demo and MCP server,
enabling users to input free text and receive automatic annotation and coding of clinical entities.

Args:
input_text (str): The input text to be processed and annotated for medical entities by MedCAT.

Returns:
Tuple:
- dict: A dictionary following the NER response model (EntityResponse), containing the
original text and the list of detected entities.
- list[list[str]]: A datatable-compatible list of lists, where each sublist represents an
entity annotation and its attributes for display purposes.

"""
logger.debug("Performing named entity resolution")
if not input_text or not input_text.strip():
return None, None, None

processor = get_medcat_processor(get_settings())
input = ProcessAPIInputContent(text=input_text)

process_result = processor.process_content(input.model_dump(), redact=redact)

if isinstance(process_result, ProcessErrorsResult):
error_msg = (
"; ".join(process_result.errors) if process_result.errors else "Unknown error occurred during processing"
)
raise ValueError(f"Processing failed: {error_msg}")
result: ProcessResult = process_result

entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations)

logger.debug("Converting entity dict to display model")
annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations)
response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format)

response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text)
response_tuple = response.model_dump(), response_datatable_format, result.text
return response_tuple


def medcat_demo_perform_named_entity_resolution(input_text: str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, including the return type (-> tuple[dict, list[list[str]]]) could be useful.

"""
Performs named entity resolution for the MedCAT demo.
"""
result = perform_named_entity_resolution(input_text)
return result[0], result[1]


def anoncat_demo_perform_deidentification(input_text: str, redact: bool):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, return type might be useful.

"""
Performs deidentification for the AnonCAT demo.
"""
result = perform_named_entity_resolution(input_text, redact=redact)
return result
Loading
Loading