diff --git a/medcat-trainer/webapp/api/api/permissions.py b/medcat-trainer/webapp/api/api/permissions.py index e8995c01f..2a2f04ff3 100644 --- a/medcat-trainer/webapp/api/api/permissions.py +++ b/medcat-trainer/webapp/api/api/permissions.py @@ -1,4 +1,7 @@ from rest_framework import permissions +from rest_framework.exceptions import PermissionDenied +from .models import ProjectAnnotateEntities, ProjectGroup + class IsReadOnly(permissions.BasePermission): """ @@ -9,3 +12,25 @@ def has_permission(self, request, view): # Read permissions are allowed to any request, # so we'll always allow GET, HEAD or OPTIONS requests. return request.method in permissions.SAFE_METHODS + + +def is_project_admin(user, project): + """ + Check if a user is an admin of a project. + A user is a project admin if: + 1. They are a member of the project, OR + 2. They are an administrator of the project's group (if the project has a group) + 3. They are a superuser/staff + """ + if user.is_superuser or user.is_staff: + return True + + # Check if user is a member of the project + if project.members.filter(id=user.id).exists(): + return True + + # Check if user is an administrator of the project's group + if project.group and project.group.administrators.filter(id=user.id).exists(): + return True + + return False diff --git a/medcat-trainer/webapp/api/api/serializers.py b/medcat-trainer/webapp/api/api/serializers.py index 89ff70fea..49fd2cd16 100644 --- a/medcat-trainer/webapp/api/api/serializers.py +++ b/medcat-trainer/webapp/api/api/serializers.py @@ -60,6 +60,26 @@ class Meta: class DatasetSerializer(serializers.ModelSerializer): + """ + Serializer for Dataset model. + + Schema Requirements: + - File format: .csv or .xlsx + - Required columns: + * name: A unique identifier for each document (string) + * text: The free-text content to annotate (string) + - Additional columns are allowed but will be ignored + + Example CSV structure: + name,text + doc001,"This is the first document to annotate." + doc002,"This is the second document with medical text." + """ + + original_file = serializers.FileField( + help_text="Upload a .csv or .xlsx file with two required columns: 'name' (unique document identifier) and 'text' (free-text to annotate)." + ) + class Meta: model = Dataset fields = '__all__' diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 587c12cae..b9e6961eb 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -1,6 +1,5 @@ import logging import os -import traceback from smtplib import SMTPException from tempfile import NamedTemporaryFile from typing import Any @@ -15,7 +14,7 @@ from django_filters import rest_framework as drf from rest_framework import viewsets -from rest_framework.decorators import api_view +from rest_framework.decorators import api_view, permission_classes from rest_framework.response import Response from medcat.components.ner.trf.deid import DeIdModel from medcat.utils.cdb_utils import ch2pt_from_pt2ch, get_all_ch, snomed_ct_concept_path @@ -32,6 +31,8 @@ from .solr_utils import collections_available, search_collection, ensure_concept_searchable from .utils import add_annotations, remove_annotations, train_medcat, create_annotation, prep_docs +logger = logging.getLogger(__name__) + # For local testing, put envs """ from environs import Env @@ -201,8 +202,22 @@ class ModelPackViewSet(viewsets.ModelViewSet): class DatasetViewSet(viewsets.ModelViewSet): + """ + ViewSet for managing datasets. + + File Schema Requirements: + - Format: .csv or .xlsx file + - Required columns: + * name: A unique identifier for each document + * text: The free-text content to annotate + + Example CSV: + name,text + doc001,"First document text" + doc002,"Second document text" + """ permission_classes = [permissions.IsAuthenticated] - http_method_names = ['get', 'post'] + http_method_names = ['get', 'post', 'put', 'patch', 'delete'] queryset = Dataset.objects.all() serializer_class = DatasetSerializer @@ -322,10 +337,8 @@ def prepare_documents(request): except Exception as e: logger.warning('Error preparing documents for project %s', p_id, exc_info=e) - stack = traceback.format_exc() return Response({'message': e.args[0] if len(e.args) > 0 else 'Internal Server Error', - 'description': e.args[1] if len(e.args) > 1 else '', - 'stacktrace': stack}, status=500) + 'description': e.args[1] if len(e.args) > 1 else '',}, status=500) return Response({'message': 'Documents prepared successfully'}) @@ -372,7 +385,7 @@ def prepare_docs_bg_task(request, proj_id): ds_total_count = Document.objects.filter(dataset=ProjectAnnotateEntities.objects.get(id=proj_id).dataset.id).count() return Response({'proj_id': proj_id, 'dataset_len': ds_total_count, 'prepd_docs_len': prepd_docs_count}) except ObjectDoesNotExist: - return HttpResponseBadRequest('No Project found for ID: %s', proj_id) + return HttpResponseBadRequest('No Project found for the given ID') else: running_doc_prep_tasks = {json.loads(task.task_params)[0][0]: task.id for task in Task.objects.filter(queue='doc_prep')} @@ -536,8 +549,9 @@ def submit_document(request): try: _submit_document(project, document) - except Exception as e: - HttpResponseServerError(e.message) + except Exception: + logger.exception("Error while submitting document") + return HttpResponseServerError("An internal error occurred while submitting the document.") return Response({'message': 'Document submited successfully'}) @@ -764,7 +778,7 @@ def concept_search_index_available(request): except Exception as e: logger.error("Failed to search for concept_search_index. Solr Search Service not available", exc_info=e) return HttpResponseServerError("Solr Search Service not available check the service is up, running " - "and configured correctly", e) + "and configured correctly.") @api_view(http_method_names=['GET']) @@ -1076,3 +1090,214 @@ def project_progress(request): out[p] = {'validated_count': val_docs, 'dataset_count': ds_doc_count} return Response(out) + + +@api_view(http_method_names=['GET']) +@permission_classes([permissions.IsAuthenticated]) +def project_admin_projects(request): + """ + Get all projects where the user is a project admin. + """ + user = request.user + projects = ProjectAnnotateEntities.objects.filter(members=user.id) + + # Also include projects where user is admin of the project's group + group_admin_projects = ProjectAnnotateEntities.objects.filter( + group__administrators=user.id + ) + projects = (projects | group_admin_projects).distinct() + + serializer = ProjectAnnotateEntitiesSerializer(projects, many=True) + return Response(serializer.data) + + +@api_view(http_method_names=['GET', 'PUT', 'DELETE']) +@permission_classes([permissions.IsAuthenticated]) +def project_admin_detail(request, project_id): + """ + Get, update, or delete a project (only if user is project admin). + """ + try: + project = ProjectAnnotateEntities.objects.get(id=project_id) + except ProjectAnnotateEntities.DoesNotExist: + return Response({'error': 'Project not found'}, status=404) + + # Check if user is project admin + from .permissions import is_project_admin + if not is_project_admin(request.user, project): + return Response({'error': 'You do not have permission to access this project'}, status=403) + + if request.method == 'GET': + serializer = ProjectAnnotateEntitiesSerializer(project) + return Response(serializer.data) + + elif request.method == 'PUT': + # Handle both JSON and FormData + data = request.data.copy() if hasattr(request.data, 'copy') else dict(request.data) + + # Extract many-to-many fields before serializer validation + cdb_search_filter_ids = [] + if 'cdb_search_filter' in request.data: + if isinstance(request.data.get('cdb_search_filter'), list): + cdb_search_filter_ids = request.data['cdb_search_filter'] + else: + # FormData sends as multiple values with same key + cdb_search_filter_ids = request.data.getlist('cdb_search_filter') + # Remove from data dict so serializer doesn't try to validate it + data.pop('cdb_search_filter', None) + + members_ids = [] + if 'members' in request.data: + if isinstance(request.data.get('members'), list): + members_ids = request.data['members'] + else: + members_ids = request.data.getlist('members') + # Remove from data dict so serializer doesn't try to validate it + data.pop('members', None) + + # Convert string booleans to actual booleans + boolean_fields = ['project_locked', 'annotation_classification', 'require_entity_validation', + 'train_model_on_submit', 'add_new_entities', 'restrict_concept_lookup', + 'terminate_available', 'irrelevant_available', 'enable_entity_annotation_comments', + 'use_model_service'] + for field in boolean_fields: + if field in data: + if isinstance(data[field], str): + data[field] = data[field].lower() in ('true', '1', 'yes', 'on') + + serializer = ProjectAnnotateEntitiesSerializer(project, data=data, partial=True) + if serializer.is_valid(): + try: + project = serializer.save() + # Handle many-to-many fields manually after saving + project.cdb_search_filter.set(cdb_search_filter_ids) + project.members.set(members_ids) + return Response(ProjectAnnotateEntitiesSerializer(project).data) + except Exception as e: + logger.error(f"Error saving project {project_id}: {e}", exc_info=e) + return Response({'error': f'Failed to save project'}, status=400) + else: + logger.warning(f"Validation errors for project {project_id}: {serializer.errors}") + return Response(serializer.errors, status=400) + + elif request.method == 'DELETE': + project.delete() + return Response({'message': 'Project deleted successfully'}, status=200) + + +@api_view(http_method_names=['POST']) +@permission_classes([permissions.IsAuthenticated]) +def project_admin_create(request): + """ + Create a new project (user must be authenticated). + """ + # Handle both JSON and FormData + data = request.data.copy() if hasattr(request.data, 'copy') else dict(request.data) + + # Convert many-to-many fields from FormData format + if 'cdb_search_filter' in request.data: + if isinstance(request.data.get('cdb_search_filter'), list): + data['cdb_search_filter'] = request.data['cdb_search_filter'] + else: + cdb_filter_list = request.data.getlist('cdb_search_filter') + # Only include if list has items, otherwise set to empty list + data['cdb_search_filter'] = cdb_filter_list if cdb_filter_list else [] + else: + data['cdb_search_filter'] = [] + + if 'members' in request.data: + if isinstance(request.data.get('members'), list): + data['members'] = request.data['members'] + else: + members_list = request.data.getlist('members') + # Only include if list has items + data['members'] = members_list if members_list else [] + else: + data['members'] = [] + + serializer = ProjectAnnotateEntitiesSerializer(data=data) + if serializer.is_valid(): + project = serializer.save() + # Handle many-to-many fields manually + if 'cdb_search_filter' in data: + project.cdb_search_filter.set(data['cdb_search_filter']) + if 'members' in data: + project.members.set(data['members']) + # Add the creator as a member if not already included + if request.user not in project.members.all(): + project.members.add(request.user) + return Response(ProjectAnnotateEntitiesSerializer(project).data, status=201) + return Response(serializer.errors, status=400) + + +@api_view(http_method_names=['POST']) +@permission_classes([permissions.IsAuthenticated]) +def project_admin_clone(request, project_id): + """ + Clone a project (user must be authenticated and have permission). + """ + import copy + try: + project = ProjectAnnotateEntities.objects.get(id=project_id) + except ProjectAnnotateEntities.DoesNotExist: + return Response({'error': 'Project not found'}, status=404) + + # Check if user is project admin + from .permissions import is_project_admin + if not is_project_admin(request.user, project): + return Response({'error': 'You do not have permission to clone this project'}, status=403) + + try: + # Get custom name from request, or use default + custom_name = request.data.get('name', None) if hasattr(request.data, 'get') else None + if not custom_name: + custom_name = f'{project.name} (Clone)' + + # Create a copy of the project + project_copy = copy.copy(project) + project_copy.id = None + project_copy.pk = None + project_copy.name = custom_name + project_copy.save() + + # Copy many-to-many fields + for m in project.members.all(): + project_copy.members.add(m) + for c in project.cdb_search_filter.all(): + project_copy.cdb_search_filter.add(c) + for t in project.tasks.all(): + project_copy.tasks.add(t) + + project_copy.save() + serializer = ProjectAnnotateEntitiesSerializer(project_copy) + return Response(serializer.data, status=201) + except Exception as e: + logger.error(f"Failed to clone project: {e}", exc_info=e) + return Response({'error': f'Failed to clone project:'}, status=500) + + +@api_view(http_method_names=['POST']) +@permission_classes([permissions.IsAuthenticated]) +def project_admin_reset(request, project_id): + """ + Reset a project (clear all annotations) - only if user is project admin. + This is equivalent to the reset_project admin action. + """ + try: + project = ProjectAnnotateEntities.objects.get(id=project_id) + except ProjectAnnotateEntities.DoesNotExist: + return Response({'error': 'Project not found'}, status=404) + + # Check if user is project admin + from .permissions import is_project_admin + if not is_project_admin(request.user, project): + return Response({'error': 'You do not have permission to reset this project'}, status=403) + + # Remove all annotations and cascade to meta anns + AnnotatedEntity.objects.filter(project=project).delete() + + # Clear validated_documents and prepared_documents + project.validated_documents.clear() + project.prepared_documents.clear() + + return Response({'message': 'Project reset successfully'}, status=200) diff --git a/medcat-trainer/webapp/api/core/urls.py b/medcat-trainer/webapp/api/core/urls.py index f9ee2296b..2934895d9 100644 --- a/medcat-trainer/webapp/api/core/urls.py +++ b/medcat-trainer/webapp/api/core/urls.py @@ -61,6 +61,11 @@ path('api/generate-concept-filter-json/', api.views.generate_concept_filter_flat_json), path('api/generate-concept-filter/', api.views.generate_concept_filter), path('api/cuis-to-concepts/', api.views.cuis_to_concepts), + path('api/project-admin/projects/', api.views.project_admin_projects), + path('api/project-admin/projects//', api.views.project_admin_detail), + path('api/project-admin/projects//clone/', api.views.project_admin_clone), + path('api/project-admin/projects//reset/', api.views.project_admin_reset), + path('api/project-admin/projects/create/', api.views.project_admin_create), path('reset_password/', api.views.ResetPasswordView.as_view(), name='reset_password'), path('reset_password_sent/', pw_views.PasswordResetDoneView.as_view(), name='password_reset_done'), path('reset//', pw_views.PasswordResetConfirmView.as_view(), name='password_reset_confirm'), diff --git a/medcat-trainer/webapp/frontend/package-lock.json b/medcat-trainer/webapp/frontend/package-lock.json index 3990464d3..2689d9df5 100644 --- a/medcat-trainer/webapp/frontend/package-lock.json +++ b/medcat-trainer/webapp/frontend/package-lock.json @@ -4633,10 +4633,12 @@ } }, "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", "dev": true, + "license": "ISC", "dependencies": { "foreground-child": "^3.1.0", "jackspeak": "^3.1.2", @@ -5160,9 +5162,9 @@ "license": "MIT" }, "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", "dev": true, "license": "MIT", "dependencies": { @@ -5339,10 +5341,11 @@ } }, "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "dev": true + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "dev": true, + "license": "MIT" }, "node_modules/lodash.merge": { "version": "4.6.2", @@ -6866,9 +6869,9 @@ } }, "node_modules/vite": { - "version": "6.3.6", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.3.6.tgz", - "integrity": "sha512-0msEVHJEScQbhkbVTb/4iHZdJ6SXp/AvxL2sjwYQFfBqleHtnCqv1J3sa9zbWz/6kW1m9Tfzn92vW+kZ1WV6QA==", + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz", + "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", "dependencies": { diff --git a/medcat-trainer/webapp/frontend/package.json b/medcat-trainer/webapp/frontend/package.json index 33f948d02..6f1ba97cc 100644 --- a/medcat-trainer/webapp/frontend/package.json +++ b/medcat-trainer/webapp/frontend/package.json @@ -4,13 +4,13 @@ "private": true, "type": "module", "scripts": { - "dev": "vite", + "dev": "NODE_OPTIONS=--max-old-space-size=4096 vite", "build": "run-p type-check \"build-only {@}\" --", "preview": "vite preview", "test:unit": "vitest", "coverage": "vitest run --coverage", "test:ui": "vitest --ui", - "build-only": "vite build", + "build-only": "NODE_OPTIONS=--max-old-space-size=4096 vite build", "type-check": "vue-tsc --build --force", "lint": "eslint . --fix", "format": "prettier --write src/" @@ -63,4 +63,4 @@ "vitest": "^3.2.4", "vue-tsc": "^2.1.6" } -} +} \ No newline at end of file diff --git a/medcat-trainer/webapp/frontend/src/App.vue b/medcat-trainer/webapp/frontend/src/App.vue index 5b406daa5..042392f13 100644 --- a/medcat-trainer/webapp/frontend/src/App.vue +++ b/medcat-trainer/webapp/frontend/src/App.vue @@ -27,6 +27,9 @@
+ + +