diff --git a/.github/workflows/smoke-nifi-services.yml b/.github/workflows/smoke-nifi-services.yml index b05333e96..3949e8488 100644 --- a/.github/workflows/smoke-nifi-services.yml +++ b/.github/workflows/smoke-nifi-services.yml @@ -33,9 +33,7 @@ jobs: - name: Start NiFi services (build) run: | set -euo pipefail - source deploy/export_env_vars.sh - set -euo pipefail - docker compose -f deploy/services.dev.yml up -d --build nifi nifi-nginx nifi-registry-flow + make -C deploy start-nifi-dev-build - name: Smoke tests run: | @@ -44,7 +42,7 @@ jobs: retries=30 delay=15 for attempt in $(seq 1 $retries); do - if ./scripts/smoke_nifi_services.sh; then + if ./scripts/tests/smoke_nifi_services.sh; then exit 0 fi echo "Attempt ${attempt}/${retries} failed. Sleeping ${delay}s..." diff --git a/deploy/Makefile b/deploy/Makefile index 367f1fe43..4a9fd6fb7 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -25,17 +25,23 @@ load-env: show-env: ${WITH_ENV} >/dev/null 2>&1; printenv | sort + +fix-nifi-registry-perms: + $(WITH_ENV) SKIP_EXPORT_ENV=1 ../nifi/fix_nifi_registry_perms.sh $(COMPOSE_FILE) # start services start-nifi: - $(WITH_ENV) docker compose -f services.yml $(DC_START_CMD) nifi nifi-nginx nifi-registry-flow + $(WITH_ENV) SKIP_EXPORT_ENV=1 ../nifi/fix_nifi_registry_perms.sh services.yml; \ + docker compose -f services.yml $(DC_START_CMD) nifi nifi-nginx nifi-registry-flow start-nifi-dev: - $(WITH_ENV) docker compose -f services.dev.yml $(DC_START_CMD) nifi nifi-nginx nifi-registry-flow + $(WITH_ENV) SKIP_EXPORT_ENV=1 ../nifi/fix_nifi_registry_perms.sh services.dev.yml; \ + docker compose -f services.dev.yml $(DC_START_CMD) nifi nifi-nginx nifi-registry-flow start-nifi-dev-build: - $(WITH_ENV) docker compose -f services.dev.yml up -d --build nifi nifi-nginx nifi-registry-flow + $(WITH_ENV) SKIP_EXPORT_ENV=1 ../nifi/fix_nifi_registry_perms.sh services.dev.yml; \ + docker compose -f services.dev.yml up -d --build nifi nifi-nginx nifi-registry-flow start-elastic: $(WITH_ENV) docker compose -f services.yml $(DC_START_CMD) elasticsearch-1 elasticsearch-2 kibana @@ -101,7 +107,7 @@ start-data-infra: start-nifi start-elastic start-samples start-all: start-data-infra start-jupyter start-medcat-service start-ocr-services -.PHONY: start-all start-data-infra start-nifi start-nifi-dev start-nifi-dev-build start-elastic start-samples start-jupyter +.PHONY: start-all start-data-infra start-nifi start-nifi-dev start-nifi-dev-build start-elastic start-samples start-jupyter fix-nifi-registry-perms # stop services diff --git a/deploy/elasticsearch.env b/deploy/elasticsearch.env index 9b3616f55..986e8d165 100644 --- a/deploy/elasticsearch.env +++ b/deploy/elasticsearch.env @@ -89,6 +89,8 @@ ELASTICSEARCH_SECURITY_DIR=../security/certificates/elastic/ # MEMORY CONFIG ELASTICSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Des.failure_store_feature_flag_enabled=true" +ES_JAVA_OPTS=$ELASTICSEARCH_JAVA_OPTS +OPENSEARCH_JAVA_OPTS=$ELASTICSEARCH_JAVA_OPTS ELASTICSEARCH_DOCKER_CPU_MIN=1 ELASTICSEARCH_DOCKER_CPU_MAX=1 diff --git a/deploy/export_env_vars.sh b/deploy/export_env_vars.sh index 2ee8a95cf..c68b4f1b3 100755 --- a/deploy/export_env_vars.sh +++ b/deploy/export_env_vars.sh @@ -41,22 +41,6 @@ env_files=( "$SERVICES_DIR/cogstack-nlp/medcat-service/env/medcat.env" ) -LINT_SCRIPT="$SCRIPT_DIR/../nifi/user_scripts/utils/lint_env.py" - -if [ -e "$LINT_SCRIPT" ]; then - chmod +x $LINT_SCRIPT -fi - -if [ -x "$LINT_SCRIPT" ]; then - echo "🔍 Validating env files..." - if ! python3 "$LINT_SCRIPT" "${env_files[@]}"; then - echo "❌ Env validation failed. Fix the errors above before continuing." - exit 1 - fi -else - echo "⚠️ Skipping env validation; $LINT_SCRIPT not found or not executable." -fi - for env_file in "${env_files[@]}"; do if [ -f "$env_file" ]; then echo "✅ Sourcing $env_file" diff --git a/deploy/network_settings.env b/deploy/network_settings.env index ee2357969..6b19bab5e 100644 --- a/deploy/network_settings.env +++ b/deploy/network_settings.env @@ -18,4 +18,4 @@ HTTP_PROXY="" NO_PROXY="" no_proxy="" http_proxy="" -https_proxy="" \ No newline at end of file +https_proxy="" diff --git a/deploy/nifi.env b/deploy/nifi.env index c9d007d7a..190f65bfd 100644 --- a/deploy/nifi.env +++ b/deploy/nifi.env @@ -41,6 +41,9 @@ NIFI_DATA_PATH="../data/" NIFI_TOOLKIT_VERSION=$NIFI_VERSION +# this is to mount medcat models (optional) +NIFI_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH="../services/cogstack-nlp/medcat-service/models/" + #### Port and network settings NIFI_WEB_PROXY_CONTEXT_PATH="/nifi" diff --git a/deploy/services.dev.yml b/deploy/services.dev.yml index e1970b449..b87974180 100644 --- a/deploy/services.dev.yml +++ b/deploy/services.dev.yml @@ -63,11 +63,11 @@ x-nifi-common: &nifi-common x-nifi-volumes: &nifi-volumes # Drivers - - ../nifi/drivers:/opt/nifi/drivers + - ../nifi/drivers:/opt/nifi/drivers:ro # User overrides bundled in the image - ../nifi/user_scripts:/opt/nifi/user_scripts:rw - - ../nifi/user_schemas:/opt/nifi/user_schemas:rw + - ../nifi/user_schemas:/opt/nifi/user_schemas:ro # Python processors (NiFi 2.x) - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw @@ -84,9 +84,6 @@ x-nifi-volumes: &nifi-volumes # Ingest data directory - ./${NIFI_DATA_PATH:-../data/}:/data/:rw - # DB schemas - - ../services/cogstack-db/:/opt/cogstack-db/:rw - # MedCAT models - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw @@ -127,21 +124,10 @@ services: build: context: .. dockerfile: nifi/Dockerfile - args: - HTTP_PROXY: $HTTP_PROXY - HTTPS_PROXY: $HTTPS_PROXY - no_proxy: $no_proxy container_name: cogstack-nifi hostname: nifi shm_size: ${NIFI_DOCKER_SHM_SIZE:-"1g"} environment: - - USER_ID=${NIFI_UID:-1000} - - GROUP_ID=${NIFI_GID:-1000} - - NIFI_WEB_PROXY_HOST=${NIFI_WEB_PROXY_HOST:-"localhost:8443"} - - NIFI_WEB_PROXY_CONTEXT_PATH=${NIFI_WEB_PROXY_CONTEXT_PATH:-"/nifi"} - - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} - PYTHONPATH=${NIFI_PYTHONPATH:-/opt/nifi/nifi-current/python/framework} - JVM_OPTS="${NIFI_JVM_OPTS:--XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom}" deploy: @@ -170,13 +156,7 @@ services: hostname: nifi-registry container_name: cogstack-nifi-registry-flow shm_size: ${NIFI_DOCKER_REGISTRY_SHM_SIZE:-1g} - user: root environment: - - http_proxy=$HTTP_PROXY - - https_proxy=$HTTPS_PROXY - - no_proxy=$no_proxy - - USER_ID=${NIFI_UID:-1000} - - GROUP_ID=${NIFI_GID:-1000} - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-/security/certificates/nifi/nifi-keystore.jks} - KEYSTORE_TYPE=${NIFI_KEYSTORE_TYPE:-jks} - KEYSTORE_PASSWORD=${NIFI_KEYSTORE_PASSWORD:-"cogstackNifi"} @@ -186,9 +166,6 @@ services: - TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks} - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"} - AUTH=${NIFI_AUTH:-"tls"} - - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} - #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} deploy: resources: limits: @@ -203,11 +180,6 @@ services: ports: - "${NIFI_REGISTRY_FLOW_OUTPUT_PORT:-8083}:${NIFI_REGISTRY_FLOW_INPUT_PORT:-18443}" - entrypoint: bash -c "chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/database && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/flow_storage && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/work && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/logs && \ - bash /opt/nifi-registry/scripts/start.sh" logging: *nifi-logging-common nifi-nginx: diff --git a/deploy/services.yml b/deploy/services.yml index a0901553c..d85f2b2ba 100644 --- a/deploy/services.yml +++ b/deploy/services.yml @@ -74,11 +74,11 @@ x-nifi-common: &nifi-common x-nifi-volumes: &nifi-volumes # Drivers - - ../nifi/drivers:/opt/nifi/drivers + - ../nifi/drivers:/opt/nifi/drivers:ro # User overrides bundled in the image - ../nifi/user_scripts:/opt/nifi/user_scripts:rw - - ../nifi/user_schemas:/opt/nifi/user_schemas:rw + - ../nifi/user_schemas:/opt/nifi/user_schemas:ro # Python processors (NiFi 2.x) - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw @@ -95,11 +95,8 @@ x-nifi-volumes: &nifi-volumes # Ingest data directory - ./${NIFI_DATA_PATH:-../data/}:/data/:rw - # DB schemas - - ../services/cogstack-db/:/opt/cogstack-db/:rw - # MedCAT models - - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw + - ./${NIFI_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw # NiFi repositories/state - nifi-vol-logs:/opt/nifi/nifi-current/logs @@ -122,8 +119,8 @@ x-nifi-registry-volumes: &nifi-registry-volumes # Registry persistence - nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database - nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage - - nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work - nifi-registry-vol-logs:/opt/nifi-registry/nifi-registry-current/logs + - nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work x-db-common: &db-common <<: *common-ulimits @@ -145,7 +142,6 @@ x-es-common-volumes: &es-common-volumes - ../services/elasticsearch/config/log4j2_${ELASTICSEARCH_VERSION:-opensearch}.properties:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/log4j2.properties:ro # Shared root CA + admin certs - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.crt.pem:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/root-ca.crt:ro - - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.key.pem:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/root-ca.key:ro # OPENSEARCH specific (always mounted even if unused) - ../security/certificates/elastic/opensearch/admin.crt:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/admin.crt:ro - ../security/certificates/elastic/opensearch/admin.key.pem:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/admin.key.pem:ro @@ -171,9 +167,6 @@ x-es-common: &es-common networks: - cognet extra_hosts: *common-hosts - environment: - ES_JAVA_OPTS: ${ELASTICSEARCH_JAVA_OPTS:--Xms2048m -Xmx2048m -Des.failure_store_feature_flag_enabled=true} - OPENSEARCH_JAVA_OPTS: ${ELASTICSEARCH_JAVA_OPTS:--Xms2048m -Xmx2048m -Des.failure_store_feature_flag_enabled=true} logging: *es-logging-common deploy: resources: @@ -204,7 +197,6 @@ x-metricbeat-common: &metricbeat-common volumes: - ../services/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/usr/share/metricbeat/root-ca.crt:ro - - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.key.pem:/usr/share/metricbeat/root-ca.key:ro networks: - cognet extra_hosts: *common-hosts @@ -219,11 +211,6 @@ x-filebeat-common: &filebeat-common env_file: - ./elasticsearch.env - ../security/env/users_elasticsearch.env - environment: - - ELASTICSEARCH_HOSTS=${ELASTICSEARCH_HOSTS:-["https://elasticsearch-1:9200","https://elasticsearch-2:9200"]} - - FILEBEAT_USER=${FILEBEAT_USER:-elastic} - - FILEBEAT_PASSWORD=${FILEBEAT_PASSWORD:-kibanaserver} - - KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"} deploy: resources: limits: @@ -233,9 +220,8 @@ x-filebeat-common: &filebeat-common cpus: "${FILEBEAT_DOCKER_CPU_MIN}" memory: "${FILEBEAT_DOCKER_RAM}" volumes: - - ../services/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:rw + - ../services/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/etc/pki/root/root-ca.crt:ro - - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.key.pem:/etc/pki/root/root-ca.key:ro networks: - cognet extra_hosts: *common-hosts @@ -251,7 +237,7 @@ services: #---------------------------------------------------------------------------# samples-db: <<: *db-common - image: postgres:17.7-alpine + image: postgres:18.1-trixie container_name: cogstack-samples-db platform: linux/amd64 environment: @@ -264,7 +250,7 @@ services: - ../services/pgsamples/schemas:/data/schemas:rw - ../services/pgsamples/init_db.sh:/docker-entrypoint-initdb.d/init_db.sh:ro # data persistence - - samples-vol:/var/lib/postgresql/data + - samples-vol:/var/lib/postgresql command: postgres -c "max_connections=${POSTGRES_DB_MAX_CONNECTIONS:-100}" ports: - 5554:5432 @@ -278,7 +264,7 @@ services: #---------------------------------------------------------------------------# cogstack-databank-db: <<: *db-common - image: postgres:17.7-alpine + image: postgres:18.1-trixie container_name: cogstack-production-databank-db platform: linux/amd64 environment: @@ -290,7 +276,7 @@ services: - ../services/cogstack-db/pgsql/schemas:/data/:ro - ../services/cogstack-db/pgsql/init_db.sh:/docker-entrypoint-initdb.d/init_db.sh:ro # data persistence - - databank-vol:/var/lib/postgresql/data + - databank-vol:/var/lib/postgresql command: postgres -c "max_connections=${POSTGRES_DB_MAX_CONNECTIONS:-100}" ports: - 5558:5432 @@ -298,15 +284,13 @@ services: - 5432 networks: - cognet - + cogstack-databank-db-mssql: <<: *db-common image: mcr.microsoft.com/mssql/server:2019-latest container_name: cogstack-production-databank-db-mssql environment: - ACCEPT_EULA=y - - MSSQL_SA_USER=${MSSQL_SA_USER:-sa} - - MSSQL_SA_PASSWORD=${MSSQL_SA_PASSWORD:-admin!COGSTACK2022} volumes: # mapping postgres data dump and initialization - ../services/cogstack-db/mssql/schemas:/data/:ro @@ -481,7 +465,6 @@ services: # Security certificates, general - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.crt.pem:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/root-ca.crt:ro - - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.key.pem:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/root-ca.key:ro - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.p12:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/root-ca.p12:ro - ../security/certificates/elastic/${ELASTICSEARCH_VERSION:-opensearch}/elasticsearch/${ES_INSTANCE_NAME_1:-elasticsearch-1}/${ES_INSTANCE_NAME_1:-elasticsearch-1}.crt:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/esnode1.crt:ro @@ -509,13 +492,6 @@ services: hostname: nifi shm_size: ${NIFI_DOCKER_SHM_SIZE:-"1g"} environment: - - USER_ID=${NIFI_UID:-1000} - - GROUP_ID=${NIFI_GID:-1000} - - NIFI_WEB_PROXY_HOST=${NIFI_WEB_PROXY_HOST:-"localhost:8443"} - - NIFI_WEB_PROXY_CONTEXT_PATH=${NIFI_WEB_PROXY_CONTEXT_PATH:-"/nifi"} - - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} - PYTHONPATH=${NIFI_PYTHONPATH:-/opt/nifi/nifi-current/python/framework} - JVM_OPTS="${NIFI_JVM_OPTS:--XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom}" deploy: @@ -544,25 +520,15 @@ services: hostname: nifi-registry container_name: cogstack-nifi-registry-flow shm_size: ${NIFI_DOCKER_REGISTRY_SHM_SIZE:-1g} - user: root environment: - - http_proxy=$HTTP_PROXY - - https_proxy=$HTTPS_PROXY - - no_proxy=$no_proxy - - USER_ID=${NIFI_UID:-1000} - - GROUP_ID=${NIFI_GID:-1000} - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-/security/certificates/nifi/nifi-keystore.jks} - KEYSTORE_TYPE=${NIFI_KEYSTORE_TYPE:-jks} - KEYSTORE_PASSWORD=${NIFI_KEYSTORE_PASSWORD:-"cogstackNifi"} - TRUSTSTORE_PASSWORD=${NIFI_TRUSTSTORE_PASSWORD:-"cogstackNifi"} - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-/security/certificates/nifi/nifi-truststore.jks} - - TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks} - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"} - AUTH=${NIFI_AUTH:-"tls"} - - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} - #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} deploy: resources: limits: @@ -577,11 +543,6 @@ services: ports: - "${NIFI_REGISTRY_FLOW_OUTPUT_PORT:-8083}:${NIFI_REGISTRY_FLOW_INPUT_PORT:-18443}" - entrypoint: bash -c "chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/database && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/flow_storage && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/work && \ - chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/logs && \ - bash /opt/nifi-registry/scripts/start.sh" logging: *nifi-logging-common nifi-nginx: @@ -638,10 +599,6 @@ services: image: gitea/gitea:1.23-rootless shm_size: ${GITEA_DOCKER_SHM_SIZE:-"1g"} restart: always - environment: - - http_proxy=$HTTP_PROXY - - https_proxy=$HTTPS_PROXY - - no_proxy=$no_proxy deploy: resources: limits: @@ -728,10 +685,10 @@ volumes: driver: local nifi-registry-vol-flow-storage: driver: local - nifi-registry-vol-work: - driver: local nifi-registry-vol-logs: driver: local + nifi-registry-vol-work: + driver: local # Gitea gitea-lib-vol: diff --git a/docs/deploy/deployment.md b/docs/deploy/deployment.md index 1a0bd7440..65b5c5347 100644 --- a/docs/deploy/deployment.md +++ b/docs/deploy/deployment.md @@ -17,6 +17,14 @@ Make sure you have read the [Prerequisites](./main.md) section before proceeding These variables configure NiFi, Elasticsearch/OpenSearch, Kibana, Jupyter, Metricbeat, the sample DB, etc. +> **Important:** If you run `docker compose` directly (instead of `make`), first load the envs with: +> +> ```bash +> source ./deploy/export_env_vars.sh +> ``` +> +> The Makefile targets already do this for you. + ## 🧩 Modular service design (important) This repository follows a **modular deployment model**: diff --git a/docs/nifi/main.md b/docs/nifi/main.md index 82d899b85..9c0b8b90b 100644 --- a/docs/nifi/main.md +++ b/docs/nifi/main.md @@ -140,6 +140,16 @@ You should check if the env vars have been set after running the script: echo $NIFI_GID ``` +### NiFi Registry permissions helper + +If NiFi Registry fails to start due to permission issues on its persistent volumes, run the helper script once to fix ownership: + + ```bash + ./nifi/fix_nifi_registry_perms.sh + ``` + +This script runs the registry container as root only long enough to `chown` the registry `database`, `flow_storage`, `work`, and `logs` directories, then exits. Subsequent starts can run as the default non-root user. + If the above command prints some numbers then it means that the `export_env_vars.sh` script worked. Otherwise, if you don't see anything, or just blank lines, then you need to execute the following: ```bash @@ -172,7 +182,7 @@ Then execute the `recreate_nifi_docker_image.sh` script located in the `./nifi` bash recreate_nifi_docker_image.sh ``` -Remember that the above export script and/or command are only visible in the current shell, so every time you restart your shell terminal you must execute the `./deploy/export_env_vars.sh` so that the variables will be visible by docker at runtime, because it uses the GID/UID in the `services.yml` file , specifying in the service definition `user: "${USER_ID:-${NIFI_UID:-1000}}:${GROUP_ID:-${NIFI_GID:-1000}}"`. +Remember that the above export script and/or command are only visible in the current shell, so every time you restart your shell terminal you must `source ./deploy/export_env_vars.sh` so the variables are visible to Docker at runtime. If you're using the `deploy/Makefile` targets, it handles this for you. ### `{bootstrap.conf}` diff --git a/nifi/fix_nifi_registry_perms.sh b/nifi/fix_nifi_registry_perms.sh new file mode 100755 index 000000000..7e7e8c27c --- /dev/null +++ b/nifi/fix_nifi_registry_perms.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Support being run from any directory. +SCRIPT_SOURCE="${BASH_SOURCE[0]-$0}" +SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_SOURCE")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +COMPOSE_FILE="${1:-services.yml}" +if [[ "$COMPOSE_FILE" != /* ]]; then + if [[ "$COMPOSE_FILE" == services*.yml ]]; then + COMPOSE_FILE="deploy/$COMPOSE_FILE" + fi + COMPOSE_PATH="$REPO_ROOT/$COMPOSE_FILE" +else + COMPOSE_PATH="$COMPOSE_FILE" +fi + +if [ ! -f "$COMPOSE_PATH" ]; then + echo "Compose file not found: $COMPOSE_PATH" >&2 + exit 1 +fi + +if [ "${SKIP_EXPORT_ENV:-}" != "1" ]; then + set -a + source "$REPO_ROOT/deploy/export_env_vars.sh" + set +a +fi + +docker compose -f "$COMPOSE_PATH" run --rm --no-deps --user root --entrypoint bash -T nifi-registry-flow \ + -c 'chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/{database,flow_storage,work,logs}' diff --git a/nifi/user_python_extensions/convert_json_to_attribute.py b/nifi/user_python_extensions/convert_json_to_attribute.py index 3dded5f2b..231663be8 100644 --- a/nifi/user_python_extensions/convert_json_to_attribute.py +++ b/nifi/user_python_extensions/convert_json_to_attribute.py @@ -26,11 +26,13 @@ class Java: class ProcessorDetails: version = '0.0.1' + description = "Build ids_csv attribute from merged JSON records (dedupe, numeric-only)" + tags = ["ids", "sql", "in-clause"] def __init__(self, jvm: JVMView): super().__init__(jvm) - self.field_name: str = "base64" + self.field_name: str = "id" # this is directly mirrored to the UI self._properties: list[PropertyDescriptor] = [ @@ -40,9 +42,6 @@ def __init__(self, jvm: JVMView): validators=[StandardValidators.NON_EMPTY_VALIDATOR]) ] - self.description = "Build ids_csv attribute from merged JSON records (dedupe, numeric-only)" - self.tags = ["ids", "sql", "in-clause"] - self.descriptors: list[PropertyDescriptor] = self._properties @overrides @@ -61,7 +60,6 @@ def transform(self, context: ProcessContext, flowFile: JavaObject) -> FlowFileTr except Exception: parsed = [] - records = parsed if isinstance(parsed, list) else parsed.get("records", []) if not isinstance(records, list): records = [] diff --git a/nifi/user_scripts/tests/nifi/test_service_health.py b/nifi/user_scripts/tests/nifi/test_service_health.py index 8799270e4..dbeefe4b1 100644 --- a/nifi/user_scripts/tests/nifi/test_service_health.py +++ b/nifi/user_scripts/tests/nifi/test_service_health.py @@ -7,8 +7,8 @@ from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig from nifi.user_scripts.dto.service_health import DatabaseHealth, ElasticHealth, NiFiHealth from nifi.user_scripts.utils.generic import get_logger -from nifi.user_scripts.utils.nifi.nifi_api_client import NiFiClient, NiFiRegistryClient from nifi.user_scripts.utils.health.service import check_elasticsearch, check_kibana, check_postgres +from nifi.user_scripts.utils.nifi.nifi_api_client import NiFiClient, NiFiRegistryClient class TestServices(unittest.TestCase): @@ -51,4 +51,4 @@ def test_elastic_health(self): def test_kibana_health(self): elastic_health: ElasticHealth = check_kibana(self.elastic_config) self.assertTrue(elastic_health.connected) - self.assertEqual(elastic_health.status, "healthy") \ No newline at end of file + self.assertEqual(elastic_health.status, "healthy") diff --git a/nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml b/nifi/user_templates/legacy/dt4h/annotate_dt4h_ann_manager.xml similarity index 100% rename from nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml rename to nifi/user_templates/legacy/dt4h/annotate_dt4h_ann_manager.xml diff --git a/nifi/user_templates/dt4h/raw_ingest_dt4h.xml b/nifi/user_templates/legacy/dt4h/raw_ingest_dt4h.xml similarity index 100% rename from nifi/user_templates/dt4h/raw_ingest_dt4h.xml rename to nifi/user_templates/legacy/dt4h/raw_ingest_dt4h.xml diff --git a/scripts/smoke_nifi_services.sh b/scripts/tests/smoke_nifi_services.sh similarity index 95% rename from scripts/smoke_nifi_services.sh rename to scripts/tests/smoke_nifi_services.sh index 0d2c79853..ef185224d 100755 --- a/scripts/smoke_nifi_services.sh +++ b/scripts/tests/smoke_nifi_services.sh @@ -3,7 +3,7 @@ set -euo pipefail -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" ENV_LOADER="${ROOT_DIR}/deploy/export_env_vars.sh" if [[ -f "$ENV_LOADER" ]]; then