From 00bf0e519f2f32ab800ada6b1797ca368aa580aa Mon Sep 17 00:00:00 2001 From: Sarath1018 Date: Wed, 19 Nov 2025 20:57:28 +0530 Subject: [PATCH 1/2] Add null checks in jira connector --- backend/danswer/connectors/danswer_jira/connector.py | 4 ++-- backend/danswer/connectors/danswer_jira/utils.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index a142ed4a193..164e5d95f0b 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -84,9 +84,9 @@ def fetch_jira_issues_batch( continue description = ( - issue.fields.description + issue.fields.description or "" if JIRA_API_VERSION == "2" - else extract_text_from_content(issue.raw["fields"]["description"]) + else extract_text_from_content(issue.raw["fields"].get("description")) ) comments = get_comment_strs( issue=issue, diff --git a/backend/danswer/connectors/danswer_jira/utils.py b/backend/danswer/connectors/danswer_jira/utils.py index e50600e9174..3acf32d3335 100644 --- a/backend/danswer/connectors/danswer_jira/utils.py +++ b/backend/danswer/connectors/danswer_jira/utils.py @@ -45,7 +45,10 @@ def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any: return None -def extract_text_from_content(content: dict) -> str: +def extract_text_from_content(content: dict | None) -> str: + if content is None: + return "" + texts = [] if "content" in content: for block in content["content"]: @@ -63,9 +66,9 @@ def get_comment_strs( for comment in issue.fields.comment.comments: try: body_text = ( - comment.body + comment.body or "" if JIRA_API_VERSION == "2" - else extract_text_from_content(comment.raw["body"]) + else extract_text_from_content(comment.raw.get("body")) ) if ( From 929186853604df4f208d35136241c4d66eb1adfb Mon Sep 17 00:00:00 2001 From: Sarath1018 Date: Wed, 19 Nov 2025 22:21:21 +0530 Subject: [PATCH 2/2] Fix Jira connector --- backend/Dockerfile | 71 ++++++++++--------- .../connectors/danswer_jira/connector.py | 35 ++++----- 2 files changed, 49 insertions(+), 57 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 7f9daad94a3..71b3f02856d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -19,33 +19,35 @@ RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" # zip for Vespa step futher down # ca-certificates for HTTPS RUN apt-get update && \ - apt-get install -y \ - cmake \ - curl \ - zip \ - ca-certificates \ - libgnutls30=3.7.9-2+deb12u3 \ - libblkid1=2.38.1-5+deb12u1 \ - libmount1=2.38.1-5+deb12u1 \ - libsmartcols1=2.38.1-5+deb12u1 \ - libuuid1=2.38.1-5+deb12u1 \ - libxmlsec1-dev \ - pkg-config \ - gcc && \ - rm -rf /var/lib/apt/lists/* && \ - apt-get clean + apt-get install -y \ + cmake \ + curl \ + zip \ + ca-certificates \ + libgnutls30 \ + libblkid1 \ + libmount1 \ + libsmartcols1 \ + libuuid1 \ + libxmlsec1-dev \ + pkg-config \ + gcc \ + nano \ + vim && \ + rm -rf /var/lib/apt/lists/* && \ + apt-get clean # Install Python dependencies # Remove py which is pulled in by retry, py is not needed and is a CVE COPY ./requirements/default.txt /tmp/requirements.txt COPY ./requirements/ee.txt /tmp/ee-requirements.txt RUN pip install --no-cache-dir --upgrade \ - -r /tmp/requirements.txt \ - -r /tmp/ee-requirements.txt && \ - pip uninstall -y py && \ - playwright install chromium && \ - playwright install-deps chromium && \ - ln -s /usr/local/bin/supervisord /usr/bin/supervisord + -r /tmp/requirements.txt \ + -r /tmp/ee-requirements.txt && \ + pip uninstall -y py && \ + playwright install chromium && \ + playwright install-deps chromium && \ + ln -s /usr/local/bin/supervisord /usr/bin/supervisord # Cleanup for CVEs and size reduction # https://github.com/tornadoweb/tornado/issues/3107 @@ -53,19 +55,18 @@ RUN pip install --no-cache-dir --upgrade \ # perl-base is part of the base Python Debian image but not needed for Danswer functionality # perl-base could only be removed with --allow-remove-essential RUN apt-get update && \ - apt-get remove -y --allow-remove-essential \ - perl-base \ - xserver-common \ - xvfb \ - cmake \ - libldap-2.5-0 \ - libxmlsec1-dev \ - pkg-config \ - gcc && \ - apt-get install -y libxmlsec1-openssl && \ - apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* && \ - rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key + apt-get remove -y --allow-remove-essential \ + perl-base \ + xserver-common \ + xvfb \ + cmake \ + libldap-2.5-0 \ + libxmlsec1-dev \ + pkg-config && \ + apt-get install -y libxmlsec1-openssl && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* && \ + rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key # Pre-downloading models for setups with limited egress RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')" @@ -100,4 +101,4 @@ ENV PYTHONPATH /app # Default command which does nothing # This container is used by api server and background which specify their own CMD -CMD ["tail", "-f", "/dev/null"] +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index 164e5d95f0b..06b5a132bc2 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -38,29 +38,20 @@ def _paginate_jql_search( max_results: int, fields: str | None = None, ) -> Iterable[Issue]: - start = 0 - while True: - logger.debug( - f"Fetching Jira issues with JQL: {jql}, " - f"starting at {start}, max results: {max_results}" - ) - issues = jira_client.search_issues( - jql_str=jql, - startAt=start, - maxResults=max_results, - fields=fields, - ) - - for issue in issues: - if isinstance(issue, Issue): - yield issue - else: - raise Exception(f"Found Jira object not of type Issue: {issue}") - - if len(issues) < max_results: - break + # Use enhanced_search_issues for Jira Cloud (API v3) + # It uses search tokens instead of startAt for pagination + logger.debug(f"Fetching Jira issues with JQL: {jql}, max results: {max_results}") + issues = jira_client.enhanced_search_issues( + jql_str=jql, + maxResults=max_results, + fields=fields or "*all", + ) - start += max_results + for issue in issues: + if isinstance(issue, Issue): + yield issue + else: + raise Exception(f"Found Jira object not of type Issue: {issue}") def fetch_jira_issues_batch(