Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 36 additions & 35 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,53 +19,54 @@ RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
# zip for Vespa step futher down
# ca-certificates for HTTPS
RUN apt-get update && \
apt-get install -y \
cmake \
curl \
zip \
ca-certificates \
libgnutls30=3.7.9-2+deb12u3 \
libblkid1=2.38.1-5+deb12u1 \
libmount1=2.38.1-5+deb12u1 \
libsmartcols1=2.38.1-5+deb12u1 \
libuuid1=2.38.1-5+deb12u1 \
libxmlsec1-dev \
pkg-config \
gcc && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
apt-get install -y \
cmake \
curl \
zip \
ca-certificates \
libgnutls30 \
libblkid1 \
libmount1 \
libsmartcols1 \
libuuid1 \
libxmlsec1-dev \
pkg-config \
gcc \
nano \
vim && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean

# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN pip install --no-cache-dir --upgrade \
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord

# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
# xserver-common and xvfb included by playwright installation but not needed after
# perl-base is part of the base Python Debian image but not needed for Danswer functionality
# perl-base could only be removed with --allow-remove-essential
RUN apt-get update && \
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
xvfb \
cmake \
libldap-2.5-0 \
libxmlsec1-dev \
pkg-config \
gcc && \
apt-get install -y libxmlsec1-openssl && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
xvfb \
cmake \
libldap-2.5-0 \
libxmlsec1-dev \
pkg-config && \
apt-get install -y libxmlsec1-openssl && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key

# Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')"
Expand Down Expand Up @@ -100,4 +101,4 @@ ENV PYTHONPATH /app

# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]
CMD ["tail", "-f", "/dev/null"]
39 changes: 15 additions & 24 deletions backend/danswer/connectors/danswer_jira/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,20 @@ def _paginate_jql_search(
max_results: int,
fields: str | None = None,
) -> Iterable[Issue]:
start = 0
while True:
logger.debug(
f"Fetching Jira issues with JQL: {jql}, "
f"starting at {start}, max results: {max_results}"
)
issues = jira_client.search_issues(
jql_str=jql,
startAt=start,
maxResults=max_results,
fields=fields,
)

for issue in issues:
if isinstance(issue, Issue):
yield issue
else:
raise Exception(f"Found Jira object not of type Issue: {issue}")

if len(issues) < max_results:
break
# Use enhanced_search_issues for Jira Cloud (API v3)
# It uses search tokens instead of startAt for pagination
logger.debug(f"Fetching Jira issues with JQL: {jql}, max results: {max_results}")
issues = jira_client.enhanced_search_issues(
jql_str=jql,
maxResults=max_results,
fields=fields or "*all",
)

start += max_results
for issue in issues:
if isinstance(issue, Issue):
yield issue
else:
raise Exception(f"Found Jira object not of type Issue: {issue}")


def fetch_jira_issues_batch(
Expand All @@ -84,9 +75,9 @@ def fetch_jira_issues_batch(
continue

description = (
issue.fields.description
issue.fields.description or ""
if JIRA_API_VERSION == "2"
else extract_text_from_content(issue.raw["fields"]["description"])
else extract_text_from_content(issue.raw["fields"].get("description"))
)
comments = get_comment_strs(
issue=issue,
Expand Down
9 changes: 6 additions & 3 deletions backend/danswer/connectors/danswer_jira/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
return None


def extract_text_from_content(content: dict) -> str:
def extract_text_from_content(content: dict | None) -> str:
if content is None:
return ""

texts = []
if "content" in content:
for block in content["content"]:
Expand All @@ -63,9 +66,9 @@ def get_comment_strs(
for comment in issue.fields.comment.comments:
try:
body_text = (
comment.body
comment.body or ""
if JIRA_API_VERSION == "2"
else extract_text_from_content(comment.raw["body"])
else extract_text_from_content(comment.raw.get("body"))
)

if (
Expand Down
Loading