Compare commits

...

19 Commits

Author SHA1 Message Date
rkuo-danswer
a4f1bb9ac8 Merge pull request #2753 from danswer-ai/hotfix/v0.7-supervisor-logs
Merge hotfix/v0.7-supervisor-logs into release/v0.7
2024-10-09 18:10:43 -07:00
rkuo-danswer
5258e6ed67 update indexing and slack bot to use stdout options (#2752) 2024-10-10 00:55:56 +00:00
rkuo-danswer
7f9fdbdcfc Merge pull request #2750 from danswer-ai/hotfix/v0.7-bedrock
backport Update litellm to fix bedrock models (#2649)
2024-10-09 14:47:24 -07:00
Richard Kuo (Danswer)
fd8644d040 backport Update litellm to fix bedrock models (#2649) 2024-10-09 14:02:52 -07:00
rkuo-danswer
0ae9f6e5b6 Merge pull request #2746 from danswer-ai/hotfix/v0.7-docker-kv-deprecation
Merge hotfix/v0.7-docker-kv-deprecation into release/v0.7
2024-10-09 13:02:15 -07:00
rkuo-danswer
906d77e980 Merge pull request #2717 from danswer-ai/bugfix/docker-legacy-key-value-format
Fix all LegacyKeyValueFormat docker warnings
2024-10-09 19:41:39 +00:00
Chris Weaver
1f0af86454 Temp patch to remove multiple tool calls (#2720) 2024-10-08 13:22:16 -07:00
hagen-danswer
0e6524dd32 Added quotes to project name to handle reserved words (#2639) 2024-10-08 11:20:44 -07:00
rkuo-danswer
2be133d784 Merge pull request #2716 from danswer-ai/hotfix/v0.7-background-logs
backport: rely on stdout redirection for supervisord logging (#2711)
2024-10-07 15:48:43 -07:00
Richard Kuo (Danswer)
cb668bcff5 backport: rely on stdout redirection for supervisord logging (#2711) 2024-10-07 15:13:43 -07:00
rkuo-danswer
756385e3ac Hotfix/v0.7 harden redis (#2683)
* harden redis

* use blockingconnectionpool
2024-10-04 09:49:32 -07:00
Richard Kuo (Danswer)
1966127bd4 trivy workaround 2024-10-03 15:25:54 -07:00
rkuo-danswer
3ac84da698 Merge pull request #2676 from danswer-ai/hotfix/v0.7-vespa-delete-performance
hotfix for vespa delete performance
2024-10-03 10:59:32 -07:00
rkuo-danswer
7c7f5b37f5 Merge pull request #2675 from danswer-ai/hotfix/v0.7-bump-celery
bump celery
2024-10-03 10:59:14 -07:00
Richard Kuo (Danswer)
0bf9243891 Merge branch 'release/v0.7' of github.com:danswer-ai/danswer into hotfix/v0.7-bump-celery 2024-10-03 10:21:41 -07:00
Richard Kuo (Danswer)
cfe4bbe3c7 Merge branch 'release/v0.7' of github.com:danswer-ai/danswer into hotfix/v0.7-vespa-delete-performance 2024-10-03 10:21:23 -07:00
Richard Kuo (Danswer)
9d18b92b90 fix sync checks 2024-10-03 10:20:57 -07:00
Richard Kuo (Danswer)
74315e21b3 bump celery 2024-10-03 09:44:25 -07:00
Richard Kuo (Danswer)
f9a5b227a1 hotfix for vespa delete performance 2024-10-03 09:43:02 -07:00
26 changed files with 279 additions and 59 deletions

View File

@@ -46,8 +46,16 @@ jobs:
build-args: |
DANSWER_VERSION=${{ github.ref_name }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
# To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}

View File

@@ -40,8 +40,16 @@ jobs:
build-args: |
DANSWER_VERSION=${{ github.ref_name }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
severity: 'CRITICAL,HIGH'

View File

@@ -113,8 +113,16 @@ jobs:
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
# https://github.com/aquasecurity/trivy/discussions/7538
# https://github.com/aquasecurity/trivy-action/issues/389
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: 'CRITICAL,HIGH'

View File

@@ -3,7 +3,9 @@ name: Python Checks
on:
merge_group:
pull_request:
branches: [ main ]
branches:
- main
- 'release/**'
jobs:
mypy-check:

View File

@@ -15,6 +15,9 @@ env:
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
# Jira
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
jobs:
connectors-check:

View File

@@ -3,7 +3,9 @@ name: Python Unit Tests
on:
merge_group:
pull_request:
branches: [ main ]
branches:
- main
- 'release/**'
jobs:
backend-check:

View File

@@ -6,7 +6,9 @@ concurrency:
on:
merge_group:
pull_request:
branches: [ main ]
branches:
- main
- 'release/**'
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

View File

@@ -101,7 +101,7 @@ COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connect
# Put logo in assets
COPY ./assets /app/assets
ENV PYTHONPATH /app
ENV PYTHONPATH=/app
# Default command which does nothing
# This container is used by api server and background which specify their own CMD

View File

@@ -55,6 +55,6 @@ COPY ./shared_configs /app/shared_configs
# Model Server main code
COPY ./model_server /app/model_server
ENV PYTHONPATH /app
ENV PYTHONPATH=/app
CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]

View File

@@ -1,7 +1,9 @@
# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html
from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT
from danswer.configs.app_configs import CELERY_RESULT_EXPIRES
from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY
from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND
from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
from danswer.configs.app_configs import REDIS_HOST
from danswer.configs.app_configs import REDIS_PASSWORD
from danswer.configs.app_configs import REDIS_PORT
@@ -9,6 +11,7 @@ from danswer.configs.app_configs import REDIS_SSL
from danswer.configs.app_configs import REDIS_SSL_CA_CERTS
from danswer.configs.app_configs import REDIS_SSL_CERT_REQS
from danswer.configs.constants import DanswerCeleryPriority
from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS
CELERY_SEPARATOR = ":"
@@ -36,12 +39,30 @@ result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PO
# can stall other tasks.
worker_prefetch_multiplier = 4
broker_connection_retry_on_startup = True
broker_pool_limit = CELERY_BROKER_POOL_LIMIT
# redis broker settings
# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html
broker_transport_options = {
"priority_steps": list(range(len(DanswerCeleryPriority))),
"sep": CELERY_SEPARATOR,
"queue_order_strategy": "priority",
"retry_on_timeout": True,
"health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
"socket_keepalive": True,
"socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS,
}
# redis backend settings
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend
redis_socket_keepalive = True
redis_retry_on_timeout = True
redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL
task_default_priority = DanswerCeleryPriority.MEDIUM
task_acks_late = True

View File

@@ -148,7 +148,7 @@ def document_by_cc_pair_cleanup_task(
if count == 1:
# count == 1 means this is the only remaining cc_pair reference to the doc
# delete it from vespa and the db
document_index.delete(doc_ids=[document_id])
document_index.delete_single(doc_id=document_id)
delete_documents_complete__no_commit(
db_session=db_session,
document_ids=[document_id],

View File

@@ -164,6 +164,12 @@ REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int(
)
REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15)) # broker
# will propagate to both our redis client as well as celery's redis client
REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60))
# our redis client only, not celery's
REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128))
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
# should be one of "required", "optional", or "none"
REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none")
@@ -171,6 +177,16 @@ REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None)
CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400)) # seconds
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit
# Setting to None may help when there is a proxy in the way closing idle connections
CELERY_BROKER_POOL_LIMIT_DEFAULT = 10
try:
CELERY_BROKER_POOL_LIMIT = int(
os.environ.get("CELERY_BROKER_POOL_LIMIT", CELERY_BROKER_POOL_LIMIT_DEFAULT)
)
except ValueError:
CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT
#####
# Connector Configs
#####

View File

@@ -1,3 +1,5 @@
import platform
import socket
from enum import auto
from enum import Enum
@@ -203,3 +205,13 @@ class DanswerCeleryPriority(int, Enum):
MEDIUM = auto()
LOW = auto()
LOWEST = auto()
REDIS_SOCKET_KEEPALIVE_OPTIONS = {}
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3
if platform.system() == "Darwin":
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60 # type: ignore
else:
REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60 # type: ignore

View File

@@ -245,10 +245,12 @@ class JiraConnector(LoadConnector, PollConnector):
if self.jira_client is None:
raise ConnectorMissingCredentialError("Jira")
# Quote the project name to handle reserved words
quoted_project = f'"{self.jira_project}"'
start_ind = 0
while True:
doc_batch, fetched_batch_size = fetch_jira_issues_batch(
jql=f"project = {self.jira_project}",
jql=f"project = {quoted_project}",
start_index=start_ind,
jira_client=self.jira_client,
batch_size=self.batch_size,
@@ -276,8 +278,10 @@ class JiraConnector(LoadConnector, PollConnector):
"%Y-%m-%d %H:%M"
)
# Quote the project name to handle reserved words
quoted_project = f'"{self.jira_project}"'
jql = (
f"project = {self.jira_project} AND "
f"project = {quoted_project} AND "
f"updated >= '{start_date_str}' AND "
f"updated <= '{end_date_str}'"
)

View File

@@ -156,6 +156,16 @@ class Deletable(abc.ABC):
Class must implement the ability to delete document by their unique document ids.
"""
@abc.abstractmethod
def delete_single(self, doc_id: str) -> None:
"""
Given a single document id, hard delete it from the document index
Parameters:
- doc_id: document id as specified by the connector
"""
raise NotImplementedError
@abc.abstractmethod
def delete(self, doc_ids: list[str]) -> None:
"""

View File

@@ -13,6 +13,7 @@ from typing import cast
import httpx
import requests
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
from danswer.configs.chat_configs import DOC_TIME_DECAY
from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -479,6 +480,66 @@ class VespaIndex(DocumentIndex):
document_ids=doc_ids, index_name=index_name, http_client=http_client
)
def delete_single(self, doc_id: str) -> None:
"""Possibly faster overall than the delete method due to using a single
delete call with a selection query."""
# Vespa deletion is poorly documented ... luckily we found this
# https://docs.vespa.ai/en/operations/batch-delete.html#example
doc_id = replace_invalid_doc_id_characters(doc_id)
# NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
# indexing / updates / deletes since we have to make a large volume of requests.
index_names = [self.index_name]
if self.secondary_index_name:
index_names.append(self.secondary_index_name)
with httpx.Client(http2=True) as http_client:
for index_name in index_names:
params = httpx.QueryParams(
{
"selection": f"{index_name}.document_id=='{doc_id}'",
"cluster": DOCUMENT_INDEX_NAME,
}
)
total_chunks_deleted = 0
while True:
try:
resp = http_client.delete(
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}",
params=params,
)
resp.raise_for_status()
except httpx.HTTPStatusError as e:
logger.error(
f"Failed to delete chunk, details: {e.response.text}"
)
raise
resp_data = resp.json()
if "documentCount" in resp_data:
chunks_deleted = resp_data["documentCount"]
total_chunks_deleted += chunks_deleted
# Check for continuation token to handle pagination
if "continuation" not in resp_data:
break # Exit loop if no continuation token
if not resp_data["continuation"]:
break # Exit loop if continuation token is empty
params = params.set("continuation", resp_data["continuation"])
logger.debug(
f"VespaIndex.delete_single: "
f"index={index_name} "
f"doc={doc_id} "
f"chunks_deleted={total_chunks_deleted}"
)
def id_based_retrieval(
self,
chunk_requests: list[VespaChunkRequest],

View File

@@ -316,7 +316,9 @@ class Answer:
yield from self._process_llm_stream(
prompt=prompt,
tools=[tool.tool_definition() for tool in self.tools],
# as of now, we don't support multiple tool calls in sequence, which is why
# we don't need to pass this in here
# tools=[tool.tool_definition() for tool in self.tools],
)
return

View File

@@ -290,10 +290,12 @@ class DefaultMultiLLM(LLM):
return litellm.completion(
# model choice
model=f"{self.config.model_provider}/{self.config.model_name}",
api_key=self._api_key,
base_url=self._api_base,
api_version=self._api_version,
custom_llm_provider=self._custom_llm_provider,
# NOTE: have to pass in None instead of empty string for these
# otherwise litellm can have some issues with bedrock
api_key=self._api_key or None,
base_url=self._api_base or None,
api_version=self._api_version or None,
custom_llm_provider=self._custom_llm_provider or None,
# actual input
messages=prompt,
tools=tools,

View File

@@ -3,23 +3,23 @@ from typing import Optional
import redis
from redis.client import Redis
from redis.connection import ConnectionPool
from danswer.configs.app_configs import REDIS_DB_NUMBER
from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
from danswer.configs.app_configs import REDIS_HOST
from danswer.configs.app_configs import REDIS_PASSWORD
from danswer.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS
from danswer.configs.app_configs import REDIS_PORT
from danswer.configs.app_configs import REDIS_SSL
from danswer.configs.app_configs import REDIS_SSL_CA_CERTS
from danswer.configs.app_configs import REDIS_SSL_CERT_REQS
REDIS_POOL_MAX_CONNECTIONS = 10
from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS
class RedisPool:
_instance: Optional["RedisPool"] = None
_lock: threading.Lock = threading.Lock()
_pool: ConnectionPool
_pool: redis.BlockingConnectionPool
def __new__(cls) -> "RedisPool":
if not cls._instance:
@@ -45,27 +45,39 @@ class RedisPool:
ssl_ca_certs: str | None = REDIS_SSL_CA_CERTS,
ssl_cert_reqs: str = REDIS_SSL_CERT_REQS,
ssl: bool = False,
) -> redis.ConnectionPool:
) -> redis.BlockingConnectionPool:
"""We use BlockingConnectionPool because it will block and wait for a connection
rather than error if max_connections is reached. This is far more deterministic
behavior and aligned with how we want to use Redis."""
# Using ConnectionPool is not well documented.
# Useful examples: https://github.com/redis/redis-py/issues/780
if ssl:
return redis.ConnectionPool(
return redis.BlockingConnectionPool(
host=host,
port=port,
db=db,
password=password,
max_connections=max_connections,
timeout=None,
health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
socket_keepalive=True,
socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
connection_class=redis.SSLConnection,
ssl_ca_certs=ssl_ca_certs,
ssl_cert_reqs=ssl_cert_reqs,
)
return redis.ConnectionPool(
return redis.BlockingConnectionPool(
host=host,
port=port,
db=db,
password=password,
max_connections=max_connections,
timeout=None,
health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
socket_keepalive=True,
socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
)

View File

@@ -4,7 +4,7 @@ asyncpg==0.27.0
atlassian-python-api==3.37.0
beautifulsoup4==4.12.2
boto3==1.34.84
celery==5.3.4
celery==5.5.0b4
chardet==5.2.0
dask==2023.8.1
ddtrace==2.6.5
@@ -28,7 +28,7 @@ jsonref==1.1.0
langchain==0.1.17
langchain-core==0.1.50
langchain-text-splitters==0.0.1
litellm==1.47.1
litellm==1.48.7
llama-index==0.9.45
Mako==1.2.4
msal==1.28.0

View File

@@ -7,12 +7,13 @@ logfile=/var/log/supervisord.log
# Cannot place this in Celery for now because Celery must run as a single process (see note below)
# Indexing uses multi-processing to speed things up
[program:document_indexing]
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
command=python danswer/background/update.py
stdout_logfile=/var/log/document_indexing.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
# Background jobs that must be run async due to long time to completion
# NOTE: due to an issue with Celery + SQLAlchemy
# (https://github.com/celery/celery/issues/7007#issuecomment-1740139367)
@@ -30,25 +31,25 @@ command=celery -A danswer.background.celery.celery_run:celery_app worker
--concurrency=4
--prefetch-multiplier=1
--loglevel=INFO
--logfile=/var/log/celery_worker_primary_supervisor.log
--hostname=primary@%%n
-Q celery
environment=LOG_FILE_NAME=celery_worker_primary
stdout_logfile=/var/log/celery_worker_primary.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
stopasgroup=true
[program:celery_worker_light]
command=celery -A danswer.background.celery.celery_run:celery_app worker
--pool=threads
--concurrency=16
--prefetch-multiplier=8
--loglevel=INFO
--logfile=/var/log/celery_worker_light_supervisor.log
--hostname=light@%%n
-Q vespa_metadata_sync,connector_deletion
environment=LOG_FILE_NAME=celery_worker_light
command=bash -c "celery -A danswer.background.celery.celery_run:celery_app worker \
--pool=threads \
--concurrency=${CELERY_WORKER_LIGHT_CONCURRENCY:-24} \
--prefetch-multiplier=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-8} \
--loglevel=INFO \
--hostname=light@%%n \
-Q vespa_metadata_sync,connector_deletion"
stdout_logfile=/var/log/celery_worker_light.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
@@ -60,10 +61,10 @@ command=celery -A danswer.background.celery.celery_run:celery_app worker
--concurrency=4
--prefetch-multiplier=1
--loglevel=INFO
--logfile=/var/log/celery_worker_heavy_supervisor.log
--hostname=heavy@%%n
-Q connector_pruning
environment=LOG_FILE_NAME=celery_worker_heavy
stdout_logfile=/var/log/celery_worker_heavy.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startsecs=10
@@ -71,9 +72,9 @@ stopasgroup=true
# Job scheduler for periodic tasks
[program:celery_beat]
command=celery -A danswer.background.celery.celery_run:celery_app beat
--logfile=/var/log/celery_beat_supervisor.log
environment=LOG_FILE_NAME=celery_beat
command=celery -A danswer.background.celery.celery_run:celery_app beat
stdout_logfile=/var/log/celery_beat.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
startsecs=10
stopasgroup=true
@@ -84,7 +85,8 @@ stopasgroup=true
# More details on setup here: https://docs.danswer.dev/slack_bot_setup
[program:slack_bot]
command=python danswer/danswerbot/slack/listener.py
environment=LOG_FILE_NAME=slack_bot
stdout_logfile=/var/log/slack_bot.log
stdout_logfile_maxbytes=16MB
redirect_stderr=true
autorestart=true
startretries=5
@@ -94,17 +96,12 @@ startsecs=60
# No log rotation here, since it's stdout it's handled by the Docker container logging
[program:log-redirect-handler]
command=tail -qF
/var/log/document_indexing_info.log
/var/log/celery_beat_supervisor.log
/var/log/celery_worker_primary_supervisor.log
/var/log/celery_worker_light_supervisor.log
/var/log/celery_worker_heavy_supervisor.log
/var/log/celery_beat_debug.log
/var/log/celery_worker_primary_debug.log
/var/log/celery_worker_light_debug.log
/var/log/celery_worker_heavy_debug.log
/var/log/slack_bot_debug.log
/var/log/celery_beat.log
/var/log/celery_worker_primary.log
/var/log/celery_worker_light.log
/var/log/celery_worker_heavy.log
/var/log/document_indexing.log
/var/log/slack_bot.log
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
redirect_stderr=true
autorestart=true
stdout_logfile_maxbytes = 0 # must be set to 0 when stdout_logfile=/dev/stdout
autorestart=true

View File

@@ -0,0 +1,48 @@
import os
import time
import pytest
from danswer.configs.constants import DocumentSource
from danswer.connectors.danswer_jira.connector import JiraConnector
@pytest.fixture
def jira_connector() -> JiraConnector:
connector = JiraConnector(
"https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6",
comment_email_blacklist=[],
)
connector.load_credentials(
{
"jira_user_email": os.environ["JIRA_USER_EMAIL"],
"jira_api_token": os.environ["JIRA_API_TOKEN"],
}
)
return connector
def test_jira_connector_basic(jira_connector: JiraConnector) -> None:
doc_batch_generator = jira_connector.poll_source(0, time.time())
doc_batch = next(doc_batch_generator)
with pytest.raises(StopIteration):
next(doc_batch_generator)
assert len(doc_batch) == 1
doc = doc_batch[0]
assert doc.id == "https://danswerai.atlassian.net/browse/AS-2"
assert doc.semantic_identifier == "test123small"
assert doc.source == DocumentSource.JIRA
assert doc.metadata == {"priority": "Medium", "status": "Backlog"}
assert doc.secondary_owners is None
assert doc.title is None
assert doc.from_ingestion_api is False
assert doc.additional_info is None
assert len(doc.sections) == 1
section = doc.sections[0]
assert section.text == "example_text\n"
assert section.link == "https://danswerai.atlassian.net/browse/AS-2"

View File

@@ -81,6 +81,6 @@ RUN pip install --no-cache-dir --upgrade \
-r /tmp/dev-requirements.txt
COPY ./tests/integration /app/tests/integration
ENV PYTHONPATH /app
ENV PYTHONPATH=/app
CMD ["pytest", "-s", "/app/tests/integration"]

View File

@@ -87,6 +87,7 @@ services:
- LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
- LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
- LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
- CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
# Chat Configs
- HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}

View File

@@ -80,7 +80,8 @@ services:
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
- CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
# Chat Configs
- HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}

View File

@@ -25,10 +25,10 @@ COPY . .
RUN npm ci
# needed to get the `standalone` dir we expect later
ENV NEXT_PRIVATE_STANDALONE true
ENV NEXT_PRIVATE_STANDALONE=true
# Disable automatic telemetry collection
ENV NEXT_TELEMETRY_DISABLED 1
ENV NEXT_TELEMETRY_DISABLED=1
# Environment variables must be present at build time
# https://github.com/vercel/next.js/discussions/14030
@@ -77,7 +77,7 @@ RUN rm -rf /usr/local/lib/node_modules
# ENV NODE_ENV production
# Disable automatic telemetry collection
ENV NEXT_TELEMETRY_DISABLED 1
ENV NEXT_TELEMETRY_DISABLED=1
# Don't run production as root
RUN addgroup --system --gid 1001 nodejs