1
0
forked from github/onyx

Compare commits

...

6 Commits

Author SHA1 Message Date
pablodanswer
92be55c9d7 k 2024-12-01 17:54:22 -08:00
pablodanswer
dc8fa4c3cb update workflows 2024-12-01 17:50:52 -08:00
pablodanswer
c5aa64e3fb fix tests 2024-12-01 17:47:58 -08:00
pablodanswer
f4dea0821f k 2024-12-01 17:17:58 -08:00
pablodanswer
1ed4002902 nits 2024-12-01 17:11:37 -08:00
pablodanswer
952893d7f0 tests fixed 2024-12-01 17:10:05 -08:00
9 changed files with 210 additions and 263 deletions

111
.github/workflows/multi-tenant-tests.yml vendored Normal file
View File

@@ -0,0 +1,111 @@
name: Run Multi-Tenant Integration Tests
on:
workflow_dispatch:
pull_request:
branches:
- main
- "release/**"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
jobs:
multi-tenant-integration-tests:
runs-on:
[runs-on, runner=8cpu-linux-x64, ram=16, "run-id=${{ github.run_id }}"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Pull Required Docker Images
run: |
docker pull danswer/danswer-backend:latest
docker tag danswer/danswer-backend:latest danswer/danswer-backend:test
docker pull danswer/danswer-model-server:latest
docker tag danswer/danswer-model-server:latest danswer/danswer-model-server:test
docker pull danswer/danswer-web-server:latest
docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test
docker pull danswer/control-tenants-service:latest
docker tag danswer/control-tenants-service:latest danswer/control-tenants-service:test
- name: Build Integration Test Docker Image
uses: ./.github/actions/custom-build-and-push
with:
context: ./backend
file: ./backend/tests/integration/Dockerfile
platforms: linux/amd64
tags: danswer/danswer-integration:test
push: false
load: true
- name: Start Docker Containers for Multi-Tenant Tests
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
MULTI_TENANT=true \
INTEGRATION_TEST_MODE=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
CONTROL_TENANTS_SERVICE_IMAGE=danswer/control-tenants-service:test \
docker compose -f docker-compose.dev.yml -f docker-compose.multi-tenant.yml -p danswer-stack up -d
- name: Run Multi-Tenant Integration Tests
run: |
echo "Running multi-tenant integration tests..."
docker run --rm --network danswer-stack_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e TEST_WEB_HOSTNAME=test-runner \
-e AUTH_TYPE=cloud \
-e MULTI_TENANT=true \
danswer/danswer-integration:test \
/app/tests/integration/multitenant_tests
continue-on-error: true
id: run_multitenant_tests
- name: Check Multi-Tenant Test Results
run: |
if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
echo "Integration tests failed. Exiting with error."
exit 1
else
echo "All integration tests passed successfully."
fi
- name: Stop Docker Containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
- name: Upload Logs
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: docker-logs
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -8,7 +8,7 @@ on:
pull_request:
branches:
- main
- 'release/**'
- "release/**"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -16,11 +16,12 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
jobs:
integration-tests:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
runs-on:
[runs-on, runner=8cpu-linux-x64, ram=16, "run-id=${{ github.run_id }}"]
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -36,9 +37,9 @@ jobs:
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Web Docker image
run: |
@@ -50,7 +51,7 @@ jobs:
# https://runs-on.com/caching/s3-cache-for-github-actions/
# https://runs-on.com/caching/docker/
# https://github.com/moby/buildkit#s3-cache-experimental
# images are built and run locally for testing purposes. Not pushed.
- name: Build Backend Docker image
uses: ./.github/actions/custom-build-and-push
@@ -75,7 +76,7 @@ jobs:
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
with:
@@ -88,58 +89,7 @@ jobs:
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# Start containers for multi-tenant tests
- name: Start Docker containers for multi-tenant tests
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
MULTI_TENANT=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d
id: start_docker_multi_tenant
# In practice, `cloud` Auth type would require OAUTH credentials to be set.
- name: Run Multi-Tenant Integration Tests
run: |
echo "Running integration tests..."
docker run --rm --network danswer-stack_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e TEST_WEB_HOSTNAME=test-runner \
-e AUTH_TYPE=cloud \
-e MULTI_TENANT=true \
danswer/danswer-integration:test \
/app/tests/integration/multitenant_tests
continue-on-error: true
id: run_multitenant_tests
- name: Check multi-tenant test results
run: |
if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
echo "Integration tests failed. Exiting with error."
exit 1
else
echo "All integration tests passed successfully."
fi
- name: Stop multi-tenant Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
- name: Start Docker containers
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
@@ -153,12 +103,12 @@ jobs:
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
docker logs -f danswer-stack-api_server-1 &
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
@@ -229,7 +179,7 @@ jobs:
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p danswer-stack down -v
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@v4

View File

@@ -33,6 +33,7 @@ from danswer.server.documents.models import ConnectorBase
from danswer.utils.logger import setup_logger
from danswer.utils.retry_wrapper import retry_builder
from danswer.utils.variable_functionality import fetch_versioned_implementation
from ee.danswer.configs.app_configs import INTEGRATION_TEST_MODE
logger = setup_logger()
@@ -127,6 +128,9 @@ def seed_initial_documents(
- Indexing the documents into Vespa
- Create a fake index attempt with fake times
"""
if INTEGRATION_TEST_MODE:
return
logger.info("Seeding initial documents")
kv_store = get_kv_store()

View File

@@ -19,3 +19,5 @@ STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")
OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY")
ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY")
COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")
INTEGRATION_TEST_MODE = os.environ.get("INTEGRATION_TEST_MODE")

View File

@@ -1,84 +0,0 @@
from datetime import datetime
from datetime import timedelta
import jwt
import requests
from danswer.server.manage.models import AllUsersResponse
from danswer.server.models import FullUserSnapshot
from danswer.server.models import InvitedUserSnapshot
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestUser
def generate_auth_token() -> str:
payload = {
"iss": "control_plane",
"exp": datetime.utcnow() + timedelta(minutes=5),
"iat": datetime.utcnow(),
"scope": "tenant:create",
}
token = jwt.encode(payload, "", algorithm="HS256")
return token
class TenantManager:
@staticmethod
def create(
tenant_id: str | None = None,
initial_admin_email: str | None = None,
referral_source: str | None = None,
) -> dict[str, str]:
body = {
"tenant_id": tenant_id,
"initial_admin_email": initial_admin_email,
"referral_source": referral_source,
}
token = generate_auth_token()
headers = {
"Authorization": f"Bearer {token}",
"X-API-KEY": "",
"Content-Type": "application/json",
}
response = requests.post(
url=f"{API_SERVER_URL}/tenants/create",
json=body,
headers=headers,
)
response.raise_for_status()
return response.json()
@staticmethod
def get_all_users(
user_performing_action: DATestUser | None = None,
) -> AllUsersResponse:
response = requests.get(
url=f"{API_SERVER_URL}/manage/users",
headers=user_performing_action.headers
if user_performing_action
else GENERAL_HEADERS,
)
response.raise_for_status()
data = response.json()
return AllUsersResponse(
accepted=[FullUserSnapshot(**user) for user in data["accepted"]],
invited=[InvitedUserSnapshot(**user) for user in data["invited"]],
accepted_pages=data["accepted_pages"],
invited_pages=data["invited_pages"],
)
@staticmethod
def verify_user_in_tenant(
user: DATestUser, user_performing_action: DATestUser | None = None
) -> None:
all_users = TenantManager.get_all_users(user_performing_action)
for accepted_user in all_users.accepted:
if accepted_user.email == user.email and accepted_user.id == user.id:
return
raise ValueError(f"User {user.email} not found in tenant")

View File

@@ -211,7 +211,6 @@ def reset_postgres_multitenant() -> None:
for schema in tenant_schemas:
schema_name = schema[0]
cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE')
cur.close()
conn.close()

View File

@@ -4,7 +4,6 @@ from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
from tests.integration.common_utils.managers.tenant import TenantManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestAPIKey
from tests.integration.common_utils.test_models import DATestCCPair
@@ -13,66 +12,49 @@ from tests.integration.common_utils.test_models import DATestUser
def test_multi_tenant_access_control(reset_multitenant: None) -> None:
# Create Tenant 1 and its Admin User
TenantManager.create("tenant_dev1", "test1@test.com", "Data Plane Registration")
test_user1: DATestUser = UserManager.create(name="test1", email="test1@test.com")
assert UserManager.verify_role(test_user1, UserRole.ADMIN)
# Create Tenants and Admin Users
test_user1: DATestUser = UserManager.create(name="test1", email="test_1@test.com")
test_user2: DATestUser = UserManager.create(name="test2", email="test_2@test.com")
# Create Tenant 2 and its Admin User
TenantManager.create("tenant_dev2", "test2@test.com", "Data Plane Registration")
test_user2: DATestUser = UserManager.create(name="test2", email="test2@test.com")
assert UserManager.verify_role(test_user1, UserRole.ADMIN)
assert UserManager.verify_role(test_user2, UserRole.ADMIN)
# Create connectors for Tenant 1
# Create connectors and seed documents for Tenant 1
cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
user_performing_action=test_user1,
)
api_key_1: DATestAPIKey = APIKeyManager.create(
user_performing_action=test_user1,
user_performing_action=test_user1
)
api_key_1: DATestAPIKey = APIKeyManager.create(user_performing_action=test_user1)
api_key_1.headers.update(test_user1.headers)
LLMProviderManager.create(user_performing_action=test_user1)
# Seed documents for Tenant 1
cc_pair_1.documents = []
doc1_tenant1 = DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_1,
content="Tenant 1 Document Content",
api_key=api_key_1,
)
doc2_tenant1 = DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_1,
content="Tenant 1 Document Content",
api_key=api_key_1,
)
cc_pair_1.documents.extend([doc1_tenant1, doc2_tenant1])
docs_tenant1 = [
DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_1, content="Tenant 1 Document Content", api_key=api_key_1
)
for _ in range(2)
]
cc_pair_1.documents.extend(docs_tenant1)
# Create connectors for Tenant 2
# Create connectors and seed documents for Tenant 2
cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch(
user_performing_action=test_user2,
)
api_key_2: DATestAPIKey = APIKeyManager.create(
user_performing_action=test_user2,
user_performing_action=test_user2
)
api_key_2: DATestAPIKey = APIKeyManager.create(user_performing_action=test_user2)
api_key_2.headers.update(test_user2.headers)
LLMProviderManager.create(user_performing_action=test_user2)
# Seed documents for Tenant 2
cc_pair_2.documents = []
doc1_tenant2 = DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_2,
content="Tenant 2 Document Content",
api_key=api_key_2,
)
doc2_tenant2 = DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_2,
content="Tenant 2 Document Content",
api_key=api_key_2,
)
cc_pair_2.documents.extend([doc1_tenant2, doc2_tenant2])
docs_tenant2 = [
DocumentManager.seed_doc_with_content(
cc_pair=cc_pair_2, content="Tenant 2 Document Content", api_key=api_key_2
)
for _ in range(2)
]
cc_pair_2.documents.extend(docs_tenant2)
tenant1_doc_ids = {doc1_tenant1.id, doc2_tenant1.id}
tenant2_doc_ids = {doc1_tenant2.id, doc2_tenant2.id}
tenant1_doc_ids = {doc.id for doc in docs_tenant1}
tenant2_doc_ids = {doc.id for doc in docs_tenant2}
# Create chat sessions for each user
chat_session1: DATestChatSession = ChatSessionManager.create(
@@ -82,69 +64,63 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
user_performing_action=test_user2
)
# User 1 sends a message and gets a response
# Test access for Tenant 1
response1 = ChatSessionManager.send_message(
chat_session_id=chat_session1.id,
message="What is in Tenant 1's documents?",
user_performing_action=test_user1,
)
# Assert that the search tool was used
assert response1.tool_name == "run_search"
response_doc_ids = {doc["document_id"] for doc in response1.tool_result or []}
response1_doc_ids = {doc["document_id"] for doc in response1.tool_result or []}
assert tenant1_doc_ids.issubset(
response_doc_ids
response1_doc_ids
), "Not all Tenant 1 document IDs are in the response"
assert not response_doc_ids.intersection(
assert not response1_doc_ids.intersection(
tenant2_doc_ids
), "Tenant 2 document IDs should not be in the response"
# Assert that the contents are correct
), "Tenant 2's document IDs should not be in the response"
for doc in response1.tool_result or []:
assert doc["content"] == "Tenant 1 Document Content"
# User 2 sends a message and gets a response
# Test access for Tenant 2
response2 = ChatSessionManager.send_message(
chat_session_id=chat_session2.id,
message="What is in Tenant 2's documents?",
user_performing_action=test_user2,
)
# Assert that the search tool was used
assert response2.tool_name == "run_search"
# Assert that the tool_result contains Tenant 2's documents
response_doc_ids = {doc["document_id"] for doc in response2.tool_result or []}
response2_doc_ids = {doc["document_id"] for doc in response2.tool_result or []}
assert tenant2_doc_ids.issubset(
response_doc_ids
response2_doc_ids
), "Not all Tenant 2 document IDs are in the response"
assert not response_doc_ids.intersection(
assert not response2_doc_ids.intersection(
tenant1_doc_ids
), "Tenant 1 document IDs should not be in the response"
# Assert that the contents are correct
), "Tenant 1's document IDs should not be in the response"
for doc in response2.tool_result or []:
assert doc["content"] == "Tenant 2 Document Content"
# User 1 tries to access Tenant 2's documents
response_cross = ChatSessionManager.send_message(
# Test cross-tenant access attempts
response_cross1 = ChatSessionManager.send_message(
chat_session_id=chat_session1.id,
message="What is in Tenant 2's documents?",
user_performing_action=test_user1,
)
# Assert that the search tool was used
assert response_cross.tool_name == "run_search"
# Assert that the tool_result is empty or does not contain Tenant 2's documents
response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result or []}
# Ensure none of Tenant 2's document IDs are in the response
assert not response_doc_ids.intersection(tenant2_doc_ids)
assert response_cross1.tool_name == "run_search"
response_cross1_doc_ids = {
doc["document_id"] for doc in response_cross1.tool_result or []
}
assert not response_cross1_doc_ids.intersection(
tenant2_doc_ids
), "Tenant 2's document IDs should not be in the response"
# User 2 tries to access Tenant 1's documents
response_cross2 = ChatSessionManager.send_message(
chat_session_id=chat_session2.id,
message="What is in Tenant 1's documents?",
user_performing_action=test_user2,
)
# Assert that the search tool was used
assert response_cross2.tool_name == "run_search"
# Assert that the tool_result is empty or does not contain Tenant 1's documents
response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result or []}
# Ensure none of Tenant 1's document IDs are in the response
assert not response_doc_ids.intersection(tenant1_doc_ids)
response_cross2_doc_ids = {
doc["document_id"] for doc in response_cross2.tool_result or []
}
assert not response_cross2_doc_ids.intersection(
tenant1_doc_ids
), "Tenant 1's document IDs should not be in the response"

View File

@@ -1,41 +0,0 @@
from danswer.configs.constants import DocumentSource
from danswer.db.enums import AccessType
from danswer.db.models import UserRole
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.connector import ConnectorManager
from tests.integration.common_utils.managers.credential import CredentialManager
from tests.integration.common_utils.managers.tenant import TenantManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
# Test flow from creating tenant to registering as a user
def test_tenant_creation(reset_multitenant: None) -> None:
TenantManager.create("tenant_dev", "test@test.com", "Data Plane Registration")
test_user: DATestUser = UserManager.create(name="test", email="test@test.com")
assert UserManager.verify_role(test_user, UserRole.ADMIN)
test_credential = CredentialManager.create(
name="admin_test_credential",
source=DocumentSource.FILE,
curator_public=False,
user_performing_action=test_user,
)
test_connector = ConnectorManager.create(
name="admin_test_connector",
source=DocumentSource.FILE,
access_type=AccessType.PRIVATE,
user_performing_action=test_user,
)
test_cc_pair = CCPairManager.create(
connector_id=test_connector.id,
credential_id=test_credential.id,
name="admin_test_cc_pair",
access_type=AccessType.PRIVATE,
user_performing_action=test_user,
)
CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=test_user)

View File

@@ -0,0 +1,30 @@
version: "3"
services:
control-tenants-service:
image: danswer/control-tenants-service:test
environment:
- POSTGRES_HOST=relational_db
- POSTGRES_PORT=5432
ports:
- "8082:8082"
depends_on:
relational_db:
condition: service_healthy
relational_db:
image: postgres:15.2-alpine
restart: always
ports:
- "5434:5432"
volumes:
- db_volume:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
command: ["postgres", "-c", "log_statement=all"]
volumes:
db_volume: