Compare commits

..

60 Commits

Author SHA1 Message Date
Dane Urban
908d360011 . 2026-02-06 17:52:23 -08:00
Dane Urban
30578bdf9a n 2026-02-06 17:38:36 -08:00
Dane Urban
aebde89432 nits 2026-02-06 16:25:00 -08:00
Dane Urban
4a4b4bb378 t 2026-02-06 13:39:05 -08:00
Dane Urban
a8d231976a nit 2026-02-06 09:56:16 -08:00
Dane Urban
9c8ae5bb4b nit 2026-02-05 17:07:24 -08:00
Dane Urban
0fc1fa3d36 nits 2026-02-05 10:28:59 -08:00
Dane Urban
94633698c3 nit 2026-02-03 00:42:20 -08:00
Dane Urban
6ae15589cd nits 2026-02-02 18:56:22 -08:00
Dane Urban
c24a8bb228 Add change 2026-02-02 18:55:38 -08:00
Dane Urban
01945abd86 fix test 2026-02-02 16:49:31 -08:00
Dane Urban
658632195f nit 2026-02-02 16:47:21 -08:00
Dane Urban
ec6fd01ba4 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-02 15:02:12 -08:00
Dane Urban
148e6fb97d nit 2026-02-02 15:01:57 -08:00
Dane Urban
6598c1a48d nit 2026-02-02 14:59:42 -08:00
Dane Urban
497ce43bd8 Fix some tests 2026-02-02 13:36:42 -08:00
Dane Urban
8634cb0446 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-02 13:28:29 -08:00
Dane Urban
8d56fd3dc6 . 2026-02-02 13:27:08 -08:00
Dane Urban
a7579a99d0 Resolve merge conflicts 2026-02-02 12:01:44 -08:00
Dane Urban
3533c10da4 n 2026-02-02 11:48:28 -08:00
Dane Urban
7b0414bf0d fix migration 2026-02-02 11:48:08 -08:00
Dane Urban
b500ea537a nits 2026-02-02 11:46:52 -08:00
Dane Urban
abd6d55add Merge branch 'flow_mapping_table' into llm_provider_refactor_1 2026-02-02 11:44:27 -08:00
Dane Urban
f15b6b8034 Merge branch 'main' into llm_provider_refactor_1 2026-02-02 11:44:17 -08:00
Dane Urban
fb40485f25 Update this 2026-02-02 11:43:58 -08:00
Dane Urban
22e85f1f28 Merge branch 'main' into flow_mapping_table 2026-02-02 11:43:24 -08:00
Dane Urban
2ef7c3e6f3 rename 2026-02-02 11:40:21 -08:00
Dane Urban
92a471ed2b . 2026-02-02 11:35:09 -08:00
Dane Urban
d1b7e529a4 nit 2026-02-02 11:32:33 -08:00
Dane Urban
95c3579264 nits 2026-02-02 11:19:51 -08:00
Dane Urban
8802e5cad3 nit 2026-02-02 11:02:58 -08:00
Dane Urban
a41b4bbc82 fix tests 2026-02-01 22:59:15 -08:00
Dane Urban
c026c077b5 nit 2026-02-01 22:53:38 -08:00
Dane Urban
3eee539a86 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-01 22:13:54 -08:00
Dane Urban
143e7a0d72 nits 2026-02-01 22:13:21 -08:00
Dane Urban
4572358038 nits 2026-02-01 22:10:37 -08:00
Dane Urban
1753f94c11 start fixes 2026-02-01 21:51:02 -08:00
Dane Urban
120ddf2ef6 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-01 21:42:40 -08:00
Dane Urban
2cce5bc58f Merge branch 'main' into flow_mapping_table 2026-02-01 21:38:54 -08:00
Dane Urban
383a6001d2 nit 2026-02-01 21:37:35 -08:00
Dane Urban
3a6f45bfca Merge branch 'main' into llm_provider_refactor_1 2026-02-01 19:36:43 -08:00
Dane Urban
e06b5ef202 Merge branch 'flow_mapping_table' into llm_provider_refactor_1 2026-02-01 15:23:59 -08:00
Dane Urban
c13ce816fa fix revision id 2026-02-01 13:55:01 -08:00
Dane Urban
39f3e872ec Merge branch 'main' into flow_mapping_table 2026-02-01 13:53:53 -08:00
Dane Urban
b033c00217 . 2026-02-01 13:52:58 -08:00
Dane Urban
6d47c5f21a nit 2026-02-01 13:51:54 -08:00
Dane Urban
0645540e24 . 2026-01-31 23:44:17 -08:00
Dane Urban
a2c0fc4df0 . 2026-01-31 19:23:46 -08:00
Dane Urban
7dccc88b35 . 2026-01-31 18:24:42 -08:00
Dane Urban
ac617a51ce nits 2026-01-31 17:30:49 -08:00
Dane Urban
339a111a8f . 2026-01-30 18:19:03 -08:00
Dane Urban
09b7e6fc9b fix revision id 2026-01-30 17:39:02 -08:00
Dane Urban
135238014f Merge branch 'main' into flow_mapping_table 2026-01-30 17:38:20 -08:00
Dane Urban
303e37bf53 migrate 2026-01-30 17:38:15 -08:00
Dane Urban
6a888e9900 nit 2026-01-30 17:01:22 -08:00
Dane Urban
e90a7767c6 nit 2026-01-30 15:35:31 -08:00
Dane Urban
1ded3af63c nit 2026-01-30 14:22:27 -08:00
Dane Urban
c53546c000 nit 2026-01-30 13:03:05 -08:00
Dane Urban
9afa12edda nit 2026-01-30 13:02:48 -08:00
Dane Urban
32046de962 nit 2026-01-30 13:01:36 -08:00
1314 changed files with 23188 additions and 86237 deletions

View File

@@ -1,16 +0,0 @@
{
"mcpServers": {
"Playwright": {
"command": "npx",
"args": [
"@playwright/mcp"
]
},
"Linear": {
"url": "https://mcp.linear.app/mcp"
},
"Figma": {
"url": "https://mcp.figma.com/mcp"
}
}
}

4
.github/CODEOWNERS vendored
View File

@@ -6,5 +6,5 @@
/web/STANDARDS.md @raunakab @Weves
# Agent context files
/CLAUDE.md @Weves
/AGENTS.md @Weves
/CLAUDE.md.template @Weves
/AGENTS.md.template @Weves

View File

@@ -82,7 +82,7 @@ jobs:
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
IS_STABLE=true
fi
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta(\.[0-9]+)?$ ]]; then
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
IS_BETA=true
fi
@@ -91,8 +91,8 @@ jobs:
BUILD_WEB_CLOUD=true
else
BUILD_WEB=true
# Only build desktop for semver tags (excluding beta)
if [[ "$IS_VERSION_TAG" == "true" ]] && [[ "$IS_BETA" != "true" ]]; then
# Skip desktop builds on beta tags and nightly runs
if [[ "$IS_BETA" != "true" ]] && [[ "$IS_NIGHTLY" != "true" ]]; then
BUILD_DESKTOP=true
fi
fi
@@ -174,10 +174,23 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
@@ -249,7 +262,7 @@ jobs:
xdg-utils
- name: setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6.2.0
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6.1.0
with:
node-version: 24
package-manager-cache: false
@@ -409,7 +422,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -482,7 +495,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -542,7 +555,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -620,7 +633,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -640,7 +653,6 @@ jobs:
NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}
NEXT_PUBLIC_GTM_ENABLED=true
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
@@ -702,7 +714,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -722,7 +734,6 @@ jobs:
NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
NEXT_PUBLIC_RECAPTCHA_SITE_KEY=${{ vars.NEXT_PUBLIC_RECAPTCHA_SITE_KEY }}
NEXT_PUBLIC_GTM_ENABLED=true
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
@@ -771,7 +782,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -846,7 +857,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -918,7 +929,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -977,7 +988,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1055,7 +1066,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1128,7 +1139,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1189,7 +1200,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1269,7 +1280,7 @@ jobs:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1348,7 +1359,7 @@ jobs:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1411,7 +1422,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1698,6 +1709,19 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Determine failed jobs
id: failed-jobs
shell: bash
@@ -1763,7 +1787,7 @@ jobs:
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
title: "🚨 Deployment Workflow Failed"
ref-name: ${{ github.ref_name }}

View File

@@ -24,7 +24,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -24,7 +24,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -33,7 +33,7 @@ jobs:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
cache: 'pip'
@@ -97,7 +97,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -40,16 +40,13 @@ jobs:
- name: Generate OpenAPI schema and Python client
shell: bash
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
env:
LICENSE_ENFORCEMENT_ENABLED: "false"
run: |
ods openapi all
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -45,12 +45,12 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
with:
node-version: 24
cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -63,7 +63,7 @@ jobs:
targets: ${{ matrix.target }}
- name: Cache Cargo registry and build
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
with:
path: |
~/.cargo/bin/
@@ -105,7 +105,7 @@ jobs:
- name: Upload build artifacts
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
path: |

View File

@@ -110,7 +110,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -118,7 +118,6 @@ jobs:
- name: Create .env file for Docker Compose
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
CODE_INTERPRETER_BETA_ENABLED=true
DISABLE_TELEMETRY=true
EOF

View File

@@ -46,7 +46,6 @@ jobs:
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
editions: ${{ steps.set-editions.outputs.editions }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
@@ -57,7 +56,7 @@ jobs:
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" ! -name "no_vectordb" -exec basename {} \; | sort)
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
@@ -73,16 +72,6 @@ jobs:
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
- name: Determine editions to test
id: set-editions
run: |
# On PRs, only run EE tests. On merge_group and tags, run both EE and MIT.
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo 'editions=["ee"]' >> $GITHUB_OUTPUT
else
echo 'editions=["ee","mit"]' >> $GITHUB_OUTPUT
fi
build-backend-image:
runs-on:
[
@@ -120,7 +109,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -180,7 +169,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -225,7 +214,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -278,7 +267,7 @@ jobs:
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-{1}-job-{2}', github.run_id, matrix.edition, strategy['job-index']) }}
- ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
@@ -286,7 +275,6 @@ jobs:
fail-fast: false
matrix:
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
edition: ${{ fromJson(needs.discover-test-dirs.outputs.editions) }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -299,7 +287,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -310,11 +298,9 @@ jobs:
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
EDITION: ${{ matrix.edition }}
run: |
# Base config shared by both editions
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
@@ -323,20 +309,11 @@ jobs:
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
EOF
# EE-only config
if [ "$EDITION" = "ee" ]; then
cat <<EOF >> deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
MCP_SERVER_ENABLED=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
fi
- name: Start Docker containers
run: |
@@ -399,14 +376,14 @@ jobs:
docker compose -f docker-compose.mock-it-services.yml \
-p mock-it-services-stack up -d
- name: Run Integration Tests (${{ matrix.edition }}) for ${{ matrix.test-dir.name }}
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running ${{ matrix.edition }} integration tests for ${{ matrix.test-dir.path }}..."
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
@@ -464,143 +441,10 @@ jobs:
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
no-vectordb-tests:
needs: [build-backend-image, build-integration-image]
runs-on:
[
runs-on,
runner=4cpu-linux-arm64,
"run-id=${{ github.run_id }}-no-vectordb-tests",
"extras=ecr-cache",
]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create .env file for no-vectordb Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
DISABLE_VECTOR_DB=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=true
EOF
# Start only the services needed for no-vectordb mode (no Vespa, no model servers)
- name: Start Docker containers (no-vectordb)
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml up \
relational_db \
cache \
minio \
api_server \
background \
-d
id: start_docker_no_vectordb
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script (no-vectordb)..."
start_time=$(date +%s)
timeout=300
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in $timeout seconds."
exit 1
fi
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "API server is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error; retrying..."
else
echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
fi
sleep 5
done
- name: Run No-VectorDB Integration Tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running no-vectordb integration tests..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e TEST_WEB_HOSTNAME=test-runner \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/tests/no_vectordb
- name: Dump API server logs (no-vectordb)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color api_server > $GITHUB_WORKSPACE/api_server_no_vectordb.log || true
- name: Dump all-container logs (no-vectordb)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color > $GITHUB_WORKSPACE/docker-compose-no-vectordb.log || true
- name: Upload logs (no-vectordb)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-no-vectordb
path: ${{ github.workspace }}/docker-compose-no-vectordb.log
- name: Stop Docker containers (no-vectordb)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml down -v
multitenant-tests:
needs:
[build-backend-image, build-model-server-image, build-integration-image]
@@ -621,7 +465,7 @@ jobs:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -633,7 +477,6 @@ jobs:
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
LICENSE_ENFORCEMENT_ENABLED=false \
MULTI_TENANT=true \
AUTH_TYPE=cloud \
REQUIRE_EMAIL_VERIFICATION=false \
@@ -740,7 +583,7 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests, no-vectordb-tests, multitenant-tests]
needs: [integration-tests, multitenant-tests]
if: ${{ always() }}
steps:
- name: Check job status

View File

@@ -28,7 +28,7 @@ jobs:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"

View File

@@ -0,0 +1,442 @@
name: Run MIT Integration Tests v2
concurrency:
group: Run-MIT-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
types: [checks_requested]
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# Test Environment Variables
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Discover test directories
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
all_dirs=""
for dir in $tests_dirs; do
all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
done
for dir in $connector_dirs; do
all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
done
# Remove trailing comma and wrap in array
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Backend Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push Model Server Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend
file: ./backend/Dockerfile.model_server
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
build-integration-image:
runs-on:
[
runs-on,
runner=2cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-integration-image",
"extras=ecr-cache",
]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
integration-tests-mit:
needs:
[
discover-test-dirs,
build-backend-image,
build-model-server-image,
build-integration-image,
]
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Create .env file for Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
cache \
minio \
api_server \
inference_model_server \
indexing_model_server \
background \
-d
id: start_docker
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script..."
wait_for_service() {
local url=$1
local label=$2
local timeout=${3:-300} # default 5 minutes
local start_time
start_time=$(date +%s)
while true; do
local current_time
current_time=$(date +%s)
local elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
exit 1
fi
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
if [ "$response" = "200" ]; then
echo "${label} is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
else
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
}
wait_for_service "http://localhost:8080/health" "API server"
echo "Finished waiting for services."
- name: Start Mock Services
run: |
cd backend/tests/integration/mock_services
docker compose -f docker-compose.mock-it-services.yml \
-p mock-it-services-stack up -d
# NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
- name: Run Integration Tests for ${{ matrix.test-dir.name }}
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running integration tests for ${{ matrix.test-dir.path }}..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-e TEST_WEB_HOSTNAME=test-runner \
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/${{ matrix.test-dir.path }}
# ------------------------------------------------------------
# Always gather logs BEFORE "down":
- name: Dump API server logs
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests-mit]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1

View File

@@ -52,9 +52,6 @@ env:
MCP_SERVER_PUBLIC_HOST: host.docker.internal
MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
# Visual regression S3 bucket (shared across all jobs)
PLAYWRIGHT_S3_BUCKET: onyx-playwright-artifacts
jobs:
build-web-image:
runs-on:
@@ -93,7 +90,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -154,7 +151,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -215,7 +212,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -242,9 +239,6 @@ jobs:
playwright-tests:
needs: [build-web-image, build-backend-image, build-model-server-image]
name: Playwright Tests (${{ matrix.project }})
permissions:
id-token: write # Required for OIDC-based AWS credential exchange (S3 access)
contents: read
runs-on:
- runs-on
- runner=8cpu-linux-arm64
@@ -255,7 +249,7 @@ jobs:
strategy:
fail-fast: false
matrix:
project: [admin, exclusive]
project: [admin, no-auth, exclusive]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -265,7 +259,7 @@ jobs:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
@@ -295,10 +289,7 @@ jobs:
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
EXA_API_KEY=${EXA_API_KEY_VALUE}
@@ -308,12 +299,15 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
EOF
if [ "${{ matrix.project }}" = "no-auth" ]; then
echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
fi
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -434,6 +428,11 @@ jobs:
env:
PROJECT: ${{ matrix.project }}
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
if [ "${PROJECT}" = "no-auth" ]; then
export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
fi
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
@@ -441,134 +440,9 @@ jobs:
with:
# Includes test results and trace.zip files
name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
path: ./web/output/playwright/
path: ./web/test-results/
retention-days: 30
- name: Upload screenshots
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
path: ./web/output/screenshots/
retention-days: 30
# --- Visual Regression Diff ---
- name: Configure AWS credentials
if: always()
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Install the latest version of uv
if: always()
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- name: Determine baseline revision
if: always()
id: baseline-rev
env:
EVENT_NAME: ${{ github.event_name }}
BASE_REF: ${{ github.event.pull_request.base.ref }}
MERGE_GROUP_BASE_REF: ${{ github.event.merge_group.base_ref }}
GH_REF: ${{ github.ref }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ "${EVENT_NAME}" = "pull_request" ]; then
# PRs compare against the base branch (e.g. main, release/2.5)
echo "rev=${BASE_REF}" >> "$GITHUB_OUTPUT"
elif [ "${EVENT_NAME}" = "merge_group" ]; then
# Merge queue compares against the target branch (e.g. refs/heads/main -> main)
echo "rev=${MERGE_GROUP_BASE_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
elif [[ "${GH_REF}" == refs/tags/* ]]; then
# Tag builds compare against the tag name
echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
else
# Push builds (main, release/*) compare against the branch name
echo "rev=${REF_NAME}" >> "$GITHUB_OUTPUT"
fi
- name: Generate screenshot diff report
if: always()
env:
PROJECT: ${{ matrix.project }}
PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
run: |
uv run --no-sync --with onyx-devtools ods screenshot-diff compare \
--project "${PROJECT}" \
--rev "${BASELINE_REV}"
- name: Upload visual diff report to S3
if: always()
env:
PROJECT: ${{ matrix.project }}
PR_NUMBER: ${{ github.event.pull_request.number }}
RUN_ID: ${{ github.run_id }}
run: |
SUMMARY_FILE="web/output/screenshot-diff/${PROJECT}/summary.json"
if [ ! -f "${SUMMARY_FILE}" ]; then
echo "No summary file found — skipping S3 upload."
exit 0
fi
HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")
if [ "${HAS_DIFF}" != "true" ]; then
echo "No visual differences for ${PROJECT} — skipping S3 upload."
exit 0
fi
aws s3 sync "web/output/screenshot-diff/${PROJECT}/" \
"s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"
- name: Upload visual diff summary
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: screenshot-diff-summary-${{ matrix.project }}
path: ./web/output/screenshot-diff/${{ matrix.project }}/summary.json
if-no-files-found: ignore
retention-days: 5
- name: Upload visual diff report artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
path: ./web/output/screenshot-diff/${{ matrix.project }}/
if-no-files-found: ignore
retention-days: 30
- name: Update S3 baselines
if: >-
success() && (
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release/') ||
startsWith(github.ref, 'refs/tags/v') ||
(
github.event_name == 'merge_group' && (
github.event.merge_group.base_ref == 'refs/heads/main' ||
startsWith(github.event.merge_group.base_ref, 'refs/heads/release/')
)
)
)
env:
PROJECT: ${{ matrix.project }}
PLAYWRIGHT_S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
BASELINE_REV: ${{ steps.baseline-rev.outputs.rev }}
run: |
if [ -d "web/output/screenshots/" ] && [ "$(ls -A web/output/screenshots/)" ]; then
uv run --no-sync --with onyx-devtools ods screenshot-diff upload-baselines \
--project "${PROJECT}" \
--rev "${BASELINE_REV}" \
--delete
else
echo "No screenshots to upload for ${PROJECT} — skipping baseline update."
fi
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
@@ -586,95 +460,6 @@ jobs:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
# Post a single combined visual regression comment after all matrix jobs finish
visual-regression-comment:
needs: [playwright-tests]
if: always() && github.event_name == 'pull_request'
runs-on: ubuntu-slim
timeout-minutes: 5
permissions:
pull-requests: write
steps:
- name: Download visual diff summaries
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # ratchet:actions/download-artifact@v4
with:
pattern: screenshot-diff-summary-*
path: summaries/
- name: Post combined PR comment
env:
GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.pull_request.number }}
RUN_ID: ${{ github.run_id }}
REPO: ${{ github.repository }}
S3_BUCKET: ${{ env.PLAYWRIGHT_S3_BUCKET }}
run: |
MARKER="<!-- visual-regression-report -->"
# Build the markdown table from all summary files
TABLE_HEADER="| Project | Changed | Added | Removed | Unchanged | Report |"
TABLE_DIVIDER="|---------|---------|-------|---------|-----------|--------|"
TABLE_ROWS=""
HAS_ANY_SUMMARY=false
for SUMMARY_DIR in summaries/screenshot-diff-summary-*/; do
SUMMARY_FILE="${SUMMARY_DIR}summary.json"
if [ ! -f "${SUMMARY_FILE}" ]; then
continue
fi
HAS_ANY_SUMMARY=true
PROJECT=$(jq -r '.project' "${SUMMARY_FILE}")
CHANGED=$(jq -r '.changed' "${SUMMARY_FILE}")
ADDED=$(jq -r '.added' "${SUMMARY_FILE}")
REMOVED=$(jq -r '.removed' "${SUMMARY_FILE}")
UNCHANGED=$(jq -r '.unchanged' "${SUMMARY_FILE}")
TOTAL=$(jq -r '.total' "${SUMMARY_FILE}")
HAS_DIFF=$(jq -r '.has_differences' "${SUMMARY_FILE}")
if [ "${TOTAL}" = "0" ]; then
REPORT_LINK="_No screenshots_"
elif [ "${HAS_DIFF}" = "true" ]; then
REPORT_URL="https://${S3_BUCKET}.s3.us-east-2.amazonaws.com/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/index.html"
REPORT_LINK="[View Report](${REPORT_URL})"
else
REPORT_LINK="✅ No changes"
fi
TABLE_ROWS="${TABLE_ROWS}| \`${PROJECT}\` | ${CHANGED} | ${ADDED} | ${REMOVED} | ${UNCHANGED} | ${REPORT_LINK} |\n"
done
if [ "${HAS_ANY_SUMMARY}" = "false" ]; then
echo "No visual diff summaries found — skipping PR comment."
exit 0
fi
BODY=$(printf '%s\n' \
"${MARKER}" \
"### 🖼️ Visual Regression Report" \
"" \
"${TABLE_HEADER}" \
"${TABLE_DIVIDER}" \
"$(printf '%b' "${TABLE_ROWS}")")
# Upsert: find existing comment with the marker, or create a new one
EXISTING_COMMENT_ID=$(gh api \
"repos/${REPO}/issues/${PR_NUMBER}/comments" \
--jq ".[] | select(.body | startswith(\"${MARKER}\")) | .id" \
2>/dev/null | head -1)
if [ -n "${EXISTING_COMMENT_ID}" ]; then
gh api \
--method PATCH \
"repos/${REPO}/issues/comments/${EXISTING_COMMENT_ID}" \
-f body="${BODY}"
else
gh api \
--method POST \
"repos/${REPO}/issues/${PR_NUMBER}/comments" \
-f body="${BODY}"
fi
playwright-required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
@@ -685,3 +470,48 @@ jobs:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
# NOTE: Chromatic UI diff testing is currently disabled.
# We are using Playwright for local and CI testing without visual regression checks.
# Chromatic may be reintroduced in the future for UI diff testing if needed.
# chromatic-tests:
# name: Chromatic Tests
# needs: playwright-tests
# runs-on:
# [
# runs-on,
# runner=32cpu-linux-x64,
# disk=large,
# "run-id=${{ github.run_id }}",
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
# with:
# node-version: 22
# - name: Install node dependencies
# working-directory: ./web
# run: npm ci
# - name: Download Playwright test results
# uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
# with:
# name: test-results
# path: ./web/test-results
# - name: Run Chromatic
# uses: chromaui/action@latest
# with:
# playwright: true
# projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
# workingDir: ./web
# env:
# CHROMATIC_ARCHIVE_LOCATION: ./test-results

View File

@@ -42,9 +42,6 @@ jobs:
- name: Generate OpenAPI schema and Python client
shell: bash
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
env:
LICENSE_ENFORCEMENT_ENABLED: "false"
run: |
ods openapi all

View File

@@ -64,7 +64,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -27,8 +27,6 @@ jobs:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
DISABLE_TELEMETRY: "true"
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED: "false"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

View File

@@ -24,13 +24,13 @@ jobs:
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"

View File

@@ -1,73 +0,0 @@
name: Preview Deployment
env:
VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
VERCEL_CLI: vercel@50.14.1
on:
push:
branches-ignore:
- main
paths:
- "web/**"
permissions:
contents: read
pull-requests: write
jobs:
Deploy-Preview:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache-dependency-path: ./web/package-lock.json
- name: Pull Vercel Environment Information
run: npx --yes ${{ env.VERCEL_CLI }} pull --yes --environment=preview --token=${{ secrets.VERCEL_TOKEN }}
- name: Build Project Artifacts
run: npx --yes ${{ env.VERCEL_CLI }} build --token=${{ secrets.VERCEL_TOKEN }}
- name: Deploy Project Artifacts to Vercel
id: deploy
run: |
DEPLOYMENT_URL=$(npx --yes ${{ env.VERCEL_CLI }} deploy --prebuilt --token=${{ secrets.VERCEL_TOKEN }})
echo "url=$DEPLOYMENT_URL" >> "$GITHUB_OUTPUT"
- name: Update PR comment with deployment URL
if: always() && steps.deploy.outputs.url
env:
GH_TOKEN: ${{ github.token }}
DEPLOYMENT_URL: ${{ steps.deploy.outputs.url }}
run: |
# Find the PR for this branch
PR_NUMBER=$(gh pr list --head "$GITHUB_REF_NAME" --json number --jq '.[0].number')
if [ -z "$PR_NUMBER" ]; then
echo "No open PR found for branch $GITHUB_REF_NAME, skipping comment."
exit 0
fi
COMMENT_MARKER="<!-- preview-deployment -->"
COMMENT_BODY="$COMMENT_MARKER
**Preview Deployment**
| Status | Preview | Commit | Updated |
| --- | --- | --- | --- |
| ✅ | $DEPLOYMENT_URL | \`${GITHUB_SHA::7}\` | $(date -u '+%Y-%m-%d %H:%M:%S UTC') |"
# Find existing comment by marker
EXISTING_COMMENT_ID=$(gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" \
--jq ".[] | select(.body | startswith(\"$COMMENT_MARKER\")) | .id" | head -1)
if [ -n "$EXISTING_COMMENT_ID" ]; then
gh api "repos/$GITHUB_REPOSITORY/issues/comments/$EXISTING_COMMENT_ID" \
--method PATCH --field body="$COMMENT_BODY"
else
gh pr comment "$PR_NUMBER" --body "$COMMENT_BODY"
fi

View File

@@ -1,290 +0,0 @@
name: Build and Push Sandbox Image on Tag
on:
push:
tags:
- "experimental-cc4a.*"
# Restrictive defaults; jobs declare what they need.
permissions: {}
jobs:
check-sandbox-changes:
runs-on: ubuntu-slim
timeout-minutes: 10
permissions:
contents: read
outputs:
sandbox-changed: ${{ steps.check.outputs.sandbox-changed }}
new-version: ${{ steps.version.outputs.new-version }}
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Check for sandbox-relevant file changes
id: check
run: |
# Get the previous tag to diff against
CURRENT_TAG="${GITHUB_REF_NAME}"
PREVIOUS_TAG=$(git tag --sort=-creatordate | grep '^experimental-cc4a\.' | grep -v "^${CURRENT_TAG}$" | head -n 1)
if [ -z "$PREVIOUS_TAG" ]; then
echo "No previous experimental-cc4a tag found, building unconditionally"
echo "sandbox-changed=true" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "Comparing ${PREVIOUS_TAG}..${CURRENT_TAG}"
# Check if any sandbox-relevant files changed
SANDBOX_PATHS=(
"backend/onyx/server/features/build/sandbox/"
)
CHANGED=false
for path in "${SANDBOX_PATHS[@]}"; do
if git diff --name-only "${PREVIOUS_TAG}..${CURRENT_TAG}" -- "$path" | grep -q .; then
echo "Changes detected in: $path"
CHANGED=true
break
fi
done
echo "sandbox-changed=$CHANGED" >> "$GITHUB_OUTPUT"
- name: Determine new sandbox version
id: version
if: steps.check.outputs.sandbox-changed == 'true'
run: |
# Query Docker Hub for the latest versioned tag
LATEST_TAG=$(curl -s "https://hub.docker.com/v2/repositories/onyxdotapp/sandbox/tags?page_size=100" \
| jq -r '.results[].name' \
| grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
| sort -V \
| tail -n 1)
if [ -z "$LATEST_TAG" ]; then
echo "No existing version tags found on Docker Hub, starting at 0.1.1"
NEW_VERSION="0.1.1"
else
CURRENT_VERSION="${LATEST_TAG#v}"
echo "Latest version on Docker Hub: $CURRENT_VERSION"
# Increment patch version
MAJOR=$(echo "$CURRENT_VERSION" | cut -d. -f1)
MINOR=$(echo "$CURRENT_VERSION" | cut -d. -f2)
PATCH=$(echo "$CURRENT_VERSION" | cut -d. -f3)
NEW_PATCH=$((PATCH + 1))
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
fi
echo "New version: $NEW_VERSION"
echo "new-version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
build-sandbox-amd64:
needs: check-sandbox-changes
if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'
runs-on:
- runs-on
- runner=4cpu-linux-x64
- run-id=${{ github.run_id }}-sandbox-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
permissions:
contents: read
id-token: write
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
REGISTRY_IMAGE: onyxdotapp/sandbox
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker
file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile
platforms: linux/amd64
labels: ${{ steps.meta.outputs.labels }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
build-sandbox-arm64:
needs: check-sandbox-changes
if: needs.check-sandbox-changes.outputs.sandbox-changed == 'true'
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- run-id=${{ github.run_id }}-sandbox-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
permissions:
contents: read
id-token: write
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
REGISTRY_IMAGE: onyxdotapp/sandbox
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ./backend/onyx/server/features/build/sandbox/kubernetes/docker
file: ./backend/onyx/server/features/build/sandbox/kubernetes/docker/Dockerfile
platforms: linux/arm64
labels: ${{ steps.meta.outputs.labels }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
merge-sandbox:
needs:
- check-sandbox-changes
- build-sandbox-amd64
- build-sandbox-arm64
runs-on:
- runs-on
- runner=2cpu-linux-x64
- run-id=${{ github.run_id }}-merge-sandbox
- extras=ecr-cache
timeout-minutes: 30
environment: release
permissions:
id-token: write
env:
REGISTRY_IMAGE: onyxdotapp/sandbox
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=v${{ needs.check-sandbox-changes.outputs.new-version }}
type=raw,value=latest
- name: Create and push manifest
env:
IMAGE_REPO: ${{ env.REGISTRY_IMAGE }}
AMD64_DIGEST: ${{ needs.build-sandbox-amd64.outputs.digest }}
ARM64_DIGEST: ${{ needs.build-sandbox-arm64.outputs.digest }}
META_TAGS: ${{ steps.meta.outputs.tags }}
run: |
IMAGES="${IMAGE_REPO}@${AMD64_DIGEST} ${IMAGE_REPO}@${ARM64_DIGEST}"
docker buildx imagetools create \
$(printf '%s\n' "${META_TAGS}" | xargs -I {} echo -t {}) \
$IMAGES

5
.gitignore vendored
View File

@@ -6,7 +6,6 @@
!/.vscode/tasks.template.jsonc
.zed
.cursor
!/.cursor/mcp.json
# macos
.DS_store
@@ -41,6 +40,10 @@ settings.json
/backend/tests/regression/answer_quality/search_test_config.yaml
*.egg-info
# Claude
AGENTS.md
CLAUDE.md
# Local .terraform directories
**/.terraform/*

6
.vscode/launch.json vendored
View File

@@ -246,7 +246,7 @@
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup,opensearch_migration"
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
],
"presentation": {
"group": "2"
@@ -275,7 +275,7 @@
"--loglevel=INFO",
"--hostname=background@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,docprocessing,connector_doc_fetching,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,kg_processing,monitoring,user_file_processing,user_file_project_sync,user_file_delete,opensearch_migration"
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,docprocessing,connector_doc_fetching,user_files_indexing,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,kg_processing,monitoring,user_file_processing,user_file_project_sync,user_file_delete"
],
"presentation": {
"group": "2"
@@ -419,7 +419,7 @@
"--loglevel=INFO",
"--hostname=docfetching@%n",
"-Q",
"connector_doc_fetching"
"connector_doc_fetching,user_files_indexing"
],
"presentation": {
"group": "2"

599
AGENTS.md.template Normal file
View File

@@ -0,0 +1,599 @@
# AGENTS.md
This file provides guidance to AI agents when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (single worker process)
- Suitable for smaller deployments or development environments
- Default concurrency: 6 threads
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
### 1. Import Standards
**Always use absolute imports with the `@` prefix.**
**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
```typescript
// ✅ Good
import { Button } from "@/components/ui/button";
import { useAuth } from "@/hooks/useAuth";
import { Text } from "@/refresh-components/texts/Text";
// ❌ Bad
import { Button } from "../../../components/ui/button";
import { useAuth } from "./hooks/useAuth";
```
### 2. React Component Functions
**Prefer regular functions over arrow functions for React components.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
function UserProfile({ userId }: UserProfileProps) {
return <div>User Profile</div>
}
// ❌ Bad
const UserProfile = ({ userId }: UserProfileProps) => {
return <div>User Profile</div>
}
```
### 3. Props Interface Extraction
**Extract prop types into their own interface definitions.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
interface UserCardProps {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}
function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
return <div>User Card</div>
}
// ❌ Bad
function UserCard({
user,
showActions = false,
onEdit
}: {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}) {
return <div>User Card</div>
}
```
### 4. Spacing Guidelines
**Prefer padding over margins for spacing.**
**Reason:** We want to consolidate usage to paddings instead of margins.
```typescript
// ✅ Good
<div className="p-4 space-y-2">
<div className="p-2">Content</div>
</div>
// ❌ Bad
<div className="m-4 space-y-2">
<div className="m-2">Content</div>
</div>
```
### 5. Tailwind Dark Mode
**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
```typescript
// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
<div className="bg-background-neutral-03 text-text-02">
Content
</div>
// ✅ Good - Logo icons with dark mode handling via createLogoIcon
export const GithubIcon = createLogoIcon(githubLightIcon, {
monochromatic: true, // Will apply dark:invert internally
});
export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
darkSrc: gitbookDarkIcon, // Will use dark:hidden/dark:block internally
});
// ❌ Bad - Manual dark mode overrides
<div className="bg-white dark:bg-black text-black dark:text-white">
Content
</div>
```
### 6. Class Name Utilities
**Use the `cn` utility instead of raw string formatting for classNames.**
**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
```typescript
import { cn } from '@/lib/utils'
// ✅ Good
<div className={cn(
'base-class',
isActive && 'active-class',
className
)}>
Content
</div>
// ❌ Bad
<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
Content
</div>
```
### 7. Custom Hooks Organization
**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
**Reason:** This is just a layout preference. Keeps code clean.
```typescript
// web/src/hooks/useUserData.ts
export function useUserData(userId: string) {
// hook implementation
}
// web/src/hooks/useLocalStorage.ts
export function useLocalStorage<T>(key: string, initialValue: T) {
// hook implementation
}
```
### 8. Icon Usage
**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
```typescript
// ✅ Good
import SvgX from "@/icons/x";
import SvgMoreHorizontal from "@/icons/more-horizontal";
// ❌ Bad
import { User } from "lucide-react";
import { FiSearch } from "react-icons/fi";
```
**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
If you need help with this step, reach out to `raunak@onyx.app`.
### 9. Text Rendering
**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
```typescript
// ✅ Good
import { Text } from '@/refresh-components/texts/Text'
function UserCard({ name }: { name: string }) {
return (
<Text
{/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
text03
{/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
mainAction
>
{name}
</Text>
)
}
// ❌ Bad
function UserCard({ name }: { name: string }) {
return (
<div>
<h2>{name}</h2>
<p>User details</p>
</div>
)
}
```
### 10. Component Usage
**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
```typescript
// ✅ Good
import Button from '@/refresh-components/buttons/Button'
import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
import SvgPlusCircle from '@/icons/plus-circle'
function ContactForm() {
return (
<form>
<InputTypeIn placeholder="Search..." />
<Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
</form>
)
}
// ❌ Bad
function ContactForm() {
return (
<form>
<input placeholder="Name" />
<textarea placeholder="Message" />
<button type="submit">Submit</button>
</form>
)
}
```
### 11. Colors
**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
```typescript
// ✅ Good - Use custom Onyx color classes
<div className="bg-background-neutral-01 border border-border-02" />
<div className="bg-background-tint-02 border border-border-01" />
<div className="bg-status-success-01" />
<div className="bg-action-link-01" />
<div className="bg-theme-primary-05" />
// ❌ Bad - Do NOT use standard Tailwind colors
<div className="bg-gray-100 border border-gray-300 text-gray-600" />
<div className="bg-white border border-slate-200" />
<div className="bg-green-100 text-green-700" />
<div className="bg-blue-100 text-blue-600" />
<div className="bg-indigo-500" />
```
### 12. Data Fetching
**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: *Timeline*, *Rollback plan*
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

View File

@@ -1 +0,0 @@
AGENTS.md

View File

@@ -1,25 +1,26 @@
# PROJECT KNOWLEDGE BASE
# CLAUDE.md
This file provides guidance to AI agents when working with code in this repository.
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
@@ -91,7 +92,6 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Light worker tasks (Vespa operations, permissions sync, deletion)
- Document processing (indexing pipeline)
@@ -105,14 +105,12 @@ Onyx supports two deployment modes for background workers, controlled by the `US
- Default concurrency: 20 threads (increased to handle combined workload)
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
@@ -121,18 +119,18 @@ The deployment mode affects:
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
@@ -144,12 +142,7 @@ function.
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
**Task Time Limits**:
Since all tasks are executed in thread pools, the time limit features of Celery are silently
disabled and won't work. Timeout logic must be implemented within the task itself.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
@@ -161,7 +154,6 @@ NOTE: Always make sure everything is strictly typed (both in Python and Typescri
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
@@ -443,7 +435,6 @@ function ContactForm() {
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
@@ -476,7 +467,6 @@ function ContactForm() {
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
@@ -486,7 +476,6 @@ alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
@@ -499,14 +488,13 @@ Write the migration manually and place it in the file that alembic creates when
## Testing Strategy
First, you must activate the virtual environment with `source .venv/bin/activate`.
First, you must activate the virtual environment with `source .venv/bin/activate`.
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
@@ -516,14 +504,13 @@ pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
@@ -536,16 +523,15 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
@@ -557,9 +543,8 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, _including_ the Web Server.
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
@@ -571,11 +556,13 @@ To run them:
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
will be tailing their logs to this file.
## Security Considerations
@@ -594,7 +581,6 @@ will be tailing their logs to this file.
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
@@ -607,10 +593,10 @@ Things you come across in your research that are important to the implementation
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: _Timeline_, _Rollback plan_
Do NOT include these: *Timeline*, *Rollback plan*
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.

View File

@@ -2,10 +2,7 @@ Copyright (c) 2023-present DanswerAI, Inc.
Portions of this software are licensed as follows:
- All content that resides under "ee" directories of this repository is licensed under the Onyx Enterprise License. Each ee directory contains an identical copy of this license at its root:
- backend/ee/LICENSE
- web/src/app/ee/LICENSE
- web/src/ee/LICENSE
- All content that resides under "ee" directories of this repository, if that directory exists, is licensed under the license defined in "backend/ee/LICENSE". Specifically all content under "backend/ee" and "web/src/app/ee" is licensed under the license defined in "backend/ee/LICENSE".
- All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.
- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.

View File

@@ -134,7 +134,6 @@ COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
COPY supervisord.conf /usr/etc/supervisord.conf
COPY --chown=onyx:onyx ./static /app/static
COPY --chown=onyx:onyx ./keys /app/keys
# Escape hatch scripts
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
@@ -150,11 +149,6 @@ RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \
fi
# Set Craft template paths to the in-image locations
# These match the paths where setup_craft_templates.sh creates the templates
ENV OUTPUTS_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs
ENV VENV_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

View File

@@ -48,7 +48,6 @@ WORKDIR /app
# Utils used by model server
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
COPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py
# Place to fetch version information
COPY ./onyx/__init__.py /app/onyx/__init__.py

View File

@@ -57,7 +57,7 @@ if USE_IAM_AUTH:
def include_object(
object: SchemaItem, # noqa: ARG001
object: SchemaItem,
name: str | None,
type_: Literal[
"schema",
@@ -67,8 +67,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
reflected: bool,
compare_to: SchemaItem | None,
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False
@@ -244,7 +244,7 @@ def do_run_migrations(
def provide_iam_token_for_alembic(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
) -> None:
if USE_IAM_AUTH:
# Database connection settings
@@ -474,7 +474,7 @@ def run_migrations_online() -> None:
if connectable is not None:
# pytest-alembic is providing an engine - use it directly
logger.debug("run_migrations_online starting (pytest-alembic mode).")
logger.info("run_migrations_online starting (pytest-alembic mode).")
# For pytest-alembic, we use the default schema (public)
schema_name = context.config.attributes.get(

View File

@@ -1,343 +0,0 @@
#!/usr/bin/env python3
"""Parallel Alembic Migration Runner
Upgrades tenant schemas to head in batched, parallel alembic subprocesses.
Each subprocess handles a batch of schemas (via ``-x schemas=a,b,c``),
reducing per-process overhead compared to one-schema-per-process.
Usage examples::
# defaults: 6 workers, 50 schemas/batch
python alembic/run_multitenant_migrations.py
# custom settings
python alembic/run_multitenant_migrations.py -j 8 -b 100
"""
from __future__ import annotations
import argparse
import subprocess
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, NamedTuple
from alembic.config import Config
from alembic.script import ScriptDirectory
from sqlalchemy import text
from onyx.db.engine.sql_engine import is_valid_schema_name
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from shared_configs.configs import TENANT_ID_PREFIX
# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------
class Args(NamedTuple):
jobs: int
batch_size: int
class BatchResult(NamedTuple):
schemas: list[str]
success: bool
output: str
elapsed_sec: float
# ---------------------------------------------------------------------------
# Core functions
# ---------------------------------------------------------------------------
def run_alembic_for_batch(schemas: list[str]) -> BatchResult:
"""Run ``alembic upgrade head`` for a batch of schemas in one subprocess.
If the batch fails, it is automatically retried with ``-x continue=true``
so that the remaining schemas in the batch still get migrated. The retry
output (which contains alembic's per-schema error messages) is returned
for diagnosis.
"""
csv = ",".join(schemas)
base_cmd = ["alembic", "-x", f"schemas={csv}"]
start = time.monotonic()
result = subprocess.run(
[*base_cmd, "upgrade", "head"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
if result.returncode == 0:
elapsed = time.monotonic() - start
return BatchResult(schemas, True, result.stdout or "", elapsed)
# At least one schema failed. Print the initial error output, then
# re-run with continue=true so the remaining schemas still get migrated.
if result.stdout:
print(f"Initial error output:\n{result.stdout}", file=sys.stderr, flush=True)
print(
f"Batch failed (exit {result.returncode}), retrying with 'continue=true'...",
file=sys.stderr,
flush=True,
)
retry = subprocess.run(
[*base_cmd, "-x", "continue=true", "upgrade", "head"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
elapsed = time.monotonic() - start
return BatchResult(schemas, False, retry.stdout or "", elapsed)
def get_head_revision() -> str | None:
"""Get the head revision from the alembic script directory."""
alembic_cfg = Config("alembic.ini")
script = ScriptDirectory.from_config(alembic_cfg)
return script.get_current_head()
def get_schemas_needing_migration(
tenant_schemas: List[str], head_rev: str
) -> List[str]:
"""Return only schemas whose current alembic version is not at head."""
if not tenant_schemas:
return []
engine = SqlEngine.get_engine()
with engine.connect() as conn:
# Find which schemas actually have an alembic_version table
rows = conn.execute(
text(
"SELECT table_schema FROM information_schema.tables "
"WHERE table_name = 'alembic_version' "
"AND table_schema = ANY(:schemas)"
),
{"schemas": tenant_schemas},
)
schemas_with_table = set(row[0] for row in rows)
# Schemas without the table definitely need migration
needs_migration = [s for s in tenant_schemas if s not in schemas_with_table]
if not schemas_with_table:
return needs_migration
# Validate schema names before interpolating into SQL
for schema in schemas_with_table:
if not is_valid_schema_name(schema):
raise ValueError(f"Invalid schema name: {schema}")
# Single query to get every schema's current revision at once.
# Use integer tags instead of interpolating schema names into
# string literals to avoid quoting issues.
schema_list = list(schemas_with_table)
union_parts = [
f'SELECT {i} AS idx, version_num FROM "{schema}".alembic_version'
for i, schema in enumerate(schema_list)
]
rows = conn.execute(text(" UNION ALL ".join(union_parts)))
version_by_schema = {schema_list[row[0]]: row[1] for row in rows}
needs_migration.extend(
s for s in schemas_with_table if version_by_schema.get(s) != head_rev
)
return needs_migration
def run_migrations_parallel(
schemas: list[str],
max_workers: int,
batch_size: int,
) -> bool:
"""Chunk *schemas* into batches and run them in parallel.
A background monitor thread prints a status line every 60 s listing
which batches are still in-flight, making it easy to spot hung tenants.
"""
batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]
total_batches = len(batches)
print(
f"{len(schemas)} schemas in {total_batches} batch(es) "
f"with {max_workers} workers (batch size: {batch_size})...",
flush=True,
)
all_success = True
# Thread-safe tracking of in-flight batches for the monitor thread.
in_flight: dict[int, list[str]] = {}
prev_in_flight: set[int] = set()
lock = threading.Lock()
stop_event = threading.Event()
def _monitor() -> None:
"""Print a status line every 60 s listing batches still in-flight.
Only prints batches that were also present in the previous tick,
making it easy to spot batches that are stuck.
"""
nonlocal prev_in_flight
while not stop_event.wait(60):
with lock:
if not in_flight:
prev_in_flight = set()
continue
current = set(in_flight)
stuck = current & prev_in_flight
prev_in_flight = current
if not stuck:
continue
schemas = [s for idx in sorted(stuck) for s in in_flight[idx]]
print(
f"⏳ batch(es) still running since last check "
f"({', '.join(str(i + 1) for i in sorted(stuck))}): "
+ ", ".join(schemas),
flush=True,
)
monitor_thread = threading.Thread(target=_monitor, daemon=True)
monitor_thread.start()
try:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
def _run(batch_idx: int, batch: list[str]) -> BatchResult:
with lock:
in_flight[batch_idx] = batch
print(
f"Batch {batch_idx + 1}/{total_batches} started "
f"({len(batch)} schemas): {', '.join(batch)}",
flush=True,
)
result = run_alembic_for_batch(batch)
with lock:
in_flight.pop(batch_idx, None)
return result
future_to_idx = {
executor.submit(_run, i, b): i for i, b in enumerate(batches)
}
for future in as_completed(future_to_idx):
batch_idx = future_to_idx[future]
try:
result = future.result()
status = "" if result.success else ""
print(
f"Batch {batch_idx + 1}/{total_batches} "
f"{status} {len(result.schemas)} schemas "
f"in {result.elapsed_sec:.1f}s",
flush=True,
)
if not result.success:
# Print last 20 lines of retry output for diagnosis
tail = result.output.strip().splitlines()[-20:]
for line in tail:
print(f" {line}", flush=True)
all_success = False
except Exception as e:
print(
f"Batch {batch_idx + 1}/{total_batches} " f"✗ exception: {e}",
flush=True,
)
all_success = False
finally:
stop_event.set()
monitor_thread.join(timeout=2)
return all_success
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def parse_args() -> Args:
parser = argparse.ArgumentParser(
description="Run alembic migrations for all tenant schemas in parallel"
)
parser.add_argument(
"-j",
"--jobs",
type=int,
default=6,
metavar="N",
help="Number of parallel alembic processes (default: 6)",
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
default=50,
metavar="N",
help="Schemas per alembic process (default: 50)",
)
args = parser.parse_args()
if args.jobs < 1:
parser.error("--jobs must be >= 1")
if args.batch_size < 1:
parser.error("--batch-size must be >= 1")
return Args(jobs=args.jobs, batch_size=args.batch_size)
def main() -> int:
args = parse_args()
head_rev = get_head_revision()
if head_rev is None:
print("Could not determine head revision.", file=sys.stderr)
return 1
with SqlEngine.scoped_engine(pool_size=5, max_overflow=2):
tenant_ids = get_all_tenant_ids()
tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
if not tenant_schemas:
print(
"No tenant schemas found. Is MULTI_TENANT=true set?",
file=sys.stderr,
)
return 1
schemas_to_migrate = get_schemas_needing_migration(tenant_schemas, head_rev)
if not schemas_to_migrate:
print(
f"All {len(tenant_schemas)} tenants are already at head "
f"revision ({head_rev})."
)
return 0
print(
f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need "
f"migration (head: {head_rev})."
)
success = run_migrations_parallel(
schemas_to_migrate,
max_workers=args.jobs,
batch_size=args.batch_size,
)
print(f"\n{'All migrations successful' if success else 'Some migrations failed'}")
return 0 if success else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,58 @@
"""LLMProvider deprecated fields are nullable
Revision ID: 001984c88745
Revises: 01f8e6d95a33
Create Date: 2026-02-01 22:24:34.171100
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "001984c88745"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make default_model_name nullable (was NOT NULL)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=True,
)
# Remove server_default from is_default_vision_provider (was server_default=false())
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=None,
)
# is_default_provider and default_vision_model are already nullable with no server_default
def downgrade() -> None:
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
op.execute(
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=False,
)
# Restore server_default for is_default_vision_provider
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=sa.false(),
)

View File

@@ -1,7 +1,7 @@
"""Populate flow mapping data
Revision ID: 01f8e6d95a33
Revises: d5c86e2c6dc6
Revises: f220515df7b4
Create Date: 2026-01-31 17:37:10.485558
"""
@@ -11,7 +11,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision = "01f8e6d95a33"
down_revision = "d5c86e2c6dc6"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None
@@ -23,7 +23,7 @@ def upgrade() -> None:
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'CHAT' AS llm_model_flow_type,
'chat' AS llm_model_flow_type,
COALESCE(
(lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
FALSE
@@ -44,7 +44,7 @@ def upgrade() -> None:
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'VISION' AS llm_model_flow_type,
'vision' AS llm_model_flow_type,
COALESCE(
(lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
FALSE
@@ -68,7 +68,7 @@ def downgrade() -> None:
default_vision_model = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'VISION'
WHERE mf.llm_model_flow_type = 'vision'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
@@ -83,7 +83,7 @@ def downgrade() -> None:
default_model_name = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'CHAT'
WHERE mf.llm_model_flow_type = 'chat'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
@@ -100,7 +100,7 @@ def downgrade() -> None:
FROM model_configuration mc
JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
WHERE mc.llm_provider_id = lp.id
AND mf.llm_model_flow_type = 'CHAT'
AND mf.llm_model_flow_type = 'chat'
ORDER BY mc.is_visible DESC, mc.id ASC
LIMIT 1
)

View File

@@ -1,33 +0,0 @@
"""add default_app_mode to user
Revision ID: 114a638452db
Revises: feead2911109
Create Date: 2026-02-09 18:57:08.274640
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "114a638452db"
down_revision = "feead2911109"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"default_app_mode",
sa.String(),
nullable=False,
server_default="CHAT",
),
)
def downgrade() -> None:
op.drop_column("user", "default_app_mode")

View File

@@ -11,6 +11,7 @@ import sqlalchemy as sa
from urllib.parse import urlparse, urlunparse
from httpx import HTTPStatusError
import httpx
from onyx.document_index.factory import get_default_document_index
from onyx.db.search_settings import SearchSettings
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.shared_utils.utils import (
@@ -518,11 +519,15 @@ def delete_document_from_db(current_doc_id: str, index_name: str) -> None:
def upgrade() -> None:
if SKIP_CANON_DRIVE_IDS:
return
current_search_settings, _ = active_search_settings()
current_search_settings, future_search_settings = active_search_settings()
document_index = get_default_document_index(
current_search_settings,
future_search_settings,
)
# Get the index name
if hasattr(current_search_settings, "index_name"):
index_name = current_search_settings.index_name
if hasattr(document_index, "index_name"):
index_name = document_index.index_name
else:
# Default index name if we can't get it from the document_index
index_name = "danswer_index"

View File

@@ -1,27 +0,0 @@
"""add_user_preferences
Revision ID: 175ea04c7087
Revises: d56ffa94ca32
Create Date: 2026-02-04 18:16:24.830873
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "175ea04c7087"
down_revision = "d56ffa94ca32"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column("user_preferences", sa.Text(), nullable=True),
)
def downgrade() -> None:
op.drop_column("user", "user_preferences")

View File

@@ -1,71 +0,0 @@
"""Migrate to contextual rag model
Revision ID: 19c0ccb01687
Revises: 9c54986124c6
Create Date: 2026-02-12 11:21:41.798037
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "19c0ccb01687"
down_revision = "9c54986124c6"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Widen the column to fit 'CONTEXTUAL_RAG' (15 chars); was varchar(10)
# when the table was created with only CHAT/VISION values.
op.alter_column(
"llm_model_flow",
"llm_model_flow_type",
type_=sa.String(length=20),
existing_type=sa.String(length=10),
existing_nullable=False,
)
# For every search_settings row that has contextual rag configured,
# create an llm_model_flow entry. is_default is TRUE if the row
# belongs to the PRESENT search settings, FALSE otherwise.
op.execute(
"""
INSERT INTO llm_model_flow (llm_model_flow_type, model_configuration_id, is_default)
SELECT DISTINCT
'CONTEXTUAL_RAG',
mc.id,
(ss.status = 'PRESENT')
FROM search_settings ss
JOIN llm_provider lp
ON lp.name = ss.contextual_rag_llm_provider
JOIN model_configuration mc
ON mc.llm_provider_id = lp.id
AND mc.name = ss.contextual_rag_llm_name
WHERE ss.enable_contextual_rag = TRUE
AND ss.contextual_rag_llm_name IS NOT NULL
AND ss.contextual_rag_llm_provider IS NOT NULL
ON CONFLICT (llm_model_flow_type, model_configuration_id)
DO UPDATE SET is_default = EXCLUDED.is_default
WHERE EXCLUDED.is_default = TRUE
"""
)
def downgrade() -> None:
op.execute(
"""
DELETE FROM llm_model_flow
WHERE llm_model_flow_type = 'CONTEXTUAL_RAG'
"""
)
op.alter_column(
"llm_model_flow",
"llm_model_flow_type",
type_=sa.String(length=10),
existing_type=sa.String(length=20),
existing_nullable=False,
)

View File

@@ -1,36 +0,0 @@
"""add_chat_compression_fields
Revision ID: 90b409d06e50
Revises: f220515df7b4
Create Date: 2026-01-26 09:13:09.635427
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "90b409d06e50"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add last_summarized_message_id to chat_message
# This field marks a message as a summary and indicates the last message it covers.
# Summaries are branch-aware via their parent_message_id pointing to the branch.
op.add_column(
"chat_message",
sa.Column(
"last_summarized_message_id",
sa.Integer(),
sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("chat_message", "last_summarized_message_id")

View File

@@ -16,6 +16,7 @@ from typing import Generator
from alembic import op
import sqlalchemy as sa
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.db.search_settings import SearchSettings
from onyx.configs.app_configs import AUTH_TYPE
@@ -125,11 +126,14 @@ def remove_old_tags() -> None:
the document got reindexed, the old tag would not be removed.
This function removes those old tags by comparing it against the tags in vespa.
"""
current_search_settings, _ = active_search_settings()
current_search_settings, future_search_settings = active_search_settings()
document_index = get_default_document_index(
current_search_settings, future_search_settings
)
# Get the index name
if hasattr(current_search_settings, "index_name"):
index_name = current_search_settings.index_name
if hasattr(document_index, "index_name"):
index_name = document_index.index_name
else:
# Default index name if we can't get it from the document_index
index_name = "danswer_index"

View File

@@ -1,43 +0,0 @@
"""add chunk error and vespa count columns to opensearch tenant migration
Revision ID: 93c15d6a6fbb
Revises: d3fd499c829c
Create Date: 2026-02-11 23:07:34.576725
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "93c15d6a6fbb"
down_revision = "d3fd499c829c"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"total_chunks_errored",
sa.Integer(),
nullable=False,
server_default="0",
),
)
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"total_chunks_in_vespa",
sa.Integer(),
nullable=False,
server_default="0",
),
)
def downgrade() -> None:
op.drop_column("opensearch_tenant_migration_record", "total_chunks_in_vespa")
op.drop_column("opensearch_tenant_migration_record", "total_chunks_errored")

View File

@@ -1,124 +0,0 @@
"""add_scim_tables
Revision ID: 9c54986124c6
Revises: b51c6844d1df
Create Date: 2026-02-12 20:29:47.448614
"""
from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9c54986124c6"
down_revision = "b51c6844d1df"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"scim_token",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column("hashed_token", sa.String(length=64), nullable=False),
sa.Column("token_display", sa.String(), nullable=False),
sa.Column(
"created_by_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=False,
),
sa.Column(
"is_active",
sa.Boolean(),
server_default=sa.text("true"),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_used_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(["created_by_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("hashed_token"),
)
op.create_table(
"scim_group_mapping",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("external_id", sa.String(), nullable=False),
sa.Column("user_group_id", sa.Integer(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
nullable=False,
),
sa.ForeignKeyConstraint(
["user_group_id"], ["user_group.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_group_id"),
)
op.create_index(
op.f("ix_scim_group_mapping_external_id"),
"scim_group_mapping",
["external_id"],
unique=True,
)
op.create_table(
"scim_user_mapping",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("external_id", sa.String(), nullable=False),
sa.Column(
"user_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
nullable=False,
),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id"),
)
op.create_index(
op.f("ix_scim_user_mapping_external_id"),
"scim_user_mapping",
["external_id"],
unique=True,
)
def downgrade() -> None:
op.drop_index(
op.f("ix_scim_user_mapping_external_id"),
table_name="scim_user_mapping",
)
op.drop_table("scim_user_mapping")
op.drop_index(
op.f("ix_scim_group_mapping_external_id"),
table_name="scim_group_mapping",
)
op.drop_table("scim_group_mapping")
op.drop_table("scim_token")

View File

@@ -1,81 +0,0 @@
"""seed_memory_tool and add enable_memory_tool to user
Revision ID: b51c6844d1df
Revises: 93c15d6a6fbb
Create Date: 2026-02-11 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "b51c6844d1df"
down_revision = "93c15d6a6fbb"
branch_labels = None
depends_on = None
MEMORY_TOOL = {
"name": "MemoryTool",
"display_name": "Add Memory",
"description": "Save memories about the user for future conversations.",
"in_code_tool_id": "MemoryTool",
"enabled": True,
}
def upgrade() -> None:
conn = op.get_bind()
existing = conn.execute(
sa.text(
"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id = :in_code_tool_id"
),
{"in_code_tool_id": MEMORY_TOOL["in_code_tool_id"]},
).fetchone()
if existing:
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
MEMORY_TOOL,
)
else:
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
"""
),
MEMORY_TOOL,
)
op.add_column(
"user",
sa.Column(
"enable_memory_tool",
sa.Boolean(),
nullable=False,
server_default=sa.true(),
),
)
def downgrade() -> None:
op.drop_column("user", "enable_memory_tool")
conn = op.get_bind()
conn.execute(
sa.text("DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
{"in_code_tool_id": MEMORY_TOOL["in_code_tool_id"]},
)

View File

@@ -1,102 +0,0 @@
"""add_file_reader_tool
Revision ID: d3fd499c829c
Revises: 114a638452db
Create Date: 2026-02-07 19:28:22.452337
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d3fd499c829c"
down_revision = "114a638452db"
branch_labels = None
depends_on = None
FILE_READER_TOOL = {
"name": "read_file",
"display_name": "File Reader",
"description": (
"Read sections of user-uploaded files by character offset. "
"Useful for inspecting large files that cannot fit entirely in context."
),
"in_code_tool_id": "FileReaderTool",
"enabled": True,
}
def upgrade() -> None:
conn = op.get_bind()
# Check if tool already exists
existing = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
{"in_code_tool_id": FILE_READER_TOOL["in_code_tool_id"]},
).fetchone()
if existing:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
FILE_READER_TOOL,
)
tool_id = existing[0]
else:
# Insert new tool
result = conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
RETURNING id
"""
),
FILE_READER_TOOL,
)
tool_id = result.scalar_one()
# Attach to the default persona (id=0) if not already attached
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": tool_id},
)
def downgrade() -> None:
conn = op.get_bind()
in_code_tool_id = FILE_READER_TOOL["in_code_tool_id"]
# Remove persona associations first (FK constraint)
conn.execute(
sa.text(
"""
DELETE FROM persona__tool
WHERE tool_id IN (
SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id
)
"""
),
{"in_code_tool_id": in_code_tool_id},
)
conn.execute(
sa.text("DELETE FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
{"in_code_tool_id": in_code_tool_id},
)

View File

@@ -1,35 +0,0 @@
"""add_file_content
Revision ID: d56ffa94ca32
Revises: 01f8e6d95a33
Create Date: 2026-02-06 15:29:34.192960
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d56ffa94ca32"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"file_content",
sa.Column(
"file_id",
sa.String(),
sa.ForeignKey("file_record.file_id", ondelete="CASCADE"),
primary_key=True,
),
sa.Column("lobj_oid", sa.BigInteger(), nullable=False),
sa.Column("file_size", sa.BigInteger(), nullable=False, server_default="0"),
)
def downgrade() -> None:
op.drop_table("file_content")

View File

@@ -1,35 +0,0 @@
"""add_cascade_delete_to_search_query_user_id
Revision ID: d5c86e2c6dc6
Revises: 90b409d06e50
Create Date: 2026-02-04 16:05:04.749804
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "d5c86e2c6dc6"
down_revision = "90b409d06e50"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
op.create_foreign_key(
"search_query_user_id_fkey",
"search_query",
"user",
["user_id"],
["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
op.create_foreign_key(
"search_query_user_id_fkey", "search_query", "user", ["user_id"], ["id"]
)

View File

@@ -1,69 +0,0 @@
"""add_opensearch_tenant_migration_columns
Revision ID: feead2911109
Revises: d56ffa94ca32
Create Date: 2026-02-10 17:46:34.029937
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "feead2911109"
down_revision = "175ea04c7087"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"opensearch_tenant_migration_record",
sa.Column("vespa_visit_continuation_token", sa.Text(), nullable=True),
)
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"total_chunks_migrated",
sa.Integer(),
nullable=False,
server_default="0",
),
)
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"migration_completed_at",
sa.DateTime(timezone=True),
nullable=True,
),
)
op.add_column(
"opensearch_tenant_migration_record",
sa.Column(
"enable_opensearch_retrieval",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
def downgrade() -> None:
op.drop_column("opensearch_tenant_migration_record", "enable_opensearch_retrieval")
op.drop_column("opensearch_tenant_migration_record", "migration_completed_at")
op.drop_column("opensearch_tenant_migration_record", "created_at")
op.drop_column("opensearch_tenant_migration_record", "total_chunks_migrated")
op.drop_column(
"opensearch_tenant_migration_record", "vespa_visit_continuation_token"
)

View File

@@ -39,7 +39,7 @@ EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
def include_object(
object: SchemaItem, # noqa: ARG001
object: SchemaItem,
name: str | None,
type_: Literal[
"schema",
@@ -49,8 +49,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
reflected: bool,
compare_to: SchemaItem | None,
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False

View File

@@ -1,20 +1,20 @@
The Onyx Enterprise License (the "Enterprise License")
The DanswerAI Enterprise license (the Enterprise License)
Copyright (c) 2023-present DanswerAI, Inc.
With regard to the Onyx Software:
This software and associated documentation files (the "Software") may only be
used in production, if you (and any entity that you represent) have agreed to,
and are in compliance with, the Onyx Subscription Terms of Service, available
at https://www.onyx.app/legal/self-host (the "Enterprise Terms"), or other
and are in compliance with, the DanswerAI Subscription Terms of Service, available
at https://onyx.app/terms (the Enterprise Terms), or other
agreement governing the use of the Software, as agreed by you and DanswerAI,
and otherwise have a valid Onyx Enterprise License for the
and otherwise have a valid Onyx Enterprise license for the
correct number of user seats. Subject to the foregoing sentence, you are free to
modify this Software and publish patches to the Software. You agree that DanswerAI
and/or its licensors (as applicable) retain all right, title and interest in and
to all such modifications and/or patches, and all such modifications and/or
patches may only be used, copied, modified, displayed, distributed, or otherwise
exploited with a valid Onyx Enterprise License for the correct
exploited with a valid Onyx Enterprise license for the correct
number of user seats. Notwithstanding the foregoing, you may copy and modify
the Software for development and testing purposes, without requiring a
subscription. You agree that DanswerAI and/or its licensors (as applicable) retain

View File

@@ -1,15 +1,12 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.background import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.tenant_provisioning",
"ee.onyx.background.celery.tasks.query_history",
]
)
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.tenant_provisioning",
"ee.onyx.background.celery.tasks.query_history",
]
)

View File

@@ -1,14 +1,11 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.heavy import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.query_history",
]
)
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.query_history",
]
)

View File

@@ -1,11 +1,8 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.light import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
]
)
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
]
)

View File

@@ -1,10 +1,7 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.monitoring import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.tenant_provisioning",
]
)
[
"ee.onyx.background.celery.tasks.tenant_provisioning",
]
)

View File

@@ -1,15 +1,12 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.primary import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cloud",
"ee.onyx.background.celery.tasks.ttl_management",
"ee.onyx.background.celery.tasks.usage_reporting",
]
)
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cloud",
"ee.onyx.background.celery.tasks.ttl_management",
"ee.onyx.background.celery.tasks.usage_reporting",
]
)

View File

@@ -536,9 +536,7 @@ def connector_permission_sync_generator_task(
)
redis_connector.permissions.set_fence(new_payload)
callback = PermissionSyncCallback(
redis_connector, lock, r, timeout_seconds=JOB_TIMEOUT
)
callback = PermissionSyncCallback(redis_connector, lock, r)
# pass in the capability to fetch all existing docs for the cc_pair
# this is can be used to determine documents that are "missing" and thus
@@ -578,13 +576,6 @@ def connector_permission_sync_generator_task(
tasks_generated = 0
docs_with_errors = 0
for doc_external_access in document_external_accesses:
if callback.should_stop():
raise RuntimeError(
f"Permission sync task timed out or stop signal detected: "
f"cc_pair={cc_pair_id} "
f"tasks_generated={tasks_generated}"
)
result = redis_connector.permissions.update_db(
lock=lock,
new_permissions=[doc_external_access],
@@ -941,7 +932,6 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
redis_connector: RedisConnector,
redis_lock: RedisLock,
redis_client: Redis,
timeout_seconds: int | None = None,
):
super().__init__()
self.redis_connector: RedisConnector = redis_connector
@@ -954,29 +944,14 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
self.last_tag: str = "PermissionSyncCallback.__init__"
self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
self.last_lock_monotonic = time.monotonic()
self.start_monotonic = time.monotonic()
self.timeout_seconds = timeout_seconds
def should_stop(self) -> bool:
if self.redis_connector.stop.fenced:
return True
# Check if the task has exceeded its timeout
# NOTE: Celery's soft_time_limit does not work with thread pools,
# so we must enforce timeouts internally.
if self.timeout_seconds is not None:
elapsed = time.monotonic() - self.start_monotonic
if elapsed > self.timeout_seconds:
logger.warning(
f"PermissionSyncCallback - task timeout exceeded: "
f"elapsed={elapsed:.0f}s timeout={self.timeout_seconds}s "
f"cc_pair={self.redis_connector.cc_pair_id}"
)
return True
return False
def progress(self, tag: str, amount: int) -> None: # noqa: ARG002
def progress(self, tag: str, amount: int) -> None:
try:
self.redis_connector.permissions.set_active()
@@ -1007,7 +982,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
def monitor_ccpair_permissions_taskset(
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -259,7 +259,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
def try_creating_external_group_sync_task(
app: Celery,
cc_pair_id: int,
r: Redis, # noqa: ARG001
r: Redis,
tenant_id: str,
) -> str | None:
"""Returns an int if syncing is needed. The int represents the number of sync tasks generated.
@@ -344,7 +344,7 @@ def try_creating_external_group_sync_task(
bind=True,
)
def connector_external_group_sync_generator_task(
self: Task, # noqa: ARG001
self: Task,
cc_pair_id: int,
tenant_id: str,
) -> None:
@@ -466,7 +466,6 @@ def connector_external_group_sync_generator_task(
def _perform_external_group_sync(
cc_pair_id: int,
tenant_id: str,
timeout_seconds: int = JOB_TIMEOUT,
) -> None:
# Create attempt record at the start
with get_session_with_current_tenant() as db_session:
@@ -519,23 +518,9 @@ def _perform_external_group_sync(
seen_users: set[str] = set() # Track unique users across all groups
total_groups_processed = 0
total_group_memberships_synced = 0
start_time = time.monotonic()
try:
external_user_group_generator = ext_group_sync_func(tenant_id, cc_pair)
for external_user_group in external_user_group_generator:
# Check if the task has exceeded its timeout
# NOTE: Celery's soft_time_limit does not work with thread pools,
# so we must enforce timeouts internally.
elapsed = time.monotonic() - start_time
if elapsed > timeout_seconds:
raise RuntimeError(
f"External group sync task timed out: "
f"cc_pair={cc_pair_id} "
f"elapsed={elapsed:.0f}s "
f"timeout={timeout_seconds}s "
f"groups_processed={total_groups_processed}"
)
external_user_group_batch.append(external_user_group)
# Track progress
@@ -605,8 +590,8 @@ def _perform_external_group_sync(
def validate_external_group_sync_fences(
tenant_id: str,
celery_app: Celery, # noqa: ARG001
r: Redis, # noqa: ARG001
celery_app: Celery,
r: Redis,
r_replica: Redis,
r_celery: Redis,
lock_beat: RedisLock,

View File

@@ -40,7 +40,7 @@ def export_query_history_task(
end: datetime,
start_time: datetime,
# Need to include the tenant_id since the TenantAwareTask needs this
tenant_id: str, # noqa: ARG001
tenant_id: str,
) -> None:
if not self.request.id:
raise RuntimeError("No task id defined for this task; cannot identify it")

View File

@@ -43,7 +43,7 @@ _TENANT_PROVISIONING_TIME_LIMIT = 60 * 10 # 10 minutes
trail=False,
bind=True,
)
def check_available_tenants(self: Task) -> None: # noqa: ARG001
def check_available_tenants(self: Task) -> None:
"""
Check if we have enough pre-provisioned tenants available.
If not, trigger the pre-provisioning of new tenants.

View File

@@ -21,9 +21,9 @@ logger = setup_logger()
trail=False,
)
def generate_usage_report_task(
self: Task, # noqa: ARG001
self: Task,
*,
tenant_id: str, # noqa: ARG001
tenant_id: str,
user_id: str | None = None,
period_from: str | None = None,
period_to: str | None = None,

View File

@@ -7,7 +7,7 @@ QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK
def name_chat_ttl_task(
retention_limit_days: float, tenant_id: str | None = None # noqa: ARG001
retention_limit_days: float, tenant_id: str | None = None
) -> str:
return f"chat_ttl_{retention_limit_days}_days"

View File

@@ -134,7 +134,7 @@ GATED_TENANTS_KEY = "gated_tenants"
# License enforcement - when True, blocks API access for gated/expired licenses
LICENSE_ENFORCEMENT_ENABLED = (
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "true").lower() == "true"
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
)
# Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints

View File

@@ -54,7 +54,7 @@ def delete_document_set_privacy__no_commit(
def fetch_document_sets(
user_id: UUID | None,
db_session: Session,
include_outdated: bool = True, # Parameter only for versioned implementation, unused # noqa: ARG001
include_outdated: bool = True, # Parameter only for versioned implementation, unused
) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:
assert user_id is not None

View File

@@ -5,10 +5,8 @@ It filters hierarchy nodes based on user email and external group membership.
"""
from sqlalchemy import any_
from sqlalchemy import cast
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement
@@ -34,7 +32,7 @@ def _build_hierarchy_access_filter(
if external_group_ids:
access_filters.append(
HierarchyNode.external_user_group_ids.overlap(
cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))
postgresql.array(external_group_ids)
)
)
return or_(*access_filters)

View File

@@ -11,7 +11,6 @@ from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
@@ -108,8 +107,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
Get current seat usage directly from database.
For multi-tenant: counts users in UserTenantMapping for this tenant.
For self-hosted: counts all active users (excludes EXT_PERM_USER role
and the anonymous system user).
For self-hosted: counts all active users (excludes EXT_PERM_USER role).
TODO: Exclude API key dummy users from seat counting. API keys create
users with emails like `__DANSWER_API_KEY_*` that should not count toward
@@ -129,7 +127,6 @@ def get_used_seats(tenant_id: str | None = None) -> int:
.where(
User.is_active == True, # type: ignore # noqa: E712
User.role != UserRole.EXT_PERM_USER,
User.email != ANONYMOUS_USER_EMAIL, # type: ignore
)
)
return result.scalar() or 0

View File

@@ -643,7 +643,7 @@ def add_users_to_user_group(
def update_user_group(
db_session: Session,
user: User, # noqa: ARG001
user: User,
user_group_id: int,
user_group_update: UserGroupUpdate,
) -> UserGroup:

View File

@@ -25,7 +25,7 @@ CONFLUENCE_DOC_SYNC_LABEL = "confluence_doc_sync"
def confluence_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -1,8 +1,6 @@
from typing import Any
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
@@ -74,7 +72,6 @@ def get_page_restrictions(
page_id: str,
page_restrictions: dict[str, Any],
ancestors: list[dict[str, Any]],
add_prefix: bool = False,
) -> ExternalAccess | None:
"""
This function gets the restrictions for a page. In Confluence, a child can have
@@ -82,9 +79,6 @@ def get_page_restrictions(
If no restrictions are found anywhere, then return None, indicating that the page
should inherit the space's restrictions.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
found_user_emails: set[str] = set()
found_group_names: set[str] = set()
@@ -98,22 +92,13 @@ def get_page_restrictions(
restrictions=page_restrictions,
)
)
def _maybe_prefix_groups(group_names: set[str]) -> set[str]:
if add_prefix:
return {
build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
for g in group_names
}
return group_names
# if there are individual page-level restrictions, then this is the accurate
# restriction for the page. You cannot both have page-level restrictions AND
# inherit restrictions from the parent.
if found_any_page_level_restriction:
return ExternalAccess(
external_user_emails=found_user_emails,
external_user_group_ids=_maybe_prefix_groups(found_group_names),
external_user_group_ids=found_group_names,
is_public=False,
)
@@ -140,7 +125,7 @@ def get_page_restrictions(
)
return ExternalAccess(
external_user_emails=ancestor_user_emails,
external_user_group_ids=_maybe_prefix_groups(ancestor_group_names),
external_user_group_ids=ancestor_group_names,
is_public=False,
)

View File

@@ -3,8 +3,6 @@ from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GR
from ee.onyx.external_permissions.confluence.constants import REQUEST_PAGINATION_LIMIT
from ee.onyx.external_permissions.confluence.constants import VIEWSPACE_PERMISSION_TYPE
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
@@ -114,7 +112,6 @@ def get_space_permission(
confluence_client: OnyxConfluence,
space_key: str,
is_cloud: bool,
add_prefix: bool = False,
) -> ExternalAccess:
if is_cloud:
space_permissions = _get_cloud_space_permissions(confluence_client, space_key)
@@ -133,32 +130,13 @@ def get_space_permission(
f"permissions for space '{space_key}'"
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix and space_permissions.external_user_group_ids:
prefixed_groups = {
build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
for g in space_permissions.external_user_group_ids
}
return ExternalAccess(
external_user_emails=space_permissions.external_user_emails,
external_user_group_ids=prefixed_groups,
is_public=space_permissions.is_public,
)
return space_permissions
def get_all_space_permissions(
confluence_client: OnyxConfluence,
is_cloud: bool,
add_prefix: bool = False,
) -> dict[str, ExternalAccess]:
"""
Get access permissions for all spaces in Confluence.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
logger.debug("Getting space permissions")
# Gets all the spaces in the Confluence instance
all_space_keys = [
@@ -173,9 +151,7 @@ def get_all_space_permissions(
logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
space_permissions_by_space_key: dict[str, ExternalAccess] = {}
for space_key in all_space_keys:
space_permissions = get_space_permission(
confluence_client, space_key, is_cloud, add_prefix
)
space_permissions = get_space_permission(confluence_client, space_key, is_cloud)
# Stores the permissions for each space
space_permissions_by_space_key[space_key] = space_permissions

View File

@@ -34,7 +34,7 @@ GITHUB_DOC_SYNC_LABEL = "github_doc_sync"
def github_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
"""
@@ -50,12 +50,7 @@ def github_doc_sync(
**cc_pair.connector.connector_specific_config
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
github_connector.load_credentials(credential_json)
github_connector.load_credentials(cc_pair.credential.credential_json)
logger.info("GitHub connector credentials loaded successfully")
if not github_connector.github_client:
@@ -65,7 +60,21 @@ def github_doc_sync(
# Get all repositories from GitHub API
logger.info("Fetching all repositories from GitHub API")
try:
repos = github_connector.fetch_configured_repos()
repos = []
if github_connector.repositories:
if "," in github_connector.repositories:
# Multiple repositories specified
repos = github_connector.get_github_repos(
github_connector.github_client
)
else:
# Single repository
repos = [
github_connector.get_github_repo(github_connector.github_client)
]
else:
# All repositories
repos = github_connector.get_all_repos(github_connector.github_client)
logger.info(f"Found {len(repos)} repositories to check")
except Exception as e:

View File

@@ -12,18 +12,13 @@ logger = setup_logger()
def github_group_sync(
tenant_id: str, # noqa: ARG001
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
github_connector: GithubConnector = GithubConnector(
**cc_pair.connector.connector_specific_config
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
github_connector.load_credentials(credential_json)
github_connector.load_credentials(cc_pair.credential.credential_json)
if not github_connector.github_client:
raise ValueError("github_client is required")

View File

@@ -91,7 +91,7 @@ class TeamInfo(BaseModel):
def _fetch_organization_members(
github_client: Github, org_name: str, retry_count: int = 0 # noqa: ARG001
github_client: Github, org_name: str, retry_count: int = 0
) -> List[UserInfo]:
"""Fetch all organization members including owners and regular members."""
org_members: List[UserInfo] = []
@@ -124,7 +124,7 @@ def _fetch_organization_members(
def _fetch_repository_teams_detailed(
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
repo: Repository, github_client: Github, retry_count: int = 0
) -> List[TeamInfo]:
"""Fetch teams with access to the repository and their members."""
teams_data: List[TeamInfo] = []
@@ -167,7 +167,7 @@ def _fetch_repository_teams_detailed(
def fetch_repository_team_slugs(
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
repo: Repository, github_client: Github, retry_count: int = 0
) -> List[str]:
"""Fetch team slugs with access to the repository."""
logger.info(f"Fetching team slugs for repository {repo.full_name}")

View File

@@ -39,8 +39,8 @@ def _get_slim_doc_generator(
def gmail_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""
@@ -50,12 +50,7 @@ def gmail_doc_sync(
already populated.
"""
gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
gmail_connector.load_credentials(credential_json)
gmail_connector.load_credentials(cc_pair.credential.credential_json)
slim_doc_generator = _get_slim_doc_generator(
cc_pair, gmail_connector, callback=callback

View File

@@ -13,7 +13,6 @@ from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import GoogleDriveFileType
@@ -68,17 +67,11 @@ def get_external_access_for_raw_gdrive_file(
company_domain: str,
retriever_drive_service: GoogleDriveService | None,
admin_drive_service: GoogleDriveService,
add_prefix: bool = False,
) -> ExternalAccess:
"""
Get the external access for a raw Google Drive file.
Assumes the file we retrieved has EITHER `permissions` or `permission_ids`
add_prefix: When this method is called during the initial indexing via the connector,
set add_prefix to True so group IDs are prefixed with the source type.
When invoked from doc_sync (permission sync), use the default (False)
since upsert_document_external_perms handles prefixing.
"""
doc_id = file.get("id")
if not doc_id:
@@ -171,13 +164,6 @@ def get_external_access_for_raw_gdrive_file(
| ({drive_id} if drive_id is not None else set())
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix:
group_ids = {
build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
for group_id in group_ids
}
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_ids,
@@ -189,7 +175,6 @@ def get_external_access_for_folder(
folder: GoogleDriveFileType,
google_domain: str,
drive_service: GoogleDriveService,
add_prefix: bool = False,
) -> ExternalAccess:
"""
Extract ExternalAccess from a folder's permissions.
@@ -201,8 +186,6 @@ def get_external_access_for_folder(
folder: The folder metadata from Google Drive API (must include permissionIds field)
google_domain: The company's Google Workspace domain (e.g., "company.com")
drive_service: Google Drive service for fetching permission details
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
Returns:
ExternalAccess with extracted permission info
@@ -265,25 +248,17 @@ def get_external_access_for_folder(
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
# Prefix group IDs with source type if requested (for indexing path)
group_ids: set[str] = group_emails
if add_prefix:
group_ids = {
build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
for group_id in group_emails
}
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_ids,
external_user_group_ids=group_emails,
is_public=is_public,
)
def gdrive_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""
@@ -295,12 +270,7 @@ def gdrive_doc_sync(
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
google_drive_connector.load_credentials(credential_json)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)

View File

@@ -384,19 +384,14 @@ def _build_onyx_groups(
def gdrive_group_sync(
tenant_id: str, # noqa: ARG001
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
# Initialize connector and build credential/service objects
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
google_drive_connector.load_credentials(credential_json)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
admin_service = get_admin_service(
google_drive_connector.creds, google_drive_connector.primary_admin_email
)

View File

@@ -17,19 +17,14 @@ JIRA_DOC_SYNC_TAG = "jira_doc_sync"
def jira_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
jira_connector = JiraConnector(
**cc_pair.connector.connector_specific_config,
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
jira_connector.load_credentials(credential_json)
jira_connector.load_credentials(cc_pair.credential.credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -102,7 +102,7 @@ def _build_group_member_email_map(
def jira_group_sync(
tenant_id: str, # noqa: ARG001
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""
@@ -119,13 +119,8 @@ def jira_group_sync(
if not jira_base_url:
raise ValueError("No jira_base_url found in connector config")
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
jira_client = build_jira_client(
credentials=credential_json,
credentials=cc_pair.credential.credential_json,
jira_base=jira_base_url,
scoped_token=scoped_token,
)

View File

@@ -8,8 +8,6 @@ from ee.onyx.external_permissions.jira.models import Holder
from ee.onyx.external_permissions.jira.models import Permission
from ee.onyx.external_permissions.jira.models import User
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.utils.logger import setup_logger
HolderMap = dict[str, list[Holder]]
@@ -254,14 +252,7 @@ def _build_external_access_from_holder_map(
def get_project_permissions(
jira_client: JIRA,
jira_project: str,
add_prefix: bool = False,
) -> ExternalAccess | None:
"""
Get project permissions from Jira.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
project_permissions: PermissionScheme = jira_client.project_permissionscheme(
project=jira_project
)
@@ -276,20 +267,6 @@ def get_project_permissions(
holder_map = _build_holder_map(permissions=project_permissions.permissions)
external_access = _build_external_access_from_holder_map(
return _build_external_access_from_holder_map(
jira_client=jira_client, jira_project=jira_project, holder_map=holder_map
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix and external_access and external_access.external_user_group_ids:
prefixed_groups = {
build_ext_group_name_for_onyx(g, DocumentSource.JIRA)
for g in external_access.external_user_group_ids
}
return ExternalAccess(
external_user_emails=external_access.external_user_emails,
external_user_group_ids=prefixed_groups,
is_public=external_access.is_public,
)
return external_access

View File

@@ -23,7 +23,7 @@ ContentRange = tuple[int, int | None] # (start_index, end_index) None means to
# NOTE: Used for testing timing
def _get_dummy_object_access_map(
object_ids: set[str], user_email: str, chunks: list[InferenceChunk] # noqa: ARG001
object_ids: set[str], user_email: str, chunks: list[InferenceChunk]
) -> dict[str, bool]:
time.sleep(0.15)
# return {object_id: True for object_id in object_ids}

View File

@@ -30,11 +30,7 @@ def get_any_salesforce_client_for_doc_id(
if _ANY_SALESFORCE_CLIENT is None:
cc_pairs = get_cc_pairs_for_document(db_session, doc_id)
first_cc_pair = cc_pairs[0]
credential_json = (
first_cc_pair.credential.credential_json.get_value(apply_mask=False)
if first_cc_pair.credential.credential_json
else {}
)
credential_json = first_cc_pair.credential.credential_json
_ANY_SALESFORCE_CLIENT = Salesforce(
username=credential_json["sf_username"],
password=credential_json["sf_password"],
@@ -162,11 +158,7 @@ def _get_salesforce_client_for_doc_id(db_session: Session, doc_id: str) -> Sales
)
if cc_pair is None:
raise ValueError(f"CC pair {cc_pair_id} not found")
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
credential_json = cc_pair.credential.credential_json
_CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id] = Salesforce(
username=credential_json["sf_username"],
password=credential_json["sf_password"],

View File

@@ -17,19 +17,14 @@ SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"
def sharepoint_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
sharepoint_connector = SharepointConnector(
**cc_pair.connector.connector_specific_config,
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
sharepoint_connector.load_credentials(credential_json)
sharepoint_connector.load_credentials(cc_pair.credential.credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -1,9 +1,12 @@
from collections.abc import Generator
from office365.sharepoint.client_context import ClientContext # type: ignore[import-untyped]
from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.sharepoint.permission_utils import (
get_sharepoint_external_groups,
)
from onyx.connectors.sharepoint.connector import acquire_token_for_rest
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger
@@ -12,7 +15,7 @@ logger = setup_logger()
def sharepoint_group_sync(
tenant_id: str, # noqa: ARG001
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""Sync SharePoint groups and their members"""
@@ -22,12 +25,7 @@ def sharepoint_group_sync(
# Create SharePoint connector instance and load credentials
connector = SharepointConnector(**connector_config)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
connector.load_credentials(credential_json)
connector.load_credentials(cc_pair.credential.credential_json)
if not connector.msal_app:
raise RuntimeError("MSAL app not initialized in connector")
@@ -43,11 +41,16 @@ def sharepoint_group_sync(
logger.info(f"Processing {len(site_descriptors)} sites for group sync")
msal_app = connector.msal_app
sp_tenant_domain = connector.sp_tenant_domain
# Process each site
for site_descriptor in site_descriptors:
logger.debug(f"Processing site: {site_descriptor.url}")
ctx = connector._create_rest_client_context(site_descriptor.url)
# Create client context for the site using connector's MSAL app
ctx = ClientContext(site_descriptor.url).with_access_token(
lambda: acquire_token_for_rest(msal_app, sp_tenant_domain)
)
# Get external groups for this site
external_groups = get_sharepoint_external_groups(ctx, connector.graph_client)

View File

@@ -103,7 +103,7 @@ def _fetch_channel_permissions(
def _get_slack_document_access(
slack_connector: SlackConnector,
channel_permissions: dict[str, ExternalAccess], # noqa: ARG001
channel_permissions: dict[str, ExternalAccess],
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
@@ -136,8 +136,8 @@ def _get_slack_document_access(
def slack_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
"""
@@ -151,14 +151,9 @@ def slack_doc_sync(
tenant_id = get_current_tenant_id()
provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
r = get_redis_client(tenant_id=tenant_id)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
slack_client = SlackConnector.make_slack_web_client(
provider.get_provider_key(),
credential_json["slack_bot_token"],
cc_pair.credential.credential_json["slack_bot_token"],
SlackConnector.MAX_RETRIES,
r,
)

View File

@@ -63,14 +63,9 @@ def slack_group_sync(
provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
r = get_redis_client(tenant_id=tenant_id)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
slack_client = SlackConnector.make_slack_web_client(
provider.get_provider_key(),
credential_json["slack_bot_token"],
cc_pair.credential.credential_json["slack_bot_token"],
SlackConnector.MAX_RETRIES,
r,
)

View File

@@ -72,10 +72,10 @@ class SyncConfig(BaseModel):
# Mock doc sync function for testing (no-op)
def mock_doc_sync(
cc_pair: "ConnectorCredentialPair", # noqa: ARG001
fetch_all_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: Optional["IndexingHeartbeatInterface"], # noqa: ARG001
cc_pair: "ConnectorCredentialPair",
fetch_all_docs_fn: FetchAllDocumentsFunction,
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: Optional["IndexingHeartbeatInterface"],
) -> Generator["DocExternalAccess", None, None]:
"""Mock doc sync function for testing - returns empty list since permissions are fetched during indexing"""
yield from []

View File

@@ -18,19 +18,14 @@ TEAMS_DOC_SYNC_LABEL = "teams_doc_sync"
def teams_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
teams_connector = TeamsConnector(
**cc_pair.connector.connector_specific_config,
)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
teams_connector.load_credentials(credential_json)
teams_connector.load_credentials(cc_pair.credential.credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -77,7 +77,7 @@ def stream_search_query(
# Get document index
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None, db_session)
document_index = get_default_document_index(search_settings, None)
# Determine queries to execute
original_query = request.search_query

View File

@@ -32,7 +32,6 @@ from sqlalchemy.orm import Session
from ee.onyx.auth.users import current_admin_user
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_used_seats
from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionRequest
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
@@ -165,16 +164,6 @@ async def create_checkout_session(
seats = request.seats if request else None
email = request.email if request else None
# Validate that requested seats is not less than current used seats
if seats is not None:
used_seats = get_used_seats(tenant_id)
if seats < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot subscribe with fewer seats than current usage. "
f"You have {used_seats} active users/integrations but requested {seats} seats.",
)
# Build redirect URL for after checkout completion
redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"
@@ -276,15 +265,6 @@ async def update_seats(
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
# Validate that new seat count is not less than current used seats
used_seats = get_used_seats(tenant_id)
if request.new_seat_count < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot reduce seats below current usage. "
f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
)
try:
result = await update_seat_service(
new_seat_count=request.new_seat_count,

View File

@@ -109,9 +109,7 @@ async def _make_billing_request(
headers = _get_headers(license_data)
try:
async with httpx.AsyncClient(
timeout=_REQUEST_TIMEOUT, follow_redirects=True
) as client:
async with httpx.AsyncClient(timeout=_REQUEST_TIMEOUT) as client:
if method == "GET":
response = await client.get(url, headers=headers, params=params)
else:

View File

@@ -139,7 +139,7 @@ def put_logo(
upload_logo(file=file, is_logotype=is_logotype)
def fetch_logo_helper(db_session: Session) -> Response: # noqa: ARG001
def fetch_logo_helper(db_session: Session) -> Response:
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
@@ -155,7 +155,7 @@ def fetch_logo_helper(db_session: Session) -> Response: # noqa: ARG001
return Response(content=onyx_file.data, media_type=onyx_file.mime_type)
def fetch_logotype_helper(db_session: Session) -> Response: # noqa: ARG001
def fetch_logotype_helper(db_session: Session) -> Response:
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())

View File

@@ -17,7 +17,7 @@ router = APIRouter(prefix="/evals")
@router.post("/eval_run", response_model=EvalRunAck)
def eval_run(
request: EvalConfigurationOptions,
user: User = Depends(current_cloud_superuser), # noqa: ARG001
user: User = Depends(current_cloud_superuser),
) -> EvalRunAck:
"""
Run an evaluation with the given message and optional dataset.

View File

@@ -42,20 +42,6 @@ logger = setup_logger()
router = APIRouter(prefix="/license")
# PEM-style delimiters used in license file format
_PEM_BEGIN = "-----BEGIN ONYX LICENSE-----"
_PEM_END = "-----END ONYX LICENSE-----"
def _strip_pem_delimiters(content: str) -> str:
"""Strip PEM-style delimiters from license content if present."""
content = content.strip()
if content.startswith(_PEM_BEGIN) and content.endswith(_PEM_END):
# Remove first and last lines (the delimiters)
lines = content.split("\n")
return "\n".join(lines[1:-1]).strip()
return content
@router.get("")
async def get_license_status(
@@ -120,11 +106,6 @@ async def claim_license(
- Updating seats via the billing API
- Returning from the Stripe customer portal
- Any operation that regenerates the license on control plane
Claim a license from the control plane (self-hosted only).
Two modes:
1. With session_id: After Stripe checkout, exchange session_id for license
2. Without session_id: Re-claim using existing license for auth
"""
if MULTI_TENANT:
raise HTTPException(
@@ -229,10 +210,6 @@ async def upload_license(
try:
content = await license_file.read()
license_data = content.decode("utf-8").strip()
# Strip PEM-style delimiters if present (used in .lic file format)
license_data = _strip_pem_delimiters(license_data)
# Remove any stray whitespace/newlines from user input
license_data = license_data.strip()
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid license file format")

View File

@@ -260,7 +260,7 @@ def confluence_oauth_accessible_resources(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
"""Atlassian's API is weird and does not supply us with enough info to be in a
usable state after authorizing. All API's require a cloud id. We have to list
@@ -270,11 +270,7 @@ def confluence_oauth_accessible_resources(
if not credential:
raise HTTPException(400, f"Credential {credential_id} not found.")
credential_dict = (
credential.credential_json.get_value(apply_mask=False)
if credential.credential_json
else {}
)
credential_dict = credential.credential_json
access_token = credential_dict["confluence_access_token"]
try:
@@ -327,7 +323,7 @@ def confluence_oauth_finalize(
cloud_url: str,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
tenant_id: str | None = Depends(get_current_tenant_id),
) -> JSONResponse:
"""Saves the info for the selected cloud site to the credential.
This is the final step in the confluence oauth flow where after the traditional
@@ -341,12 +337,7 @@ def confluence_oauth_finalize(
detail=f"Confluence Cloud OAuth failed - credential {credential_id} not found.",
)
existing_credential_json = (
credential.credential_json.get_value(apply_mask=False)
if credential.credential_json
else {}
)
new_credential_json: dict[str, Any] = dict(existing_credential_json)
new_credential_json: dict[str, Any] = dict(credential.credential_json)
new_credential_json["cloud_id"] = cloud_id
new_credential_json["cloud_name"] = cloud_name
new_credential_json["wiki_base"] = cloud_url

View File

@@ -27,8 +27,6 @@ class SearchFlowClassificationResponse(BaseModel):
is_search_flow: bool
# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
class SendSearchQueryRequest(BaseModel):
search_query: str
filters: BaseFilters | None = None

View File

@@ -26,7 +26,6 @@ from onyx.db.models import User
from onyx.llm.factory import get_default_llm
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
from onyx.server.utils import get_json_line
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
@@ -67,13 +66,7 @@ def search_flow_classification(
return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
# NOTE: This endpoint is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
@router.post(
"/send-search-message",
response_model=None,
dependencies=[Depends(require_vector_db)],
)
@router.post("/send-search-message", response_model=None)
def handle_send_search_message(
request: SendSearchQueryRequest,
user: User = Depends(current_user),

View File

@@ -78,7 +78,7 @@ def fetch_and_process_chat_session_history(
db_session: Session,
start: datetime,
end: datetime,
limit: int | None = 500, # noqa: ARG001
limit: int | None = 500,
) -> Generator[ChatSessionSnapshot]:
PAGE_SIZE = 100

View File

@@ -59,7 +59,7 @@ def generate_report(
def read_usage_report(
report_name: str,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session), # noqa: ARG001
db_session: Session = Depends(get_session),
) -> Response:
try:
file = get_usage_report_data(report_name)

View File

@@ -1,96 +0,0 @@
"""SCIM filter expression parser (RFC 7644 §3.4.2.2).
Identity providers (Okta, Azure AD, OneLogin, etc.) use filters to look up
resources before deciding whether to create or update them. For example, when
an admin assigns a user to the Onyx app, the IdP first checks whether that
user already exists::
GET /scim/v2/Users?filter=userName eq "john@example.com"
If zero results come back the IdP creates the user (``POST``); if a match is
found it links to the existing record and uses ``PUT``/``PATCH`` going forward.
The same pattern applies to groups (``displayName eq "Engineering"``).
This module parses the subset of the SCIM filter grammar that identity
providers actually send in practice:
attribute SP operator SP value
Supported operators: ``eq``, ``co`` (contains), ``sw`` (starts with).
Compound filters (``and`` / ``or``) are not supported; if an IdP sends one
the parser returns ``None`` and the caller falls back to an unfiltered list.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from enum import Enum
class ScimFilterOperator(str, Enum):
"""Supported SCIM filter operators."""
EQUAL = "eq"
CONTAINS = "co"
STARTS_WITH = "sw"
@dataclass(frozen=True, slots=True)
class ScimFilter:
"""Parsed SCIM filter expression."""
attribute: str
operator: ScimFilterOperator
value: str
# Matches: attribute operator "value" (with or without quotes around value)
# Groups: (attribute) (operator) ("quoted value" | unquoted_value)
_FILTER_RE = re.compile(
r"^(\S+)\s+(eq|co|sw)\s+" # attribute + operator
r'(?:"([^"]*)"' # quoted value
r"|'([^']*)')" # or single-quoted value
r"$",
re.IGNORECASE,
)
def parse_scim_filter(filter_string: str | None) -> ScimFilter | None:
"""Parse a simple SCIM filter expression.
Args:
filter_string: Raw filter query parameter value, e.g.
``'userName eq "john@example.com"'``
Returns:
A ``ScimFilter`` if the expression is valid and uses a supported
operator, or ``None`` if the input is empty / missing.
Raises:
ValueError: If the filter string is present but malformed or uses
an unsupported operator.
"""
if not filter_string or not filter_string.strip():
return None
match = _FILTER_RE.match(filter_string.strip())
if not match:
raise ValueError(f"Unsupported or malformed SCIM filter: {filter_string}")
return _build_filter(match, filter_string)
def _build_filter(match: re.Match[str], raw: str) -> ScimFilter:
"""Extract fields from a regex match and construct a ScimFilter."""
attribute = match.group(1)
op_str = match.group(2).lower()
# Value is in group 3 (double-quoted) or group 4 (single-quoted)
value = match.group(3) if match.group(3) is not None else match.group(4)
if value is None:
raise ValueError(f"Unsupported or malformed SCIM filter: {raw}")
operator = ScimFilterOperator(op_str)
return ScimFilter(attribute=attribute, operator=operator, value=value)

View File

@@ -1,255 +0,0 @@
"""Pydantic schemas for SCIM 2.0 provisioning (RFC 7643 / RFC 7644).
SCIM protocol schemas follow the wire format defined in:
- Core Schema: https://datatracker.ietf.org/doc/html/rfc7643
- Protocol: https://datatracker.ietf.org/doc/html/rfc7644
Admin API schemas are internal to Onyx and used for SCIM token management.
"""
from datetime import datetime
from enum import Enum
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
# ---------------------------------------------------------------------------
# SCIM Schema URIs (RFC 7643 §8)
# Every SCIM JSON payload includes a "schemas" array identifying its type.
# IdPs like Okta/Azure AD use these URIs to determine how to parse responses.
# ---------------------------------------------------------------------------
SCIM_USER_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:User"
SCIM_GROUP_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:Group"
SCIM_LIST_RESPONSE_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:ListResponse"
SCIM_PATCH_OP_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:PatchOp"
SCIM_ERROR_SCHEMA = "urn:ietf:params:scim:api:messages:2.0:Error"
SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA = (
"urn:ietf:params:scim:schemas:core:2.0:ServiceProviderConfig"
)
SCIM_RESOURCE_TYPE_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:ResourceType"
# ---------------------------------------------------------------------------
# SCIM Protocol Schemas
# ---------------------------------------------------------------------------
class ScimName(BaseModel):
"""User name components (RFC 7643 §4.1.1)."""
givenName: str | None = None
familyName: str | None = None
formatted: str | None = None
class ScimEmail(BaseModel):
"""Email sub-attribute (RFC 7643 §4.1.2)."""
value: str
type: str | None = None
primary: bool = False
class ScimMeta(BaseModel):
"""Resource metadata (RFC 7643 §3.1)."""
resourceType: str | None = None
created: datetime | None = None
lastModified: datetime | None = None
location: str | None = None
class ScimUserResource(BaseModel):
"""SCIM User resource representation (RFC 7643 §4.1).
This is the JSON shape that IdPs send when creating/updating a user via
SCIM, and the shape we return in GET responses. Field names use camelCase
to match the SCIM wire format (not Python convention).
"""
schemas: list[str] = Field(default_factory=lambda: [SCIM_USER_SCHEMA])
id: str | None = None # Onyx's internal user ID, set on responses
externalId: str | None = None # IdP's identifier for this user
userName: str # Typically the user's email address
name: ScimName | None = None
emails: list[ScimEmail] = Field(default_factory=list)
active: bool = True
meta: ScimMeta | None = None
class ScimGroupMember(BaseModel):
"""Group member reference (RFC 7643 §4.2).
Represents a user within a SCIM group. The IdP sends these when adding
or removing users from groups. ``value`` is the Onyx user ID.
"""
value: str # User ID of the group member
display: str | None = None
class ScimGroupResource(BaseModel):
"""SCIM Group resource representation (RFC 7643 §4.2)."""
schemas: list[str] = Field(default_factory=lambda: [SCIM_GROUP_SCHEMA])
id: str | None = None
externalId: str | None = None
displayName: str
members: list[ScimGroupMember] = Field(default_factory=list)
meta: ScimMeta | None = None
class ScimListResponse(BaseModel):
"""Paginated list response (RFC 7644 §3.4.2)."""
schemas: list[str] = Field(default_factory=lambda: [SCIM_LIST_RESPONSE_SCHEMA])
totalResults: int
startIndex: int = 1
itemsPerPage: int = 100
Resources: list[ScimUserResource | ScimGroupResource] = Field(default_factory=list)
class ScimPatchOperationType(str, Enum):
"""Supported PATCH operations (RFC 7644 §3.5.2)."""
ADD = "add"
REPLACE = "replace"
REMOVE = "remove"
class ScimPatchOperation(BaseModel):
"""Single PATCH operation (RFC 7644 §3.5.2)."""
op: ScimPatchOperationType
path: str | None = None
value: str | list[dict[str, str]] | dict[str, str | bool] | bool | None = None
class ScimPatchRequest(BaseModel):
"""PATCH request body (RFC 7644 §3.5.2).
IdPs use PATCH to make incremental changes — e.g. deactivating a user
(replace active=false) or adding/removing group members — instead of
replacing the entire resource with PUT.
"""
schemas: list[str] = Field(default_factory=lambda: [SCIM_PATCH_OP_SCHEMA])
Operations: list[ScimPatchOperation]
class ScimError(BaseModel):
"""SCIM error response (RFC 7644 §3.12)."""
schemas: list[str] = Field(default_factory=lambda: [SCIM_ERROR_SCHEMA])
status: str
detail: str | None = None
scimType: str | None = None
# ---------------------------------------------------------------------------
# Service Provider Configuration (RFC 7643 §5)
# ---------------------------------------------------------------------------
class ScimSupported(BaseModel):
"""Generic supported/not-supported flag used in ServiceProviderConfig."""
supported: bool
class ScimFilterConfig(BaseModel):
"""Filter configuration within ServiceProviderConfig (RFC 7643 §5)."""
supported: bool
maxResults: int = 100
class ScimServiceProviderConfig(BaseModel):
"""SCIM ServiceProviderConfig resource (RFC 7643 §5).
Served at GET /scim/v2/ServiceProviderConfig. IdPs fetch this during
initial setup to discover which SCIM features our server supports
(e.g. PATCH yes, bulk no, filtering yes).
"""
schemas: list[str] = Field(
default_factory=lambda: [SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA]
)
patch: ScimSupported = ScimSupported(supported=True)
bulk: ScimSupported = ScimSupported(supported=False)
filter: ScimFilterConfig = ScimFilterConfig(supported=True)
changePassword: ScimSupported = ScimSupported(supported=False)
sort: ScimSupported = ScimSupported(supported=False)
etag: ScimSupported = ScimSupported(supported=False)
authenticationSchemes: list[dict[str, str]] = Field(
default_factory=lambda: [
{
"type": "oauthbearertoken",
"name": "OAuth Bearer Token",
"description": "Authentication scheme using a SCIM bearer token",
}
]
)
class ScimSchemaExtension(BaseModel):
"""Schema extension reference within ResourceType (RFC 7643 §6)."""
model_config = ConfigDict(populate_by_name=True)
schema_: str = Field(alias="schema")
required: bool
class ScimResourceType(BaseModel):
"""SCIM ResourceType resource (RFC 7643 §6).
Served at GET /scim/v2/ResourceTypes. Tells the IdP which resource
types are available (Users, Groups) and their respective endpoints.
"""
model_config = ConfigDict(populate_by_name=True)
schemas: list[str] = Field(default_factory=lambda: [SCIM_RESOURCE_TYPE_SCHEMA])
id: str
name: str
endpoint: str
description: str | None = None
schema_: str = Field(alias="schema")
schemaExtensions: list[ScimSchemaExtension] = Field(default_factory=list)
# ---------------------------------------------------------------------------
# Admin API Schemas (Onyx-internal, for SCIM token management)
# These are NOT part of the SCIM protocol. They power the Onyx admin UI
# where admins create/revoke the bearer tokens that IdPs use to authenticate.
# ---------------------------------------------------------------------------
class ScimTokenCreate(BaseModel):
"""Request to create a new SCIM bearer token."""
name: str
class ScimTokenResponse(BaseModel):
"""SCIM token metadata returned in list/get responses."""
id: int
name: str
token_display: str
is_active: bool
created_at: datetime
last_used_at: datetime | None = None
class ScimTokenCreatedResponse(ScimTokenResponse):
"""Response returned when a new SCIM token is created.
Includes the raw token value which is only available at creation time.
"""
raw_token: str

View File

@@ -1,256 +0,0 @@
"""SCIM PATCH operation handler (RFC 7644 §3.5.2).
Identity providers use PATCH to make incremental changes to SCIM resources
instead of replacing the entire resource with PUT. Common operations include:
- Deactivating a user: ``replace`` ``active`` with ``false``
- Adding group members: ``add`` to ``members``
- Removing group members: ``remove`` from ``members[value eq "..."]``
This module applies PATCH operations to Pydantic SCIM resource objects and
returns the modified result. It does NOT touch the database — the caller is
responsible for persisting changes.
"""
from __future__ import annotations
import re
from ee.onyx.server.scim.models import ScimGroupResource
from ee.onyx.server.scim.models import ScimPatchOperation
from ee.onyx.server.scim.models import ScimPatchOperationType
from ee.onyx.server.scim.models import ScimUserResource
class ScimPatchError(Exception):
"""Raised when a PATCH operation cannot be applied."""
def __init__(self, detail: str, status: int = 400) -> None:
self.detail = detail
self.status = status
super().__init__(detail)
# Pattern for member removal path: members[value eq "user-id"]
_MEMBER_FILTER_RE = re.compile(
r'^members\[value\s+eq\s+"([^"]+)"\]$',
re.IGNORECASE,
)
def apply_user_patch(
operations: list[ScimPatchOperation],
current: ScimUserResource,
) -> ScimUserResource:
"""Apply SCIM PATCH operations to a user resource.
Returns a new ``ScimUserResource`` with the modifications applied.
The original object is not mutated.
Raises:
ScimPatchError: If an operation targets an unsupported path.
"""
data = current.model_dump()
name_data = data.get("name") or {}
for op in operations:
if op.op == ScimPatchOperationType.REPLACE:
_apply_user_replace(op, data, name_data)
elif op.op == ScimPatchOperationType.ADD:
_apply_user_replace(op, data, name_data)
else:
raise ScimPatchError(
f"Unsupported operation '{op.op.value}' on User resource"
)
data["name"] = name_data
return ScimUserResource.model_validate(data)
def _apply_user_replace(
op: ScimPatchOperation,
data: dict,
name_data: dict,
) -> None:
"""Apply a replace/add operation to user data."""
path = (op.path or "").lower()
if not path:
# No path — value is a dict of top-level attributes to set
if isinstance(op.value, dict):
for key, val in op.value.items():
_set_user_field(key.lower(), val, data, name_data)
else:
raise ScimPatchError("Replace without path requires a dict value")
return
_set_user_field(path, op.value, data, name_data)
def _set_user_field(
path: str,
value: str | bool | dict | list | None,
data: dict,
name_data: dict,
) -> None:
"""Set a single field on user data by SCIM path."""
if path == "active":
data["active"] = value
elif path == "username":
data["userName"] = value
elif path == "externalid":
data["externalId"] = value
elif path == "name.givenname":
name_data["givenName"] = value
elif path == "name.familyname":
name_data["familyName"] = value
elif path == "name.formatted":
name_data["formatted"] = value
elif path == "displayname":
# Some IdPs send displayName on users; map to formatted name
name_data["formatted"] = value
else:
raise ScimPatchError(f"Unsupported path '{path}' for User PATCH")
def apply_group_patch(
operations: list[ScimPatchOperation],
current: ScimGroupResource,
) -> tuple[ScimGroupResource, list[str], list[str]]:
"""Apply SCIM PATCH operations to a group resource.
Returns:
A tuple of (modified group, added member IDs, removed member IDs).
The caller uses the member ID lists to update the database.
Raises:
ScimPatchError: If an operation targets an unsupported path.
"""
data = current.model_dump()
current_members: list[dict] = list(data.get("members") or [])
added_ids: list[str] = []
removed_ids: list[str] = []
for op in operations:
if op.op == ScimPatchOperationType.REPLACE:
_apply_group_replace(op, data, current_members, added_ids, removed_ids)
elif op.op == ScimPatchOperationType.ADD:
_apply_group_add(op, current_members, added_ids)
elif op.op == ScimPatchOperationType.REMOVE:
_apply_group_remove(op, current_members, removed_ids)
else:
raise ScimPatchError(
f"Unsupported operation '{op.op.value}' on Group resource"
)
data["members"] = current_members
group = ScimGroupResource.model_validate(data)
return group, added_ids, removed_ids
def _apply_group_replace(
op: ScimPatchOperation,
data: dict,
current_members: list[dict],
added_ids: list[str],
removed_ids: list[str],
) -> None:
"""Apply a replace operation to group data."""
path = (op.path or "").lower()
if not path:
if isinstance(op.value, dict):
for key, val in op.value.items():
if key.lower() == "members":
_replace_members(val, current_members, added_ids, removed_ids)
else:
_set_group_field(key.lower(), val, data)
else:
raise ScimPatchError("Replace without path requires a dict value")
return
if path == "members":
_replace_members(op.value, current_members, added_ids, removed_ids)
return
_set_group_field(path, op.value, data)
def _replace_members(
value: str | list | dict | bool | None,
current_members: list[dict],
added_ids: list[str],
removed_ids: list[str],
) -> None:
"""Replace the entire group member list."""
if not isinstance(value, list):
raise ScimPatchError("Replace members requires a list value")
old_ids = {m["value"] for m in current_members}
new_ids = {m.get("value", "") for m in value}
removed_ids.extend(old_ids - new_ids)
added_ids.extend(new_ids - old_ids)
current_members[:] = value
def _set_group_field(
path: str,
value: str | bool | dict | list | None,
data: dict,
) -> None:
"""Set a single field on group data by SCIM path."""
if path == "displayname":
data["displayName"] = value
elif path == "externalid":
data["externalId"] = value
else:
raise ScimPatchError(f"Unsupported path '{path}' for Group PATCH")
def _apply_group_add(
op: ScimPatchOperation,
members: list[dict],
added_ids: list[str],
) -> None:
"""Add members to a group."""
path = (op.path or "").lower()
if path and path != "members":
raise ScimPatchError(f"Unsupported add path '{op.path}' for Group")
if not isinstance(op.value, list):
raise ScimPatchError("Add members requires a list value")
existing_ids = {m["value"] for m in members}
for member_data in op.value:
member_id = member_data.get("value", "")
if member_id and member_id not in existing_ids:
members.append(member_data)
added_ids.append(member_id)
existing_ids.add(member_id)
def _apply_group_remove(
op: ScimPatchOperation,
members: list[dict],
removed_ids: list[str],
) -> None:
"""Remove members from a group."""
if not op.path:
raise ScimPatchError("Remove operation requires a path")
match = _MEMBER_FILTER_RE.match(op.path)
if not match:
raise ScimPatchError(
f"Unsupported remove path '{op.path}'. "
'Expected: members[value eq "user-id"]'
)
target_id = match.group(1)
original_len = len(members)
members[:] = [m for m in members if m.get("value") != target_id]
if len(members) < original_len:
removed_ids.append(target_id)

View File

@@ -123,9 +123,14 @@ def _seed_llms(
upsert_llm_provider(llm_upsert_request, db_session)
for llm_upsert_request in llm_upsert_requests
]
update_default_provider(
provider_id=seeded_providers[0].id, db_session=db_session
)
if len(seeded_providers[0].model_configurations) > 0:
default_model = seeded_providers[0].model_configurations[0].name
update_default_provider(
provider_id=seeded_providers[0].id,
model_name=default_model,
db_session=db_session,
)
def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:

View File

@@ -4,7 +4,6 @@ from redis.exceptions import RedisError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.server.settings.models import ApplicationStatus
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger
@@ -59,46 +58,26 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
For self-hosted, looks up license metadata and overrides application_status
if the license indicates GATED_ACCESS (fully expired).
Also sets ee_features_enabled based on license status to control
visibility of EE features in the UI.
For multi-tenant (cloud), the settings already have the correct status
from the control plane, so no override is needed.
If LICENSE_ENFORCEMENT_ENABLED is false, ee_features_enabled is set to True
(since EE code was loaded via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES).
If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
allowing the product to function normally without license checks.
"""
if not LICENSE_ENFORCEMENT_ENABLED:
# License enforcement disabled - EE code is loaded via
# ENABLE_PAID_ENTERPRISE_EDITION_FEATURES, so EE features are on
settings.ee_features_enabled = True
return settings
if MULTI_TENANT:
# Cloud mode - EE features always available (gating handled by is_tenant_gated)
settings.ee_features_enabled = True
return settings
tenant_id = get_current_tenant_id()
try:
metadata = get_cached_license_metadata(tenant_id)
if metadata:
if metadata.status == _BLOCKING_STATUS:
settings.application_status = metadata.status
settings.ee_features_enabled = False
else:
# Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
settings.ee_features_enabled = True
else:
# No license found.
if ENTERPRISE_EDITION_ENABLED:
# Legacy EE flag is set → prior EE usage (e.g. permission
# syncing) means indexed data may need protection.
settings.application_status = _BLOCKING_STATUS
settings.ee_features_enabled = False
if metadata and metadata.status == _BLOCKING_STATUS:
settings.application_status = metadata.status
# No license = user hasn't purchased yet, allow access for upgrade flow
# GRACE_PERIOD/PAYMENT_REMINDER don't block - they're for notifications
except RedisError as e:
logger.warning(f"Failed to check license metadata for settings: {e}")
# Fail closed - disable EE features if we can't verify license
settings.ee_features_enabled = False
return settings

View File

@@ -19,7 +19,6 @@ logger = setup_logger()
def fetch_stripe_checkout_session(
tenant_id: str,
billing_period: Literal["monthly", "annual"] = "monthly",
seats: int | None = None,
) -> str:
token = generate_data_plane_token()
headers = {
@@ -30,23 +29,10 @@ def fetch_stripe_checkout_session(
payload = {
"tenant_id": tenant_id,
"billing_period": billing_period,
"seats": seats,
}
response = requests.post(url, headers=headers, json=payload)
if not response.ok:
try:
data = response.json()
error_msg = (
data.get("error")
or f"Request failed with status {response.status_code}"
)
except (ValueError, requests.exceptions.JSONDecodeError):
error_msg = f"Request failed with status {response.status_code}: {response.text[:200]}"
raise Exception(error_msg)
data = response.json()
if data.get("error"):
raise Exception(data["error"])
return data["sessionId"]
response.raise_for_status()
return response.json()["sessionId"]
def fetch_tenant_stripe_information(tenant_id: str) -> dict:
@@ -65,6 +51,7 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
def fetch_billing_information(
tenant_id: str,
) -> BillingInformation | SubscriptionStatusResponse:
logger.info("Fetching billing information")
token = generate_data_plane_token()
headers = {
"Authorization": f"Bearer {token}",

Some files were not shown because too many files have changed in this diff Show More