Compare commits

..

126 Commits

Author SHA1 Message Date
Nik
77a3243b80 refactor: replace HTTPException with OnyxError in auth, chat, and federated
Migrate auth/users.py, query_and_chat/chat_backend.py,
query_and_chat/token_limit.py, and federated/api.py to use
OnyxError with standardized error codes instead of raw HTTPException.
2026-03-05 17:03:49 -08:00
dependabot[bot]
45d77be4eb chore(deps): bump ajv in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8655)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 15:14:50 -08:00
dependabot[bot]
413fa85134 chore(deps): bump minimatch in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8828)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 15:13:57 -08:00
dependabot[bot]
108cde4f55 chore(deps): bump j178/prek-action from 1.0.12 to 1.1.1 (#8477)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 15:13:00 -08:00
dependabot[bot]
f88ce32bd4 chore(deps): bump @hono/node-server from 1.19.9 to 1.19.10 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9048)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:51:22 -08:00
dependabot[bot]
911f3439ea chore(deps): bump helm/kind-action from 1.13.0 to 1.14.0 (#8917)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:50:06 -08:00
dependabot[bot]
b02590d2b2 chore(deps): bump aws-actions/configure-aws-credentials from 5.1.1 to 6.0.0 (#8478)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:49:29 -08:00
dependabot[bot]
2d75b4b1f8 chore(deps): bump dompurify from 3.3.1 to 3.3.2 in /widget (#9106)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:45:53 -08:00
dependabot[bot]
7e3f7d01c2 chore(deps): bump authlib from 1.6.6 to 1.6.7 (#9049)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 22:14:44 +00:00
Jamison Lahman
9d6ce26ea3 fix(fe): show modal body on Safari/desktop (#9035) 2026-03-05 21:35:43 +00:00
roshan
41713d42a2 chore: upgrade golangci-lint to v2.10.1 for Go 1.26 support (#9107)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 21:22:56 +00:00
roshan
8afc283410 fix(chrome-extension): open login in new tab when session expires (#9091)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 21:18:21 +00:00
Jamison Lahman
b5c873077e chore(devtools): upgrade ods: 0.6.2->0.6.3 (#9105) 2026-03-05 21:04:51 +00:00
Jamison Lahman
20a4dd32eb chore(devtools): pull release branch and support PR # args (#9102)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 12:37:51 -08:00
Jamison Lahman
fde0d44bc1 chore(devtools): upgrade ods to go 1.26 (#9103) 2026-03-05 20:24:57 +00:00
Jamison Lahman
8fd91b6e83 chore(devtools): ods desktop (#9100) 2026-03-05 19:38:02 +00:00
Justin Tahara
8247fdd45b fix(llm): Handle Bedrock tool content in message history without toolConfig (#9063) 2026-03-05 19:06:35 +00:00
Jamison Lahman
8c5859ba4d fix(fe): disable projects modal button unless project is named (#9093) 2026-03-05 10:29:15 -08:00
Jamison Lahman
62ef6f59bb chore(playwright): screenshot tests for user settings pages (#9078) 2026-03-05 08:35:46 -08:00
Jamison Lahman
7eabfa125c fix(fe): properly wrap copy and edit buttons on mobile (#9073) 2026-03-05 04:36:11 +00:00
SubashMohan
ee18114739 feat(table): add DataTable config-driven wrapper component (#9020)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 04:21:38 +00:00
Nikolas Garza
f7630f5648 fix: EE route gating for upgrading CE users (#9026) 2026-03-05 03:44:16 +00:00
Jamison Lahman
e0d91b9ea7 chore(fe): rm unreachable code (#9069) 2026-03-05 03:26:50 +00:00
Raunak Bhagat
2c0a4a60a5 refactor: consolidate AppInputBar search/chat rendering with animated transitions (#9054) 2026-03-05 03:16:36 +00:00
Justin Tahara
3a7d4dad56 fix(ui): Improve text truncation and overflow handling in FileCard layout (#9061) 2026-03-05 03:11:53 +00:00
acaprau
c5c236d098 chore(opensearch): Fix and consolidate the dev script used to start OpenSearch locally (#9036) 2026-03-05 01:54:02 +00:00
Danelegend
b18baff4d0 fix: Correct file_id for docs (#9058) 2026-03-05 01:43:58 +00:00
SubashMohan
eb3e15c195 feat(table): add ColumnVisibilityPopover, Footer, Pagination, and SortingPopover components (#9019)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 01:43:37 +00:00
acaprau
47d9a9e1ac feat(document index): Re-enable search settings swap (#9005) 2026-03-05 01:41:03 +00:00
Evan Lohn
aca466b35d fix: doc to hierarchynode connection in pruning (#9046) 2026-03-05 01:30:36 +00:00
Justin Tahara
5176fd7386 fix(llm): Final LLM Cleanup for Nightly Tests (#9055) 2026-03-05 01:00:45 +00:00
SubashMohan
92538084e9 feat(table): add useColumnWidths, useDataTable, and useDraggableRows hooks (#9018)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 00:00:06 +00:00
Bo-Onyx
2d996e05a4 chore(fe): opal button migration (#8864) 2026-03-04 22:52:49 +00:00
Nikolas Garza
b2956f795b refactor: migrate LLM & embedding management to OnyxError (#9025) 2026-03-04 22:09:25 +00:00
Danelegend
b272085543 fix: Code Interpreter Client session clean up (#9028)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 21:58:00 +00:00
Justin Tahara
8193aa4fd0 fix(ui): Persist agent sharing changes immediately for existing agents (#9024) 2026-03-04 21:34:50 +00:00
dependabot[bot]
52db41a00b chore(deps): bump nltk from 3.9.1 to 3.9.3 (#9045)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-04 21:21:37 +00:00
SubashMohan
f1cf3c4589 feat(table): add table primitive components and styles (#9017) 2026-03-04 21:06:53 +00:00
dependabot[bot]
5322aeed90 chore(deps): bump hono from 4.11.7 to 4.12.5 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9044)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-04 12:51:05 -08:00
Evan Lohn
5da8870fd2 fix: stop calling unsupported endpoints no vectordb (#9012) 2026-03-04 20:18:09 +00:00
Nikolas Garza
57d3ab3b40 feat: add SCIM token management page (#9001) 2026-03-04 19:48:37 +00:00
Nikolas Garza
649c7fe8b9 feat(slack): convert markdown tables to Slack-friendly format (#8999) 2026-03-04 19:16:50 +00:00
Jamison Lahman
e5e2bc6149 chore(fe): "Share Chat"->"Share" (#9022) 2026-03-04 11:08:14 -08:00
Jamison Lahman
b148065e1d chore(devtools): --debug mode for desktop (#9027) 2026-03-04 11:07:52 -08:00
Evan Lohn
367808951c chore: remove lightweight mode (#9014) 2026-03-04 18:26:05 +00:00
Jamison Lahman
0f74da3302 fix(fe): dont align center modals on small screens (#8988) 2026-03-04 17:46:35 +00:00
Raunak Bhagat
96f7cbd25a fix: Use IllustrationContent for empty search results (#9013) 2026-03-04 16:54:23 +00:00
Raunak Bhagat
c627cea17d feat(opal): add sidebar variant to Interactive + refactor SidebarTab (#9016) 2026-03-04 15:52:56 +00:00
Raunak Bhagat
a8cdc3965d refactor(fe): move onboarding to sections/, consolidate hooks, move types (#8985) 2026-03-04 10:51:20 +00:00
Raunak Bhagat
60891b2f44 feat: Add IllustrationContent layout component to opal (#9011) 2026-03-04 06:14:21 +00:00
Danelegend
d2f35e1fae feat: Align action tool tips (#8997) 2026-03-04 04:32:30 +00:00
Danelegend
7a7350f387 fix: Markdown does not show all texts (#9009) 2026-03-04 02:16:00 +00:00
Nikolas Garza
8ef504acd5 refactor: add OnyxErrorCode enum and migrate billing/license routers (#8975) 2026-03-04 02:03:38 +00:00
Danelegend
0dbabfe445 feat: Support intermediate code interpreter file generation (#9006) 2026-03-04 01:55:44 +00:00
Justin Tahara
50575d0f6b chore(ui): Rename from LLM Models to Language Models (#9007) 2026-03-04 01:40:34 +00:00
Evan Lohn
9862fbd4a6 chore: deploying onyx lite (#9004) 2026-03-04 01:38:02 +00:00
Jamison Lahman
003d94546a fix(a11y): prevent show password button losing focus on tab (#9000) 2026-03-04 01:13:31 +00:00
Nikolas Garza
01d3473974 chore: port Greptile custom context rules to greptile.json (#9003) 2026-03-04 01:04:15 +00:00
Raunak Bhagat
19c7809a43 feat: Add illustrations to opal (#8993) 2026-03-04 00:59:47 +00:00
Bo-Onyx
98e6346152 chore: [Running GitHub actions for #8972] (#8996)
Co-authored-by: Jean Caillé <jean.caille@helsing.ai>
2026-03-03 23:36:18 +00:00
acaprau
c63fdf1c13 fix(opensearch): Increase the Vespa http client timeout to 120s for the OpenSearch migration (#8966) 2026-03-03 22:40:50 +00:00
Justin Tahara
49b509a0a7 fix(permissions): Add file connector access control for global curators (#8990) 2026-03-03 22:13:11 +00:00
Wenxi
2b1f1fe311 chore: use abort controller to properly manage oauth requests (#8994) 2026-03-03 21:46:17 +00:00
Danelegend
3e67ea9df7 feat: Code Interpreter responsive in actions dropdown (#8982) 2026-03-03 21:22:37 +00:00
Wenxi
98e3602dd6 fix: google connectors redirect to connector page instead of auth error (#8989) 2026-03-03 21:18:30 +00:00
Wenxi
4fded5b0a1 chore: remove dead code from expandable content component (#8981) 2026-03-03 21:11:07 +00:00
Wenxi
328c305d26 chore: remove dead code from admin theming (#8979) 2026-03-03 21:06:28 +00:00
Jamison Lahman
f902727215 chore(devtools): npm run test:diff on changed files (#8991) 2026-03-03 13:10:35 -08:00
Justin Tahara
69c8aa08b3 fix(ci): Add secrets inheritance to nightly LLM provider chat workflow (#8984) 2026-03-03 20:49:12 +00:00
Raunak Bhagat
c98aa486e4 refactor(fe): migrate onboarding components to Content/ContentAction (#8983) 2026-03-03 20:46:40 +00:00
Wenxi
03553114c5 fix(ollama): debounce API url input and properly handle model fetch request with abort signal (#8986) 2026-03-03 20:08:57 +00:00
Justin Tahara
6532c94230 chore: Add greptile.json configuration file (#8978) 2026-03-03 19:58:05 +00:00
Danelegend
1b32a7d94e fix: Default code interpreter base url (#8969) 2026-03-03 18:35:20 +00:00
Danelegend
5fd0fe192b fix: Tokeniser does not rely on llm (#8967) 2026-03-03 18:35:15 +00:00
Wenxi
1de522f9ae fix: sandbox rollback db on pod deletion failure (#8965) 2026-03-03 17:09:50 +00:00
Raunak Bhagat
60fe3e9ad6 refactor(fe): migrate admin pages from AdminPageTitle to SettingsLayouts (#8930) 2026-03-03 08:34:58 +00:00
Evan Lohn
6aa56821d6 feat: use new cache backend where appropriate (#8889) 2026-03-03 07:14:39 +00:00
Danelegend
eda436de01 fix: Block deleting default provider (#8962) 2026-03-03 06:36:29 +00:00
Danelegend
07915a6c01 fix: Update frontend route calls to use new endpoints (#8968) 2026-03-03 06:19:47 +00:00
Nikolas Garza
2c3e9aecd1 fix(scim): only list SCIM-managed users and link pre-existing users (#8959) 2026-03-03 05:36:43 +00:00
Evan Lohn
fa29cc3849 feat: postgres cache backend (#8879) 2026-03-03 04:33:47 +00:00
Raunak Bhagat
24ac8b37d3 refactor(fe): define settings layout width presets as CSS variables (#8936) 2026-03-03 03:11:18 +00:00
Jessica Singh
be8b108ae4 chore(auth): ecs fargate deployment cleanup (#8589) 2026-03-03 02:34:04 +00:00
Danelegend
f380a75df3 fix: Non-intuitive llm auth exceptions (#8960) 2026-03-03 01:58:45 +00:00
Wenxi
21ec93663b chore: proxy cloud ph (#8961) 2026-03-03 01:43:15 +00:00
Raunak Bhagat
d789c74024 chore(icons): add SvgBookmark to @opal/icons (#8933) 2026-03-02 17:07:24 -08:00
Danelegend
fe014776f7 feat: embed code interpreter images in chat (#8875) 2026-03-03 00:57:56 +00:00
Danelegend
700ca0e0fc fix: Sticky background in CSV Preview Variant (#8939) 2026-03-03 00:08:17 +00:00
Jamison Lahman
a84f8238ec chore(fe): space between Manage All connectors button (#8938) 2026-03-02 23:56:08 +00:00
dependabot[bot]
4fc802e19d chore(deps): bump pypdf from 6.7.4 to 6.7.5 (#8932)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-02 23:55:34 +00:00
Danelegend
6cfd49439a chore: Bump code interpreter to 0.3.1 (#8937) 2026-03-02 23:49:58 +00:00
Jamison Lahman
71a1faa47e fix(fe): break long words in human messages (#8929)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-02 15:47:35 -08:00
Nikolas Garza
1a65217baf fix(scim): pass Okta Runscope spec test for OIN submission (#8925) 2026-03-02 23:03:38 +00:00
dependabot[bot]
30fa43b5fc chore(deps): bump pypdf from 6.7.3 to 6.7.4 (#8905)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-02 22:48:54 +00:00
Justin Tahara
28332fa24b fix(ui): InputComboBox search for users/groups (#8928) 2026-03-02 22:44:09 +00:00
Raunak Bhagat
1f5050f9f6 refactor(admin): update admin-page HealthCheckBanner (#8922) 2026-03-02 22:21:47 +00:00
acaprau
3c1d29d3cf chore(opensearch): Configure index settings for multitenant cloud (#8921) 2026-03-02 22:16:05 +00:00
Raunak Bhagat
709e3f4ca7 chore(icons): add SvgCreditCard and SvgNetworkGraph to @opal/icons (#8927) 2026-03-02 22:04:36 +00:00
Jamison Lahman
dfa27c08ef chore(deployment): optimize layer caching (#8924) 2026-03-02 20:58:46 +00:00
Nikolas Garza
13d60dcb0e test(scim): add integration tests for SCIM group CRUD (#8830) 2026-03-02 20:51:59 +00:00
Evan Lohn
30704f427f refactor: add abstraction for cache backend (#8870) 2026-03-02 20:50:13 +00:00
Jamison Lahman
4f3c54f282 chore(playwright): hide actions toolbar buttons in screenshots (#8914) 2026-03-02 20:10:21 +00:00
Jamison Lahman
580d41dc23 chore(mypy): run from repro root in CI (#6995) 2026-03-02 20:04:54 +00:00
Raunak Bhagat
897e181d67 refactor(opal): update ModalHeader to use Content (#8885) 2026-03-02 20:04:35 +00:00
dependabot[bot]
fd322a8a10 chore(deps): bump lxml-html-clean from 0.4.3 to 0.4.4 (#8919)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-02 20:03:36 +00:00
Evan Lohn
11c54bafb5 chore: no vector db deployment (#8867) 2026-03-02 20:01:26 +00:00
Nikolas Garza
c93617df5d test(scim): add integration tests for SCIM user CRUD (#8825) 2026-03-02 19:38:33 +00:00
Justin Tahara
0cdd438f46 chore(ui): Update the Share Agent Modal (#8915) 2026-03-02 19:28:49 +00:00
Justin Tahara
31aef36f78 chore(llm): Use AWS Secrrets Manager (#8913) 2026-03-02 19:28:43 +00:00
Jamison Lahman
0c35dfc0e4 fix(search): re-sync search/chat preference on user data load (#8868) 2026-03-02 18:59:02 +00:00
Nikolas Garza
a9769757fe fix(llm): enforce persona restrictions on public LLM providers (#8846)
Co-authored-by: Dane <dane@onyx.app>
2026-03-02 18:20:03 +00:00
Nikolas Garza
15d8946f40 refactor(fe): rename assistant → agent identifiers (#8869) 2026-03-02 18:19:23 +00:00
Nikolas Garza
ba79539d6d feat(slack): add Slack user deactivation and seat-aware reactivation (#8887) 2026-03-02 18:10:13 +00:00
Jamison Lahman
59d3725fc6 chore(gha): rm docker-compose.opensearch.yml ref (#8912) 2026-03-02 17:34:22 +00:00
Jamison Lahman
9c05bd215d fix(a11y): settings popover buttons prefer href (#8880)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-02 17:22:01 +00:00
Wenxi
4d2aa09654 feat: infinite chat session sidebar scroll (#8874) 2026-03-02 17:20:23 +00:00
Jamison Lahman
16c07c8756 feat(desktop): option to hide alt-menu (#8882)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-02 17:03:24 +00:00
Raunak Bhagat
3fb4f5d6e6 refactor(opal): split ContentLg into ContentXl + ContentLg (#8904) 2026-03-02 15:11:32 +00:00
Evan Lohn
14fab7fcdf feat: no vector db beat tasks (#8865) 2026-03-02 03:51:18 +00:00
Evan Lohn
22a335fffa feat: bg tasks via fastapi (#8861) 2026-03-02 02:35:31 +00:00
Justin Tahara
b0f7466eba chore(llm): Fixing test image selection (#8902) 2026-03-01 18:32:20 +00:00
Evan Lohn
b1d42726b1 test: file reader tool (#8856) 2026-02-28 23:09:43 +00:00
Yuhong Sun
7d922bffc1 chore: Persona cleanup (#8810) 2026-02-28 21:34:45 +00:00
Evan Lohn
de7fc36fc5 test: no vector db user file processing (#8854) 2026-02-28 04:19:59 +00:00
Evan Lohn
7f9e37450d fix: non vector db tasks (#8849) 2026-02-28 03:51:57 +00:00
Evan Lohn
c7ef85b733 chore: narrow no_vector_db supported scope (#8847) 2026-02-28 02:54:15 +00:00
720 changed files with 22528 additions and 6940 deletions

View File

@@ -54,6 +54,7 @@ runs:
shell: bash
env:
RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
INTEGRATION_REPOSITORY: ${{ inputs.runs-on-ecr-cache }}
TAG: nightly-llm-it-${{ inputs.run-id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ inputs.github-sha }}

View File

@@ -213,7 +213,7 @@ jobs:
- name: Configure AWS credentials
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -384,7 +384,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -426,8 +426,9 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
NODE_OPTIONS=--max-old-space-size=8192
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64,mode=max
@@ -457,7 +458,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -499,8 +500,9 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
NODE_OPTIONS=--max-old-space-size=8192
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64,mode=max
@@ -525,7 +527,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -595,7 +597,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -646,8 +648,8 @@ jobs:
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
NODE_OPTIONS=--max-old-space-size=8192
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64,mode=max
@@ -677,7 +679,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -728,8 +730,8 @@ jobs:
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
NODE_OPTIONS=--max-old-space-size=8192
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64,mode=max
@@ -754,7 +756,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -821,7 +823,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -862,8 +864,9 @@ jobs:
build-args: |
ONYX_VERSION=${{ github.ref_name }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64,mode=max
@@ -893,7 +896,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -934,8 +937,9 @@ jobs:
build-args: |
ONYX_VERSION=${{ github.ref_name }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64,mode=max
@@ -960,7 +964,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1030,7 +1034,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1072,8 +1076,8 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
ENABLE_CRAFT=true
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-amd64,mode=max
@@ -1103,7 +1107,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1145,8 +1149,8 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
ENABLE_CRAFT=true
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-craft-cache-arm64,mode=max
@@ -1172,7 +1176,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1242,7 +1246,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1287,8 +1291,9 @@ jobs:
build-args: |
ONYX_VERSION=${{ github.ref_name }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64,mode=max
@@ -1321,7 +1326,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1366,8 +1371,9 @@ jobs:
build-args: |
ONYX_VERSION=${{ github.ref_name }}
cache-from: |
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64
type=registry,ref=${{ env.REGISTRY_IMAGE }}:edge
type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: |
type=inline
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64,mode=max
@@ -1394,7 +1400,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1459,7 +1465,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1514,7 +1520,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1574,7 +1580,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1631,7 +1637,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2

View File

@@ -15,6 +15,10 @@ permissions:
jobs:
provider-chat-test:
uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
secrets: inherit
permissions:
contents: read
id-token: write
with:
openai_models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
anthropic_models: ${{ vars.NIGHTLY_LLM_ANTHROPIC_MODELS }}
@@ -25,16 +29,6 @@ jobs:
ollama_models: ${{ vars.NIGHTLY_LLM_OLLAMA_MODELS }}
openrouter_models: ${{ vars.NIGHTLY_LLM_OPENROUTER_MODELS }}
strict: true
secrets:
openai_api_key: ${{ secrets.OPENAI_API_KEY }}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
bedrock_api_key: ${{ secrets.BEDROCK_API_KEY }}
vertex_ai_custom_config_json: ${{ secrets.NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON }}
azure_api_key: ${{ secrets.AZURE_API_KEY }}
ollama_api_key: ${{ secrets.OLLAMA_API_KEY }}
openrouter_api_key: ${{ secrets.OPENROUTER_API_KEY }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
notify-slack-on-failure:
needs: [provider-chat-test]

View File

@@ -160,7 +160,7 @@ jobs:
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
# Collect logs from each container
for container in $containers; do

View File

@@ -71,7 +71,7 @@ jobs:
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # ratchet:helm/kind-action@v1.14.0
- name: Pre-install cluster status check
if: steps.list-changed.outputs.changed == 'true'

View File

@@ -335,7 +335,6 @@ jobs:
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
fi
@@ -471,13 +470,13 @@ jobs:
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
no-vectordb-tests:
onyx-lite-tests:
needs: [build-backend-image, build-integration-image]
runs-on:
[
runs-on,
runner=4cpu-linux-arm64,
"run-id=${{ github.run_id }}-no-vectordb-tests",
"run-id=${{ github.run_id }}-onyx-lite-tests",
"extras=ecr-cache",
]
timeout-minutes: 45
@@ -495,13 +494,12 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create .env file for no-vectordb Docker Compose
- name: Create .env file for Onyx Lite Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
@@ -509,28 +507,23 @@ jobs:
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
DISABLE_VECTOR_DB=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=true
EOF
# Start only the services needed for no-vectordb mode (no Vespa, no model servers)
- name: Start Docker containers (no-vectordb)
# Start only the services needed for Onyx Lite (Postgres + API server)
- name: Start Docker containers (onyx-lite)
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml up \
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up \
relational_db \
cache \
minio \
api_server \
background \
-d
id: start_docker_no_vectordb
id: start_docker_onyx_lite
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script (no-vectordb)..."
echo "Starting wait-for-service script (onyx-lite)..."
start_time=$(date +%s)
timeout=300
while true; do
@@ -552,14 +545,14 @@ jobs:
sleep 5
done
- name: Run No-VectorDB Integration Tests
- name: Run Onyx Lite Integration Tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running no-vectordb integration tests..."
echo "Running onyx-lite integration tests..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
@@ -570,39 +563,38 @@ jobs:
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e TEST_WEB_HOSTNAME=test-runner \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/tests/no_vectordb
- name: Dump API server logs (no-vectordb)
- name: Dump API server logs (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color api_server > $GITHUB_WORKSPACE/api_server_no_vectordb.log || true
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
logs --no-color api_server > $GITHUB_WORKSPACE/api_server_onyx_lite.log || true
- name: Dump all-container logs (no-vectordb)
- name: Dump all-container logs (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color > $GITHUB_WORKSPACE/docker-compose-no-vectordb.log || true
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
logs --no-color > $GITHUB_WORKSPACE/docker-compose-onyx-lite.log || true
- name: Upload logs (no-vectordb)
- name: Upload logs (onyx-lite)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-no-vectordb
path: ${{ github.workspace }}/docker-compose-no-vectordb.log
name: docker-all-logs-onyx-lite
path: ${{ github.workspace }}/docker-compose-onyx-lite.log
- name: Stop Docker containers (no-vectordb)
- name: Stop Docker containers (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml down -v
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml down -v
multitenant-tests:
needs:
@@ -744,7 +736,7 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests, no-vectordb-tests, multitenant-tests]
needs: [integration-tests, onyx-lite-tests, multitenant-tests]
if: ${{ always() }}
steps:
- name: Check job status

View File

@@ -268,10 +268,11 @@ jobs:
persist-credentials: false
- name: Setup node
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
@@ -279,6 +280,7 @@ jobs:
run: npm ci
- name: Cache playwright cache
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
@@ -459,7 +461,7 @@ jobs:
# --- Visual Regression Diff ---
- name: Configure AWS credentials
if: always()
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -590,6 +592,108 @@ jobs:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
playwright-tests-lite:
needs: [build-web-image, build-backend-image]
name: Playwright Tests (lite)
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-playwright-tests-lite"
- "extras=ecr-cache"
timeout-minutes: 30
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup node
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Cache playwright cache
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
restore-keys: |
${{ runner.os }}-playwright-npm-
- name: Install playwright browsers
working-directory: ./web
run: npx playwright install --with-deps
- name: Create .env file for Docker Compose
env:
OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
INTEGRATION_TESTS_MODE=true
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
MOCK_LLM_RESPONSE=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
EOF
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers (lite)
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up -d
id: start_docker
- name: Run Playwright tests (lite)
working-directory: ./web
run: npx playwright test --project lite
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: playwright-test-results-lite-${{ github.run_id }}
path: ./web/output/playwright/
retention-days: 30
- name: Save Docker logs
if: success() || failure()
env:
WORKSPACE: ${{ github.workspace }}
run: |
cd deployment/docker_compose
docker compose logs > docker-compose.log
mv docker-compose.log ${WORKSPACE}/docker-compose.log
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-logs-lite-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
# Post a single combined visual regression comment after all matrix jobs finish
visual-regression-comment:
needs: [playwright-tests]
@@ -686,7 +790,7 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [playwright-tests]
needs: [playwright-tests, playwright-tests-lite]
if: ${{ always() }}
steps:
- name: Check job status

View File

@@ -8,7 +8,7 @@ on:
pull_request:
branches:
- main
- 'release/**'
- "release/**"
push:
tags:
- "v*.*.*"
@@ -21,7 +21,13 @@ jobs:
# See https://runs-on.com/runners/linux/
# Note: Mypy seems quite optimized for x64 compared to arm64.
# Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
runs-on:
[
runs-on,
runner=2cpu-linux-x64,
"run-id=${{ github.run_id }}-mypy-check",
"extras=s3-cache",
]
timeout-minutes: 45
steps:
@@ -52,21 +58,14 @@ jobs:
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: backend/.mypy_cache
key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
path: .mypy_cache
key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
restore-keys: |
mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
mypy-${{ runner.os }}-
- name: Run MyPy
working-directory: ./backend
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy .
- name: Run MyPy (tools/)
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy tools/

View File

@@ -38,9 +38,9 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@9d6a3097e0c1865ecce00cfb89fe80f2ee91b547 # ratchet:j178/prek-action@v1
- uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1
with:
prek-version: '0.2.21'
prek-version: '0.3.4'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1

View File

@@ -48,28 +48,10 @@ on:
required: false
default: true
type: boolean
secrets:
openai_api_key:
required: false
anthropic_api_key:
required: false
bedrock_api_key:
required: false
vertex_ai_custom_config_json:
required: false
azure_api_key:
required: false
ollama_api_key:
required: false
openrouter_api_key:
required: false
DOCKER_USERNAME:
required: true
DOCKER_TOKEN:
required: true
permissions:
contents: read
id-token: write
jobs:
build-backend-image:
@@ -81,6 +63,7 @@ jobs:
"extras=ecr-cache",
]
timeout-minutes: 45
environment: ci-protected
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -89,6 +72,19 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, test/docker-username
DOCKER_TOKEN, test/docker-token
- name: Build backend image
uses: ./.github/actions/build-backend-image
with:
@@ -97,8 +93,8 @@ jobs:
pr-number: ${{ github.event.pull_request.number }}
github-sha: ${{ github.sha }}
run-id: ${{ github.run_id }}
docker-username: ${{ secrets.DOCKER_USERNAME }}
docker-token: ${{ secrets.DOCKER_TOKEN }}
docker-username: ${{ env.DOCKER_USERNAME }}
docker-token: ${{ env.DOCKER_TOKEN }}
docker-no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' && 'true' || 'false' }}
build-model-server-image:
@@ -110,6 +106,7 @@ jobs:
"extras=ecr-cache",
]
timeout-minutes: 45
environment: ci-protected
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -118,6 +115,19 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, test/docker-username
DOCKER_TOKEN, test/docker-token
- name: Build model server image
uses: ./.github/actions/build-model-server-image
with:
@@ -126,8 +136,8 @@ jobs:
pr-number: ${{ github.event.pull_request.number }}
github-sha: ${{ github.sha }}
run-id: ${{ github.run_id }}
docker-username: ${{ secrets.DOCKER_USERNAME }}
docker-token: ${{ secrets.DOCKER_TOKEN }}
docker-username: ${{ env.DOCKER_USERNAME }}
docker-token: ${{ env.DOCKER_TOKEN }}
build-integration-image:
runs-on:
@@ -138,6 +148,7 @@ jobs:
"extras=ecr-cache",
]
timeout-minutes: 45
environment: ci-protected
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -146,6 +157,19 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, test/docker-username
DOCKER_TOKEN, test/docker-token
- name: Build integration image
uses: ./.github/actions/build-integration-image
with:
@@ -154,8 +178,8 @@ jobs:
pr-number: ${{ github.event.pull_request.number }}
github-sha: ${{ github.sha }}
run-id: ${{ github.run_id }}
docker-username: ${{ secrets.DOCKER_USERNAME }}
docker-token: ${{ secrets.DOCKER_TOKEN }}
docker-username: ${{ env.DOCKER_USERNAME }}
docker-token: ${{ env.DOCKER_TOKEN }}
provider-chat-test:
needs:
@@ -170,56 +194,56 @@ jobs:
include:
- provider: openai
models: ${{ inputs.openai_models }}
api_key_secret: openai_api_key
custom_config_secret: ""
api_key_env: OPENAI_API_KEY
custom_config_env: ""
api_base: ""
api_version: ""
deployment_name: ""
required: true
- provider: anthropic
models: ${{ inputs.anthropic_models }}
api_key_secret: anthropic_api_key
custom_config_secret: ""
api_key_env: ANTHROPIC_API_KEY
custom_config_env: ""
api_base: ""
api_version: ""
deployment_name: ""
required: true
- provider: bedrock
models: ${{ inputs.bedrock_models }}
api_key_secret: bedrock_api_key
custom_config_secret: ""
api_key_env: BEDROCK_API_KEY
custom_config_env: ""
api_base: ""
api_version: ""
deployment_name: ""
required: false
- provider: vertex_ai
models: ${{ inputs.vertex_ai_models }}
api_key_secret: ""
custom_config_secret: vertex_ai_custom_config_json
api_key_env: ""
custom_config_env: NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON
api_base: ""
api_version: ""
deployment_name: ""
required: false
- provider: azure
models: ${{ inputs.azure_models }}
api_key_secret: azure_api_key
custom_config_secret: ""
api_key_env: AZURE_API_KEY
custom_config_env: ""
api_base: ${{ inputs.azure_api_base }}
api_version: "2025-04-01-preview"
deployment_name: ""
required: false
- provider: ollama_chat
models: ${{ inputs.ollama_models }}
api_key_secret: ollama_api_key
custom_config_secret: ""
api_key_env: OLLAMA_API_KEY
custom_config_env: ""
api_base: "https://ollama.com"
api_version: ""
deployment_name: ""
required: false
- provider: openrouter
models: ${{ inputs.openrouter_models }}
api_key_secret: openrouter_api_key
custom_config_secret: ""
api_key_env: OPENROUTER_API_KEY
custom_config_env: ""
api_base: "https://openrouter.ai/api/v1"
api_version: ""
deployment_name: ""
@@ -230,6 +254,7 @@ jobs:
- "run-id=${{ github.run_id }}-nightly-${{ matrix.provider }}-provider-chat-test"
- extras=ecr-cache
timeout-minutes: 45
environment: ci-protected
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -238,21 +263,43 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
# Keep JSON values unparsed so vertex custom config is passed as raw JSON.
parse-json-secrets: false
secret-ids: |
DOCKER_USERNAME, test/docker-username
DOCKER_TOKEN, test/docker-token
OPENAI_API_KEY, test/openai-api-key
ANTHROPIC_API_KEY, test/anthropic-api-key
BEDROCK_API_KEY, test/bedrock-api-key
NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON, test/nightly-llm-vertex-ai-custom-config-json
AZURE_API_KEY, test/azure-api-key
OLLAMA_API_KEY, test/ollama-api-key
OPENROUTER_API_KEY, test/openrouter-api-key
- name: Run nightly provider chat test
uses: ./.github/actions/run-nightly-provider-chat-test
with:
provider: ${{ matrix.provider }}
models: ${{ matrix.models }}
provider-api-key: ${{ matrix.api_key_secret && secrets[matrix.api_key_secret] || '' }}
provider-api-key: ${{ matrix.api_key_env && env[matrix.api_key_env] || '' }}
strict: ${{ inputs.strict && 'true' || 'false' }}
api-base: ${{ matrix.api_base }}
api-version: ${{ matrix.api_version }}
deployment-name: ${{ matrix.deployment_name }}
custom-config-json: ${{ matrix.custom_config_secret && secrets[matrix.custom_config_secret] || '' }}
custom-config-json: ${{ matrix.custom_config_env && env[matrix.custom_config_env] || '' }}
runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
run-id: ${{ github.run_id }}
docker-username: ${{ secrets.DOCKER_USERNAME }}
docker-token: ${{ secrets.DOCKER_TOKEN }}
docker-username: ${{ env.DOCKER_USERNAME }}
docker-token: ${{ env.DOCKER_TOKEN }}
- name: Dump API server logs
if: always()

View File

@@ -110,7 +110,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -180,7 +180,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -244,7 +244,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2

View File

@@ -119,9 +119,10 @@ repos:
]
- repo: https://github.com/golangci/golangci-lint
rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
hooks:
- id: golangci-lint
language_version: "1.26.0"
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
- repo: https://github.com/astral-sh/ruff-pre-commit

58
.vscode/launch.json vendored
View File

@@ -40,19 +40,7 @@
}
},
{
"name": "Celery (lightweight mode)",
"configurations": [
"Celery primary",
"Celery background",
"Celery beat"
],
"presentation": {
"group": "1"
},
"stopAll": true
},
{
"name": "Celery (standard mode)",
"name": "Celery",
"configurations": [
"Celery primary",
"Celery light",
@@ -253,35 +241,6 @@
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery background",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.background",
"worker",
"--pool=threads",
"--concurrency=20",
"--prefetch-multiplier=4",
"--loglevel=INFO",
"--hostname=background@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,docprocessing,connector_doc_fetching,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,kg_processing,monitoring,user_file_processing,user_file_project_sync,user_file_delete,opensearch_migration"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery background Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
@@ -526,21 +485,6 @@
"group": "3"
}
},
{
"name": "Clear and Restart OpenSearch Container",
// Generic debugger type, required arg but has no bearing on bash.
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Eval CLI",
"type": "debugpy",

View File

@@ -86,37 +86,6 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Light worker tasks (Vespa operations, permissions sync, deletion)
- Document processing (indexing pipeline)
- Document fetching (connector data retrieval)
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (fewer worker processes)
- Suitable for smaller deployments or development environments
- Default concurrency: 20 threads (increased to handle combined workload)
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
@@ -617,6 +586,45 @@ Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
## Error Handling
**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
`{"error_code": "...", "message": "..."}` shape. This eliminates boilerplate and keeps error
handling consistent across the entire backend.
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# ✅ Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# ✅ Good — no extra message needed
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# ✅ Good — upstream service with dynamic status code
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
# ❌ Bad — using HTTPException directly
raise HTTPException(status_code=404, detail="Session not found")
# ❌ Bad — starlette constant
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
```
Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
category is needed, add it there first — do not invent ad-hoc codes.
**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
status code is dynamic (comes from the upstream response), use `status_code_override`:
```python
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
```
## Best Practices
In addition to the other content in this file, best practices for contributing

View File

@@ -0,0 +1,37 @@
"""add cache_store table
Revision ID: 2664261bfaab
Revises: 4a1e4b1c89d2
Create Date: 2026-02-27 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "2664261bfaab"
down_revision = "4a1e4b1c89d2"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.create_table(
"cache_store",
sa.Column("key", sa.String(), nullable=False),
sa.Column("value", sa.LargeBinary(), nullable=True),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("key"),
)
op.create_index(
"ix_cache_store_expires",
"cache_store",
["expires_at"],
postgresql_where=sa.text("expires_at IS NOT NULL"),
)
def downgrade() -> None:
op.drop_index("ix_cache_store_expires", table_name="cache_store")
op.drop_table("cache_store")

View File

@@ -0,0 +1,51 @@
"""Add INDEXING to UserFileStatus
Revision ID: 4a1e4b1c89d2
Revises: 6b3b4083c5aa
Create Date: 2026-02-28 00:00:00.000000
"""
import sqlalchemy as sa
from alembic import op
revision = "4a1e4b1c89d2"
down_revision = "6b3b4083c5aa"
branch_labels = None
depends_on = None
TABLE = "user_file"
COLUMN = "status"
CONSTRAINT_NAME = "ck_user_file_status"
OLD_VALUES = ("PROCESSING", "COMPLETED", "FAILED", "CANCELED", "DELETING")
NEW_VALUES = ("PROCESSING", "INDEXING", "COMPLETED", "FAILED", "CANCELED", "DELETING")
def _drop_status_check_constraint() -> None:
"""Drop the existing CHECK constraint on user_file.status.
The constraint name is auto-generated by SQLAlchemy and unknown,
so we look it up via the inspector.
"""
inspector = sa.inspect(op.get_bind())
for constraint in inspector.get_check_constraints(TABLE):
if COLUMN in constraint.get("sqltext", ""):
constraint_name = constraint["name"]
if constraint_name is not None:
op.drop_constraint(constraint_name, TABLE, type_="check")
def upgrade() -> None:
_drop_status_check_constraint()
in_clause = ", ".join(f"'{v}'" for v in NEW_VALUES)
op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")
def downgrade() -> None:
op.execute(
f"UPDATE {TABLE} SET {COLUMN} = 'PROCESSING' WHERE {COLUMN} = 'INDEXING'"
)
op.drop_constraint(CONSTRAINT_NAME, TABLE, type_="check")
in_clause = ", ".join(f"'{v}'" for v in OLD_VALUES)
op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")

View File

@@ -0,0 +1,112 @@
"""persona cleanup and featured
Revision ID: 6b3b4083c5aa
Revises: 57122d037335
Create Date: 2026-02-26 12:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "6b3b4083c5aa"
down_revision = "57122d037335"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add featured column with nullable=True first
op.add_column("persona", sa.Column("featured", sa.Boolean(), nullable=True))
# Migrate data from is_default_persona to featured
op.execute("UPDATE persona SET featured = is_default_persona")
# Make featured non-nullable with default=False
op.alter_column(
"persona",
"featured",
existing_type=sa.Boolean(),
nullable=False,
server_default=sa.false(),
)
# Drop is_default_persona column
op.drop_column("persona", "is_default_persona")
# Drop unused columns
op.drop_column("persona", "num_chunks")
op.drop_column("persona", "chunks_above")
op.drop_column("persona", "chunks_below")
op.drop_column("persona", "llm_relevance_filter")
op.drop_column("persona", "llm_filter_extraction")
op.drop_column("persona", "recency_bias")
def downgrade() -> None:
# Add back recency_bias column
op.add_column(
"persona",
sa.Column(
"recency_bias",
sa.VARCHAR(),
nullable=False,
server_default="base_decay",
),
)
# Add back llm_filter_extraction column
op.add_column(
"persona",
sa.Column(
"llm_filter_extraction",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
# Add back llm_relevance_filter column
op.add_column(
"persona",
sa.Column(
"llm_relevance_filter",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
# Add back chunks_below column
op.add_column(
"persona",
sa.Column("chunks_below", sa.Integer(), nullable=False, server_default="0"),
)
# Add back chunks_above column
op.add_column(
"persona",
sa.Column("chunks_above", sa.Integer(), nullable=False, server_default="0"),
)
# Add back num_chunks column
op.add_column("persona", sa.Column("num_chunks", sa.Float(), nullable=True))
# Add back is_default_persona column
op.add_column(
"persona",
sa.Column(
"is_default_persona",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
# Migrate data from featured to is_default_persona
op.execute("UPDATE persona SET is_default_persona = featured")
# Drop featured column
op.drop_column("persona", "featured")

View File

@@ -0,0 +1,34 @@
"""make scim_user_mapping.external_id nullable
Revision ID: a3b8d9e2f1c4
Revises: 2664261bfaab
Create Date: 2026-03-02
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "a3b8d9e2f1c4"
down_revision = "2664261bfaab"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"scim_user_mapping",
"external_id",
nullable=True,
)
def downgrade() -> None:
# Delete any rows where external_id is NULL before re-applying NOT NULL
op.execute("DELETE FROM scim_user_mapping WHERE external_id IS NULL")
op.alter_column(
"scim_user_mapping",
"external_id",
nullable=False,
)

View File

@@ -1,15 +0,0 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.background import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.tenant_provisioning",
"ee.onyx.background.celery.tasks.query_history",
]
)
)

View File

@@ -11,11 +11,10 @@ from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.cache.factory import get_cache_backend
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
@@ -142,7 +141,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
"""
Get license metadata from Redis cache.
Get license metadata from cache.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
@@ -150,38 +149,34 @@ def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata
Returns:
LicenseMetadata if cached, None otherwise
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_replica_client(tenant_id=tenant)
cache = get_cache_backend(tenant_id=tenant_id)
cached = cache.get(LICENSE_METADATA_KEY)
if not cached:
return None
cached = redis_client.get(LICENSE_METADATA_KEY)
if cached:
try:
cached_str: str
if isinstance(cached, bytes):
cached_str = cached.decode("utf-8")
else:
cached_str = str(cached)
return LicenseMetadata.model_validate_json(cached_str)
except Exception as e:
logger.warning(f"Failed to parse cached license metadata: {e}")
return None
return None
try:
cached_str = (
cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
)
return LicenseMetadata.model_validate_json(cached_str)
except Exception as e:
logger.warning(f"Failed to parse cached license metadata: {e}")
return None
def invalidate_license_cache(tenant_id: str | None = None) -> None:
"""
Invalidate the license metadata cache (not the license itself).
This deletes the cached LicenseMetadata from Redis. The actual license
in the database is not affected. Redis delete is idempotent - if the
key doesn't exist, this is a no-op.
Deletes the cached LicenseMetadata. The actual license in the database
is not affected. Delete is idempotent if the key doesn't exist, this
is a no-op.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
redis_client.delete(LICENSE_METADATA_KEY)
cache = get_cache_backend(tenant_id=tenant_id)
cache.delete(LICENSE_METADATA_KEY)
logger.info("License cache invalidated")
@@ -192,7 +187,7 @@ def update_license_cache(
tenant_id: str | None = None,
) -> LicenseMetadata:
"""
Update the Redis cache with license metadata.
Update the cache with license metadata.
We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
1. Frontend needs status to show appropriate UI/banners
@@ -211,7 +206,7 @@ def update_license_cache(
from ee.onyx.utils.license import get_license_status
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
cache = get_cache_backend(tenant_id=tenant_id)
used_seats = get_used_seats(tenant)
status = get_license_status(payload, grace_period_end)
@@ -230,7 +225,7 @@ def update_license_cache(
stripe_subscription_id=payload.stripe_subscription_id,
)
redis_client.set(
cache.set(
LICENSE_METADATA_KEY,
metadata.model_dump_json(),
ex=LICENSE_CACHE_TTL_SECONDS,

View File

@@ -126,12 +126,16 @@ class ScimDAL(DAL):
def create_user_mapping(
self,
external_id: str,
external_id: str | None,
user_id: UUID,
scim_username: str | None = None,
fields: ScimMappingFields | None = None,
) -> ScimUserMapping:
"""Create a mapping between a SCIM externalId and an Onyx user."""
"""Create a SCIM mapping for a user.
``external_id`` may be ``None`` when the IdP omits it (RFC 7643
allows this). The mapping still marks the user as SCIM-managed.
"""
f = fields or ScimMappingFields()
mapping = ScimUserMapping(
external_id=external_id,
@@ -270,8 +274,13 @@ class ScimDAL(DAL):
Raises:
ValueError: If the filter uses an unsupported attribute.
"""
query = select(User).where(
User.role.notin_([UserRole.SLACK_USER, UserRole.EXT_PERM_USER])
# Inner-join with ScimUserMapping so only SCIM-managed users appear.
# Pre-existing system accounts (anonymous, admin, etc.) are excluded
# unless they were explicitly linked via SCIM provisioning.
query = (
select(User)
.join(ScimUserMapping, ScimUserMapping.user_id == User.id)
.where(User.role.notin_([UserRole.SLACK_USER, UserRole.EXT_PERM_USER]))
)
if scim_filter:
@@ -321,34 +330,37 @@ class ScimDAL(DAL):
scim_username: str | None = None,
fields: ScimMappingFields | None = None,
) -> None:
"""Create, update, or delete the external ID mapping for a user.
"""Sync the SCIM mapping for a user.
If a mapping already exists, its fields are updated (including
setting ``external_id`` to ``None`` when the IdP omits it).
If no mapping exists and ``new_external_id`` is provided, a new
mapping is created. A mapping is never deleted here — SCIM-managed
users must retain their mapping to remain visible in ``GET /Users``.
When *fields* is provided, all mapping fields are written
unconditionally — including ``None`` values — so that a caller can
clear a previously-set field (e.g. removing a department).
"""
mapping = self.get_user_mapping_by_user_id(user_id)
if new_external_id:
if mapping:
if mapping.external_id != new_external_id:
mapping.external_id = new_external_id
if scim_username is not None:
mapping.scim_username = scim_username
if fields is not None:
mapping.department = fields.department
mapping.manager = fields.manager
mapping.given_name = fields.given_name
mapping.family_name = fields.family_name
mapping.scim_emails_json = fields.scim_emails_json
else:
self.create_user_mapping(
external_id=new_external_id,
user_id=user_id,
scim_username=scim_username,
fields=fields,
)
elif mapping:
self.delete_user_mapping(mapping.id)
if mapping:
if mapping.external_id != new_external_id:
mapping.external_id = new_external_id
if scim_username is not None:
mapping.scim_username = scim_username
if fields is not None:
mapping.department = fields.department
mapping.manager = fields.manager
mapping.given_name = fields.given_name
mapping.family_name = fields.family_name
mapping.scim_emails_json = fields.scim_emails_json
elif new_external_id:
self.create_user_mapping(
external_id=new_external_id,
user_id=user_id,
scim_username=scim_username,
fields=fields,
)
def _get_user_mappings_batch(
self, user_ids: list[UUID]

View File

@@ -15,6 +15,7 @@ from sqlalchemy.orm import Session
from ee.onyx.server.user_group.models import SetCuratorRequest
from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
@@ -471,7 +472,9 @@ def _add_user_group__cc_pair_relationships__no_commit(
def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:
db_user_group = UserGroup(
name=user_group.name, time_last_modified_by_user=func.now()
name=user_group.name,
time_last_modified_by_user=func.now(),
is_up_to_date=DISABLE_VECTOR_DB,
)
db_session.add(db_user_group)
db_session.flush() # give the group an ID
@@ -774,8 +777,7 @@ def update_user_group(
cc_pair_ids=user_group_update.cc_pair_ids,
)
# only needs to sync with Vespa if the cc_pairs have been updated
if cc_pairs_updated:
if cc_pairs_updated and not DISABLE_VECTOR_DB:
db_user_group.is_up_to_date = False
removed_users = db_session.scalars(

View File

@@ -4,7 +4,6 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from httpx_oauth.clients.google import GoogleOAuth2
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.server.analytics.api import router as analytics_router
from ee.onyx.server.auth_check import check_ee_router_auth
from ee.onyx.server.billing.api import router as billing_router
@@ -31,6 +30,7 @@ from ee.onyx.server.query_and_chat.query_backend import (
from ee.onyx.server.query_and_chat.search_backend import router as search_router
from ee.onyx.server.query_history.api import router as query_history_router
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
from ee.onyx.server.scim.api import register_scim_exception_handlers
from ee.onyx.server.scim.api import scim_router
from ee.onyx.server.seeding import seed_db
from ee.onyx.server.tenants.api import router as tenants_router
@@ -152,12 +152,9 @@ def get_application() -> FastAPI:
# License management
include_router_with_global_prefix_prepended(application, license_router)
# Unified billing API - available when license system is enabled
# Works for both self-hosted and cloud deployments
# TODO(ENG-3533): Once frontend migrates to /admin/billing/*, this becomes the
# primary billing API and /tenants/* billing endpoints can be removed
if LICENSE_ENFORCEMENT_ENABLED:
include_router_with_global_prefix_prepended(application, billing_router)
# Unified billing API - always registered in EE.
# Each endpoint is protected by the `current_admin_user` dependency (admin auth).
include_router_with_global_prefix_prepended(application, billing_router)
if MULTI_TENANT:
# Tenant management
@@ -167,6 +164,7 @@ def get_application() -> FastAPI:
# they use their own SCIM bearer token auth).
# Not behind APP_API_PREFIX because IdPs expect /scim/v2/... directly.
application.include_router(scim_router)
register_scim_exception_handlers(application)
# Ensure all routes have auth enabled or are explicitly marked as public
check_ee_router_auth(application)

View File

@@ -26,7 +26,6 @@ import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -42,7 +41,6 @@ from ee.onyx.server.billing.models import SeatUpdateRequest
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import StripePublishableKeyResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.billing.service import BillingServiceError
from ee.onyx.server.billing.service import (
create_checkout_session as create_checkout_service,
)
@@ -58,6 +56,8 @@ from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import get_shared_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -169,26 +169,23 @@ async def create_checkout_session(
if seats is not None:
used_seats = get_used_seats(tenant_id)
if seats < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot subscribe with fewer seats than current usage. "
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Cannot subscribe with fewer seats than current usage. "
f"You have {used_seats} active users/integrations but requested {seats} seats.",
)
# Build redirect URL for after checkout completion
redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"
try:
return await create_checkout_service(
billing_period=billing_period,
seats=seats,
email=email,
license_data=license_data,
redirect_url=redirect_url,
tenant_id=tenant_id,
)
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
return await create_checkout_service(
billing_period=billing_period,
seats=seats,
email=email,
license_data=license_data,
redirect_url=redirect_url,
tenant_id=tenant_id,
)
@router.post("/create-customer-portal-session")
@@ -206,18 +203,15 @@ async def create_customer_portal_session(
# Self-hosted requires license
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
return_url = request.return_url if request else f"{WEB_DOMAIN}/admin/billing"
try:
return await create_portal_service(
license_data=license_data,
return_url=return_url,
tenant_id=tenant_id,
)
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
return await create_portal_service(
license_data=license_data,
return_url=return_url,
tenant_id=tenant_id,
)
@router.get("/billing-information")
@@ -240,9 +234,9 @@ async def get_billing_information(
# Check circuit breaker (self-hosted only)
if _is_billing_circuit_open():
raise HTTPException(
status_code=503,
detail="Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
raise OnyxError(
OnyxErrorCode.SERVICE_UNAVAILABLE,
"Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
)
try:
@@ -250,11 +244,11 @@ async def get_billing_information(
license_data=license_data,
tenant_id=tenant_id,
)
except BillingServiceError as e:
except OnyxError as e:
# Open circuit breaker on connection failures (self-hosted only)
if e.status_code in (502, 503, 504):
_open_billing_circuit()
raise HTTPException(status_code=e.status_code, detail=e.message)
raise
@router.post("/seats/update")
@@ -274,31 +268,25 @@ async def update_seats(
# Self-hosted requires license
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
# Validate that new seat count is not less than current used seats
used_seats = get_used_seats(tenant_id)
if request.new_seat_count < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot reduce seats below current usage. "
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Cannot reduce seats below current usage. "
f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
)
try:
result = await update_seat_service(
new_seat_count=request.new_seat_count,
license_data=license_data,
tenant_id=tenant_id,
)
# Note: Don't store license here - the control plane may still be processing
# the subscription update. The frontend should call /license/claim after a
# short delay to get the freshly generated license.
return result
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
# Note: Don't store license here - the control plane may still be processing
# the subscription update. The frontend should call /license/claim after a
# short delay to get the freshly generated license.
return await update_seat_service(
new_seat_count=request.new_seat_count,
license_data=license_data,
tenant_id=tenant_id,
)
@router.get("/stripe-publishable-key")
@@ -329,18 +317,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Stripe publishable key is not configured",
)
try:
@@ -351,17 +339,17 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to fetch Stripe publishable key",
)

View File

@@ -22,6 +22,8 @@ from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.access import generate_data_plane_token
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -31,15 +33,6 @@ logger = setup_logger()
_REQUEST_TIMEOUT = 30.0
class BillingServiceError(Exception):
"""Exception raised for billing service errors."""
def __init__(self, message: str, status_code: int = 500):
self.message = message
self.status_code = status_code
super().__init__(self.message)
def _get_proxy_headers(license_data: str | None) -> dict[str, str]:
"""Build headers for proxy requests (self-hosted).
@@ -101,7 +94,7 @@ async def _make_billing_request(
Response JSON as dict
Raises:
BillingServiceError: If request fails
OnyxError: If request fails
"""
base_url = _get_base_url()
@@ -128,11 +121,17 @@ async def _make_billing_request(
except Exception:
pass
logger.error(f"{error_message}: {e.response.status_code} - {detail}")
raise BillingServiceError(detail, e.response.status_code)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=e.response.status_code,
)
except httpx.RequestError:
logger.exception("Failed to connect to billing service")
raise BillingServiceError("Failed to connect to billing service", 502)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, "Failed to connect to billing service"
)
async def create_checkout_session(

View File

@@ -223,6 +223,15 @@ def get_active_scim_token(
token = dal.get_active_token()
if not token:
raise HTTPException(status_code=404, detail="No active SCIM token")
# Derive the IdP domain from the first synced user as a heuristic.
idp_domain: str | None = None
mappings, _total = dal.list_user_mappings(start_index=1, count=1)
if mappings:
user = dal.get_user(mappings[0].user_id)
if user and "@" in user.email:
idp_domain = user.email.rsplit("@", 1)[1]
return ScimTokenResponse(
id=token.id,
name=token.name,
@@ -230,6 +239,7 @@ def get_active_scim_token(
is_active=token.is_active,
created_at=token.created_at,
last_used_at=token.last_used_at,
idp_domain=idp_domain,
)

View File

@@ -14,7 +14,6 @@ import requests
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import UploadFile
from sqlalchemy.orm import Session
@@ -35,6 +34,8 @@ from ee.onyx.server.license.models import SeatUsageResponse
from ee.onyx.utils.license import verify_license_signature
from onyx.auth.users import User
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -127,9 +128,9 @@ async def claim_license(
2. Without session_id: Re-claim using existing license for auth
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License claiming is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License claiming is only available for self-hosted deployments",
)
try:
@@ -146,15 +147,16 @@ async def claim_license(
# Re-claim using existing license for auth
metadata = get_license_metadata(db_session)
if not metadata or not metadata.tenant_id:
raise HTTPException(
status_code=400,
detail="No license found. Provide session_id after checkout.",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"No license found. Provide session_id after checkout.",
)
license_row = get_license(db_session)
if not license_row or not license_row.license_data:
raise HTTPException(
status_code=400, detail="No license found in database"
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"No license found in database",
)
url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
@@ -173,7 +175,7 @@ async def claim_license(
license_data = data.get("license")
if not license_data:
raise HTTPException(status_code=404, detail="No license in response")
raise OnyxError(OnyxErrorCode.NOT_FOUND, "No license in response")
# Verify signature before persisting
payload = verify_license_signature(license_data)
@@ -199,12 +201,14 @@ async def claim_license(
detail = error_data.get("detail", detail)
except Exception:
pass
raise HTTPException(status_code=status_code, detail=detail)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=status_code
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
except requests.RequestException:
raise HTTPException(
status_code=502, detail="Failed to connect to license server"
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, "Failed to connect to license server"
)
@@ -221,9 +225,9 @@ async def upload_license(
The license file must be cryptographically signed by Onyx.
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License upload is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License upload is only available for self-hosted deployments",
)
try:
@@ -234,14 +238,14 @@ async def upload_license(
# Remove any stray whitespace/newlines from user input
license_data = license_data.strip()
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid license file format")
raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Invalid license file format")
# Verify cryptographic signature - this is the only validation needed
# The license's tenant_id identifies the customer in control plane, not locally
try:
payload = verify_license_signature(license_data)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
# Persist to DB and update cache
upsert_license(db_session, license_data)
@@ -297,9 +301,9 @@ async def delete_license(
Admin only - removes license from database and invalidates cache.
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License deletion is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License deletion is only available for self-hosted deployments",
)
try:

View File

@@ -46,7 +46,6 @@ from fastapi import FastAPI
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
@@ -56,6 +55,7 @@ from ee.onyx.configs.license_enforcement_config import (
)
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
from shared_configs.contextvars import get_current_tenant_id
@@ -164,9 +164,9 @@ def add_license_enforcement_middleware(
"[license_enforcement] No license, allowing community features"
)
is_gated = False
except RedisError as e:
except CACHE_TRANSIENT_ERRORS as e:
logger.warning(f"Failed to check license metadata: {e}")
# Fail open - don't block users due to Redis connectivity issues
# Fail open - don't block users due to cache connectivity issues
is_gated = False
if is_gated:

View File

@@ -15,7 +15,9 @@ from uuid import UUID
from fastapi import APIRouter
from fastapi import Depends
from fastapi import FastAPI
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from fastapi_users.password import PasswordHelper
@@ -24,6 +26,7 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from ee.onyx.db.scim import ScimDAL
from ee.onyx.server.scim.auth import ScimAuthError
from ee.onyx.server.scim.auth import verify_scim_token
from ee.onyx.server.scim.filtering import parse_scim_filter
from ee.onyx.server.scim.models import SCIM_LIST_RESPONSE_SCHEMA
@@ -77,6 +80,22 @@ scim_router = APIRouter(prefix="/scim/v2", tags=["SCIM"])
_pw_helper = PasswordHelper()
def register_scim_exception_handlers(app: FastAPI) -> None:
"""Register SCIM-specific exception handlers on the FastAPI app.
Call this after ``app.include_router(scim_router)`` so that auth
failures from ``verify_scim_token`` return RFC 7644 §3.12 error
envelopes (with ``schemas`` and ``status`` fields) instead of
FastAPI's default ``{"detail": "..."}`` format.
"""
@app.exception_handler(ScimAuthError)
async def _handle_scim_auth_error(
_request: Request, exc: ScimAuthError
) -> ScimJSONResponse:
return _scim_error_response(exc.status_code, exc.detail)
def _get_provider(
_token: ScimToken = Depends(verify_scim_token),
) -> ScimProvider:
@@ -404,21 +423,63 @@ def create_user(
email = user_resource.userName.strip()
# externalId is how the IdP correlates this user on subsequent requests.
# Without it, the IdP can't find the user and will try to re-create,
# hitting a 409 conflict — so we require it up front.
if not user_resource.externalId:
return _scim_error_response(400, "externalId is required")
# Check for existing user — if they exist but aren't SCIM-managed yet,
# link them to the IdP rather than rejecting with 409.
external_id: str | None = user_resource.externalId
scim_username: str = user_resource.userName.strip()
fields: ScimMappingFields = _fields_from_resource(user_resource)
# Enforce seat limit
existing_user = dal.get_user_by_email(email)
if existing_user:
existing_mapping = dal.get_user_mapping_by_user_id(existing_user.id)
if existing_mapping:
return _scim_error_response(409, f"User with email {email} already exists")
# Adopt pre-existing user into SCIM management.
# Reactivating a deactivated user consumes a seat, so enforce the
# seat limit the same way replace_user does.
if user_resource.active and not existing_user.is_active:
seat_error = _check_seat_availability(dal)
if seat_error:
return _scim_error_response(403, seat_error)
personal_name = _scim_name_to_str(user_resource.name)
dal.update_user(
existing_user,
is_active=user_resource.active,
**({"personal_name": personal_name} if personal_name else {}),
)
try:
dal.create_user_mapping(
external_id=external_id,
user_id=existing_user.id,
scim_username=scim_username,
fields=fields,
)
dal.commit()
except IntegrityError:
dal.rollback()
return _scim_error_response(
409, f"User with email {email} already has a SCIM mapping"
)
return _scim_resource_response(
provider.build_user_resource(
existing_user,
external_id,
scim_username=scim_username,
fields=fields,
),
status_code=201,
)
# Only enforce seat limit for net-new users — adopting a pre-existing
# user doesn't consume a new seat.
seat_error = _check_seat_availability(dal)
if seat_error:
return _scim_error_response(403, seat_error)
# Check for existing user
if dal.get_user_by_email(email):
return _scim_error_response(409, f"User with email {email} already exists")
# Create user with a random password (SCIM users authenticate via IdP)
personal_name = _scim_name_to_str(user_resource.name)
user = User(
@@ -436,18 +497,21 @@ def create_user(
dal.rollback()
return _scim_error_response(409, f"User with email {email} already exists")
# Create SCIM mapping (externalId is validated above, always present)
external_id = user_resource.externalId
scim_username = user_resource.userName.strip()
fields = _fields_from_resource(user_resource)
dal.create_user_mapping(
external_id=external_id,
user_id=user.id,
scim_username=scim_username,
fields=fields,
)
dal.commit()
# Always create a SCIM mapping so that the user is marked as
# SCIM-managed. externalId may be None (RFC 7643 says it's optional).
try:
dal.create_user_mapping(
external_id=external_id,
user_id=user.id,
scim_username=scim_username,
fields=fields,
)
dal.commit()
except IntegrityError:
dal.rollback()
return _scim_error_response(
409, f"User with email {email} already has a SCIM mapping"
)
return _scim_resource_response(
provider.build_user_resource(

View File

@@ -19,7 +19,6 @@ import hashlib
import secrets
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from sqlalchemy.orm import Session
@@ -28,6 +27,21 @@ from onyx.auth.utils import get_hashed_bearer_token_from_request
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import ScimToken
class ScimAuthError(Exception):
"""Raised when SCIM bearer token authentication fails.
Unlike HTTPException, this carries the status and detail so the SCIM
exception handler can wrap them in an RFC 7644 §3.12 error envelope
with ``schemas`` and ``status`` fields.
"""
def __init__(self, status_code: int, detail: str) -> None:
self.status_code = status_code
self.detail = detail
super().__init__(detail)
SCIM_TOKEN_PREFIX = "onyx_scim_"
SCIM_TOKEN_LENGTH = 48
@@ -82,23 +96,14 @@ def verify_scim_token(
"""
hashed = _get_hashed_scim_token_from_request(request)
if not hashed:
raise HTTPException(
status_code=401,
detail="Missing or invalid SCIM bearer token",
)
raise ScimAuthError(401, "Missing or invalid SCIM bearer token")
token = dal.get_token_by_hash(hashed)
if not token:
raise HTTPException(
status_code=401,
detail="Invalid SCIM bearer token",
)
raise ScimAuthError(401, "Invalid SCIM bearer token")
if not token.is_active:
raise HTTPException(
status_code=401,
detail="SCIM token has been revoked",
)
raise ScimAuthError(401, "SCIM token has been revoked")
return token

View File

@@ -365,6 +365,7 @@ class ScimTokenResponse(BaseModel):
is_active: bool
created_at: datetime
last_used_at: datetime | None = None
idp_domain: str | None = None
class ScimTokenCreatedResponse(ScimTokenResponse):

View File

@@ -153,26 +153,31 @@ class ScimProvider(ABC):
self,
user: User,
fields: ScimMappingFields,
) -> ScimName | None:
) -> ScimName:
"""Build SCIM name components for the response.
Round-trips stored ``given_name``/``family_name`` when available (so
the IdP gets back what it sent). Falls back to splitting
``personal_name`` for users provisioned before we stored components.
Always returns a ScimName — Okta's spec tests expect ``name``
(with ``givenName``/``familyName``) on every user resource.
Providers may override for custom behavior.
"""
if fields.given_name is not None or fields.family_name is not None:
return ScimName(
givenName=fields.given_name,
familyName=fields.family_name,
formatted=user.personal_name,
givenName=fields.given_name or "",
familyName=fields.family_name or "",
formatted=user.personal_name or "",
)
if not user.personal_name:
return None
# Derive a reasonable name from the email so that SCIM spec tests
# see non-empty givenName / familyName for every user resource.
local = user.email.split("@")[0] if user.email else ""
return ScimName(givenName=local, familyName="", formatted=local)
parts = user.personal_name.split(" ", 1)
return ScimName(
givenName=parts[0],
familyName=parts[1] if len(parts) > 1 else None,
familyName=parts[1] if len(parts) > 1 else "",
formatted=user.personal_name,
)

View File

@@ -18,7 +18,6 @@ from ee.onyx.server.enterprise_settings.store import (
store_settings as store_ee_settings,
)
from ee.onyx.server.enterprise_settings.store import upload_logo
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_provider
@@ -161,12 +160,6 @@ def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) ->
user=None, # Seeding is done as admin
name=persona.name,
description=persona.description,
num_chunks=(
persona.num_chunks if persona.num_chunks is not None else 0.0
),
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
recency_bias=RecencyBiasSetting.AUTO,
document_set_ids=persona.document_set_ids,
llm_model_provider_override=persona.llm_model_provider_override,
llm_model_version_override=persona.llm_model_version_override,
@@ -178,6 +171,7 @@ def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) ->
system_prompt=persona.system_prompt,
task_prompt=persona.task_prompt,
datetime_aware=persona.datetime_aware,
featured=persona.featured,
commit=False,
)
db_session.commit()

View File

@@ -6,6 +6,7 @@ from sqlalchemy.exc import SQLAlchemyError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
@@ -125,7 +126,7 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
# syncing) means indexed data may need protection.
settings.application_status = _BLOCKING_STATUS
settings.ee_features_enabled = False
except RedisError as e:
except CACHE_TRANSIENT_ERRORS as e:
logger.warning(f"Failed to check license metadata for settings: {e}")
# Fail closed - disable EE features if we can't verify license
settings.ee_features_enabled = False

View File

@@ -21,7 +21,6 @@ import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from ee.onyx.auth.users import current_admin_user
from ee.onyx.server.tenants.access import control_plane_dep
@@ -43,6 +42,8 @@ from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id
@@ -116,9 +117,14 @@ async def create_customer_portal_session(
try:
portal_url = fetch_customer_portal_session(tenant_id, return_url)
return {"stripe_customer_portal_url": portal_url}
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create customer portal session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create customer portal session",
)
@router.post("/create-checkout-session")
@@ -134,9 +140,14 @@ async def create_checkout_session(
try:
checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
return {"stripe_checkout_url": checkout_url}
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create checkout session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create checkout session",
)
@router.post("/create-subscription-session")
@@ -147,15 +158,20 @@ async def create_subscription_session(
try:
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
if not tenant_id:
raise HTTPException(status_code=400, detail="Tenant ID not found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Tenant ID not found")
billing_period = request.billing_period if request else "monthly"
session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
return SubscriptionSessionResponse(sessionId=session_id)
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create subscription session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create subscription session",
)
@router.get("/stripe-publishable-key")
@@ -186,18 +202,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Stripe publishable key is not configured",
)
try:
@@ -208,15 +224,15 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to fetch Stripe publishable key",
)

View File

@@ -5,6 +5,8 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from ee.onyx.db.user_group import add_users_to_user_group
from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
from ee.onyx.db.user_group import fetch_user_group
from ee.onyx.db.user_group import fetch_user_groups
from ee.onyx.db.user_group import fetch_user_groups_for_user
from ee.onyx.db.user_group import insert_user_group
@@ -20,6 +22,7 @@ from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
@@ -153,3 +156,8 @@ def delete_user_group(
prepare_user_group_for_deletion(db_session, user_group_id)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
if DISABLE_VECTOR_DB:
user_group = fetch_user_group(db_session, user_group_id)
if user_group:
db_delete_user_group(db_session, user_group)

View File

@@ -23,11 +23,9 @@ from email_validator import EmailUndeliverableError
from email_validator import validate_email
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi import status
from fastapi.responses import RedirectResponse
from fastapi.security import OAuth2PasswordRequestForm
from fastapi_users import BaseUserManager
@@ -119,8 +117,9 @@ from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.pat import fetch_user_for_pat
from onyx.db.users import get_user_by_email
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import get_async_redis_connection
from onyx.redis.redis_pool import get_redis_client
from onyx.server.settings.store import load_settings
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
@@ -138,8 +137,6 @@ from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
REGISTER_INVITE_ONLY_CODE = "REGISTER_INVITE_ONLY"
def is_user_admin(user: User) -> bool:
return user.role == UserRole.ADMIN
@@ -201,13 +198,14 @@ def user_needs_to_be_verified() -> bool:
def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:
redis_client = get_redis_client(tenant_id=tenant_id)
value = redis_client.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
from onyx.cache.factory import get_cache_backend
cache = get_cache_backend(tenant_id=tenant_id)
value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
if value is None:
return False
assert isinstance(value, bytes)
return int(value.decode("utf-8")) == 1
@@ -227,17 +225,17 @@ def verify_email_is_invited(email: str) -> None:
whitelist = get_invited_users()
if not email:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"reason": "Email must be specified"},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Email must be specified",
)
try:
email_info = validate_email(email, check_deliverability=False)
except EmailUndeliverableError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"reason": "Email is not valid"},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Email is not valid",
)
for email_whitelist in whitelist:
@@ -255,12 +253,9 @@ def verify_email_is_invited(email: str) -> None:
if email_info.normalized.lower() == email_info_whitelist.normalized.lower():
return
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail={
"code": REGISTER_INVITE_ONLY_CODE,
"reason": "This workspace is invite-only. Please ask your admin to invite you.",
},
raise OnyxError(
OnyxErrorCode.UNAUTHORIZED,
"This workspace is invite-only. Please ask your admin to invite you.",
)
@@ -272,9 +267,9 @@ def verify_email_in_whitelist(email: str, tenant_id: str) -> None:
def verify_email_domain(email: str) -> None:
if email.count("@") != 1:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Email is not valid",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Email is not valid",
)
local_part, domain = email.split("@")
@@ -283,39 +278,35 @@ def verify_email_domain(email: str) -> None:
if AUTH_TYPE == AuthType.CLOUD:
# Normalize googlemail.com to gmail.com (they deliver to the same inbox)
if domain == "googlemail.com":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"reason": "Please use @gmail.com instead of @googlemail.com."},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Please use @gmail.com instead of @googlemail.com.",
)
if "+" in local_part and domain != "onyx.app":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"reason": "Email addresses with '+' are not allowed. Please use your base email address."
},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Email addresses with '+' are not allowed. Please use your base email address.",
)
# Check if email uses a disposable/temporary domain
if is_disposable_email(email):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"reason": "Disposable email addresses are not allowed. Please use a permanent email address."
},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Disposable email addresses are not allowed. Please use a permanent email address.",
)
# Check domain whitelist if configured
if VALID_EMAIL_DOMAINS:
if domain not in VALID_EMAIL_DOMAINS:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Email domain is not valid",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Email domain is not valid",
)
def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
"""Raise HTTPException(402) if adding users would exceed the seat limit.
"""Raise OnyxError if adding users would exceed the seat limit.
No-op for multi-tenant or CE deployments.
"""
@@ -327,7 +318,7 @@ def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
)(db_session, seats_needed=seats_needed)
if result is not None and not result.available:
raise HTTPException(status_code=402, detail=result.error_message)
raise OnyxError(OnyxErrorCode.SEAT_LIMIT_EXCEEDED, result.error_message)
class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
@@ -380,9 +371,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
captcha_token or "", expected_action="signup"
)
except CaptchaVerificationError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"reason": str(e)},
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
str(e),
)
# We verify the password here to make sure it's valid before we proceed
@@ -394,12 +385,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
# This prevents creating tenants for throwaway email addresses
try:
verify_email_domain(user_create.email)
except HTTPException as e:
except OnyxError as e:
# Log blocked disposable email attempts
if (
e.status_code == status.HTTP_400_BAD_REQUEST
and "Disposable email" in str(e.detail)
):
if e.status_code == 400 and "Disposable email" in str(e.message):
domain = (
user_create.email.split("@")[-1]
if "@" in user_create.email
@@ -543,7 +531,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
result = await db_session.execute(
select(Persona.id)
.where(
Persona.is_default_persona.is_(True),
Persona.featured.is_(True),
Persona.is_public.is_(True),
Persona.is_visible.is_(True),
Persona.deleted.is_(False),
@@ -623,7 +611,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
)
if not tenant_id:
raise HTTPException(status_code=401, detail="User not found")
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, "User not found")
# Proceed with the tenant context
token = None
@@ -725,11 +713,19 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
if user_by_session:
user = user_by_session
# If the user is inactive, check seat availability before
# upgrading role — otherwise they'd become an inactive BASIC
# user who still can't log in.
if not user.is_active:
with get_session_with_current_tenant() as sync_db:
enforce_seat_limit(sync_db)
await self.user_db.update(
user,
{
"is_verified": is_verified_by_default,
"role": UserRole.BASIC,
**({"is_active": True} if not user.is_active else {}),
},
)
@@ -855,8 +851,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
logger.error(
"Email is not configured. Please configure email in the admin panel"
)
raise HTTPException(
status.HTTP_500_INTERNAL_SERVER_ERROR,
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Your admin has not enabled this feature.",
)
tenant_id = await fetch_ee_implementation_or_noop(
@@ -956,12 +952,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
old_password, user.hashed_password
)
if not verified:
# Raise some HTTPException (or your custom exception) if old password is invalid:
from fastapi import HTTPException, status
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid current password",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"Invalid current password",
)
# If the hash was upgraded behind the scenes, we can keep it before setting the new password:
@@ -1231,11 +1224,7 @@ class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
)
logout_responses: OpenAPIResponseType = {
**{
status.HTTP_401_UNAUTHORIZED: {
"description": "Missing token or inactive user."
}
},
**{401: {"description": "Missing token or inactive user."}},
**backend.transport.get_openapi_logout_responses_success(),
}
@@ -1269,11 +1258,7 @@ class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
)
refresh_responses: OpenAPIResponseType = {
**{
status.HTTP_401_UNAUTHORIZED: {
"description": "Missing token or inactive user."
}
},
**{401: {"description": "Missing token or inactive user."}},
**backend.transport.get_openapi_login_responses_success(),
}
@@ -1326,9 +1311,9 @@ class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
return await backend.login(strategy, user)
except Exception as e:
logger.error(f"Unexpected error in refresh endpoint: {str(e)}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Token refresh failed: {str(e)}",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Token refresh failed: {str(e)}",
)
return router
@@ -1751,7 +1736,7 @@ def get_oauth_router(
name=callback_route_name,
description="The response varies based on the authentication backend used.",
responses={
status.HTTP_400_BAD_REQUEST: {
400: {
"model": ErrorModel,
"content": {
"application/json": {
@@ -1784,24 +1769,24 @@ def get_oauth_router(
)
if account_email is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,
)
try:
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
except jwt.DecodeError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
raise OnyxError(
OnyxErrorCode.INVALID_TOKEN,
getattr(
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
),
)
except jwt.ExpiredSignatureError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
raise OnyxError(
OnyxErrorCode.TOKEN_EXPIRED,
getattr(
ErrorCode,
"ACCESS_TOKEN_ALREADY_EXPIRED",
"ACCESS_TOKEN_ALREADY_EXPIRED",
@@ -1815,9 +1800,9 @@ def get_oauth_router(
or not state_csrf_token
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
raise OnyxError(
OnyxErrorCode.CSRF_FAILURE,
getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
)
next_url = state_data.get("next_url", "/")
@@ -1845,15 +1830,15 @@ def get_oauth_router(
is_verified_by_default=is_verified_by_default,
)
except UserAlreadyExists:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ErrorCode.OAUTH_USER_ALREADY_EXISTS,
raise OnyxError(
OnyxErrorCode.CONFLICT,
ErrorCode.OAUTH_USER_ALREADY_EXISTS,
)
if not user.is_active:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ErrorCode.LOGIN_BAD_CREDENTIALS,
raise OnyxError(
OnyxErrorCode.UNAUTHENTICATED,
ErrorCode.LOGIN_BAD_CREDENTIALS,
)
# Login user

View File

@@ -1,142 +0,0 @@
from typing import Any
from typing import cast
from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_process_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown
import onyx.background.celery.apps.app_base as app_base
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
logger = setup_logger()
celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.background")
celery_app.Task = app_base.TenantAwareTask # type: ignore [misc]
@signals.task_prerun.connect
def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
**kwds: Any,
) -> None:
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
@signals.task_postrun.connect
def on_task_postrun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
retval: Any | None = None,
state: str | None = None,
**kwds: Any,
) -> None:
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
app_base.on_celeryd_init(sender, conf, **kwargs)
@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
EXTRA_CONCURRENCY = 8 # small extra fudge factor for connection limits
logger.info("worker_init signal received for consolidated background worker.")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME)
pool_size = cast(int, sender.concurrency) # type: ignore
SqlEngine.init_engine(pool_size=pool_size, max_overflow=EXTRA_CONCURRENCY)
# Initialize Vespa httpx pool (needed for light worker tasks)
if MANAGED_VESPA:
httpx_init_vespa_pool(
sender.concurrency + EXTRA_CONCURRENCY, # type: ignore
ssl_cert=VESPA_CLOUD_CERT_PATH,
ssl_key=VESPA_CLOUD_KEY_PATH,
)
else:
httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY) # type: ignore
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)
@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_ready(sender, **kwargs)
@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_shutdown(sender, **kwargs)
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()
@signals.setup_logging.connect
def on_setup_logging(
loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
celery_app.steps["worker"].add(bootstep)
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
# Original background worker tasks
"onyx.background.celery.tasks.pruning",
"onyx.background.celery.tasks.monitoring",
"onyx.background.celery.tasks.user_file_processing",
"onyx.background.celery.tasks.llm_model_update",
# Light worker tasks
"onyx.background.celery.tasks.shared",
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.opensearch_migration",
# Docprocessing worker tasks
"onyx.background.celery.tasks.docprocessing",
# Docfetching worker tasks
"onyx.background.celery.tasks.docfetching",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)
)

View File

@@ -39,9 +39,13 @@ CT = TypeVar("CT", bound=ConnectorCheckpoint)
class SlimConnectorExtractionResult(BaseModel):
"""Result of extracting document IDs and hierarchy nodes from a connector."""
"""Result of extracting document IDs and hierarchy nodes from a connector.
doc_ids: set[str]
raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
"""
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
@@ -93,30 +97,37 @@ def _get_failure_id(failure: ConnectorFailure) -> str | None:
return None
class BatchResult(BaseModel):
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
def _extract_from_batch(
doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
) -> tuple[set[str], list[HierarchyNode]]:
"""Separate a batch into document IDs and hierarchy nodes.
) -> BatchResult:
"""Separate a batch into document IDs (with parent mapping) and hierarchy nodes.
ConnectorFailure items have their failed document/entity IDs added to the
ID set so that failed-to-retrieve documents are not accidentally pruned.
ID dict so that failed-to-retrieve documents are not accidentally pruned.
"""
ids: set[str] = set()
ids: dict[str, str | None] = {}
hierarchy_nodes: list[HierarchyNode] = []
for item in doc_list:
if isinstance(item, HierarchyNode):
hierarchy_nodes.append(item)
ids.add(item.raw_node_id)
if item.raw_node_id not in ids:
ids[item.raw_node_id] = None
elif isinstance(item, ConnectorFailure):
failed_id = _get_failure_id(item)
if failed_id:
ids.add(failed_id)
ids[failed_id] = None
logger.warning(
f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
)
else:
ids.add(item.id)
return ids, hierarchy_nodes
parent_raw = getattr(item, "parent_hierarchy_raw_node_id", None)
ids[item.id] = parent_raw
return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)
def extract_ids_from_runnable_connector(
@@ -132,7 +143,7 @@ def extract_ids_from_runnable_connector(
Optionally, a callback can be passed to handle the length of each document batch.
"""
all_connector_doc_ids: set[str] = set()
all_raw_id_to_parent: dict[str, str | None] = {}
all_hierarchy_nodes: list[HierarchyNode] = []
# Sequence (covariant) lets all the specific list[...] iterator types unify here
@@ -177,15 +188,20 @@ def extract_ids_from_runnable_connector(
"extract_ids_from_runnable_connector: Stop signal detected"
)
batch_ids, batch_nodes = _extract_from_batch(doc_list)
all_connector_doc_ids.update(doc_batch_processing_func(batch_ids))
batch_result = _extract_from_batch(doc_list)
batch_ids = batch_result.raw_id_to_parent
batch_nodes = batch_result.hierarchy_nodes
doc_batch_processing_func(batch_ids)
for k, v in batch_ids.items():
if v is not None or k not in all_raw_id_to_parent:
all_raw_id_to_parent[k] = v
all_hierarchy_nodes.extend(batch_nodes)
if callback:
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
return SlimConnectorExtractionResult(
doc_ids=all_connector_doc_ids,
raw_id_to_parent=all_raw_id_to_parent,
hierarchy_nodes=all_hierarchy_nodes,
)

View File

@@ -1,23 +0,0 @@
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_BACKGROUND_CONCURRENCY
broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options
redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
result_backend = shared_config.result_backend
result_expires = shared_config.result_expires # 86400 seconds is the default
task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late
worker_concurrency = CELERY_WORKER_BACKGROUND_CONCURRENCY
worker_pool = "threads"
# Increased from 1 to 4 to handle fast light worker tasks more efficiently
# This allows the worker to prefetch multiple tasks per thread
worker_prefetch_multiplier = 4

View File

@@ -241,8 +241,7 @@ _VECTOR_DB_BEAT_TASK_NAMES: set[str] = {
"check-for-index-attempt-cleanup",
"check-for-doc-permissions-sync",
"check-for-external-group-sync",
"check-for-documents-for-opensearch-migration",
"migrate-documents-from-vespa-to-opensearch",
"migrate-chunks-from-vespa-to-opensearch",
}
if DISABLE_VECTOR_DB:

View File

@@ -30,6 +30,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
transform_vespa_chunks_to_opensearch_chunks,
)
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -47,6 +48,7 @@ from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchDocumentIndex,
)
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import IndexingSetting
from onyx.redis.redis_pool import get_redis_client
@@ -146,7 +148,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
with (
get_session_with_current_tenant() as db_session,
get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as vespa_client,
):
try_insert_opensearch_tenant_migration_record_with_commit(db_session)
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
@@ -161,6 +168,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
index_name=search_settings.index_name,
tenant_state=tenant_state,
large_chunks_enabled=False,
httpx_client=vespa_client,
)
sanitized_doc_start_time = time.monotonic()

View File

@@ -29,6 +29,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -47,6 +48,8 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
@@ -57,6 +60,8 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
@@ -113,6 +118,38 @@ class PruneCallback(IndexingCallbackBase):
super().progress(tag, amount)
def _resolve_and_update_document_parents(
db_session: Session,
redis_client: Redis,
source: DocumentSource,
raw_id_to_parent: dict[str, str | None],
) -> None:
"""Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
each document and bulk-update the DB. Mirrors the resolution logic in
run_docfetching.py."""
source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent_id in raw_id_to_parent.items():
if raw_parent_id is None:
continue
node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
resolved[doc_id] = node_id if found else source_node_id
if not resolved:
return
update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
task_logger.info(
f"Pruning: resolved and updated parent hierarchy for "
f"{len(resolved)} documents (source={source.value})"
)
"""Jobs / utils for kicking off pruning tasks."""
@@ -535,22 +572,22 @@ def connector_pruning_generator_task(
extraction_result = extract_ids_from_runnable_connector(
runnable_connector, callback
)
all_connector_doc_ids = extraction_result.doc_ids
all_connector_doc_ids = extraction_result.raw_id_to_parent
# Process hierarchy nodes (same as docfetching):
# upsert to Postgres and cache in Redis
source = cc_pair.connector.source
redis_client = get_redis_client(tenant_id=tenant_id)
if extraction_result.hierarchy_nodes:
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
redis_client = get_redis_client(tenant_id=tenant_id)
ensure_source_node_exists(
redis_client, db_session, cc_pair.connector.source
)
ensure_source_node_exists(redis_client, db_session, source)
upserted_nodes = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=extraction_result.hierarchy_nodes,
source=cc_pair.connector.source,
source=source,
commit=True,
is_connector_public=is_connector_public,
)
@@ -561,7 +598,7 @@ def connector_pruning_generator_task(
]
cache_hierarchy_nodes_batch(
redis_client=redis_client,
source=cc_pair.connector.source,
source=source,
entries=cache_entries,
)
@@ -570,6 +607,26 @@ def connector_pruning_generator_task(
f"hierarchy nodes for cc_pair={cc_pair_id}"
)
ensure_source_node_exists(redis_client, db_session, source)
# Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
# and bulk-update documents, mirroring the docfetching resolution
_resolve_and_update_document_parents(
db_session=db_session,
redis_client=redis_client,
source=source,
raw_id_to_parent=all_connector_doc_ids,
)
# Link hierarchy nodes to documents for sources where pages can be
# both hierarchy nodes AND documents (e.g. Notion, Confluence)
all_doc_id_list = list(all_connector_doc_ids.keys())
link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=all_doc_id_list,
source=source,
commit=True,
)
# a list of docs in our local index
all_indexed_document_ids = {
doc.id
@@ -581,7 +638,9 @@ def connector_pruning_generator_task(
}
# generate list of docs to remove (no longer in the source)
doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
doc_ids_to_remove = list(
all_indexed_document_ids - all_connector_doc_ids.keys()
)
task_logger.info(
"Pruning set collected: "

View File

@@ -414,34 +414,31 @@ def _process_user_file_with_indexing(
raise RuntimeError(f"Indexing pipeline failed for user file {user_file_id}")
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
def process_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
"""Core implementation for processing a single user file.
When redis_locking=True, acquires a per-file Redis lock and clears the
queued-key guard (Celery path). When redis_locking=False, skips all Redis
operations (BackgroundTask path).
"""
task_logger.info(f"process_user_file_impl - Starting id={user_file_id}")
start = time.monotonic()
redis_client = get_redis_client(tenant_id=tenant_id)
# Clear the "queued" guard set by the beat generator so that the next beat
# cycle can re-enqueue this file if it is still in PROCESSING state after
# this task completes or fails.
redis_client.delete(_user_file_queued_key(user_file_id))
file_lock: RedisLock = redis_client.lock(
_user_file_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file - Lock held, skipping user_file_id={user_file_id}"
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_queued_key(user_file_id))
file_lock = redis_client.lock(
_user_file_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
)
return None
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"process_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
documents: list[Document] = []
try:
@@ -449,15 +446,18 @@ def process_single_user_file(
uf = db_session.get(UserFile, _as_uuid(user_file_id))
if not uf:
task_logger.warning(
f"process_single_user_file - UserFile not found id={user_file_id}"
f"process_user_file_impl - UserFile not found id={user_file_id}"
)
return None
return
if uf.status != UserFileStatus.PROCESSING:
if uf.status not in (
UserFileStatus.PROCESSING,
UserFileStatus.INDEXING,
):
task_logger.info(
f"process_single_user_file - Skipping id={user_file_id} status={uf.status}"
f"process_user_file_impl - Skipping id={user_file_id} status={uf.status}"
)
return None
return
connector = LocalFileConnector(
file_locations=[uf.file_id],
@@ -471,7 +471,6 @@ def process_single_user_file(
[doc for doc in batch if not isinstance(doc, HierarchyNode)]
)
# update the document id to userfile id in the documents
for document in documents:
document.id = str(user_file_id)
document.source = DocumentSource.USER_FILE
@@ -493,9 +492,8 @@ def process_single_user_file(
except Exception as e:
task_logger.exception(
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
# don't update the status if the user file is being deleted
current_user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if (
current_user_file
@@ -504,33 +502,43 @@ def process_single_user_file(
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
return None
return
elapsed = time.monotonic() - start
task_logger.info(
f"process_single_user_file - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
f"process_user_file_impl - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
)
return None
except Exception as e:
# Attempt to mark the file as failed
with get_session_with_current_tenant() as db_session:
uf = db_session.get(UserFile, _as_uuid(user_file_id))
if uf:
# don't update the status if the user file is being deleted
if uf.status != UserFileStatus.DELETING:
uf.status = UserFileStatus.FAILED
db_session.add(uf)
db_session.commit()
task_logger.exception(
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
return None
raise
finally:
if file_lock.owned():
if file_lock is not None and file_lock.owned():
file_lock.release()
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
process_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_USER_FILE_DELETE,
soft_time_limit=300,
@@ -581,36 +589,38 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
return None
@shared_task(
name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file_delete(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
def delete_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
"""Process a single user file delete."""
task_logger.info(f"process_single_user_file_delete - Starting id={user_file_id}")
redis_client = get_redis_client(tenant_id=tenant_id)
file_lock: RedisLock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file_delete - Lock held, skipping user_file_id={user_file_id}"
"""Core implementation for deleting a single user file.
When redis_locking=True, acquires a per-file Redis lock (Celery path).
When redis_locking=False, skips Redis operations (BackgroundTask path).
"""
task_logger.info(f"delete_user_file_impl - Starting id={user_file_id}")
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
file_lock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
)
return None
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"delete_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
try:
with get_session_with_current_tenant() as db_session:
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if not user_file:
task_logger.info(
f"process_single_user_file_delete - User file not found id={user_file_id}"
f"delete_user_file_impl - User file not found id={user_file_id}"
)
return None
return
# 1) Delete vector DB chunks (skip when disabled)
if not DISABLE_VECTOR_DB:
if MANAGED_VESPA:
httpx_init_vespa_pool(
@@ -648,7 +658,6 @@ def process_single_user_file_delete(
chunk_count=chunk_count,
)
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
file_store = get_default_file_store()
try:
file_store.delete_file(user_file.file_id)
@@ -656,26 +665,34 @@ def process_single_user_file_delete(
user_file_id_to_plaintext_file_name(user_file.id)
)
except Exception as e:
# This block executed only if the file is not found in the filestore
task_logger.exception(
f"process_single_user_file_delete - Error deleting file id={user_file.id} - {e.__class__.__name__}"
f"delete_user_file_impl - Error deleting file id={user_file.id} - {e.__class__.__name__}"
)
# 3) Finally, delete the UserFile row
db_session.delete(user_file)
db_session.commit()
task_logger.info(
f"process_single_user_file_delete - Completed id={user_file_id}"
)
task_logger.info(f"delete_user_file_impl - Completed id={user_file_id}")
except Exception as e:
task_logger.exception(
f"process_single_user_file_delete - Error processing file id={user_file_id} - {e.__class__.__name__}"
f"delete_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
return None
raise
finally:
if file_lock.owned():
if file_lock is not None and file_lock.owned():
file_lock.release()
return None
@shared_task(
name=OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
bind=True,
ignore_result=True,
)
def process_single_user_file_delete(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
delete_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)
@shared_task(
@@ -747,32 +764,30 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
return None
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
bind=True,
ignore_result=True,
)
def process_single_user_file_project_sync(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
def project_sync_user_file_impl(
*, user_file_id: str, tenant_id: str, redis_locking: bool
) -> None:
"""Process a single user file project sync."""
task_logger.info(
f"process_single_user_file_project_sync - Starting id={user_file_id}"
)
"""Core implementation for syncing a user file's project/persona metadata.
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
When redis_locking=True, acquires a per-file Redis lock and clears the
queued-key guard (Celery path). When redis_locking=False, skips Redis
operations (BackgroundTask path).
"""
task_logger.info(f"project_sync_user_file_impl - Starting id={user_file_id}")
file_lock: RedisLock = redis_client.lock(
user_file_project_sync_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
)
if not file_lock.acquire(blocking=False):
task_logger.info(
f"process_single_user_file_project_sync - Lock held, skipping user_file_id={user_file_id}"
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
file_lock = redis_client.lock(
user_file_project_sync_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
)
return None
if file_lock is not None and not file_lock.acquire(blocking=False):
task_logger.info(
f"project_sync_user_file_impl - Lock held, skipping user_file_id={user_file_id}"
)
return
try:
with get_session_with_current_tenant() as db_session:
@@ -783,11 +798,10 @@ def process_single_user_file_project_sync(
).scalar_one_or_none()
if not user_file:
task_logger.info(
f"process_single_user_file_project_sync - User file not found id={user_file_id}"
f"project_sync_user_file_impl - User file not found id={user_file_id}"
)
return None
return
# Sync project metadata to vector DB (skip when disabled)
if not DISABLE_VECTOR_DB:
if MANAGED_VESPA:
httpx_init_vespa_pool(
@@ -822,7 +836,7 @@ def process_single_user_file_project_sync(
)
task_logger.info(
f"process_single_user_file_project_sync - User file id={user_file_id}"
f"project_sync_user_file_impl - User file id={user_file_id}"
)
user_file.needs_project_sync = False
@@ -835,11 +849,22 @@ def process_single_user_file_project_sync(
except Exception as e:
task_logger.exception(
f"process_single_user_file_project_sync - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
f"project_sync_user_file_impl - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
)
return None
raise
finally:
if file_lock.owned():
if file_lock is not None and file_lock.owned():
file_lock.release()
return None
@shared_task(
name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
bind=True,
ignore_result=True,
)
def process_single_user_file_project_sync(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
project_sync_user_file_impl(
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
)

View File

@@ -1,10 +0,0 @@
from celery import Celery
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
"onyx.background.celery.apps.background",
"celery_app",
)

View File

@@ -0,0 +1,307 @@
"""Periodic poller for NO_VECTOR_DB deployments.
Replaces Celery Beat and background workers with a lightweight daemon thread
that runs from the API server process. Two responsibilities:
1. Recovery polling (every 30 s): re-processes user files stuck in
PROCESSING / DELETING / needs_sync states via the drain loops defined
in ``task_utils.py``.
2. Periodic task execution (configurable intervals): runs LLM model updates
and scheduled evals at their configured cadences, with Postgres advisory
lock deduplication across multiple API server instances.
"""
import threading
import time
from collections.abc import Callable
from dataclasses import dataclass
from dataclasses import field
from onyx.utils.logger import setup_logger
logger = setup_logger()
RECOVERY_INTERVAL_SECONDS = 30
PERIODIC_TASK_LOCK_BASE = 20_000
PERIODIC_TASK_KV_PREFIX = "periodic_poller:last_claimed:"
# ------------------------------------------------------------------
# Periodic task definitions
# ------------------------------------------------------------------
_NEVER_RAN: float = -1e18
@dataclass
class _PeriodicTaskDef:
name: str
interval_seconds: float
lock_id: int
run_fn: Callable[[], None]
last_run_at: float = field(default=_NEVER_RAN)
def _run_auto_llm_update() -> None:
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
if not AUTO_LLM_CONFIG_URL:
return
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.llm.well_known_providers.auto_update_service import (
sync_llm_models_from_github,
)
with get_session_with_current_tenant() as db_session:
sync_llm_models_from_github(db_session)
def _run_cache_cleanup() -> None:
from onyx.cache.postgres_backend import cleanup_expired_cache_entries
cleanup_expired_cache_entries()
def _run_scheduled_eval() -> None:
from onyx.configs.app_configs import BRAINTRUST_API_KEY
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
from onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL
from onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT
if not all(
[
BRAINTRUST_API_KEY,
SCHEDULED_EVAL_PROJECT,
SCHEDULED_EVAL_DATASET_NAMES,
SCHEDULED_EVAL_PERMISSIONS_EMAIL,
]
):
return
from datetime import datetime
from datetime import timezone
from onyx.evals.eval import run_eval
from onyx.evals.models import EvalConfigurationOptions
run_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
for dataset_name in SCHEDULED_EVAL_DATASET_NAMES:
try:
run_eval(
configuration=EvalConfigurationOptions(
search_permissions_email=SCHEDULED_EVAL_PERMISSIONS_EMAIL,
dataset_name=dataset_name,
no_send_logs=False,
braintrust_project=SCHEDULED_EVAL_PROJECT,
experiment_name=f"{dataset_name} - {run_timestamp}",
),
remote_dataset_name=dataset_name,
)
except Exception:
logger.exception(
f"Periodic poller - Failed scheduled eval for dataset {dataset_name}"
)
_CACHE_CLEANUP_INTERVAL_SECONDS = 300
def _build_periodic_tasks() -> list[_PeriodicTaskDef]:
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
from onyx.configs.app_configs import CACHE_BACKEND
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
tasks: list[_PeriodicTaskDef] = []
if CACHE_BACKEND == CacheBackendType.POSTGRES:
tasks.append(
_PeriodicTaskDef(
name="cache-cleanup",
interval_seconds=_CACHE_CLEANUP_INTERVAL_SECONDS,
lock_id=PERIODIC_TASK_LOCK_BASE + 2,
run_fn=_run_cache_cleanup,
)
)
if AUTO_LLM_CONFIG_URL:
tasks.append(
_PeriodicTaskDef(
name="auto-llm-update",
interval_seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS,
lock_id=PERIODIC_TASK_LOCK_BASE,
run_fn=_run_auto_llm_update,
)
)
if SCHEDULED_EVAL_DATASET_NAMES:
tasks.append(
_PeriodicTaskDef(
name="scheduled-eval",
interval_seconds=7 * 24 * 3600,
lock_id=PERIODIC_TASK_LOCK_BASE + 1,
run_fn=_run_scheduled_eval,
)
)
return tasks
# ------------------------------------------------------------------
# Periodic task runner with advisory-lock-guarded claim
# ------------------------------------------------------------------
def _try_claim_task(task_def: _PeriodicTaskDef) -> bool:
"""Atomically check whether *task_def* should run and record a claim.
Uses a transaction-scoped advisory lock for atomicity combined with a
``KVStore`` timestamp for cross-instance dedup. The DB session is held
only for this brief claim transaction, not during task execution.
"""
from datetime import datetime
from datetime import timezone
from sqlalchemy import text
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import KVStore
kv_key = PERIODIC_TASK_KV_PREFIX + task_def.name
with get_session_with_current_tenant() as db_session:
acquired = db_session.execute(
text("SELECT pg_try_advisory_xact_lock(:id)"),
{"id": task_def.lock_id},
).scalar()
if not acquired:
return False
row = db_session.query(KVStore).filter_by(key=kv_key).first()
if row and row.value is not None:
last_claimed = datetime.fromisoformat(str(row.value))
elapsed = (datetime.now(timezone.utc) - last_claimed).total_seconds()
if elapsed < task_def.interval_seconds:
return False
now_ts = datetime.now(timezone.utc).isoformat()
if row:
row.value = now_ts
else:
db_session.add(KVStore(key=kv_key, value=now_ts))
db_session.commit()
return True
def _try_run_periodic_task(task_def: _PeriodicTaskDef) -> None:
"""Run *task_def* if its interval has elapsed and no peer holds the lock."""
now = time.monotonic()
if now - task_def.last_run_at < task_def.interval_seconds:
return
if not _try_claim_task(task_def):
return
try:
task_def.run_fn()
task_def.last_run_at = now
except Exception:
logger.exception(
f"Periodic poller - Error running periodic task {task_def.name}"
)
# ------------------------------------------------------------------
# Recovery / drain loop runner
# ------------------------------------------------------------------
def _run_drain_loops(tenant_id: str) -> None:
from onyx.background.task_utils import drain_delete_loop
from onyx.background.task_utils import drain_processing_loop
from onyx.background.task_utils import drain_project_sync_loop
drain_processing_loop(tenant_id)
drain_delete_loop(tenant_id)
drain_project_sync_loop(tenant_id)
# ------------------------------------------------------------------
# Startup recovery (10g)
# ------------------------------------------------------------------
def recover_stuck_user_files(tenant_id: str) -> None:
"""Run all drain loops once to re-process files left in intermediate states.
Called from ``lifespan()`` on startup when ``DISABLE_VECTOR_DB`` is set.
"""
logger.info("recover_stuck_user_files - Checking for stuck user files")
try:
_run_drain_loops(tenant_id)
except Exception:
logger.exception("recover_stuck_user_files - Error during recovery")
# ------------------------------------------------------------------
# Daemon thread (10f)
# ------------------------------------------------------------------
_shutdown_event = threading.Event()
_poller_thread: threading.Thread | None = None
def _poller_loop(tenant_id: str) -> None:
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
periodic_tasks = _build_periodic_tasks()
logger.info(
f"Periodic poller started with {len(periodic_tasks)} periodic task(s): "
f"{[t.name for t in periodic_tasks]}"
)
while not _shutdown_event.is_set():
try:
_run_drain_loops(tenant_id)
except Exception:
logger.exception("Periodic poller - Error in recovery polling")
for task_def in periodic_tasks:
try:
_try_run_periodic_task(task_def)
except Exception:
logger.exception(
f"Periodic poller - Unhandled error checking task {task_def.name}"
)
_shutdown_event.wait(RECOVERY_INTERVAL_SECONDS)
def start_periodic_poller(tenant_id: str) -> None:
"""Start the periodic poller daemon thread."""
global _poller_thread # noqa: PLW0603
_shutdown_event.clear()
_poller_thread = threading.Thread(
target=_poller_loop,
args=(tenant_id,),
daemon=True,
name="no-vectordb-periodic-poller",
)
_poller_thread.start()
logger.info("Periodic poller thread started")
def stop_periodic_poller() -> None:
"""Signal the periodic poller to stop and wait for it to exit."""
global _poller_thread # noqa: PLW0603
if _poller_thread is None:
return
_shutdown_event.set()
_poller_thread.join(timeout=10)
if _poller_thread.is_alive():
logger.warning("Periodic poller thread did not stop within timeout")
_poller_thread = None
logger.info("Periodic poller thread stopped")

View File

@@ -1,3 +1,33 @@
"""Background task utilities.
Contains query-history report helpers (used by all deployment modes) and
in-process background task execution helpers for NO_VECTOR_DB mode:
- Atomic claim-and-mark helpers that prevent duplicate processing
- Drain loops that process all pending user file work
Each claim function runs a short-lived transaction: SELECT ... FOR UPDATE
SKIP LOCKED, UPDATE the row to remove it from future queries, COMMIT.
After the commit the row lock is released, but the row is no longer
eligible for re-claiming. No long-lived sessions or advisory locks.
"""
from uuid import UUID
import sqlalchemy as sa
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.utils.logger import setup_logger
logger = setup_logger()
# ------------------------------------------------------------------
# Query-history report helpers (pre-existing, used by all modes)
# ------------------------------------------------------------------
QUERY_REPORT_NAME_PREFIX = "query-history"
@@ -9,3 +39,168 @@ def construct_query_history_report_name(
def extract_task_id_from_query_history_report_name(name: str) -> str:
return name.removeprefix(f"{QUERY_REPORT_NAME_PREFIX}-").removesuffix(".csv")
# ------------------------------------------------------------------
# Atomic claim-and-mark helpers
# ------------------------------------------------------------------
# Each function runs inside a single short-lived session/transaction:
# 1. SELECT ... FOR UPDATE SKIP LOCKED (locks one eligible row)
# 2. UPDATE the row so it is no longer eligible
# 3. COMMIT (releases the row lock)
# After the commit, no other drain loop can claim the same row.
def _claim_next_processing_file(db_session: Session) -> UUID | None:
"""Claim the next PROCESSING file by transitioning it to INDEXING.
Returns the file id, or None when no eligible files remain.
"""
file_id = db_session.execute(
select(UserFile.id)
.where(UserFile.status == UserFileStatus.PROCESSING)
.order_by(UserFile.created_at)
.limit(1)
.with_for_update(skip_locked=True)
).scalar_one_or_none()
if file_id is None:
return None
db_session.execute(
sa.update(UserFile)
.where(UserFile.id == file_id)
.values(status=UserFileStatus.INDEXING)
)
db_session.commit()
return file_id
def _claim_next_deleting_file(
db_session: Session,
exclude_ids: set[UUID] | None = None,
) -> UUID | None:
"""Claim the next DELETING file.
No status transition needed — the impl deletes the row on success.
The short-lived FOR UPDATE lock prevents concurrent claims.
*exclude_ids* prevents re-processing the same file if the impl fails.
"""
stmt = (
select(UserFile.id)
.where(UserFile.status == UserFileStatus.DELETING)
.order_by(UserFile.created_at)
.limit(1)
.with_for_update(skip_locked=True)
)
if exclude_ids:
stmt = stmt.where(UserFile.id.notin_(exclude_ids))
file_id = db_session.execute(stmt).scalar_one_or_none()
db_session.commit()
return file_id
def _claim_next_sync_file(
db_session: Session,
exclude_ids: set[UUID] | None = None,
) -> UUID | None:
"""Claim the next file needing project/persona sync.
No status transition needed — the impl clears the sync flags on
success. The short-lived FOR UPDATE lock prevents concurrent claims.
*exclude_ids* prevents re-processing the same file if the impl fails.
"""
stmt = (
select(UserFile.id)
.where(
sa.and_(
sa.or_(
UserFile.needs_project_sync.is_(True),
UserFile.needs_persona_sync.is_(True),
),
UserFile.status == UserFileStatus.COMPLETED,
)
)
.order_by(UserFile.created_at)
.limit(1)
.with_for_update(skip_locked=True)
)
if exclude_ids:
stmt = stmt.where(UserFile.id.notin_(exclude_ids))
file_id = db_session.execute(stmt).scalar_one_or_none()
db_session.commit()
return file_id
# ------------------------------------------------------------------
# Drain loops — process *all* pending work of each type
# ------------------------------------------------------------------
def drain_processing_loop(tenant_id: str) -> None:
"""Process all pending PROCESSING user files."""
from onyx.background.celery.tasks.user_file_processing.tasks import (
process_user_file_impl,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
while True:
with get_session_with_current_tenant() as session:
file_id = _claim_next_processing_file(session)
if file_id is None:
break
try:
process_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to process user file {file_id}")
def drain_delete_loop(tenant_id: str) -> None:
"""Delete all pending DELETING user files."""
from onyx.background.celery.tasks.user_file_processing.tasks import (
delete_user_file_impl,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
failed: set[UUID] = set()
while True:
with get_session_with_current_tenant() as session:
file_id = _claim_next_deleting_file(session, exclude_ids=failed)
if file_id is None:
break
try:
delete_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to delete user file {file_id}")
failed.add(file_id)
def drain_project_sync_loop(tenant_id: str) -> None:
"""Sync all pending project/persona metadata for user files."""
from onyx.background.celery.tasks.user_file_processing.tasks import (
project_sync_user_file_impl,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
failed: set[UUID] = set()
while True:
with get_session_with_current_tenant() as session:
file_id = _claim_next_sync_file(session, exclude_ids=failed)
if file_id is None:
break
try:
project_sync_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to sync user file {file_id}")
failed.add(file_id)

51
backend/onyx/cache/factory.py vendored Normal file
View File

@@ -0,0 +1,51 @@
from collections.abc import Callable
from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import CACHE_BACKEND
def _build_redis_backend(tenant_id: str) -> CacheBackend:
from onyx.cache.redis_backend import RedisCacheBackend
from onyx.redis.redis_pool import redis_pool
return RedisCacheBackend(redis_pool.get_client(tenant_id))
def _build_postgres_backend(tenant_id: str) -> CacheBackend:
from onyx.cache.postgres_backend import PostgresCacheBackend
return PostgresCacheBackend(tenant_id)
_BACKEND_BUILDERS: dict[CacheBackendType, Callable[[str], CacheBackend]] = {
CacheBackendType.REDIS: _build_redis_backend,
CacheBackendType.POSTGRES: _build_postgres_backend,
}
def get_cache_backend(*, tenant_id: str | None = None) -> CacheBackend:
"""Return a tenant-aware ``CacheBackend``.
If *tenant_id* is ``None``, the current tenant is read from the
thread-local context variable (same behaviour as ``get_redis_client``).
"""
if tenant_id is None:
from shared_configs.contextvars import get_current_tenant_id
tenant_id = get_current_tenant_id()
builder = _BACKEND_BUILDERS.get(CACHE_BACKEND)
if builder is None:
raise ValueError(
f"Unsupported CACHE_BACKEND={CACHE_BACKEND!r}. "
f"Supported values: {[t.value for t in CacheBackendType]}"
)
return builder(tenant_id)
def get_shared_cache_backend() -> CacheBackend:
"""Return a ``CacheBackend`` in the shared (cross-tenant) namespace."""
from shared_configs.configs import DEFAULT_REDIS_PREFIX
return get_cache_backend(tenant_id=DEFAULT_REDIS_PREFIX)

115
backend/onyx/cache/interface.py vendored Normal file
View File

@@ -0,0 +1,115 @@
import abc
from enum import Enum
from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError
TTL_KEY_NOT_FOUND = -2
TTL_NO_EXPIRY = -1
CACHE_TRANSIENT_ERRORS: tuple[type[Exception], ...] = (RedisError, SQLAlchemyError)
"""Exception types that represent transient cache connectivity / operational
failures. Callers that want to fail-open (or fail-closed) on cache errors
should catch this tuple instead of bare ``Exception``.
When adding a new ``CacheBackend`` implementation, add its transient error
base class(es) here so all call-sites pick it up automatically."""
class CacheBackendType(str, Enum):
REDIS = "redis"
POSTGRES = "postgres"
class CacheLock(abc.ABC):
"""Abstract distributed lock returned by CacheBackend.lock()."""
@abc.abstractmethod
def acquire(
self,
blocking: bool = True,
blocking_timeout: float | None = None,
) -> bool:
raise NotImplementedError
@abc.abstractmethod
def release(self) -> None:
raise NotImplementedError
@abc.abstractmethod
def owned(self) -> bool:
raise NotImplementedError
def __enter__(self) -> "CacheLock":
if not self.acquire():
raise RuntimeError("Failed to acquire lock")
return self
def __exit__(self, *args: object) -> None:
self.release()
class CacheBackend(abc.ABC):
"""Thin abstraction over a key-value cache with TTL, locks, and blocking lists.
Covers the subset of Redis operations used outside of Celery. When
CACHE_BACKEND=postgres, a PostgreSQL-backed implementation is used instead.
"""
# -- basic key/value ---------------------------------------------------
@abc.abstractmethod
def get(self, key: str) -> bytes | None:
raise NotImplementedError
@abc.abstractmethod
def set(
self,
key: str,
value: str | bytes | int | float,
ex: int | None = None,
) -> None:
raise NotImplementedError
@abc.abstractmethod
def delete(self, key: str) -> None:
raise NotImplementedError
@abc.abstractmethod
def exists(self, key: str) -> bool:
raise NotImplementedError
# -- TTL ---------------------------------------------------------------
@abc.abstractmethod
def expire(self, key: str, seconds: int) -> None:
raise NotImplementedError
@abc.abstractmethod
def ttl(self, key: str) -> int:
"""Return remaining TTL in seconds.
Returns ``TTL_NO_EXPIRY`` (-1) if key exists without expiry,
``TTL_KEY_NOT_FOUND`` (-2) if key is missing or expired.
"""
raise NotImplementedError
# -- distributed lock --------------------------------------------------
@abc.abstractmethod
def lock(self, name: str, timeout: float | None = None) -> CacheLock:
raise NotImplementedError
# -- blocking list (used by MCP OAuth BLPOP pattern) -------------------
@abc.abstractmethod
def rpush(self, key: str, value: str | bytes) -> None:
raise NotImplementedError
@abc.abstractmethod
def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
"""Block until a value is available on one of *keys*, or *timeout* expires.
Returns ``(key, value)`` or ``None`` on timeout.
"""
raise NotImplementedError

323
backend/onyx/cache/postgres_backend.py vendored Normal file
View File

@@ -0,0 +1,323 @@
"""PostgreSQL-backed ``CacheBackend`` for NO_VECTOR_DB deployments.
Uses the ``cache_store`` table for key-value storage, PostgreSQL advisory locks
for distributed locking, and a polling loop for the BLPOP pattern.
"""
import hashlib
import struct
import time
import uuid
from contextlib import AbstractContextManager
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session
from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
from onyx.cache.interface import TTL_KEY_NOT_FOUND
from onyx.cache.interface import TTL_NO_EXPIRY
from onyx.db.models import CacheStore
_LIST_KEY_PREFIX = "_q:"
# ASCII: ':' (0x3A) < ';' (0x3B). Upper bound for range queries so [prefix+, prefix;)
# captures all list-item keys (e.g. _q:mylist:123:uuid) without including other
# lists whose names share a prefix (e.g. _q:mylist2:...).
_LIST_KEY_RANGE_TERMINATOR = ";"
_LIST_ITEM_TTL_SECONDS = 3600
_LOCK_POLL_INTERVAL = 0.1
_BLPOP_POLL_INTERVAL = 0.25
def _list_item_key(key: str) -> str:
"""Unique key for a list item. Timestamp for FIFO ordering; UUID prevents
collision when concurrent rpush calls occur within the same nanosecond.
"""
return f"{_LIST_KEY_PREFIX}{key}:{time.time_ns()}:{uuid.uuid4().hex}"
def _to_bytes(value: str | bytes | int | float) -> bytes:
if isinstance(value, bytes):
return value
return str(value).encode()
# ------------------------------------------------------------------
# Lock
# ------------------------------------------------------------------
class PostgresCacheLock(CacheLock):
"""Advisory-lock-based distributed lock.
Uses ``get_session_with_tenant`` for connection lifecycle. The lock is tied
to the session's connection; releasing or closing the session frees it.
NOTE: Unlike Redis locks, advisory locks do not auto-expire after
``timeout`` seconds. They are released when ``release()`` is
called or when the session is closed.
"""
def __init__(self, lock_id: int, timeout: float | None, tenant_id: str) -> None:
self._lock_id = lock_id
self._timeout = timeout
self._tenant_id = tenant_id
self._session_cm: AbstractContextManager[Session] | None = None
self._session: Session | None = None
self._acquired = False
def acquire(
self,
blocking: bool = True,
blocking_timeout: float | None = None,
) -> bool:
from onyx.db.engine.sql_engine import get_session_with_tenant
self._session_cm = get_session_with_tenant(tenant_id=self._tenant_id)
self._session = self._session_cm.__enter__()
try:
if not blocking:
return self._try_lock()
effective_timeout = blocking_timeout or self._timeout
deadline = (
(time.monotonic() + effective_timeout) if effective_timeout else None
)
while True:
if self._try_lock():
return True
if deadline is not None and time.monotonic() >= deadline:
return False
time.sleep(_LOCK_POLL_INTERVAL)
finally:
if not self._acquired:
self._close_session()
def release(self) -> None:
if not self._acquired or self._session is None:
return
try:
self._session.execute(select(func.pg_advisory_unlock(self._lock_id)))
finally:
self._acquired = False
self._close_session()
def owned(self) -> bool:
return self._acquired
def _close_session(self) -> None:
if self._session_cm is not None:
try:
self._session_cm.__exit__(None, None, None)
finally:
self._session_cm = None
self._session = None
def _try_lock(self) -> bool:
assert self._session is not None
result = self._session.execute(
select(func.pg_try_advisory_lock(self._lock_id))
).scalar()
if result:
self._acquired = True
return True
return False
# ------------------------------------------------------------------
# Backend
# ------------------------------------------------------------------
class PostgresCacheBackend(CacheBackend):
"""``CacheBackend`` backed by the ``cache_store`` table in PostgreSQL.
Each operation opens and closes its own database session so the backend
is safe to share across threads. Tenant isolation is handled by
SQLAlchemy's ``schema_translate_map`` (set by ``get_session_with_tenant``).
"""
def __init__(self, tenant_id: str) -> None:
self._tenant_id = tenant_id
# -- basic key/value ---------------------------------------------------
def get(self, key: str) -> bytes | None:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = select(CacheStore.value).where(
CacheStore.key == key,
or_(CacheStore.expires_at.is_(None), CacheStore.expires_at > func.now()),
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
value = session.execute(stmt).scalar_one_or_none()
if value is None:
return None
return bytes(value)
def set(
self,
key: str,
value: str | bytes | int | float,
ex: int | None = None,
) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
value_bytes = _to_bytes(value)
expires_at = (
datetime.now(timezone.utc) + timedelta(seconds=ex)
if ex is not None
else None
)
stmt = (
pg_insert(CacheStore)
.values(key=key, value=value_bytes, expires_at=expires_at)
.on_conflict_do_update(
index_elements=[CacheStore.key],
set_={"value": value_bytes, "expires_at": expires_at},
)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(stmt)
session.commit()
def delete(self, key: str) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(delete(CacheStore).where(CacheStore.key == key))
session.commit()
def exists(self, key: str) -> bool:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = (
select(CacheStore.key)
.where(
CacheStore.key == key,
or_(
CacheStore.expires_at.is_(None),
CacheStore.expires_at > func.now(),
),
)
.limit(1)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
return session.execute(stmt).first() is not None
# -- TTL ---------------------------------------------------------------
def expire(self, key: str, seconds: int) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
new_exp = datetime.now(timezone.utc) + timedelta(seconds=seconds)
stmt = (
update(CacheStore).where(CacheStore.key == key).values(expires_at=new_exp)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(stmt)
session.commit()
def ttl(self, key: str) -> int:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = select(CacheStore.expires_at).where(CacheStore.key == key)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
result = session.execute(stmt).first()
if result is None:
return TTL_KEY_NOT_FOUND
expires_at: datetime | None = result[0]
if expires_at is None:
return TTL_NO_EXPIRY
remaining = (expires_at - datetime.now(timezone.utc)).total_seconds()
if remaining <= 0:
return TTL_KEY_NOT_FOUND
return int(remaining)
# -- distributed lock --------------------------------------------------
def lock(self, name: str, timeout: float | None = None) -> CacheLock:
return PostgresCacheLock(
self._lock_id_for(name), timeout, tenant_id=self._tenant_id
)
# -- blocking list (MCP OAuth BLPOP pattern) ---------------------------
def rpush(self, key: str, value: str | bytes) -> None:
self.set(_list_item_key(key), value, ex=_LIST_ITEM_TTL_SECONDS)
def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
if timeout <= 0:
raise ValueError(
"PostgresCacheBackend.blpop requires timeout > 0. "
"timeout=0 would block the calling thread indefinitely "
"with no way to interrupt short of process termination."
)
from onyx.db.engine.sql_engine import get_session_with_tenant
deadline = time.monotonic() + timeout
while True:
for key in keys:
lower = f"{_LIST_KEY_PREFIX}{key}:"
upper = f"{_LIST_KEY_PREFIX}{key}{_LIST_KEY_RANGE_TERMINATOR}"
stmt = (
select(CacheStore)
.where(
CacheStore.key >= lower,
CacheStore.key < upper,
or_(
CacheStore.expires_at.is_(None),
CacheStore.expires_at > func.now(),
),
)
.order_by(CacheStore.key)
.limit(1)
.with_for_update(skip_locked=True)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
row = session.execute(stmt).scalars().first()
if row is not None:
value = bytes(row.value) if row.value else b""
session.delete(row)
session.commit()
return (key.encode(), value)
if time.monotonic() >= deadline:
return None
time.sleep(_BLPOP_POLL_INTERVAL)
# -- helpers -----------------------------------------------------------
def _lock_id_for(self, name: str) -> int:
"""Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
h = hashlib.md5(f"{self._tenant_id}:{name}".encode()).digest()
return struct.unpack("q", h[:8])[0]
# ------------------------------------------------------------------
# Periodic cleanup
# ------------------------------------------------------------------
def cleanup_expired_cache_entries() -> None:
"""Delete rows whose ``expires_at`` is in the past.
Called by the periodic poller every 5 minutes.
"""
from onyx.db.engine.sql_engine import get_session_with_current_tenant
with get_session_with_current_tenant() as session:
session.execute(
delete(CacheStore).where(
CacheStore.expires_at.is_not(None),
CacheStore.expires_at < func.now(),
)
)
session.commit()

92
backend/onyx/cache/redis_backend.py vendored Normal file
View File

@@ -0,0 +1,92 @@
from typing import cast
from redis.client import Redis
from redis.lock import Lock as RedisLock
from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
class RedisCacheLock(CacheLock):
"""Wraps ``redis.lock.Lock`` behind the ``CacheLock`` interface."""
def __init__(self, lock: RedisLock) -> None:
self._lock = lock
def acquire(
self,
blocking: bool = True,
blocking_timeout: float | None = None,
) -> bool:
return bool(
self._lock.acquire(
blocking=blocking,
blocking_timeout=blocking_timeout,
)
)
def release(self) -> None:
self._lock.release()
def owned(self) -> bool:
return bool(self._lock.owned())
class RedisCacheBackend(CacheBackend):
"""``CacheBackend`` implementation that delegates to a ``redis.Redis`` client.
This is a thin pass-through — every method maps 1-to-1 to the underlying
Redis command. ``TenantRedis`` key-prefixing is handled by the client
itself (provided by ``get_redis_client``).
"""
def __init__(self, redis_client: Redis) -> None:
self._r = redis_client
# -- basic key/value ---------------------------------------------------
def get(self, key: str) -> bytes | None:
val = self._r.get(key)
if val is None:
return None
if isinstance(val, bytes):
return val
return str(val).encode()
def set(
self,
key: str,
value: str | bytes | int | float,
ex: int | None = None,
) -> None:
self._r.set(key, value, ex=ex)
def delete(self, key: str) -> None:
self._r.delete(key)
def exists(self, key: str) -> bool:
return bool(self._r.exists(key))
# -- TTL ---------------------------------------------------------------
def expire(self, key: str, seconds: int) -> None:
self._r.expire(key, seconds)
def ttl(self, key: str) -> int:
return cast(int, self._r.ttl(key))
# -- distributed lock --------------------------------------------------
def lock(self, name: str, timeout: float | None = None) -> CacheLock:
return RedisCacheLock(self._r.lock(name, timeout=timeout))
# -- blocking list (MCP OAuth BLPOP pattern) ---------------------------
def rpush(self, key: str, value: str | bytes) -> None:
self._r.rpush(key, value)
def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
result = cast(list[bytes] | None, self._r.blpop(keys, timeout=timeout))
if result is None:
return None
return (result[0], result[1])

View File

@@ -1,57 +1,52 @@
from uuid import UUID
from redis.client import Redis
from onyx.cache.interface import CacheBackend
# Redis key prefixes for chat message processing
PREFIX = "chatprocessing"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 30 * 60 # 30 minutes
def _get_fence_key(chat_session_id: UUID) -> str:
"""
Generate the Redis key for a chat session processing a message.
"""Generate the cache key for a chat session processing fence.
Args:
chat_session_id: The UUID of the chat session
Returns:
The fence key string (tenant_id is automatically added by the Redis client)
The fence key string. Tenant isolation is handled automatically
by the cache backend (Redis key-prefixing or Postgres schema routing).
"""
return f"{FENCE_PREFIX}_{chat_session_id}"
def set_processing_status(
chat_session_id: UUID, redis_client: Redis, value: bool
chat_session_id: UUID, cache: CacheBackend, value: bool
) -> None:
"""
Set or clear the fence for a chat session processing a message.
"""Set or clear the fence for a chat session processing a message.
If the key exists, we are processing a message. If the key does not exist, we are not processing a message.
If the key exists, a message is being processed.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
cache: Tenant-aware cache backend
value: True to set the fence, False to clear it
"""
fence_key = _get_fence_key(chat_session_id)
if value:
redis_client.set(fence_key, 0, ex=FENCE_TTL)
cache.set(fence_key, 0, ex=FENCE_TTL)
else:
redis_client.delete(fence_key)
cache.delete(fence_key)
def is_chat_session_processing(chat_session_id: UUID, redis_client: Redis) -> bool:
"""
Check if the chat session is processing a message.
def is_chat_session_processing(chat_session_id: UUID, cache: CacheBackend) -> bool:
"""Check if the chat session is processing a message.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
cache: Tenant-aware cache backend
Returns:
True if the chat session is processing a message, False otherwise
"""
fence_key = _get_fence_key(chat_session_id)
return bool(redis_client.exists(fence_key))
return cache.exists(_get_fence_key(chat_session_id))

View File

@@ -1,6 +1,7 @@
import json
import time
from collections.abc import Callable
from typing import Any
from typing import Literal
from sqlalchemy.orm import Session
@@ -35,7 +36,6 @@ from onyx.db.memory import add_memory
from onyx.db.memory import update_memory_at_index
from onyx.db.memory import UserMemoryContext
from onyx.db.models import Persona
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
@@ -51,6 +51,7 @@ from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import MemoryToolResponseSnapshot
from onyx.tools.models import PythonToolRichResponse
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
@@ -82,28 +83,6 @@ def _looks_like_xml_tool_call_payload(text: str | None) -> bool:
)
def _should_keep_bedrock_tool_definitions(
llm: object, simple_chat_history: list[ChatMessageSimple]
) -> bool:
"""Bedrock requires tool config when history includes toolUse/toolResult blocks."""
model_provider = getattr(getattr(llm, "config", None), "model_provider", None)
if model_provider not in {
LlmProviderNames.BEDROCK,
LlmProviderNames.BEDROCK_CONVERSE,
}:
return False
return any(
(
msg.message_type == MessageType.ASSISTANT
and msg.tool_calls
and len(msg.tool_calls) > 0
)
or msg.message_type == MessageType.TOOL_CALL_RESPONSE
for msg in simple_chat_history
)
def _try_fallback_tool_extraction(
llm_step_result: LlmStepResult,
tool_choice: ToolChoiceOptions,
@@ -530,11 +509,13 @@ def _create_file_tool_metadata_message(
"""
lines = [
"You have access to the following files. Use the read_file tool to "
"read sections of any file:"
"read sections of any file. You MUST pass the file_id UUID (not the "
"filename) to read_file:"
]
for meta in file_metadata:
lines.append(
f'- {meta.file_id}: "{meta.filename}" (~{meta.approx_char_count:,} chars)'
f'- file_id="{meta.file_id}" filename="{meta.filename}" '
f"(~{meta.approx_char_count:,} chars)"
)
message_content = "\n".join(lines)
@@ -558,12 +539,16 @@ def _create_context_files_message(
# Format as documents JSON as described in README
documents_list = []
for idx, file_text in enumerate(context_files.file_texts, start=1):
documents_list.append(
{
"document": idx,
"contents": file_text,
}
title = (
context_files.file_metadata[idx - 1].filename
if idx - 1 < len(context_files.file_metadata)
else None
)
entry: dict[str, Any] = {"document": idx}
if title:
entry["title"] = title
entry["contents"] = file_text
documents_list.append(entry)
documents_json = json.dumps({"documents": documents_list}, indent=2)
message_content = f"Here are some documents provided for context, they may not all be relevant:\n{documents_json}"
@@ -678,12 +663,7 @@ def run_llm_loop(
elif out_of_cycles or ran_image_gen:
# Last cycle, no tools allowed, just answer!
tool_choice = ToolChoiceOptions.NONE
# Bedrock requires tool config in requests that include toolUse/toolResult history.
final_tools = (
tools
if _should_keep_bedrock_tool_definitions(llm, simple_chat_history)
else []
)
final_tools = []
else:
tool_choice = ToolChoiceOptions.AUTO
final_tools = tools
@@ -959,6 +939,13 @@ def run_llm_loop(
):
generated_images = tool_response.rich_response.generated_images
# Extract generated_files if this is a code interpreter response
generated_files = None
if isinstance(tool_response.rich_response, PythonToolRichResponse):
generated_files = (
tool_response.rich_response.generated_files or None
)
# Persist memory if this is a memory tool response
memory_snapshot: MemoryToolResponseSnapshot | None = None
if isinstance(tool_response.rich_response, MemoryToolResponse):
@@ -1010,6 +997,7 @@ def run_llm_loop(
tool_call_response=saved_response,
search_docs=displayed_docs or search_docs,
generated_images=generated_images,
generated_files=generated_files,
)
# Add to state container for partial save support
state_container.add_tool_call(tool_call_info)

View File

@@ -11,9 +11,10 @@ from contextvars import Token
from uuid import UUID
from pydantic import BaseModel
from redis.client import Redis
from sqlalchemy.orm import Session
from onyx.cache.factory import get_cache_backend
from onyx.cache.interface import CacheBackend
from onyx.chat.chat_processing_checker import set_processing_status
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import run_chat_loop_with_state_containers
@@ -79,7 +80,6 @@ from onyx.llm.request_context import reset_llm_mock_response
from onyx.llm.request_context import set_llm_mock_response
from onyx.llm.utils import litellm_exception_to_error_msg
from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
@@ -448,7 +448,7 @@ def handle_stream_message_objects(
llm: LLM | None = None
chat_session: ChatSession | None = None
redis_client: Redis | None = None
cache: CacheBackend | None = None
user_id = user.id
if user.is_anonymous:
@@ -809,19 +809,19 @@ def handle_stream_message_objects(
)
simple_chat_history.insert(0, summary_simple)
redis_client = get_redis_client()
cache = get_cache_backend()
reset_cancel_status(
chat_session.id,
redis_client,
cache,
)
def check_is_connected() -> bool:
return check_stop_signal(chat_session.id, redis_client)
return check_stop_signal(chat_session.id, cache)
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
cache=cache,
value=True,
)
@@ -968,10 +968,10 @@ def handle_stream_message_objects(
reset_llm_mock_response(mock_response_token)
try:
if redis_client is not None and chat_session is not None:
if cache is not None and chat_session is not None:
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
cache=cache,
value=False,
)
except Exception:

View File

@@ -1,4 +1,5 @@
import json
import mimetypes
from sqlalchemy.orm import Session
@@ -12,14 +13,41 @@ from onyx.db.chat import create_db_search_doc
from onyx.db.models import ChatMessage
from onyx.db.models import ToolCall
from onyx.db.tools import create_tool_call_no_commit
from onyx.file_store.models import FileDescriptor
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.tools.models import ToolCallInfo
from onyx.utils.logger import setup_logger
logger = setup_logger()
def _extract_referenced_file_descriptors(
tool_calls: list[ToolCallInfo],
message_text: str,
) -> list[FileDescriptor]:
"""Extract FileDescriptors for code interpreter files referenced in the message text."""
descriptors: list[FileDescriptor] = []
for tool_call_info in tool_calls:
if not tool_call_info.generated_files:
continue
for gen_file in tool_call_info.generated_files:
file_id = (
gen_file.file_link.rsplit("/", 1)[-1] if gen_file.file_link else ""
)
if file_id and file_id in message_text:
mime_type, _ = mimetypes.guess_type(gen_file.filename)
descriptors.append(
FileDescriptor(
id=file_id,
type=mime_type_to_chat_file_type(mime_type),
name=gen_file.filename,
)
)
return descriptors
def _create_and_link_tool_calls(
tool_calls: list[ToolCallInfo],
assistant_message: ChatMessage,
@@ -297,5 +325,14 @@ def save_chat_turn(
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
)
# 8. Attach code interpreter generated files that the assistant actually
# referenced in its response, so they are available via load_all_chat_files
# on subsequent turns. Files not mentioned are intermediate artifacts.
if message_text:
referenced = _extract_referenced_file_descriptors(tool_calls, message_text)
if referenced:
existing_files = assistant_message.files or []
assistant_message.files = existing_files + referenced
# Finally save the messages, tool calls, and docs
db_session.commit()

View File

@@ -1,65 +1,58 @@
from uuid import UUID
from redis.client import Redis
from onyx.cache.interface import CacheBackend
# Redis key prefixes for chat session stop signals
PREFIX = "chatsessionstop"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 10 * 60 # 10 minutes - defensive TTL to prevent memory leaks
FENCE_TTL = 10 * 60 # 10 minutes
def _get_fence_key(chat_session_id: UUID) -> str:
"""
Generate the Redis key for a chat session stop signal fence.
"""Generate the cache key for a chat session stop signal fence.
Args:
chat_session_id: The UUID of the chat session
Returns:
The fence key string (tenant_id is automatically added by the Redis client)
The fence key string. Tenant isolation is handled automatically
by the cache backend (Redis key-prefixing or Postgres schema routing).
"""
return f"{FENCE_PREFIX}_{chat_session_id}"
def set_fence(chat_session_id: UUID, redis_client: Redis, value: bool) -> None:
"""
Set or clear the stop signal fence for a chat session.
def set_fence(chat_session_id: UUID, cache: CacheBackend, value: bool) -> None:
"""Set or clear the stop signal fence for a chat session.
Args:
chat_session_id: The UUID of the chat session
redis_client: Redis client to use (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
value: True to set the fence (stop signal), False to clear it
"""
fence_key = _get_fence_key(chat_session_id)
if not value:
redis_client.delete(fence_key)
cache.delete(fence_key)
return
redis_client.set(fence_key, 0, ex=FENCE_TTL)
cache.set(fence_key, 0, ex=FENCE_TTL)
def is_connected(chat_session_id: UUID, redis_client: Redis) -> bool:
"""
Check if the chat session should continue (not stopped).
def is_connected(chat_session_id: UUID, cache: CacheBackend) -> bool:
"""Check if the chat session should continue (not stopped).
Args:
chat_session_id: The UUID of the chat session to check
redis_client: Redis client to use for checking the stop signal (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
Returns:
True if the session should continue, False if it should stop
"""
fence_key = _get_fence_key(chat_session_id)
return not bool(redis_client.exists(fence_key))
return not cache.exists(_get_fence_key(chat_session_id))
def reset_cancel_status(chat_session_id: UUID, redis_client: Redis) -> None:
"""
Clear the stop signal for a chat session.
def reset_cancel_status(chat_session_id: UUID, cache: CacheBackend) -> None:
"""Clear the stop signal for a chat session.
Args:
chat_session_id: The UUID of the chat session
redis_client: Redis client to use (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
"""
fence_key = _get_fence_key(chat_session_id)
redis_client.delete(fence_key)
cache.delete(_get_fence_key(chat_session_id))

View File

@@ -6,6 +6,7 @@ from datetime import timezone
from typing import cast
from onyx.auth.schemas import AuthBackend
from onyx.cache.interface import CacheBackendType
from onyx.configs.constants import AuthType
from onyx.configs.constants import QueryHistoryType
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
@@ -54,6 +55,12 @@ DISABLE_USER_KNOWLEDGE = os.environ.get("DISABLE_USER_KNOWLEDGE", "").lower() ==
# are disabled but core chat, tools, user file uploads, and Projects still work.
DISABLE_VECTOR_DB = os.environ.get("DISABLE_VECTOR_DB", "").lower() == "true"
# Which backend to use for caching, locks, and ephemeral state.
# "redis" (default) or "postgres" (only valid when DISABLE_VECTOR_DB=true).
CACHE_BACKEND = CacheBackendType(
os.environ.get("CACHE_BACKEND", CacheBackendType.REDIS)
)
# Maximum token count for a single uploaded file. Files exceeding this are rejected.
# Defaults to 100k tokens (or 10M when vector DB is disabled).
_DEFAULT_FILE_TOKEN_LIMIT = 10_000_000 if DISABLE_VECTOR_DB else 100_000
@@ -488,14 +495,7 @@ CELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(
os.environ.get("CELERY_WORKER_PRIMARY_POOL_OVERFLOW") or 4
)
# Consolidated background worker (light, docprocessing, docfetching, heavy, monitoring, user_file_processing)
# separate workers' defaults: light=24, docprocessing=6, docfetching=1, heavy=4, kg=2, monitoring=1, user_file=2
# Total would be 40, but we use a more conservative default of 20 for the consolidated worker
CELERY_WORKER_BACKGROUND_CONCURRENCY = int(
os.environ.get("CELERY_WORKER_BACKGROUND_CONCURRENCY") or 20
)
# Individual worker concurrency settings (used when USE_LIGHTWEIGHT_BACKGROUND_WORKER is False or on Kuberenetes deployments)
# Individual worker concurrency settings
CELERY_WORKER_HEAVY_CONCURRENCY = int(
os.environ.get("CELERY_WORKER_HEAVY_CONCURRENCY") or 4
)
@@ -812,7 +812,9 @@ RERANK_COUNT = int(os.environ.get("RERANK_COUNT") or 1000)
# Tool Configs
#####
# Code Interpreter Service Configuration
CODE_INTERPRETER_BASE_URL = os.environ.get("CODE_INTERPRETER_BASE_URL")
CODE_INTERPRETER_BASE_URL = os.environ.get(
"CODE_INTERPRETER_BASE_URL", "http://localhost:8000"
)
CODE_INTERPRETER_DEFAULT_TIMEOUT_MS = int(
os.environ.get("CODE_INTERPRETER_DEFAULT_TIMEOUT_MS") or 60_000
@@ -893,6 +895,9 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
)
VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "15")
VESPA_MIGRATION_REQUEST_TIMEOUT_S = int(
os.environ.get("VESPA_MIGRATION_REQUEST_TIMEOUT_S") or "120"
)
SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

View File

@@ -84,7 +84,6 @@ POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME = "celery_worker_docprocessing"
POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME = "celery_worker_background"
POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (

View File

@@ -943,6 +943,9 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
page
),
)
)
@@ -992,6 +995,7 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=page_id,
)
)

View File

@@ -781,4 +781,5 @@ def build_slim_document(
return SlimDocument(
id=onyx_document_id_from_drive_file(file),
external_access=external_access,
parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
)

View File

@@ -902,6 +902,11 @@ class JiraConnector(
external_access=self._get_project_permissions(
project_key, add_prefix=False
),
parent_hierarchy_raw_node_id=(
self._get_parent_hierarchy_raw_node_id(issue, project_key)
if project_key
else None
),
)
)
current_offset += 1

View File

@@ -385,6 +385,7 @@ class IndexingDocument(Document):
class SlimDocument(BaseModel):
id: str
external_access: ExternalAccess | None = None
parent_hierarchy_raw_node_id: str | None = None
class HierarchyNode(BaseModel):

View File

@@ -772,6 +772,7 @@ def _convert_driveitem_to_slim_document(
drive_name: str,
ctx: ClientContext,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
if driveitem.id is None:
raise ValueError("DriveItem ID is required")
@@ -787,11 +788,15 @@ def _convert_driveitem_to_slim_document(
return SlimDocument(
id=driveitem.id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
def _convert_sitepage_to_slim_document(
site_page: dict[str, Any], ctx: ClientContext | None, graph_client: GraphClient
site_page: dict[str, Any],
ctx: ClientContext | None,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
"""Convert a SharePoint site page to a SlimDocument object."""
if site_page.get("id") is None:
@@ -808,6 +813,7 @@ def _convert_sitepage_to_slim_document(
return SlimDocument(
id=id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
@@ -1594,12 +1600,22 @@ class SharepointConnector(
)
)
parent_hierarchy_url: str | None = None
if drive_web_url:
parent_hierarchy_url = self._get_parent_hierarchy_url(
site_url, drive_web_url, drive_name, driveitem
)
try:
logger.debug(f"Processing: {driveitem.web_url}")
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_driveitem_to_slim_document(
driveitem, drive_name, ctx, self.graph_client
driveitem,
drive_name,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=parent_hierarchy_url,
)
)
except Exception as e:
@@ -1619,7 +1635,10 @@ class SharepointConnector(
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_sitepage_to_slim_document(
site_page, ctx, self.graph_client
site_page,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=site_descriptor.url,
)
)
if len(doc_batch) >= SLIM_BATCH_SIZE:

View File

@@ -565,6 +565,7 @@ def _get_all_doc_ids(
channel_id=channel_id, thread_ts=message["ts"]
),
external_access=external_access,
parent_hierarchy_raw_node_id=channel_id,
)
)

View File

@@ -98,6 +98,7 @@ def get_chat_sessions_by_user(
db_session: Session,
include_onyxbot_flows: bool = False,
limit: int = 50,
before: datetime | None = None,
project_id: int | None = None,
only_non_project_chats: bool = False,
include_failed_chats: bool = False,
@@ -112,6 +113,9 @@ def get_chat_sessions_by_user(
if deleted is not None:
stmt = stmt.where(ChatSession.deleted == deleted)
if before is not None:
stmt = stmt.where(ChatSession.time_updated < before)
if limit:
stmt = stmt.limit(limit)

View File

@@ -13,6 +13,7 @@ from sqlalchemy.orm import aliased
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.enums import AccessType
@@ -246,6 +247,7 @@ def insert_document_set(
description=document_set_creation_request.description,
user_id=user_id,
is_public=document_set_creation_request.is_public,
is_up_to_date=DISABLE_VECTOR_DB,
time_last_modified_by_user=func.now(),
)
db_session.add(new_document_set_row)
@@ -336,7 +338,8 @@ def update_document_set(
)
document_set_row.description = document_set_update_request.description
document_set_row.is_up_to_date = False
if not DISABLE_VECTOR_DB:
document_set_row.is_up_to_date = False
document_set_row.is_public = document_set_update_request.is_public
document_set_row.time_last_modified_by_user = func.now()
versioned_private_doc_set_fn = fetch_versioned_implementation(

View File

@@ -186,6 +186,7 @@ class EmbeddingPrecision(str, PyEnum):
class UserFileStatus(str, PyEnum):
PROCESSING = "PROCESSING"
INDEXING = "INDEXING"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
CANCELED = "CANCELED"

View File

@@ -1,5 +1,7 @@
"""CRUD operations for HierarchyNode."""
from collections import defaultdict
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -525,6 +527,53 @@ def get_document_parent_hierarchy_node_ids(
return {doc_id: parent_id for doc_id, parent_id in results}
def update_document_parent_hierarchy_nodes(
db_session: Session,
doc_parent_map: dict[str, int | None],
commit: bool = True,
) -> int:
"""Bulk-update Document.parent_hierarchy_node_id for multiple documents.
Only updates rows whose current value differs from the desired value to
avoid unnecessary writes.
Args:
db_session: SQLAlchemy session
doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
commit: Whether to commit the transaction
Returns:
Number of documents actually updated
"""
if not doc_parent_map:
return 0
doc_ids = list(doc_parent_map.keys())
existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)
by_parent: dict[int | None, list[str]] = defaultdict(list)
for doc_id, desired_parent_id in doc_parent_map.items():
current = existing.get(doc_id)
if current == desired_parent_id or doc_id not in existing:
continue
by_parent[desired_parent_id].append(doc_id)
updated = 0
for desired_parent_id, ids in by_parent.items():
db_session.query(Document).filter(Document.id.in_(ids)).update(
{Document.parent_hierarchy_node_id: desired_parent_id},
synchronize_session=False,
)
updated += len(ids)
if commit:
db_session.commit()
elif updated:
db_session.flush()
return updated
def update_hierarchy_node_permissions(
db_session: Session,
raw_node_id: str,

View File

@@ -109,45 +109,38 @@ def can_user_access_llm_provider(
is_admin: If True, bypass user group restrictions but still respect persona restrictions
Access logic:
1. If is_public=True → everyone has access (public override)
2. If is_public=False:
- Both groups AND personas set → must satisfy BOTH (AND logic, admins bypass group check)
- Only groups set → must be in one of the groups (OR across groups, admins bypass)
- Only personas set → must use one of the personas (OR across personas, applies to admins)
- Neither set → NOBODY has access unless admin (locked, admin-only)
- is_public controls USER access (group bypass): when True, all users can access
regardless of group membership. When False, user must be in a whitelisted group
(or be admin).
- Persona restrictions are ALWAYS enforced when set, regardless of is_public.
This allows admins to make a provider available to all users while still
restricting which personas (assistants) can use it.
Decision matrix:
1. is_public=True, no personas set → everyone has access
2. is_public=True, personas set → all users, but only whitelisted personas
3. is_public=False, groups+personas set → must satisfy BOTH (admins bypass groups)
4. is_public=False, only groups set → must be in group (admins bypass)
5. is_public=False, only personas set → must use whitelisted persona
6. is_public=False, neither set → admin-only (locked)
"""
# Public override - everyone has access
if provider.is_public:
return True
# Extract IDs once to avoid multiple iterations
provider_group_ids = (
{group.id for group in provider.groups} if provider.groups else set()
)
provider_persona_ids = (
{p.id for p in provider.personas} if provider.personas else set()
)
provider_group_ids = {g.id for g in (provider.groups or [])}
provider_persona_ids = {p.id for p in (provider.personas or [])}
has_groups = bool(provider_group_ids)
has_personas = bool(provider_persona_ids)
# Both groups AND personas set → AND logic (must satisfy both)
if has_groups and has_personas:
# Admins bypass group check but still must satisfy persona restrictions
user_in_group = is_admin or bool(user_group_ids & provider_group_ids)
persona_allowed = persona.id in provider_persona_ids if persona else False
return user_in_group and persona_allowed
# Persona restrictions are always enforced when set, regardless of is_public
if has_personas and not (persona and persona.id in provider_persona_ids):
return False
if provider.is_public:
return True
# Only groups set → user must be in one of the groups (admins bypass)
if has_groups:
return is_admin or bool(user_group_ids & provider_group_ids)
# Only personas set → persona must be in allowed list (applies to admins too)
if has_personas:
return persona.id in provider_persona_ids if persona else False
# Neither groups nor personas set, and not public → admins can access
return is_admin
# No groups: either persona-whitelisted (already passed) or admin-only if locked
return has_personas or is_admin
def validate_persona_ids_exist(
@@ -539,6 +532,7 @@ def fetch_default_model(
) -> ModelConfiguration | None:
model_config = db_session.scalar(
select(ModelConfiguration)
.options(selectinload(ModelConfiguration.llm_provider))
.join(LLMModelFlow)
.where(
ModelConfiguration.is_visible == True, # noqa: E712

View File

@@ -103,7 +103,6 @@ from onyx.utils.encryption import encrypt_string_to_bytes
from onyx.utils.sensitive import SensitiveValue
from onyx.utils.headers import HeaderItemDict
from shared_configs.enums import EmbeddingProvider
from onyx.context.search.enums import RecencyBiasSetting
# TODO: After anonymous user migration has been deployed, make user_id columns NOT NULL
# and update Mapped[User | None] relationships to Mapped[User] where needed.
@@ -3265,19 +3264,6 @@ class Persona(Base):
)
name: Mapped[str] = mapped_column(String)
description: Mapped[str] = mapped_column(String)
# Number of chunks to pass to the LLM for generation.
num_chunks: Mapped[float | None] = mapped_column(Float, nullable=True)
chunks_above: Mapped[int] = mapped_column(Integer)
chunks_below: Mapped[int] = mapped_column(Integer)
# Pass every chunk through LLM for evaluation, fairly expensive
# Can be turned off globally by admin, in which case, this setting is ignored
llm_relevance_filter: Mapped[bool] = mapped_column(Boolean)
# Enables using LLM to extract time and source type filters
# Can also be admin disabled globally
llm_filter_extraction: Mapped[bool] = mapped_column(Boolean)
recency_bias: Mapped[RecencyBiasSetting] = mapped_column(
Enum(RecencyBiasSetting, native_enum=False)
)
# Allows the persona to specify a specific default LLM model
# NOTE: only is applied on the actual response generation - is not used for things like
@@ -3304,11 +3290,8 @@ class Persona(Base):
# Treated specially (cannot be user edited etc.)
builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False)
# Default personas are personas created by admins and are automatically added
# to all users' assistants list.
is_default_persona: Mapped[bool] = mapped_column(
Boolean, default=False, nullable=False
)
# Featured personas are highlighted in the UI
featured: Mapped[bool] = mapped_column(Boolean, default=False)
# controls whether the persona is available to be selected by users
is_visible: Mapped[bool] = mapped_column(Boolean, default=True)
# controls the ordering of personas in the UI
@@ -4943,7 +4926,9 @@ class ScimUserMapping(Base):
__tablename__ = "scim_user_mapping"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
external_id: Mapped[str] = mapped_column(String, unique=True, index=True)
external_id: Mapped[str | None] = mapped_column(
String, unique=True, index=True, nullable=True
)
user_id: Mapped[UUID] = mapped_column(
ForeignKey("user.id", ondelete="CASCADE"), unique=True, nullable=False
)
@@ -5000,3 +4985,25 @@ class CodeInterpreterServer(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
server_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
class CacheStore(Base):
"""Key-value cache table used by ``PostgresCacheBackend``.
Replaces Redis for simple KV caching, locks, and list operations
when ``CACHE_BACKEND=postgres`` (NO_VECTOR_DB deployments).
Intentionally separate from ``KVStore``:
- Stores raw bytes (LargeBinary) vs JSONB, matching Redis semantics.
- Has ``expires_at`` for TTL; rows are periodically garbage-collected.
- Holds ephemeral data (tokens, stop signals, lock state) not
persistent application config, so cleanup can be aggressive.
"""
__tablename__ = "cache_store"
key: Mapped[str] = mapped_column(String, primary_key=True)
value: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
expires_at: Mapped[datetime.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)

View File

@@ -18,11 +18,8 @@ from sqlalchemy.orm import Session
from onyx.access.hierarchy_access import get_user_external_group_ids
from onyx.auth.schemas import UserRole
from onyx.configs.app_configs import CURATORS_CANNOT_VIEW_OR_EDIT_NON_OWNED_ASSISTANTS
from onyx.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
from onyx.configs.chat_configs import CONTEXT_CHUNKS_BELOW
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import NotificationType
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.document_access import get_accessible_documents_by_ids
from onyx.db.models import ConnectorCredentialPair
@@ -254,13 +251,15 @@ def create_update_persona(
# Permission to actually use these is checked later
try:
# Default persona validation
if create_persona_request.is_default_persona:
# Curators can edit default personas, but not make them
# Featured persona validation
if create_persona_request.featured:
# Curators can edit featured personas, but not make them
# TODO this will be reworked soon with RBAC permissions feature
if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:
pass
elif user.role != UserRole.ADMIN:
raise ValueError("Only admins can make a default persona")
raise ValueError("Only admins can make a featured persona")
# Convert incoming string UUIDs to UUID objects for DB operations
converted_user_file_ids = None
@@ -281,7 +280,6 @@ def create_update_persona(
document_set_ids=create_persona_request.document_set_ids,
tool_ids=create_persona_request.tool_ids,
is_public=create_persona_request.is_public,
recency_bias=create_persona_request.recency_bias,
llm_model_provider_override=create_persona_request.llm_model_provider_override,
llm_model_version_override=create_persona_request.llm_model_version_override,
starter_messages=create_persona_request.starter_messages,
@@ -295,10 +293,7 @@ def create_update_persona(
remove_image=create_persona_request.remove_image,
search_start_date=create_persona_request.search_start_date,
label_ids=create_persona_request.label_ids,
num_chunks=create_persona_request.num_chunks,
llm_relevance_filter=create_persona_request.llm_relevance_filter,
llm_filter_extraction=create_persona_request.llm_filter_extraction,
is_default_persona=create_persona_request.is_default_persona,
featured=create_persona_request.featured,
user_file_ids=converted_user_file_ids,
commit=False,
hierarchy_node_ids=create_persona_request.hierarchy_node_ids,
@@ -874,10 +869,6 @@ def upsert_persona(
user: User | None,
name: str,
description: str,
num_chunks: float,
llm_relevance_filter: bool,
llm_filter_extraction: bool,
recency_bias: RecencyBiasSetting,
llm_model_provider_override: str | None,
llm_model_version_override: str | None,
starter_messages: list[StarterMessage] | None,
@@ -898,13 +889,11 @@ def upsert_persona(
remove_image: bool | None = None,
search_start_date: datetime | None = None,
builtin_persona: bool = False,
is_default_persona: bool | None = None,
featured: bool | None = None,
label_ids: list[int] | None = None,
user_file_ids: list[UUID] | None = None,
hierarchy_node_ids: list[int] | None = None,
document_ids: list[str] | None = None,
chunks_above: int = CONTEXT_CHUNKS_ABOVE,
chunks_below: int = CONTEXT_CHUNKS_BELOW,
replace_base_system_prompt: bool = False,
) -> Persona:
"""
@@ -1015,12 +1004,6 @@ def upsert_persona(
# `default` and `built-in` properties can only be set when creating a persona.
existing_persona.name = name
existing_persona.description = description
existing_persona.num_chunks = num_chunks
existing_persona.chunks_above = chunks_above
existing_persona.chunks_below = chunks_below
existing_persona.llm_relevance_filter = llm_relevance_filter
existing_persona.llm_filter_extraction = llm_filter_extraction
existing_persona.recency_bias = recency_bias
existing_persona.llm_model_provider_override = llm_model_provider_override
existing_persona.llm_model_version_override = llm_model_version_override
existing_persona.starter_messages = starter_messages
@@ -1034,10 +1017,8 @@ def upsert_persona(
if label_ids is not None:
existing_persona.labels.clear()
existing_persona.labels = labels or []
existing_persona.is_default_persona = (
is_default_persona
if is_default_persona is not None
else existing_persona.is_default_persona
existing_persona.featured = (
featured if featured is not None else existing_persona.featured
)
# Update embedded prompt fields if provided
if system_prompt is not None:
@@ -1090,12 +1071,6 @@ def upsert_persona(
is_public=is_public,
name=name,
description=description,
num_chunks=num_chunks,
chunks_above=chunks_above,
chunks_below=chunks_below,
llm_relevance_filter=llm_relevance_filter,
llm_filter_extraction=llm_filter_extraction,
recency_bias=recency_bias,
builtin_persona=builtin_persona,
system_prompt=system_prompt or "",
task_prompt=task_prompt or "",
@@ -1111,9 +1086,7 @@ def upsert_persona(
display_priority=display_priority,
is_visible=is_visible,
search_start_date=search_start_date,
is_default_persona=(
is_default_persona if is_default_persona is not None else False
),
featured=(featured if featured is not None else False),
user_files=user_files or [],
labels=labels or [],
hierarchy_nodes=hierarchy_nodes or [],
@@ -1158,9 +1131,9 @@ def delete_old_default_personas(
db_session.commit()
def update_persona_is_default(
def update_persona_featured(
persona_id: int,
is_default: bool,
featured: bool,
db_session: Session,
user: User,
) -> None:
@@ -1168,7 +1141,7 @@ def update_persona_is_default(
db_session=db_session, persona_id=persona_id, user=user, get_editable=True
)
persona.is_default_persona = is_default
persona.featured = featured
db_session.commit()

View File

@@ -9,8 +9,9 @@ from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy import func
from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
@@ -51,7 +52,7 @@ def create_user_files(
) -> CategorizedFilesResult:
# Categorize the files
categorized_files = categorize_uploaded_files(files)
categorized_files = categorize_uploaded_files(files, db_session)
# NOTE: At the moment, zip metadata is not used for user files.
# Should revisit to decide whether this should be a feature.
upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
@@ -105,8 +106,8 @@ def upload_files_to_user_files_with_indexing(
user: User,
temp_id_map: dict[str, str] | None,
db_session: Session,
background_tasks: BackgroundTasks | None = None,
) -> CategorizedFilesResult:
# Validate project ownership if a project_id is provided
if project_id is not None and user is not None:
if not check_project_ownership(project_id, user.id, db_session):
raise HTTPException(status_code=404, detail="Project not found")
@@ -127,16 +128,27 @@ def upload_files_to_user_files_with_indexing(
logger.warning(
f"File {rejected_file.filename} rejected for {rejected_file.reason}"
)
for user_file in user_files:
task = client_app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}"
)
if DISABLE_VECTOR_DB and background_tasks is not None:
from onyx.background.task_utils import drain_processing_loop
background_tasks.add_task(drain_processing_loop, tenant_id)
for user_file in user_files:
logger.info(f"Queued in-process processing for user_file_id={user_file.id}")
else:
from onyx.background.celery.versioned_apps.client import app as client_app
for user_file in user_files:
task = client_app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered indexing for user_file_id={user_file.id} "
f"with task_id={task.id}"
)
return CategorizedFilesResult(
user_files=user_files,

View File

@@ -129,7 +129,7 @@ def get_current_search_settings(db_session: Session) -> SearchSettings:
latest_settings = result.scalars().first()
if not latest_settings:
raise RuntimeError("No search settings specified, DB is not in a valid state")
raise RuntimeError("No search settings specified; DB is not in a valid state.")
return latest_settings

View File

@@ -5,8 +5,6 @@ from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.constants import DEFAULT_PERSONA_SLACK_CHANNEL_NAME
from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
from onyx.db.models import ChannelConfig
@@ -45,8 +43,6 @@ def create_slack_channel_persona(
channel_name: str | None,
document_set_ids: list[int],
existing_persona_id: int | None = None,
num_chunks: float = MAX_CHUNKS_FED_TO_CHAT,
enable_auto_filters: bool = False,
) -> Persona:
"""NOTE: does not commit changes"""
@@ -73,17 +69,13 @@ def create_slack_channel_persona(
system_prompt="",
task_prompt="",
datetime_aware=True,
num_chunks=num_chunks,
llm_relevance_filter=True,
llm_filter_extraction=enable_auto_filters,
recency_bias=RecencyBiasSetting.AUTO,
tool_ids=[search_tool.id],
document_set_ids=document_set_ids,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
is_public=True,
is_default_persona=False,
featured=False,
db_session=db_session,
commit=False,
)

View File

@@ -32,9 +32,6 @@ def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
Determines whether to enable multipass and large chunks by examining
the current search settings and the embedder configuration.
"""
if not search_settings:
return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
multipass = should_use_multipass(search_settings)
enable_large_chunks = SearchSettings.can_use_large_chunks(
multipass, search_settings.model_name, search_settings.provider_type

View File

@@ -26,11 +26,10 @@ def get_default_document_index(
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated, updates are applied to both indices.
need to be updated. Updates are applied to both indices.
WARNING: In that case, get_all_document_indices should be used.
"""
if DISABLE_VECTOR_DB:
return DisabledDocumentIndex(
@@ -51,11 +50,26 @@ def get_default_document_index(
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
if opensearch_retrieval_enabled:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=secondary_index_name,
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=secondary_large_chunks_enabled,
multitenant=MULTI_TENANT,
@@ -86,8 +100,7 @@ def get_all_document_indices(
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
Large chunks are not currently supported so we hardcode appropriate values.
NOTE: Make sure the Vespa index object is returned first. In the rare event
that there is some conflict between indexing and the migration task, it is
@@ -123,13 +136,36 @@ def get_all_document_indices(
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
secondary_index_name=(
secondary_search_settings.index_name
if secondary_search_settings
else None
),
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)

View File

@@ -6,7 +6,6 @@ import httpx
from opensearchpy import NotFoundError
from onyx.access.models import DocumentAccess
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -272,6 +271,9 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
secondary_index_name: str | None,
secondary_embedding_dim: int | None,
secondary_embedding_precision: EmbeddingPrecision | None,
# NOTE: We do not support large chunks right now.
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
multitenant: bool = False,
@@ -287,12 +289,25 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
f"Expected {MULTI_TENANT}, got {multitenant}."
)
tenant_id = get_current_tenant_id()
tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
self._real_index = OpenSearchDocumentIndex(
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
tenant_state=tenant_state,
index_name=index_name,
embedding_dim=embedding_dim,
embedding_precision=embedding_precision,
)
self._secondary_real_index: OpenSearchDocumentIndex | None = None
if self.secondary_index_name:
if secondary_embedding_dim is None or secondary_embedding_precision is None:
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
self._secondary_real_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=self.secondary_index_name,
embedding_dim=secondary_embedding_dim,
embedding_precision=secondary_embedding_precision,
)
@staticmethod
def register_multitenant_indices(
@@ -308,19 +323,38 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
primary_embedding_dim: int,
primary_embedding_precision: EmbeddingPrecision,
secondary_index_embedding_dim: int | None, # noqa: ARG002
secondary_index_embedding_precision: EmbeddingPrecision | None, # noqa: ARG002
secondary_index_embedding_dim: int | None,
secondary_index_embedding_precision: EmbeddingPrecision | None,
) -> None:
# Only handle primary index for now, ignore secondary.
return self._real_index.verify_and_create_index_if_necessary(
self._real_index.verify_and_create_index_if_necessary(
primary_embedding_dim, primary_embedding_precision
)
if self.secondary_index_name:
if (
secondary_index_embedding_dim is None
or secondary_index_embedding_precision is None
):
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.verify_and_create_index_if_necessary(
secondary_index_embedding_dim, secondary_index_embedding_precision
)
def index(
self,
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
# Convert IndexBatchParams to IndexingMetadata.
chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
@@ -352,7 +386,20 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
) -> int:
return self._real_index.delete(doc_id, chunk_count)
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
total_chunks_deleted += self._secondary_real_index.delete(
doc_id, chunk_count
)
return total_chunks_deleted
def update_single(
self,
@@ -363,6 +410,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
fields: VespaDocumentFields | None,
user_fields: VespaDocumentUserFields | None,
) -> None:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
f"Tried to update document {doc_id} with no updated fields or user fields."
@@ -393,6 +445,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
try:
self._real_index.update([update_request])
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.update([update_request])
except NotFoundError:
logger.exception(
f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "
@@ -563,12 +620,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
)
if not self._client.index_exists():
if USING_AWS_MANAGED_OPENSEARCH:
index_settings = (
DocumentSchema.get_index_settings_for_aws_managed_opensearch()
)
else:
index_settings = DocumentSchema.get_index_settings()
index_settings = DocumentSchema.get_index_settings_based_on_environment()
self._client.create_index(
mappings=expected_mappings,
settings=index_settings,

View File

@@ -12,6 +12,7 @@ from pydantic import model_validator
from pydantic import SerializerFunctionWrapHandler
from onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.constants import EF_CONSTRUCTION
@@ -525,7 +526,7 @@ class DocumentSchema:
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch() -> dict[str, Any]:
def get_index_settings_for_aws_managed_opensearch_st_dev() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch.
@@ -546,3 +547,41 @@ class DocumentSchema:
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_for_aws_managed_opensearch_mt_cloud() -> dict[str, Any]:
"""
Settings for AWS-managed OpenSearch in multi-tenant cloud.
324 shards very roughly targets a storage load of ~30Gb per shard, which
according to AWS OpenSearch documentation is within a good target range.
As documented above we need 2 replicas for a total of 3 copies of the
data because the cluster is configured with 3-AZ awareness.
"""
return {
"index": {
"number_of_shards": 324,
"number_of_replicas": 2,
# Required for vector search.
"knn": True,
"knn.algo_param.ef_search": EF_SEARCH,
}
}
@staticmethod
def get_index_settings_based_on_environment() -> dict[str, Any]:
"""
Returns the index settings based on the environment.
"""
if USING_AWS_MANAGED_OPENSEARCH:
if MULTI_TENANT:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_mt_cloud()
)
else:
return (
DocumentSchema.get_index_settings_for_aws_managed_opensearch_st_dev()
)
else:
return DocumentSchema.get_index_settings()

View File

@@ -1,5 +1,6 @@
import json
import string
import time
from collections.abc import Callable
from collections.abc import Mapping
from datetime import datetime
@@ -18,6 +19,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
)
from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.document_index.interfaces import VespaChunkRequest
@@ -338,12 +340,18 @@ def get_all_chunks_paginated(
params["continuation"] = continuation_token
response: httpx.Response | None = None
start_time = time.monotonic()
try:
with get_vespa_http_client() as http_client:
with get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as http_client:
response = http_client.get(url, params=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = f"Failed to get chunks from Vespa slice {slice_id} with continuation token {continuation_token}."
error_base = (
f"Failed to get chunks from Vespa slice {slice_id} with continuation token "
f"{continuation_token} in {time.monotonic() - start_time:.3f} seconds."
)
logger.exception(
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"

View File

@@ -465,6 +465,12 @@ class VespaIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
index_batch_params.doc_id_to_new_chunk_cnt
):
@@ -659,6 +665,10 @@ class VespaIndex(DocumentIndex):
"""Note: if the document id does not exist, the update will be a no-op and the
function will complete with no errors or exceptions.
Handle other exceptions if you wish to implement retry behavior
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
@@ -679,13 +689,6 @@ class VespaIndex(DocumentIndex):
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
project_ids: set[int] | None = None
if user_fields is not None and user_fields.user_projects is not None:
project_ids = set(user_fields.user_projects)
@@ -705,7 +708,20 @@ class VespaIndex(DocumentIndex):
persona_ids=persona_ids,
)
vespa_document_index.update([update_request])
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
vespa_document_index.update([update_request])
def delete_single(
self,
@@ -714,6 +730,11 @@ class VespaIndex(DocumentIndex):
tenant_id: str,
chunk_count: int | None,
) -> int:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
multitenant=MULTI_TENANT,
@@ -726,13 +747,25 @@ class VespaIndex(DocumentIndex):
raise ValueError(
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
return vespa_document_index.delete(document_id=doc_id, chunk_count=chunk_count)
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
total_chunks_deleted = 0
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
total_chunks_deleted += vespa_document_index.delete(
document_id=doc_id, chunk_count=chunk_count
)
return total_chunks_deleted
def id_based_retrieval(
self,

View File

@@ -52,7 +52,9 @@ def replace_invalid_doc_id_characters(text: str) -> str:
return text.replace("'", "_")
def get_vespa_http_client(no_timeout: bool = False, http2: bool = True) -> httpx.Client:
def get_vespa_http_client(
no_timeout: bool = False, http2: bool = True, timeout: int | None = None
) -> httpx.Client:
"""
Configures and returns an HTTP client for communicating with Vespa,
including authentication if needed.
@@ -64,7 +66,7 @@ def get_vespa_http_client(no_timeout: bool = False, http2: bool = True) -> httpx
else None
),
verify=False if not MANAGED_VESPA else True,
timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
timeout=None if no_timeout else (timeout or VESPA_REQUEST_TIMEOUT),
http2=http2,
)

View File

View File

@@ -0,0 +1,101 @@
"""
Standardized error codes for the Onyx backend.
Usage:
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, "Token expired")
"""
from enum import Enum
class OnyxErrorCode(Enum):
"""
Each member is a tuple of (error_code_string, http_status_code).
The error_code_string is a stable, machine-readable identifier that
API consumers can match on. The http_status_code is the default HTTP
status to return.
"""
# ------------------------------------------------------------------
# Authentication (401)
# ------------------------------------------------------------------
UNAUTHENTICATED = ("UNAUTHENTICATED", 401)
INVALID_TOKEN = ("INVALID_TOKEN", 401)
TOKEN_EXPIRED = ("TOKEN_EXPIRED", 401)
CSRF_FAILURE = ("CSRF_FAILURE", 403)
# ------------------------------------------------------------------
# Authorization (403)
# ------------------------------------------------------------------
UNAUTHORIZED = ("UNAUTHORIZED", 403)
INSUFFICIENT_PERMISSIONS = ("INSUFFICIENT_PERMISSIONS", 403)
ADMIN_ONLY = ("ADMIN_ONLY", 403)
EE_REQUIRED = ("EE_REQUIRED", 403)
# ------------------------------------------------------------------
# Validation / Bad Request (400)
# ------------------------------------------------------------------
VALIDATION_ERROR = ("VALIDATION_ERROR", 400)
INVALID_INPUT = ("INVALID_INPUT", 400)
MISSING_REQUIRED_FIELD = ("MISSING_REQUIRED_FIELD", 400)
# ------------------------------------------------------------------
# Not Found (404)
# ------------------------------------------------------------------
NOT_FOUND = ("NOT_FOUND", 404)
CONNECTOR_NOT_FOUND = ("CONNECTOR_NOT_FOUND", 404)
CREDENTIAL_NOT_FOUND = ("CREDENTIAL_NOT_FOUND", 404)
PERSONA_NOT_FOUND = ("PERSONA_NOT_FOUND", 404)
DOCUMENT_NOT_FOUND = ("DOCUMENT_NOT_FOUND", 404)
SESSION_NOT_FOUND = ("SESSION_NOT_FOUND", 404)
USER_NOT_FOUND = ("USER_NOT_FOUND", 404)
# ------------------------------------------------------------------
# Conflict (409)
# ------------------------------------------------------------------
CONFLICT = ("CONFLICT", 409)
DUPLICATE_RESOURCE = ("DUPLICATE_RESOURCE", 409)
# ------------------------------------------------------------------
# Rate Limiting / Quotas (429 / 402)
# ------------------------------------------------------------------
RATE_LIMITED = ("RATE_LIMITED", 429)
SEAT_LIMIT_EXCEEDED = ("SEAT_LIMIT_EXCEEDED", 402)
# ------------------------------------------------------------------
# Connector / Credential Errors (400-range)
# ------------------------------------------------------------------
CONNECTOR_VALIDATION_FAILED = ("CONNECTOR_VALIDATION_FAILED", 400)
CREDENTIAL_INVALID = ("CREDENTIAL_INVALID", 400)
CREDENTIAL_EXPIRED = ("CREDENTIAL_EXPIRED", 401)
# ------------------------------------------------------------------
# Server Errors (5xx)
# ------------------------------------------------------------------
INTERNAL_ERROR = ("INTERNAL_ERROR", 500)
NOT_IMPLEMENTED = ("NOT_IMPLEMENTED", 501)
SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
BAD_GATEWAY = ("BAD_GATEWAY", 502)
LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
def __init__(self, code: str, status_code: int) -> None:
self.code = code
self.status_code = status_code
def detail(self, message: str | None = None) -> dict[str, str]:
"""Build a structured error detail dict.
Returns a dict like:
{"error_code": "UNAUTHENTICATED", "message": "Token expired"}
If no message is supplied, the error code itself is used as the message.
"""
return {
"error_code": self.code,
"message": message or self.code,
}

View File

@@ -0,0 +1,82 @@
"""OnyxError — the single exception type for all Onyx business errors.
Raise ``OnyxError`` instead of ``HTTPException`` in business code. A global
FastAPI exception handler (registered via ``register_onyx_exception_handlers``)
converts it into a JSON response with the standard
``{"error_code": "...", "message": "..."}`` shape.
Usage::
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
For upstream errors with a dynamic HTTP status (e.g. billing service),
use ``status_code_override``::
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=upstream_status,
)
"""
from fastapi import FastAPI
from fastapi import Request
from fastapi.responses import JSONResponse
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.utils.logger import setup_logger
logger = setup_logger()
class OnyxError(Exception):
"""Structured error that maps to a specific ``OnyxErrorCode``.
Attributes:
error_code: The ``OnyxErrorCode`` enum member.
message: Human-readable message (defaults to the error code string).
status_code: HTTP status — either overridden or from the error code.
"""
def __init__(
self,
error_code: OnyxErrorCode,
message: str | None = None,
*,
status_code_override: int | None = None,
) -> None:
self.error_code = error_code
self.message = message or error_code.code
self._status_code_override = status_code_override
super().__init__(self.message)
@property
def status_code(self) -> int:
return self._status_code_override or self.error_code.status_code
def register_onyx_exception_handlers(app: FastAPI) -> None:
"""Register a global handler that converts ``OnyxError`` to JSON responses.
Must be called *after* the app is created but *before* it starts serving.
The handler logs at WARNING for 4xx and ERROR for 5xx.
"""
@app.exception_handler(OnyxError)
async def _handle_onyx_error(
request: Request, # noqa: ARG001
exc: OnyxError,
) -> JSONResponse:
status_code = exc.status_code
if status_code >= 500:
logger.error(f"OnyxError {exc.error_code.code}: {exc.message}")
elif status_code >= 400:
logger.warning(f"OnyxError {exc.error_code.code}: {exc.message}")
return JSONResponse(
status_code=status_code,
content=exc.error_code.detail(exc.message),
)

View File

@@ -4,39 +4,33 @@ import base64
import json
import uuid
from typing import Any
from typing import cast
from typing import Dict
from typing import Optional
from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Redis key prefix for OAuth state
OAUTH_STATE_PREFIX = "federated_oauth"
# Default TTL for OAuth state (5 minutes)
OAUTH_STATE_TTL = 300
OAUTH_STATE_TTL = 300 # 5 minutes
class OAuthSession:
"""Represents an OAuth session stored in Redis."""
"""Represents an OAuth session stored in the cache backend."""
def __init__(
self,
federated_connector_id: int,
user_id: str,
redirect_uri: Optional[str] = None,
additional_data: Optional[Dict[str, Any]] = None,
redirect_uri: str | None = None,
additional_data: dict[str, Any] | None = None,
):
self.federated_connector_id = federated_connector_id
self.user_id = user_id
self.redirect_uri = redirect_uri
self.additional_data = additional_data or {}
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for Redis storage."""
def to_dict(self) -> dict[str, Any]:
return {
"federated_connector_id": self.federated_connector_id,
"user_id": self.user_id,
@@ -45,8 +39,7 @@ class OAuthSession:
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "OAuthSession":
"""Create from dictionary retrieved from Redis."""
def from_dict(cls, data: dict[str, Any]) -> "OAuthSession":
return cls(
federated_connector_id=data["federated_connector_id"],
user_id=data["user_id"],
@@ -58,31 +51,27 @@ class OAuthSession:
def generate_oauth_state(
federated_connector_id: int,
user_id: str,
redirect_uri: Optional[str] = None,
additional_data: Optional[Dict[str, Any]] = None,
redirect_uri: str | None = None,
additional_data: dict[str, Any] | None = None,
ttl: int = OAUTH_STATE_TTL,
) -> str:
"""
Generate a secure state parameter and store session data in Redis.
Generate a secure state parameter and store session data in the cache backend.
Args:
federated_connector_id: ID of the federated connector
user_id: ID of the user initiating OAuth
redirect_uri: Optional redirect URI after OAuth completion
additional_data: Any additional data to store with the session
ttl: Time-to-live in seconds for the Redis key
ttl: Time-to-live in seconds for the cache key
Returns:
Base64-encoded state parameter
"""
# Generate a random UUID for the state
state_uuid = uuid.uuid4()
state_b64 = base64.urlsafe_b64encode(state_uuid.bytes).decode("utf-8").rstrip("=")
# Convert UUID to base64 for URL-safe state parameter
state_bytes = state_uuid.bytes
state_b64 = base64.urlsafe_b64encode(state_bytes).decode("utf-8").rstrip("=")
# Create session object
session = OAuthSession(
federated_connector_id=federated_connector_id,
user_id=user_id,
@@ -90,15 +79,9 @@ def generate_oauth_state(
additional_data=additional_data,
)
# Store in Redis with TTL
redis_client = get_redis_client()
redis_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
redis_client.set(
redis_key,
json.dumps(session.to_dict()),
ex=ttl,
)
cache = get_cache_backend()
cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
cache.set(cache_key, json.dumps(session.to_dict()), ex=ttl)
logger.info(
f"Generated OAuth state for federated_connector_id={federated_connector_id}, "
@@ -125,18 +108,15 @@ def verify_oauth_state(state: str) -> OAuthSession:
state_bytes = base64.urlsafe_b64decode(padded_state)
state_uuid = uuid.UUID(bytes=state_bytes)
# Look up in Redis
redis_client = get_redis_client()
redis_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
cache = get_cache_backend()
cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
session_data = cast(bytes, redis_client.get(redis_key))
session_data = cache.get(cache_key)
if not session_data:
raise ValueError(f"OAuth state not found in Redis: {state}")
raise ValueError(f"OAuth state not found: {state}")
# Delete the key after retrieval (one-time use)
redis_client.delete(redis_key)
cache.delete(cache_key)
# Parse and return session
session_dict = json.loads(session_data)
return OAuthSession.from_dict(session_dict)

View File

@@ -1,13 +1,11 @@
import json
from typing import cast
from redis.client import Redis
from onyx.cache.interface import CacheBackend
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import KVStore
from onyx.key_value_store.interface import KeyValueStore
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
from onyx.utils.special_types import JSON_ro
@@ -20,22 +18,27 @@ KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24 # 1 Day
class PgRedisKVStore(KeyValueStore):
def __init__(self, redis_client: Redis | None = None) -> None:
# If no redis_client is provided, fall back to the context var
if redis_client is not None:
self.redis_client = redis_client
else:
self.redis_client = get_redis_client()
def __init__(self, cache: CacheBackend | None = None) -> None:
self._cache = cache
def _get_cache(self) -> CacheBackend:
if self._cache is None:
from onyx.cache.factory import get_cache_backend
self._cache = get_cache_backend()
return self._cache
def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
# Not encrypted in Redis, but encrypted in Postgres
# Not encrypted in Cache backend (typically Redis), but encrypted in Postgres
try:
self.redis_client.set(
self._get_cache().set(
REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION
)
except Exception as e:
# Fallback gracefully to Postgres if Redis fails
logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
# Fallback gracefully to Postgres if Cache backend fails
logger.error(
f"Failed to set value in Cache backend for key '{key}': {str(e)}"
)
encrypted_val = val if encrypt else None
plain_val = val if not encrypt else None
@@ -53,16 +56,12 @@ class PgRedisKVStore(KeyValueStore):
def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:
if not refresh_cache:
try:
redis_value = self.redis_client.get(REDIS_KEY_PREFIX + key)
if redis_value:
if not isinstance(redis_value, bytes):
raise ValueError(
f"Redis value for key '{key}' is not a bytes object"
)
return json.loads(redis_value.decode("utf-8"))
cached = self._get_cache().get(REDIS_KEY_PREFIX + key)
if cached is not None:
return json.loads(cached.decode("utf-8"))
except Exception as e:
logger.error(
f"Failed to get value from Redis for key '{key}': {str(e)}"
f"Failed to get value from cache for key '{key}': {str(e)}"
)
with get_session_with_current_tenant() as db_session:
@@ -79,21 +78,21 @@ class PgRedisKVStore(KeyValueStore):
value = None
try:
self.redis_client.set(
self._get_cache().set(
REDIS_KEY_PREFIX + key,
json.dumps(value),
ex=KV_REDIS_KEY_EXPIRATION,
)
except Exception as e:
logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
logger.error(f"Failed to set value in cache for key '{key}': {str(e)}")
return cast(JSON_ro, value)
def delete(self, key: str) -> None:
try:
self.redis_client.delete(REDIS_KEY_PREFIX + key)
self._get_cache().delete(REDIS_KEY_PREFIX + key)
except Exception as e:
logger.error(f"Failed to delete value from Redis for key '{key}': {str(e)}")
logger.error(f"Failed to delete value from cache for key '{key}': {str(e)}")
with get_session_with_current_tenant() as db_session:
result = db_session.query(KVStore).filter_by(key=key).delete()

View File

@@ -67,6 +67,18 @@ Status checked against LiteLLM v1.81.6-nightly (2026-02-02):
STATUS: STILL NEEDED - litellm_core_utils/litellm_logging.py lines 3185-3199 set
usage as a dict with chat completion format instead of keeping it as
ResponseAPIUsage. Our patch creates a deep copy before modification.
7. Responses API metadata=None TypeError (_patch_responses_metadata_none):
- LiteLLM's @client decorator wrapper in utils.py uses kwargs.get("metadata", {})
to check for router calls, but when metadata is explicitly None (key exists with
value None), the default {} is not used
- This causes "argument of type 'NoneType' is not iterable" TypeError which swallows
the real exception (e.g. AuthenticationError for wrong API key)
- Surfaces as: APIConnectionError: OpenAIException - argument of type 'NoneType' is
not iterable
STATUS: STILL NEEDED - litellm/utils.py wrapper function (line 1721) does not guard
against metadata being explicitly None. Triggered when Responses API bridge
passes **litellm_params containing metadata=None.
"""
import time
@@ -725,6 +737,44 @@ def _patch_logging_assembled_streaming_response() -> None:
LiteLLMLoggingObj._get_assembled_streaming_response = _patched_get_assembled_streaming_response # type: ignore[method-assign]
def _patch_responses_metadata_none() -> None:
"""
Patches litellm.responses to normalize metadata=None to metadata={} in kwargs.
LiteLLM's @client decorator wrapper in utils.py (line 1721) does:
_is_litellm_router_call = "model_group" in kwargs.get("metadata", {})
When metadata is explicitly None in kwargs, kwargs.get("metadata", {}) returns
None (the key exists, so the default is not used), causing:
TypeError: argument of type 'NoneType' is not iterable
This swallows the real exception (e.g. AuthenticationError) and surfaces as:
APIConnectionError: OpenAIException - argument of type 'NoneType' is not iterable
This happens when the Responses API bridge calls litellm.responses() with
**litellm_params which may contain metadata=None.
STATUS: STILL NEEDED - litellm/utils.py wrapper function uses kwargs.get("metadata", {})
which does not guard against metadata being explicitly None. Same pattern exists
on line 1407 for async path.
"""
import litellm as _litellm
from functools import wraps
original_responses = _litellm.responses
if getattr(original_responses, "_metadata_patched", False):
return
@wraps(original_responses)
def _patched_responses(*args: Any, **kwargs: Any) -> Any:
if kwargs.get("metadata") is None:
kwargs["metadata"] = {}
return original_responses(*args, **kwargs)
_patched_responses._metadata_patched = True # type: ignore[attr-defined]
_litellm.responses = _patched_responses
def apply_monkey_patches() -> None:
"""
Apply all necessary monkey patches to LiteLLM for compatibility.
@@ -736,6 +786,7 @@ def apply_monkey_patches() -> None:
- Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming
- Patching ResponsesAPIResponse.model_construct to fix usage format in all code paths
- Patching LiteLLMLoggingObj._get_assembled_streaming_response to avoid mutating original response
- Patching litellm.responses to fix metadata=None causing TypeError in error handling
"""
_patch_ollama_chunk_parser()
_patch_openai_responses_parallel_tool_calls()
@@ -743,3 +794,4 @@ def apply_monkey_patches() -> None:
_patch_azure_responses_should_fake_stream()
_patch_responses_api_usage_format()
_patch_logging_assembled_streaming_response()
_patch_responses_metadata_none()

View File

@@ -92,6 +92,98 @@ def _prompt_to_dicts(prompt: LanguageModelInput) -> list[dict[str, Any]]:
return [prompt.model_dump(exclude_none=True)]
def _normalize_content(raw: Any) -> str:
"""Normalize a message content field to a plain string.
Content can be a string, None, or a list of content-block dicts
(e.g. [{"type": "text", "text": "..."}]).
"""
if raw is None:
return ""
if isinstance(raw, str):
return raw
if isinstance(raw, list):
return "\n".join(
block.get("text", "") if isinstance(block, dict) else str(block)
for block in raw
)
return str(raw)
def _strip_tool_content_from_messages(
messages: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Convert tool-related messages to plain text.
Bedrock's Converse API requires toolConfig when messages contain
toolUse/toolResult content blocks. When no tools are provided for the
current request, we must convert any tool-related history into plain text
to avoid the "toolConfig field must be defined" error.
This is the same approach used by _OllamaHistoryMessageFormatter.
"""
result: list[dict[str, Any]] = []
for msg in messages:
role = msg.get("role")
tool_calls = msg.get("tool_calls")
if role == "assistant" and tool_calls:
# Convert structured tool calls to text representation
tool_call_lines = []
for tc in tool_calls:
func = tc.get("function", {})
name = func.get("name", "unknown")
args = func.get("arguments", "{}")
tc_id = tc.get("id", "")
tool_call_lines.append(
f"[Tool Call] name={name} id={tc_id} args={args}"
)
existing_content = _normalize_content(msg.get("content"))
parts = (
[existing_content] + tool_call_lines
if existing_content
else tool_call_lines
)
new_msg = {
"role": "assistant",
"content": "\n".join(parts),
}
result.append(new_msg)
elif role == "tool":
# Convert tool response to user message with text content
tool_call_id = msg.get("tool_call_id", "")
content = _normalize_content(msg.get("content"))
tool_result_text = f"[Tool Result] id={tool_call_id}\n{content}"
# Merge into previous user message if it is also a converted
# tool result to avoid consecutive user messages (Bedrock requires
# strict user/assistant alternation).
if (
result
and result[-1]["role"] == "user"
and "[Tool Result]" in result[-1].get("content", "")
):
result[-1]["content"] += "\n\n" + tool_result_text
else:
result.append({"role": "user", "content": tool_result_text})
else:
result.append(msg)
return result
def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
"""Check if any messages contain tool-related content blocks."""
for msg in messages:
if msg.get("role") == "tool":
return True
if msg.get("role") == "assistant" and msg.get("tool_calls"):
return True
return False
def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
normalized_model_name = model_name.lower()
return any(
@@ -404,13 +496,30 @@ class LitellmLLM(LLM):
else nullcontext()
)
with env_ctx:
messages = _prompt_to_dicts(prompt)
# Bedrock's Converse API requires toolConfig when messages
# contain toolUse/toolResult content blocks. When no tools are
# provided for this request but the history contains tool
# content from previous turns, strip it to plain text.
is_bedrock = self._model_provider in {
LlmProviderNames.BEDROCK,
LlmProviderNames.BEDROCK_CONVERSE,
}
if (
is_bedrock
and not tools
and _messages_contain_tool_content(messages)
):
messages = _strip_tool_content_from_messages(messages)
response = litellm.completion(
mock_response=get_llm_mock_response() or MOCK_LLM_RESPONSE,
model=model,
base_url=self._api_base or None,
api_version=self._api_version or None,
custom_llm_provider=self._custom_llm_provider or None,
messages=_prompt_to_dicts(prompt),
messages=messages,
tools=tools,
tool_choice=tool_choice,
stream=stream,

View File

@@ -13,44 +13,38 @@ from datetime import datetime
import httpx
from sqlalchemy.orm import Session
from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.db.llm import fetch_auto_mode_providers
from onyx.db.llm import sync_auto_mode_models
from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Redis key for caching the last updated timestamp (per-tenant)
_REDIS_KEY_LAST_UPDATED_AT = "auto_llm_update:last_updated_at"
_CACHE_KEY_LAST_UPDATED_AT = "auto_llm_update:last_updated_at"
_CACHE_TTL_SECONDS = 60 * 60 * 24 # 24 hours
def _get_cached_last_updated_at() -> datetime | None:
"""Get the cached last_updated_at timestamp from Redis."""
try:
redis_client = get_redis_client()
value = redis_client.get(_REDIS_KEY_LAST_UPDATED_AT)
if value and isinstance(value, bytes):
# Value is bytes, decode to string then parse as ISO format
value = get_cache_backend().get(_CACHE_KEY_LAST_UPDATED_AT)
if value is not None:
return datetime.fromisoformat(value.decode("utf-8"))
except Exception as e:
logger.warning(f"Failed to get cached last_updated_at from Redis: {e}")
logger.warning(f"Failed to get cached last_updated_at: {e}")
return None
def _set_cached_last_updated_at(updated_at: datetime) -> None:
"""Set the cached last_updated_at timestamp in Redis."""
try:
redis_client = get_redis_client()
# Store as ISO format string, with 24 hour expiration
redis_client.set(
_REDIS_KEY_LAST_UPDATED_AT,
get_cache_backend().set(
_CACHE_KEY_LAST_UPDATED_AT,
updated_at.isoformat(),
ex=60 * 60 * 24, # 24 hours
ex=_CACHE_TTL_SECONDS,
)
except Exception as e:
logger.warning(f"Failed to set cached last_updated_at in Redis: {e}")
logger.warning(f"Failed to set cached last_updated_at: {e}")
def fetch_llm_recommendations_from_github(
@@ -148,9 +142,8 @@ def sync_llm_models_from_github(
def reset_cache() -> None:
"""Reset the cache timestamp in Redis. Useful for testing."""
"""Reset the cache timestamp. Useful for testing."""
try:
redis_client = get_redis_client()
redis_client.delete(_REDIS_KEY_LAST_UPDATED_AT)
get_cache_backend().delete(_CACHE_KEY_LAST_UPDATED_AT)
except Exception as e:
logger.warning(f"Failed to reset cache in Redis: {e}")
logger.warning(f"Failed to reset cache: {e}")

View File

@@ -32,11 +32,14 @@ from onyx.auth.schemas import UserUpdate
from onyx.auth.users import auth_backend
from onyx.auth.users import create_onyx_oauth_router
from onyx.auth.users import fastapi_users
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import APP_API_PREFIX
from onyx.configs.app_configs import APP_HOST
from onyx.configs.app_configs import APP_PORT
from onyx.configs.app_configs import AUTH_RATE_LIMITING_ENABLED
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import CACHE_BACKEND
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import LOG_ENDPOINT_LATENCY
from onyx.configs.app_configs import OAUTH_CLIENT_ID
from onyx.configs.app_configs import OAUTH_CLIENT_SECRET
@@ -56,6 +59,7 @@ from onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine
from onyx.db.engine.connection_warmup import warm_up_connections
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.error_handling.exceptions import register_onyx_exception_handlers
from onyx.file_store.file_store import get_default_file_store
from onyx.server.api_key.api import router as api_key_router
from onyx.server.auth_check import check_router_auth
@@ -254,8 +258,53 @@ def include_auth_router_with_prefix(
)
def validate_cache_backend_settings() -> None:
"""Validate that CACHE_BACKEND=postgres is only used with DISABLE_VECTOR_DB.
The Postgres cache backend eliminates the Redis dependency, but only works
when Celery is not running (which requires DISABLE_VECTOR_DB=true).
"""
if CACHE_BACKEND == CacheBackendType.POSTGRES and not DISABLE_VECTOR_DB:
raise RuntimeError(
"CACHE_BACKEND=postgres requires DISABLE_VECTOR_DB=true. "
"The Postgres cache backend is only supported in no-vector-DB "
"deployments where Celery is replaced by the in-process task runner."
)
def validate_no_vector_db_settings() -> None:
"""Validate that DISABLE_VECTOR_DB is not combined with incompatible settings.
Raises RuntimeError if DISABLE_VECTOR_DB is set alongside MULTI_TENANT or ENABLE_CRAFT,
since these modes require infrastructure that is removed in no-vector-DB deployments.
"""
if not DISABLE_VECTOR_DB:
return
if MULTI_TENANT:
raise RuntimeError(
"DISABLE_VECTOR_DB cannot be used with MULTI_TENANT. "
"Multi-tenant deployments require the vector database for "
"per-tenant document indexing and search. Run in single-tenant "
"mode when disabling the vector database."
)
from onyx.server.features.build.configs import ENABLE_CRAFT
if ENABLE_CRAFT:
raise RuntimeError(
"DISABLE_VECTOR_DB cannot be used with ENABLE_CRAFT. "
"Onyx Craft requires background workers for sandbox lifecycle "
"management, which are removed in no-vector-DB deployments. "
"Disable Craft (ENABLE_CRAFT=false) when disabling the vector database."
)
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001
validate_no_vector_db_settings()
validate_cache_backend_settings()
# Set recursion limit
if SYSTEM_RECURSION_LIMIT is not None:
sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)
@@ -324,8 +373,20 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001
if AUTH_RATE_LIMITING_ENABLED:
await setup_auth_limiter()
if DISABLE_VECTOR_DB:
from onyx.background.periodic_poller import recover_stuck_user_files
from onyx.background.periodic_poller import start_periodic_poller
recover_stuck_user_files(POSTGRES_DEFAULT_SCHEMA)
start_periodic_poller(POSTGRES_DEFAULT_SCHEMA)
yield
if DISABLE_VECTOR_DB:
from onyx.background.periodic_poller import stop_periodic_poller
stop_periodic_poller()
SqlEngine.reset_engine()
if AUTH_RATE_LIMITING_ENABLED:
@@ -384,6 +445,8 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
status.HTTP_500_INTERNAL_SERVER_ERROR, log_http_error
)
register_onyx_exception_handlers(application)
include_router_with_global_prefix_prepended(application, password_router)
include_router_with_global_prefix_prepended(application, chat_router)
include_router_with_global_prefix_prepended(application, query_router)

View File

@@ -130,7 +130,7 @@ def format_slack_message(message: str | None) -> str:
message = _transform_outside_code_blocks(message, _sanitize_html)
message = _convert_slack_links_to_markdown(message)
normalized_message = _normalize_link_destinations(message)
md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough"])
md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough", "table"])
result = md(normalized_message)
# With HTMLRenderer, result is always str (not AST list)
assert isinstance(result, str)
@@ -146,6 +146,11 @@ class SlackRenderer(HTMLRenderer):
SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}
def __init__(self) -> None:
super().__init__()
self._table_headers: list[str] = []
self._current_row_cells: list[str] = []
def escape_special(self, text: str) -> str:
for special, replacement in self.SPECIALS.items():
text = text.replace(special, replacement)
@@ -218,5 +223,48 @@ class SlackRenderer(HTMLRenderer):
# as literal &quot; text since Slack doesn't recognize that entity.
return self.escape_special(text)
# -- Table rendering (converts markdown tables to vertical cards) --
def table_cell(
self, text: str, align: str | None = None, head: bool = False # noqa: ARG002
) -> str:
if head:
self._table_headers.append(text.strip())
else:
self._current_row_cells.append(text.strip())
return ""
def table_head(self, text: str) -> str: # noqa: ARG002
self._current_row_cells = []
return ""
def table_row(self, text: str) -> str: # noqa: ARG002
cells = self._current_row_cells
self._current_row_cells = []
# First column becomes the bold title, remaining columns are bulleted fields
lines: list[str] = []
if cells:
title = cells[0]
if title:
# Avoid double-wrapping if cell already contains bold markup
if title.startswith("*") and title.endswith("*") and len(title) > 1:
lines.append(title)
else:
lines.append(f"*{title}*")
for i, cell in enumerate(cells[1:], start=1):
if i < len(self._table_headers):
lines.append(f"{self._table_headers[i]}: {cell}")
else:
lines.append(f"{cell}")
return "\n".join(lines) + "\n\n"
def table_body(self, text: str) -> str:
return text
def table(self, text: str) -> str:
self._table_headers = []
self._current_row_cells = []
return text + "\n"
def paragraph(self, text: str) -> str:
return f"{text}\n\n"

View File

@@ -3,10 +3,12 @@ import datetime
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from onyx.auth.schemas import UserRole
from onyx.configs.onyxbot_configs import ONYX_BOT_FEEDBACK_REMINDER
from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import SlackChannelConfig
from onyx.db.user_preferences import activate_user
from onyx.db.users import add_slack_user_if_not_exists
from onyx.db.users import get_user_by_email
from onyx.onyxbot.slack.blocks import get_feedback_reminder_blocks
@@ -243,6 +245,44 @@ def handle_message(
)
return False
elif (
not existing_user.is_active
and existing_user.role == UserRole.SLACK_USER
):
check_seat_fn = fetch_ee_implementation_or_noop(
"onyx.db.license",
"check_seat_availability",
None,
)
seat_result = check_seat_fn(db_session=db_session)
if seat_result is not None and not seat_result.available:
logger.info(
f"Blocked inactive Slack user {message_info.email}: "
f"{seat_result.error_message}"
)
respond_in_thread_or_channel(
client=client,
channel=channel,
thread_ts=message_info.msg_to_respond,
text=(
"We weren't able to respond because your organization "
"has reached its user seat limit. Your account is "
"currently deactivated and cannot be reactivated "
"until more seats are available. Please contact "
"your Onyx administrator."
),
)
return False
activate_user(existing_user, db_session)
invalidate_license_cache_fn = fetch_ee_implementation_or_noop(
"onyx.db.license",
"invalidate_license_cache",
None,
)
invalidate_license_cache_fn()
logger.info(f"Reactivated inactive Slack user {message_info.email}")
add_slack_user_if_not_exists(db_session, message_info.email)
# first check if we need to respond with a standard answer

View File

@@ -92,6 +92,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pairs_for
from onyx.db.connector_credential_pair import (
get_connector_credential_pairs_for_user_parallel,
)
from onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair
from onyx.db.credentials import cleanup_gmail_credentials
from onyx.db.credentials import cleanup_google_drive_credentials
from onyx.db.credentials import create_credential
@@ -572,6 +573,43 @@ def _normalize_file_names_for_backwards_compatibility(
return file_names + file_locations[len(file_names) :]
def _fetch_and_check_file_connector_cc_pair_permissions(
connector_id: int,
user: User,
db_session: Session,
require_editable: bool,
) -> ConnectorCredentialPair:
cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
if cc_pair is None:
raise HTTPException(
status_code=404,
detail="No Connector-Credential Pair found for this connector",
)
has_requested_access = verify_user_has_access_to_cc_pair(
cc_pair_id=cc_pair.id,
db_session=db_session,
user=user,
get_editable=require_editable,
)
if has_requested_access:
return cc_pair
# Special case: global curators should be able to manage files
# for public file connectors even when they are not the creator.
if (
require_editable
and user.role == UserRole.GLOBAL_CURATOR
and cc_pair.access_type == AccessType.PUBLIC
):
return cc_pair
raise HTTPException(
status_code=403,
detail="Access denied. User cannot manage files for this connector.",
)
@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
def upload_files_api(
files: list[UploadFile],
@@ -583,7 +621,7 @@ def upload_files_api(
@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)
def list_connector_files(
connector_id: int,
user: User = Depends(current_curator_or_admin_user), # noqa: ARG001
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> ConnectorFilesResponse:
"""List all files in a file connector."""
@@ -596,6 +634,13 @@ def list_connector_files(
status_code=400, detail="This endpoint only works with file connectors"
)
_ = _fetch_and_check_file_connector_cc_pair_permissions(
connector_id=connector_id,
user=user,
db_session=db_session,
require_editable=False,
)
file_locations = connector.connector_specific_config.get("file_locations", [])
file_names = connector.connector_specific_config.get("file_names", [])
@@ -645,7 +690,7 @@ def update_connector_files(
connector_id: int,
files: list[UploadFile] | None = File(None),
file_ids_to_remove: str = Form("[]"),
user: User = Depends(current_curator_or_admin_user), # noqa: ARG001
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> FileUploadResponse:
"""
@@ -663,12 +708,13 @@ def update_connector_files(
)
# Get the connector-credential pair for indexing/pruning triggers
cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
if cc_pair is None:
raise HTTPException(
status_code=404,
detail="No Connector-Credential Pair found for this connector",
)
# and validate user permissions for file management.
cc_pair = _fetch_and_check_file_connector_cc_pair_permissions(
connector_id=connector_id,
user=user,
db_session=db_session,
require_editable=True,
)
# Parse file IDs to remove
try:

View File

@@ -961,9 +961,9 @@
"license": "MIT"
},
"node_modules/@hono/node-server": {
"version": "1.19.9",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz",
"integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==",
"version": "1.19.10",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.10.tgz",
"integrity": "sha512-hZ7nOssGqRgyV3FVVQdfi+U4q02uB23bpnYpdvNXkYTRRyWx84b7yf1ans+dnJ/7h41sGL3CeQTfO+ZGxuO+Iw==",
"license": "MIT",
"engines": {
"node": ">=18.14.1"
@@ -1573,27 +1573,6 @@
}
}
},
"node_modules/@isaacs/balanced-match": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
"integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
"license": "MIT",
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@isaacs/brace-expansion": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.1.tgz",
"integrity": "sha512-WMz71T1JS624nWj2n2fnYAuPovhv7EUhk69R6i9dsVyzxt5eM3bjwvgk9L+APE1TRscGysAVMANkB0jh0LQZrQ==",
"license": "MIT",
"dependencies": {
"@isaacs/balanced-match": "^4.0.1"
},
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@jridgewell/gen-mapping": {
"version": "0.3.13",
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
@@ -1680,9 +1659,9 @@
}
},
"node_modules/@modelcontextprotocol/sdk/node_modules/ajv": {
"version": "8.17.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"version": "8.18.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz",
"integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==",
"license": "MIT",
"dependencies": {
"fast-deep-equal": "^3.1.3",
@@ -3855,6 +3834,27 @@
"path-browserify": "^1.0.1"
}
},
"node_modules/@ts-morph/common/node_modules/balanced-match": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
"integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
"license": "MIT",
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/@ts-morph/common/node_modules/brace-expansion": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
"integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
"license": "MIT",
"dependencies": {
"balanced-match": "^4.0.2"
},
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/@ts-morph/common/node_modules/fast-glob": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
@@ -3884,15 +3884,15 @@
}
},
"node_modules/@ts-morph/common/node_modules/minimatch": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
"integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
"version": "10.2.4",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
"integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==",
"license": "BlueOak-1.0.0",
"dependencies": {
"@isaacs/brace-expansion": "^5.0.0"
"brace-expansion": "^5.0.2"
},
"engines": {
"node": "20 || >=22"
"node": "18 || 20 || >=22"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
@@ -4234,13 +4234,13 @@
}
},
"node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"version": "9.0.9",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.9.tgz",
"integrity": "sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.1"
"brace-expansion": "^2.0.2"
},
"engines": {
"node": ">=16 || 14 >=14.17"
@@ -4619,9 +4619,9 @@
}
},
"node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"version": "6.14.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.14.0.tgz",
"integrity": "sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -4653,9 +4653,9 @@
}
},
"node_modules/ajv-formats/node_modules/ajv": {
"version": "8.17.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"version": "8.18.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz",
"integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==",
"license": "MIT",
"dependencies": {
"fast-deep-equal": "^3.1.3",
@@ -7424,9 +7424,9 @@
}
},
"node_modules/hono": {
"version": "4.11.7",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.11.7.tgz",
"integrity": "sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==",
"version": "4.12.5",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.5.tgz",
"integrity": "sha512-3qq+FUBtlTHhtYxbxheZgY8NIFnkkC/MR8u5TTsr7YZ3wixryQ3cCwn3iZbg8p8B88iDBBAYSfZDS75t8MN7Vg==",
"license": "MIT",
"engines": {
"node": ">=16.9.0"
@@ -8831,9 +8831,9 @@
}
},
"node_modules/minimatch": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"version": "3.1.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
"integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"dev": true,
"license": "ISC",
"dependencies": {

View File

@@ -1133,7 +1133,8 @@ done
# Already deleted
service_deleted = True
else:
logger.warning(f"Error deleting Service {service_name}: {e}")
logger.error(f"Error deleting Service {service_name}: {e}")
raise
pod_deleted = False
try:
@@ -1148,7 +1149,8 @@ done
# Already deleted
pod_deleted = True
else:
logger.warning(f"Error deleting Pod {pod_name}: {e}")
logger.error(f"Error deleting Pod {pod_name}: {e}")
raise
# Wait for resources to be fully deleted to prevent 409 conflicts
# on immediate re-provisioning

View File

@@ -80,7 +80,7 @@ def cleanup_idle_sandboxes_task(self: Task, *, tenant_id: str) -> None: # noqa:
# Prevent overlapping runs of this task
if not lock.acquire(blocking=False):
task_logger.debug("cleanup_idle_sandboxes_task - lock not acquired, skipping")
task_logger.info("cleanup_idle_sandboxes_task - lock not acquired, skipping")
return
try:

View File

@@ -11,6 +11,7 @@ from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.document_set import check_document_sets_are_public
from onyx.db.document_set import delete_document_set as db_delete_document_set
from onyx.db.document_set import fetch_all_document_sets_for_user
from onyx.db.document_set import get_document_set_by_id
from onyx.db.document_set import insert_document_set
@@ -142,7 +143,10 @@ def delete_document_set(
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
if not DISABLE_VECTOR_DB:
if DISABLE_VECTOR_DB:
db_session.refresh(document_set)
db_delete_document_set(document_set, db_session)
else:
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
kwargs={"tenant_id": tenant_id},

View File

@@ -32,7 +32,7 @@ from onyx.db.persona import get_persona_snapshots_for_user
from onyx.db.persona import get_persona_snapshots_paginated
from onyx.db.persona import mark_persona_as_deleted
from onyx.db.persona import mark_persona_as_not_deleted
from onyx.db.persona import update_persona_is_default
from onyx.db.persona import update_persona_featured
from onyx.db.persona import update_persona_label
from onyx.db.persona import update_persona_public_status
from onyx.db.persona import update_persona_shared
@@ -130,8 +130,8 @@ class IsPublicRequest(BaseModel):
is_public: bool
class IsDefaultRequest(BaseModel):
is_default_persona: bool
class IsFeaturedRequest(BaseModel):
featured: bool
@admin_router.patch("/{persona_id}/visible")
@@ -168,22 +168,22 @@ def patch_user_persona_public_status(
raise HTTPException(status_code=403, detail=str(e))
@admin_router.patch("/{persona_id}/default")
def patch_persona_default_status(
@admin_router.patch("/{persona_id}/featured")
def patch_persona_featured_status(
persona_id: int,
is_default_request: IsDefaultRequest,
is_featured_request: IsFeaturedRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> None:
try:
update_persona_is_default(
update_persona_featured(
persona_id=persona_id,
is_default=is_default_request.is_default_persona,
featured=is_featured_request.featured,
db_session=db_session,
user=user,
)
except ValueError as e:
logger.exception("Failed to update persona default status")
logger.exception("Failed to update persona featured status")
raise HTTPException(status_code=403, detail=str(e))

View File

@@ -5,7 +5,6 @@ from pydantic import BaseModel
from pydantic import Field
from onyx.configs.constants import DocumentSource
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.enums import HierarchyNodeType
from onyx.db.models import Document
from onyx.db.models import HierarchyNode
@@ -108,11 +107,7 @@ class PersonaUpsertRequest(BaseModel):
name: str
description: str
document_set_ids: list[int]
num_chunks: float
is_public: bool
recency_bias: RecencyBiasSetting
llm_filter_extraction: bool
llm_relevance_filter: bool
llm_model_provider_override: str | None = None
llm_model_version_override: str | None = None
starter_messages: list[StarterMessage] | None = None
@@ -128,7 +123,7 @@ class PersonaUpsertRequest(BaseModel):
)
search_start_date: datetime | None = None
label_ids: list[int] | None = None
is_default_persona: bool = False
featured: bool = False
display_priority: int | None = None
# Accept string UUIDs from frontend
user_file_ids: list[str] | None = None
@@ -155,9 +150,6 @@ class MinimalPersonaSnapshot(BaseModel):
tools: list[ToolSnapshot]
starter_messages: list[StarterMessage] | None
llm_relevance_filter: bool
llm_filter_extraction: bool
# only show document sets in the UI that the assistant has access to
document_sets: list[DocumentSetSummary]
# Counts for knowledge sources (used to determine if search tool should be enabled)
@@ -175,7 +167,7 @@ class MinimalPersonaSnapshot(BaseModel):
is_public: bool
is_visible: bool
display_priority: int | None
is_default_persona: bool
featured: bool
builtin_persona: bool
# Used for filtering
@@ -214,8 +206,6 @@ class MinimalPersonaSnapshot(BaseModel):
if should_expose_tool_to_fe(tool)
],
starter_messages=persona.starter_messages,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
document_sets=[
DocumentSetSummary.from_model(document_set)
for document_set in persona.document_sets
@@ -230,7 +220,7 @@ class MinimalPersonaSnapshot(BaseModel):
is_public=persona.is_public,
is_visible=persona.is_visible,
display_priority=persona.display_priority,
is_default_persona=persona.is_default_persona,
featured=persona.featured,
builtin_persona=persona.builtin_persona,
labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels],
owner=(
@@ -252,11 +242,9 @@ class PersonaSnapshot(BaseModel):
# Return string UUIDs to frontend for consistency
user_file_ids: list[str]
display_priority: int | None
is_default_persona: bool
featured: bool
builtin_persona: bool
starter_messages: list[StarterMessage] | None
llm_relevance_filter: bool
llm_filter_extraction: bool
tools: list[ToolSnapshot]
labels: list["PersonaLabelSnapshot"]
owner: MinimalUserSnapshot | None
@@ -265,7 +253,6 @@ class PersonaSnapshot(BaseModel):
document_sets: list[DocumentSetSummary]
llm_model_provider_override: str | None
llm_model_version_override: str | None
num_chunks: float | None
# Hierarchy nodes attached for scoped search
hierarchy_nodes: list[HierarchyNodeSnapshot] = Field(default_factory=list)
# Individual documents attached for scoped search
@@ -289,11 +276,9 @@ class PersonaSnapshot(BaseModel):
icon_name=persona.icon_name,
user_file_ids=[str(file.id) for file in persona.user_files],
display_priority=persona.display_priority,
is_default_persona=persona.is_default_persona,
featured=persona.featured,
builtin_persona=persona.builtin_persona,
starter_messages=persona.starter_messages,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
tools=[
ToolSnapshot.from_model(tool)
for tool in persona.tools
@@ -324,7 +309,6 @@ class PersonaSnapshot(BaseModel):
],
llm_model_provider_override=persona.llm_model_provider_override,
llm_model_version_override=persona.llm_model_version_override,
num_chunks=persona.num_chunks,
system_prompt=persona.system_prompt,
replace_base_system_prompt=persona.replace_base_system_prompt,
task_prompt=persona.task_prompt,
@@ -332,12 +316,10 @@ class PersonaSnapshot(BaseModel):
)
# Model with full context on perona's internal settings
# Model with full context on persona's internal settings
# This is used for flows which need to know all settings
class FullPersonaSnapshot(PersonaSnapshot):
search_start_date: datetime | None = None
llm_relevance_filter: bool = False
llm_filter_extraction: bool = False
@classmethod
def from_model(
@@ -360,7 +342,7 @@ class FullPersonaSnapshot(PersonaSnapshot):
icon_name=persona.icon_name,
user_file_ids=[str(file.id) for file in persona.user_files],
display_priority=persona.display_priority,
is_default_persona=persona.is_default_persona,
featured=persona.featured,
builtin_persona=persona.builtin_persona,
starter_messages=persona.starter_messages,
users=[
@@ -391,10 +373,7 @@ class FullPersonaSnapshot(PersonaSnapshot):
DocumentSetSummary.from_model(document_set_model)
for document_set_model in persona.document_sets
],
num_chunks=persona.num_chunks,
search_start_date=persona.search_start_date,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
llm_model_provider_override=persona.llm_model_provider_override,
llm_model_version_override=persona.llm_model_version_override,
system_prompt=persona.system_prompt,

View File

@@ -2,6 +2,7 @@ import json
from uuid import UUID
from fastapi import APIRouter
from fastapi import BackgroundTasks
from fastapi import Depends
from fastapi import File
from fastapi import Form
@@ -12,13 +13,7 @@ from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.background.celery.tasks.user_file_processing.tasks import (
enqueue_user_file_project_sync_task,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
get_user_file_project_sync_queue_depth,
)
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -34,7 +29,6 @@ from onyx.db.models import UserProject
from onyx.db.persona import get_personas_by_ids
from onyx.db.projects import get_project_token_count
from onyx.db.projects import upload_files_to_user_files_with_indexing
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.projects.models import CategorizedFilesSnapshot
from onyx.server.features.projects.models import ChatSessionRequest
from onyx.server.features.projects.models import TokenCountResponse
@@ -55,7 +49,27 @@ class UserFileDeleteResult(BaseModel):
assistant_names: list[str] = []
def _trigger_user_file_project_sync(user_file_id: UUID, tenant_id: str) -> None:
def _trigger_user_file_project_sync(
user_file_id: UUID,
tenant_id: str,
background_tasks: BackgroundTasks | None = None,
) -> None:
if DISABLE_VECTOR_DB and background_tasks is not None:
from onyx.background.task_utils import drain_project_sync_loop
background_tasks.add_task(drain_project_sync_loop, tenant_id)
logger.info(f"Queued in-process project sync for user_file_id={user_file_id}")
return
from onyx.background.celery.tasks.user_file_processing.tasks import (
enqueue_user_file_project_sync_task,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
get_user_file_project_sync_queue_depth,
)
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.redis.redis_pool import get_redis_client
queue_depth = get_user_file_project_sync_queue_depth(client_app)
if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:
logger.warning(
@@ -111,6 +125,7 @@ def create_project(
@router.post("/file/upload", tags=PUBLIC_API_TAGS)
def upload_user_files(
bg_tasks: BackgroundTasks,
files: list[UploadFile] = File(...),
project_id: int | None = Form(None),
temp_id_map: str | None = Form(None), # JSON string mapping hashed key -> temp_id
@@ -137,12 +152,12 @@ def upload_user_files(
user=user,
temp_id_map=parsed_temp_id_map,
db_session=db_session,
background_tasks=bg_tasks if DISABLE_VECTOR_DB else None,
)
return CategorizedFilesSnapshot.from_result(categorized_files_result)
except Exception as e:
# Log error with type, message, and stack for easier debugging
logger.exception(f"Error uploading files - {type(e).__name__}: {str(e)}")
raise HTTPException(
status_code=500,
@@ -192,6 +207,7 @@ def get_files_in_project(
def unlink_user_file_from_project(
project_id: int,
file_id: UUID,
bg_tasks: BackgroundTasks,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> Response:
@@ -208,7 +224,6 @@ def unlink_user_file_from_project(
if project is None:
raise HTTPException(status_code=404, detail="Project not found")
user_id = user.id
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
@@ -224,7 +239,7 @@ def unlink_user_file_from_project(
db_session.commit()
tenant_id = get_current_tenant_id()
_trigger_user_file_project_sync(user_file.id, tenant_id)
_trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)
return Response(status_code=204)
@@ -237,6 +252,7 @@ def unlink_user_file_from_project(
def link_user_file_to_project(
project_id: int,
file_id: UUID,
bg_tasks: BackgroundTasks,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
@@ -268,7 +284,7 @@ def link_user_file_to_project(
db_session.commit()
tenant_id = get_current_tenant_id()
_trigger_user_file_project_sync(user_file.id, tenant_id)
_trigger_user_file_project_sync(user_file.id, tenant_id, bg_tasks)
return UserFileSnapshot.from_model(user_file)
@@ -335,7 +351,7 @@ def upsert_project_instructions(
class ProjectPayload(BaseModel):
project: UserProjectSnapshot
files: list[UserFileSnapshot] | None = None
persona_id_to_is_default: dict[int, bool] | None = None
persona_id_to_featured: dict[int, bool] | None = None
@router.get(
@@ -354,13 +370,11 @@ def get_project_details(
if session.persona_id is not None
]
personas = get_personas_by_ids(persona_ids, db_session)
persona_id_to_is_default = {
persona.id: persona.is_default_persona for persona in personas
}
persona_id_to_featured = {persona.id: persona.featured for persona in personas}
return ProjectPayload(
project=project,
files=files,
persona_id_to_is_default=persona_id_to_is_default,
persona_id_to_featured=persona_id_to_featured,
)
@@ -426,6 +440,7 @@ def delete_project(
@router.delete("/file/{file_id}", tags=PUBLIC_API_TAGS)
def delete_user_file(
file_id: UUID,
bg_tasks: BackgroundTasks,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileDeleteResult:
@@ -458,15 +473,25 @@ def delete_user_file(
db_session.commit()
tenant_id = get_current_tenant_id()
task = client_app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={"user_file_id": str(user_file.id), "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered delete for user_file_id={user_file.id} with task_id={task.id}"
)
if DISABLE_VECTOR_DB:
from onyx.background.task_utils import drain_delete_loop
bg_tasks.add_task(drain_delete_loop, tenant_id)
logger.info(f"Queued in-process delete for user_file_id={user_file.id}")
else:
from onyx.background.celery.versioned_apps.client import app as client_app
task = client_app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={"user_file_id": str(user_file.id), "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered delete for user_file_id={user_file.id} "
f"with task_id={task.id}"
)
return UserFileDeleteResult(
has_associations=False, project_names=[], assistant_names=[]
)

Some files were not shown because too many files have changed in this diff Show More