Compare commits

...

190 Commits

Author SHA1 Message Date
Wenxi
77f5411bf7 fix(ci): tag web-server and model-server with craft-latest (#9661)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 11:00:49 -07:00
Wenxi
c45caf1f1d refactor: use ods latest-stable-tag to tag images in Docker Hub (#9281)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-26 11:00:49 -07:00
Wenxi
4f534249d6 refactor: sync craft latest builds with latest stable (#9279) 2026-03-26 11:00:49 -07:00
Wenxi
eb87d88b89 feat(ods): use release-tag to print highest stable semver that should receive the latest tag (#9278)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-26 11:00:49 -07:00
github-actions[bot]
4fd6786ce2 fix(chat): dont clear input message after errors submitting (#9624) to release v3.0 (#9626)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-25 12:04:20 -07:00
github-actions[bot]
6919afe022 fix(ux): disable MCP Tools toggle if needs authenticated (#9607) to release v3.0 (#9608)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-24 15:54:52 -07:00
Justin Tahara
c4ac0fd286 fix(ui): Text focused paste from PowerPoint (#9603) 2026-03-24 14:31:19 -07:00
github-actions[bot]
d2f8e38e67 chore(playwright): mask date switcher in screenshots (#9584) to release v3.0 (#9585)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 18:45:09 -07:00
github-actions[bot]
bbd57c5904 fix(ux): display invalid agent fields on load (#9582) to release v3.0 (#9583)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 17:29:22 -07:00
github-actions[bot]
546d5cd384 fix(ux): give a tooltip with reason agent edit cannot save (#9571) to release v3.0 (#9572)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-23 13:54:01 -07:00
Evan Lohn
f902f49483 fix: last index time consistency (#9546) 2026-03-23 10:35:20 -07:00
Justin Tahara
ed3630e248 feat(backend): Adding procps (#9509) 2026-03-19 16:34:58 -07:00
Jamison Lahman
598e605dd2 chore(hotfix): cherry-pick 3 commits to release v3.0 (#9510)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-19 16:15:10 -07:00
Evan Lohn
aee02f6501 fix: drive rate limit retry (#9498) 2026-03-19 14:40:13 -07:00
Justin Tahara
2959470114 fix(code interpreter): Caching files (#9484) 2026-03-19 14:14:21 -07:00
github-actions[bot]
7d9a339e0b fix(fe): fix memories immediately losing focus on click (#9493) to release v3.0 (#9496)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-19 13:24:23 -07:00
Justin Tahara
a2742fcabf fix(agents): Agents are Private by Default (#9465) 2026-03-18 17:10:49 -07:00
Justin Tahara
ba4b4f0930 fix(logging): extract LiteLLM error details in image summarization failures (#9458) 2026-03-18 16:58:11 -07:00
Justin Tahara
74a4d620ad fix(celery): add dedup guardrails to user file delete queue (#9454)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 16:49:37 -07:00
Justin Tahara
51f46bd8f0 fix(celery): add task expiry to upload API send_task call (#9456)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 16:25:21 -07:00
Justin Tahara
e6cfe77a6d fix(image): stop dumping base64 image data into error logs (#9457) 2026-03-18 16:04:22 -07:00
github-actions[bot]
cc3719f356 fix(file upload): Allow zip file upload via query param (#9432) to release v3.0 (#9443)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-03-18 09:25:05 -07:00
github-actions[bot]
b658ad8985 chore: bump next to 16.1.7 (#9423) to release v3.0 (#9448)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-03-18 09:24:54 -07:00
Nikolas Garza
b1632044ed fix(vespa): use weightedSet for ACL filters to prevent query failures (#9403) to release v3.0 (#9409) 2026-03-17 11:19:04 -07:00
github-actions[bot]
9fa8265f00 chore(tests): fix flaky test_run_with_timeout_raises_on_timeout (#9377) to release v3.0 (#9379)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-16 12:10:56 -07:00
github-actions[bot]
ce53e123dc fix(fe): bump flatted to patch CVE-2026-32141 (#9350) to release v3.0 (#9353)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-03-14 00:11:18 -07:00
github-actions[bot]
5606ae5e81 fix(litellm): filter embedding models (#9347) to release v3.0 (#9348)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-03-13 23:43:50 -07:00
Evan Lohn
923e0691aa fix: sharepoint pages 400 list expand (#9321) 2026-03-13 11:43:08 -07:00
Evan Lohn
b232e2a771 fix: skip classic site pages (#9318) 2026-03-12 22:08:49 -07:00
Evan Lohn
c3ebfeda2f chore: sharepoint error logs (#9309) 2026-03-12 21:37:50 -07:00
github-actions[bot]
6a28dfedb1 fix(fe): prevent clicking InputSelect from selecting text (#9292) to release v3.0 (#9306)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-12 09:36:25 -07:00
github-actions[bot]
a123ec083d chore(devtools): upgrade ods: 0.6.3->0.7.0 (#9297) to release v3.0 (#9298)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-11 20:37:22 -07:00
Nikolas Garza
f448f1274d fix(slackbot): resolve channel references and filter search by channel tags (#9256) to release v3.0 (#9294) 2026-03-11 20:23:28 -07:00
Jamison Lahman
d12f8b94aa fix(fe): InputComboBox resets filter value on open (#9287) to release v3.0 (#9291) 2026-03-11 18:36:36 -07:00
Bo-Onyx
355fe2ff2c fix(api memory): replace glibc with jemalloc for memory allocating (#9196) to release v3.0 (#9282)
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
2026-03-11 14:58:43 -07:00
Nikolas Garza
8ec5423a0c fix(tests): remove deprecated o1-preview and o1-mini model tests (#9280) 2026-03-11 14:37:03 -07:00
Justin Tahara
79b615db46 feat(litellm): Adding FE Provider workflow (#9264) 2026-03-11 11:56:04 -07:00
Wenxi
98756bccd4 fix: discord connector async resource cleanup (#9203) 2026-03-11 11:53:57 -07:00
Wenxi
418f84ccdf fix: don't fetch mcp tools when no llms are configured (#9173) 2026-03-11 11:53:57 -07:00
Wenxi
d37756a884 fix(mcp): use CE-compatible chat endpoint for search_indexed_documents (#9193)
Co-authored-by: Fizza-Mukhtar <fizzamukhtar01@gmail.com>
2026-03-11 11:53:57 -07:00
Wenxi
9cdc92441b fix: fallback doc access when drive item is externally owned (#9053) 2026-03-11 11:53:57 -07:00
Wenxi
b8ed30644a fix: move available context tokens to useChatController and remove arbitrary 50% cap (#9174) 2026-03-11 11:53:57 -07:00
Danelegend
d7d19e5a28 feat(llm-provider): fetch litellm models (#8418) 2026-03-11 10:55:51 -07:00
github-actions[bot]
948650829d chore(release): upgrade release-tag (#9257) to release v3.0 (#9261)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-10 18:16:55 -07:00
Evan Lohn
b6e689be0f fix: update jira group sync endpoint (#9241) 2026-03-10 17:13:10 -07:00
github-actions[bot]
85877408c8 fix(fe): increase responsive breakpoint for centering modals (#9250) to release v3.0 (#9251)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-10 14:59:46 -07:00
github-actions[bot]
c00df75c79 fix(fe): correctly parse comma literals in CSVs (#9245) to release v3.0 (#9249)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-10 14:13:40 -07:00
github-actions[bot]
6352c9a09e fix(fe): make CSV inline display responsive (#9242) to release v3.0 (#9246)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-10 13:19:54 -07:00
github-actions[bot]
3065f70d7d fix: Prevent the removal and hiding of default model (#9131) to release v3.0 (#9225)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-03-10 10:54:21 -07:00
Jamison Lahman
4befbc49dc chore(release): run playwright on release pushes (#9233) to release v3.0 (#9238) 2026-03-10 10:29:22 -07:00
Jamison Lahman
ae9679e8c4 fix(safari): Search results dont shrink (#9126) to release v3.0 (#9210) 2026-03-10 09:11:32 -07:00
SubashMohan
ea0ddee5c8 feat(custom-tools): enhance custom tool error handling and timeline UI (#9189) 2026-03-10 17:04:15 +05:30
Nikolas Garza
2826405dd2 fix: use detail instead of message in OnyxError response shape (#9214) to release v3.0 (#9220) 2026-03-09 19:15:29 -07:00
github-actions[bot]
8485bf4368 fix(fe): fix chat content padding (#9216) to release v3.0 (#9218)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-09 17:57:37 -07:00
github-actions[bot]
7bb52b0839 fix(code-interpreter): set default CODE_INTERPRETER_BASE_URL w/ docke… (#9215) to release v3.0 (#9219)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-09 17:57:21 -07:00
github-actions[bot]
85a54c01f1 feat(opensearch): Enable by default (#9211) to release v3.0 (#9217)
Co-authored-by: acaprau <48705707+acaprau@users.noreply.github.com>
2026-03-09 17:35:44 -07:00
github-actions[bot]
e4577bd564 fix(fe): move app padding inside overflow container (#9206) to release v3.0 (#9207)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-09 13:47:36 -07:00
Nikolas Garza
f150a7b940 fix(fe): fix broken slack bot admin pages (#9168) 2026-03-09 13:01:58 -07:00
Jamison Lahman
f1df36e306 feat(cli): package as docker image (#9167)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-07 03:18:47 +00:00
Wenxi
1611604269 chore(tests): add shared enable_ee fixture and test README (#9165) 2026-03-07 01:55:38 +00:00
Danelegend
c2a71091dc feat: jsonriver implementation w/ delta (#9161) 2026-03-07 00:23:24 +00:00
Jamison Lahman
cc008699e5 fix(a11y): InputSelect supports keyboard navigation (#9160)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-07 00:06:36 +00:00
Jamison Lahman
48802618db fix(fe): fix API Key Role dropdown options (#9154) 2026-03-06 22:13:52 +00:00
Justin Tahara
6917953b86 chore(projects): Turn off DR in Projects (#9150) 2026-03-06 22:08:14 +00:00
Jamison Lahman
e7cf027f8a chore(zizmor): fix rust-toolchain commit (#9153) 2026-03-06 21:53:57 +00:00
roshan
41fb1480bb docs(cli): improve onyx-cli SKILL.md and fix README default server URL (#9152) 2026-03-06 21:47:18 +00:00
Raunak Bhagat
bdc2bfdcee fix(fe): account for wrapper padding in textarea auto-resize (#9151) 2026-03-06 21:30:25 +00:00
Evan Lohn
8816d52b27 fix: vespa filter restrictions (#9138) 2026-03-06 21:08:07 +00:00
roshan
6590f1d7ba feat(cli): add PyPI and release workflow badges to README (#9148) 2026-03-06 21:01:42 +00:00
roshan
c527f75557 fix(ci): release workflow and ods build file improvements (#9149)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-06 21:00:36 +00:00
Jamison Lahman
472d1788a7 fix(fe): add horizontal padding to chat page (#9147) 2026-03-06 20:46:56 +00:00
Wenxi
99e95f8205 chore: rm dead llm provider code and bump claude recs (#9145) 2026-03-06 20:02:38 +00:00
Justin Tahara
e618bf8385 fix(ui): LLM Model selection Cache 1/2 (#9141) 2026-03-06 19:53:53 +00:00
roshan
f4dcd130ba feat(cli): add PyPI wheel packaging for onyx-cli (#8992)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-03-06 19:42:46 +00:00
Jamison Lahman
910718deaa chore(playwright): hide flaky AppInputBar/llm-popover-trigger (#9144) 2026-03-06 19:40:43 +00:00
dependabot[bot]
1a7ca93b93 chore(deps): bump @tootallnate/once and jest-environment-jsdom in /web (#9050)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-06 19:14:33 +00:00
dependabot[bot]
a615a920cb chore(deps): bump express-rate-limit from 8.2.1 to 8.3.0 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9143)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-06 11:13:21 -08:00
Jamison Lahman
29d8b310b5 chore(deps): upgrade desktop deps (#9140) 2026-03-06 19:04:23 +00:00
Jamison Lahman
d1409ccafa chore(fe): rm redundant alignBubble (#9072) 2026-03-06 18:53:56 +00:00
Jamison Lahman
e41bad9103 chore(zizmor): fix issues (#9139) 2026-03-06 18:26:08 +00:00
dependabot[bot]
661dc831dc chore(deps): bump golang.org/x/net from 0.27.0 to 0.38.0 in /cli (#9133)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-06 16:22:41 +00:00
Raunak Bhagat
19016dd35a refactor: add Disabled primitive to @opal/core (#9136) 2026-03-06 11:48:22 +00:00
Raunak Bhagat
127b2dcc80 refactor: split Interactive into Stateless/Stateful, add SelectButton, kebab-case dirs (#9134)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 09:15:39 +00:00
Danelegend
b015a37cea feat: Docx preview variant (#9060) 2026-03-06 08:20:33 +00:00
SubashMohan
b45277a8b0 fix(tools): clean up orphaned OAuthConfig and preserve settings on OpenAPI tool update (#9086) 2026-03-06 07:34:32 +00:00
SubashMohan
893e8da79a feat(table): add server-side pagination, search filtering, view mode, and DragOverlay improvements (#9085) 2026-03-06 07:18:58 +00:00
roshan
a51f0d7cb2 feat: Onyx CLI (#8958)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-06 05:42:20 +00:00
Jamison Lahman
c826d0469e fix(fe): make BlinkingBar smaller (#9132)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-06 03:49:03 +00:00
Danelegend
0f6ae6f69c fix: Sync does not update default model (#9129) 2026-03-06 03:36:27 +00:00
Justin Tahara
d0836e2603 fix(ci): Cleaning up Release Tags (#8172) 2026-03-06 02:13:21 +00:00
dependabot[bot]
bda03bafca chore(deps): bump serialize-javascript and terser-webpack-plugin in /web (#9125)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-06 01:52:22 +00:00
Bo-Onyx
376adff94a chore(dr-tune): Move postgres sanitization and use it for DR output save (#9113) 2026-03-06 01:33:08 +00:00
Jamison Lahman
d2d4b89286 chore(deps): rm google-cloud-aiplatform (#9114) 2026-03-06 01:32:06 +00:00
Bo-Onyx
dde7a18bb7 fix(dr-opti): Fix snippet matcher for special characters (#9123) 2026-03-06 01:31:50 +00:00
acaprau
3f004cf02f chore(opensearch): Debug util script (#8815) 2026-03-06 01:23:13 +00:00
acaprau
ae893079c3 chore(documentation): Add comment in contributing_guides/best_practices.md about async (#8759) 2026-03-06 01:20:40 +00:00
acaprau
189c07a913 chore(devtools): AGENTS.md warns against enqueueing tasks without an expiration (#8654)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-06 01:20:18 +00:00
Wenxi
2b82743bf5 feat: lm studio provider (#8253) 2026-03-06 01:07:56 +00:00
Danelegend
ba2a5a60e1 chore: Add docx preview depend (#9059) 2026-03-06 00:05:53 +00:00
dependabot[bot]
5888f9d69f chore(deps): bump qs from 6.14.1 to 6.14.2 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8390)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 16:42:17 -08:00
dependabot[bot]
23b3a0a6ae chore(deps): bump minimatch in /examples/widget (#8886)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 16:41:43 -08:00
Jamison Lahman
eced88fa7a chore(tests): run golang tests in CI (#9118) 2026-03-05 16:39:02 -08:00
Jamison Lahman
f59aaa902d chore(playwright): tighten how elements are hidden (#9117) 2026-03-05 23:58:07 +00:00
Nikolas Garza
57349bdbd1 chore: OnyxError cleanup (#9071)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 23:21:38 +00:00
Wenxi
192639a801 chore: bump recommended models (#9112) 2026-03-05 23:02:18 +00:00
Jamison Lahman
c10ffbb464 fix(safari): chat background blur ignores text (#9111)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 15:54:25 -08:00
dependabot[bot]
091f41fd1f chore(deps): bump google-cloud-aiplatform from 1.121.0 to 1.133.0 (#8658)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 22:54:42 +00:00
dependabot[bot]
45d77be4eb chore(deps): bump ajv in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8655)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 15:14:50 -08:00
dependabot[bot]
413fa85134 chore(deps): bump minimatch in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8828)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 15:13:57 -08:00
dependabot[bot]
108cde4f55 chore(deps): bump j178/prek-action from 1.0.12 to 1.1.1 (#8477)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 15:13:00 -08:00
dependabot[bot]
f88ce32bd4 chore(deps): bump @hono/node-server from 1.19.9 to 1.19.10 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9048)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:51:22 -08:00
dependabot[bot]
911f3439ea chore(deps): bump helm/kind-action from 1.13.0 to 1.14.0 (#8917)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:50:06 -08:00
dependabot[bot]
b02590d2b2 chore(deps): bump aws-actions/configure-aws-credentials from 5.1.1 to 6.0.0 (#8478)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:49:29 -08:00
dependabot[bot]
2d75b4b1f8 chore(deps): bump dompurify from 3.3.1 to 3.3.2 in /widget (#9106)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-05 14:45:53 -08:00
dependabot[bot]
7e3f7d01c2 chore(deps): bump authlib from 1.6.6 to 1.6.7 (#9049)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-05 22:14:44 +00:00
Jamison Lahman
9d6ce26ea3 fix(fe): show modal body on Safari/desktop (#9035) 2026-03-05 21:35:43 +00:00
roshan
41713d42a2 chore: upgrade golangci-lint to v2.10.1 for Go 1.26 support (#9107)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 21:22:56 +00:00
roshan
8afc283410 fix(chrome-extension): open login in new tab when session expires (#9091)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 21:18:21 +00:00
Jamison Lahman
b5c873077e chore(devtools): upgrade ods: 0.6.2->0.6.3 (#9105) 2026-03-05 21:04:51 +00:00
Jamison Lahman
20a4dd32eb chore(devtools): pull release branch and support PR # args (#9102)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-03-05 12:37:51 -08:00
Jamison Lahman
fde0d44bc1 chore(devtools): upgrade ods to go 1.26 (#9103) 2026-03-05 20:24:57 +00:00
Jamison Lahman
8fd91b6e83 chore(devtools): ods desktop (#9100) 2026-03-05 19:38:02 +00:00
Justin Tahara
8247fdd45b fix(llm): Handle Bedrock tool content in message history without toolConfig (#9063) 2026-03-05 19:06:35 +00:00
Jamison Lahman
8c5859ba4d fix(fe): disable projects modal button unless project is named (#9093) 2026-03-05 10:29:15 -08:00
Jamison Lahman
62ef6f59bb chore(playwright): screenshot tests for user settings pages (#9078) 2026-03-05 08:35:46 -08:00
Jamison Lahman
7eabfa125c fix(fe): properly wrap copy and edit buttons on mobile (#9073) 2026-03-05 04:36:11 +00:00
SubashMohan
ee18114739 feat(table): add DataTable config-driven wrapper component (#9020)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 04:21:38 +00:00
Nikolas Garza
f7630f5648 fix: EE route gating for upgrading CE users (#9026) 2026-03-05 03:44:16 +00:00
Jamison Lahman
e0d91b9ea7 chore(fe): rm unreachable code (#9069) 2026-03-05 03:26:50 +00:00
Raunak Bhagat
2c0a4a60a5 refactor: consolidate AppInputBar search/chat rendering with animated transitions (#9054) 2026-03-05 03:16:36 +00:00
Justin Tahara
3a7d4dad56 fix(ui): Improve text truncation and overflow handling in FileCard layout (#9061) 2026-03-05 03:11:53 +00:00
acaprau
c5c236d098 chore(opensearch): Fix and consolidate the dev script used to start OpenSearch locally (#9036) 2026-03-05 01:54:02 +00:00
Danelegend
b18baff4d0 fix: Correct file_id for docs (#9058) 2026-03-05 01:43:58 +00:00
SubashMohan
eb3e15c195 feat(table): add ColumnVisibilityPopover, Footer, Pagination, and SortingPopover components (#9019)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 01:43:37 +00:00
acaprau
47d9a9e1ac feat(document index): Re-enable search settings swap (#9005) 2026-03-05 01:41:03 +00:00
Evan Lohn
aca466b35d fix: doc to hierarchynode connection in pruning (#9046) 2026-03-05 01:30:36 +00:00
Justin Tahara
5176fd7386 fix(llm): Final LLM Cleanup for Nightly Tests (#9055) 2026-03-05 01:00:45 +00:00
SubashMohan
92538084e9 feat(table): add useColumnWidths, useDataTable, and useDraggableRows hooks (#9018)
Co-authored-by: Nik <nikolas.garza5@gmail.com>
2026-03-05 00:00:06 +00:00
Bo-Onyx
2d996e05a4 chore(fe): opal button migration (#8864) 2026-03-04 22:52:49 +00:00
Nikolas Garza
b2956f795b refactor: migrate LLM & embedding management to OnyxError (#9025) 2026-03-04 22:09:25 +00:00
Danelegend
b272085543 fix: Code Interpreter Client session clean up (#9028)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 21:58:00 +00:00
Justin Tahara
8193aa4fd0 fix(ui): Persist agent sharing changes immediately for existing agents (#9024) 2026-03-04 21:34:50 +00:00
dependabot[bot]
52db41a00b chore(deps): bump nltk from 3.9.1 to 3.9.3 (#9045)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-04 21:21:37 +00:00
SubashMohan
f1cf3c4589 feat(table): add table primitive components and styles (#9017) 2026-03-04 21:06:53 +00:00
dependabot[bot]
5322aeed90 chore(deps): bump hono from 4.11.7 to 4.12.5 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9044)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-03-04 12:51:05 -08:00
Evan Lohn
5da8870fd2 fix: stop calling unsupported endpoints no vectordb (#9012) 2026-03-04 20:18:09 +00:00
Nikolas Garza
57d3ab3b40 feat: add SCIM token management page (#9001) 2026-03-04 19:48:37 +00:00
Nikolas Garza
649c7fe8b9 feat(slack): convert markdown tables to Slack-friendly format (#8999) 2026-03-04 19:16:50 +00:00
Jamison Lahman
e5e2bc6149 chore(fe): "Share Chat"->"Share" (#9022) 2026-03-04 11:08:14 -08:00
Jamison Lahman
b148065e1d chore(devtools): --debug mode for desktop (#9027) 2026-03-04 11:07:52 -08:00
Evan Lohn
367808951c chore: remove lightweight mode (#9014) 2026-03-04 18:26:05 +00:00
Jamison Lahman
0f74da3302 fix(fe): dont align center modals on small screens (#8988) 2026-03-04 17:46:35 +00:00
Raunak Bhagat
96f7cbd25a fix: Use IllustrationContent for empty search results (#9013) 2026-03-04 16:54:23 +00:00
Raunak Bhagat
c627cea17d feat(opal): add sidebar variant to Interactive + refactor SidebarTab (#9016) 2026-03-04 15:52:56 +00:00
Raunak Bhagat
a8cdc3965d refactor(fe): move onboarding to sections/, consolidate hooks, move types (#8985) 2026-03-04 10:51:20 +00:00
Raunak Bhagat
60891b2f44 feat: Add IllustrationContent layout component to opal (#9011) 2026-03-04 06:14:21 +00:00
Danelegend
d2f35e1fae feat: Align action tool tips (#8997) 2026-03-04 04:32:30 +00:00
Danelegend
7a7350f387 fix: Markdown does not show all texts (#9009) 2026-03-04 02:16:00 +00:00
Nikolas Garza
8ef504acd5 refactor: add OnyxErrorCode enum and migrate billing/license routers (#8975) 2026-03-04 02:03:38 +00:00
Danelegend
0dbabfe445 feat: Support intermediate code interpreter file generation (#9006) 2026-03-04 01:55:44 +00:00
Justin Tahara
50575d0f6b chore(ui): Rename from LLM Models to Language Models (#9007) 2026-03-04 01:40:34 +00:00
Evan Lohn
9862fbd4a6 chore: deploying onyx lite (#9004) 2026-03-04 01:38:02 +00:00
Jamison Lahman
003d94546a fix(a11y): prevent show password button losing focus on tab (#9000) 2026-03-04 01:13:31 +00:00
Nikolas Garza
01d3473974 chore: port Greptile custom context rules to greptile.json (#9003) 2026-03-04 01:04:15 +00:00
Raunak Bhagat
19c7809a43 feat: Add illustrations to opal (#8993) 2026-03-04 00:59:47 +00:00
Bo-Onyx
98e6346152 chore: [Running GitHub actions for #8972] (#8996)
Co-authored-by: Jean Caillé <jean.caille@helsing.ai>
2026-03-03 23:36:18 +00:00
acaprau
c63fdf1c13 fix(opensearch): Increase the Vespa http client timeout to 120s for the OpenSearch migration (#8966) 2026-03-03 22:40:50 +00:00
Justin Tahara
49b509a0a7 fix(permissions): Add file connector access control for global curators (#8990) 2026-03-03 22:13:11 +00:00
Wenxi
2b1f1fe311 chore: use abort controller to properly manage oauth requests (#8994) 2026-03-03 21:46:17 +00:00
Danelegend
3e67ea9df7 feat: Code Interpreter responsive in actions dropdown (#8982) 2026-03-03 21:22:37 +00:00
Wenxi
98e3602dd6 fix: google connectors redirect to connector page instead of auth error (#8989) 2026-03-03 21:18:30 +00:00
Wenxi
4fded5b0a1 chore: remove dead code from expandable content component (#8981) 2026-03-03 21:11:07 +00:00
Wenxi
328c305d26 chore: remove dead code from admin theming (#8979) 2026-03-03 21:06:28 +00:00
Jamison Lahman
f902727215 chore(devtools): npm run test:diff on changed files (#8991) 2026-03-03 13:10:35 -08:00
Justin Tahara
69c8aa08b3 fix(ci): Add secrets inheritance to nightly LLM provider chat workflow (#8984) 2026-03-03 20:49:12 +00:00
Raunak Bhagat
c98aa486e4 refactor(fe): migrate onboarding components to Content/ContentAction (#8983) 2026-03-03 20:46:40 +00:00
Wenxi
03553114c5 fix(ollama): debounce API url input and properly handle model fetch request with abort signal (#8986) 2026-03-03 20:08:57 +00:00
Justin Tahara
6532c94230 chore: Add greptile.json configuration file (#8978) 2026-03-03 19:58:05 +00:00
Danelegend
1b32a7d94e fix: Default code interpreter base url (#8969) 2026-03-03 18:35:20 +00:00
Danelegend
5fd0fe192b fix: Tokeniser does not rely on llm (#8967) 2026-03-03 18:35:15 +00:00
Wenxi
1de522f9ae fix: sandbox rollback db on pod deletion failure (#8965) 2026-03-03 17:09:50 +00:00
Raunak Bhagat
60fe3e9ad6 refactor(fe): migrate admin pages from AdminPageTitle to SettingsLayouts (#8930) 2026-03-03 08:34:58 +00:00
Evan Lohn
6aa56821d6 feat: use new cache backend where appropriate (#8889) 2026-03-03 07:14:39 +00:00
Danelegend
eda436de01 fix: Block deleting default provider (#8962) 2026-03-03 06:36:29 +00:00
Danelegend
07915a6c01 fix: Update frontend route calls to use new endpoints (#8968) 2026-03-03 06:19:47 +00:00
Nikolas Garza
2c3e9aecd1 fix(scim): only list SCIM-managed users and link pre-existing users (#8959) 2026-03-03 05:36:43 +00:00
Evan Lohn
fa29cc3849 feat: postgres cache backend (#8879) 2026-03-03 04:33:47 +00:00
Raunak Bhagat
24ac8b37d3 refactor(fe): define settings layout width presets as CSS variables (#8936) 2026-03-03 03:11:18 +00:00
Jessica Singh
be8b108ae4 chore(auth): ecs fargate deployment cleanup (#8589) 2026-03-03 02:34:04 +00:00
Danelegend
f380a75df3 fix: Non-intuitive llm auth exceptions (#8960) 2026-03-03 01:58:45 +00:00
817 changed files with 37123 additions and 11204 deletions

View File

@@ -0,0 +1,186 @@
---
name: onyx-cli
description: Query the Onyx knowledge base using the onyx-cli command. Use when the user wants to search company documents, ask questions about internal knowledge, query connected data sources, or look up information stored in Onyx.
---
# Onyx CLI — Agent Tool
Onyx is an enterprise search and Gen-AI platform that connects to company documents, apps, and people. The `onyx-cli` CLI provides non-interactive commands to query the Onyx knowledge base and list available agents.
## Prerequisites
### 1. Check if installed
```bash
which onyx-cli
```
### 2. Install (if needed)
**Primary — pip:**
```bash
pip install onyx-cli
```
**From source (Go):**
```bash
cd cli && go build -o onyx-cli . && sudo mv onyx-cli /usr/local/bin/
```
### 3. Check if configured
```bash
onyx-cli validate-config
```
This checks the config file exists, API key is present, and tests the server connection via `/api/me`. Exit code 0 on success, non-zero with a descriptive error on failure.
If unconfigured, you have two options:
**Option A — Interactive setup (requires user input):**
```bash
onyx-cli configure
```
This prompts for the Onyx server URL and API key, tests the connection, and saves config.
**Option B — Environment variables (non-interactive, preferred for agents):**
```bash
export ONYX_SERVER_URL="https://your-onyx-server.com" # default: https://cloud.onyx.app
export ONYX_API_KEY="your-api-key"
```
Environment variables override the config file. If these are set, no config file is needed.
| Variable | Required | Description |
|----------|----------|-------------|
| `ONYX_SERVER_URL` | No | Onyx server base URL (default: `https://cloud.onyx.app`) |
| `ONYX_API_KEY` | Yes | API key for authentication |
| `ONYX_PERSONA_ID` | No | Default agent/persona ID |
If neither the config file nor environment variables are set, tell the user that `onyx-cli` needs to be configured and ask them to either:
- Run `onyx-cli configure` interactively, or
- Set `ONYX_SERVER_URL` and `ONYX_API_KEY` environment variables
## Commands
### Validate configuration
```bash
onyx-cli validate-config
```
Checks config file exists, API key is present, and tests the server connection. Use this before `ask` or `agents` to confirm the CLI is properly set up.
### List available agents
```bash
onyx-cli agents
```
Prints a table of agent IDs, names, and descriptions. Use `--json` for structured output:
```bash
onyx-cli agents --json
```
Use agent IDs with `ask --agent-id` to query a specific agent.
### Basic query (plain text output)
```bash
onyx-cli ask "What is our company's PTO policy?"
```
Streams the answer as plain text to stdout. Exit code 0 on success, non-zero on error.
### JSON output (structured events)
```bash
onyx-cli ask --json "What authentication methods do we support?"
```
Outputs JSON-encoded parsed stream events (one object per line). Key event objects include message deltas, stop, errors, search-start, and citation payloads.
Each line is a JSON object with this envelope:
```json
{"type": "<event_type>", "event": { ... }}
```
| Event Type | Description |
|------------|-------------|
| `message_delta` | Content token — concatenate all `content` fields for the full answer |
| `stop` | Stream complete |
| `error` | Error with `error` message field |
| `search_tool_start` | Onyx started searching documents |
| `citation_info` | Source citation — see shape below |
`citation_info` event shape:
```json
{
"type": "citation_info",
"event": {
"citation_number": 1,
"document_id": "abc123def456",
"placement": {"turn_index": 0, "tab_index": 0, "sub_turn_index": null}
}
}
```
`placement` is metadata about where in the conversation the citation appeared and can be ignored for most use cases.
### Specify an agent
```bash
onyx-cli ask --agent-id 5 "Summarize our Q4 roadmap"
```
Uses a specific Onyx agent/persona instead of the default.
### All flags
| Flag | Type | Description |
|------|------|-------------|
| `--agent-id` | int | Agent ID to use (overrides default) |
| `--json` | bool | Output raw NDJSON events instead of plain text |
## Statelessness
Each `onyx-cli ask` call creates an independent chat session. There is no built-in way to chain context across multiple `ask` invocations — every call starts fresh. If you need multi-turn conversation with memory, use the interactive TUI (`onyx-cli` or `onyx-cli chat`) instead.
## When to Use
Use `onyx-cli ask` when:
- The user asks about company-specific information (policies, docs, processes)
- You need to search internal knowledge bases or connected data sources
- The user references Onyx, asks you to "search Onyx", or wants to query their documents
- You need context from company wikis, Confluence, Google Drive, Slack, or other connected sources
Do NOT use when:
- The question is about general programming knowledge (use your own knowledge)
- The user is asking about code in the current repository (use grep/read tools)
- The user hasn't mentioned Onyx and the question doesn't require internal company data
## Examples
```bash
# Simple question
onyx-cli ask "What are the steps to deploy to production?"
# Get structured output for parsing
onyx-cli ask --json "List all active API integrations"
# Use a specialized agent
onyx-cli ask --agent-id 3 "What were the action items from last week's standup?"
# Pipe the answer into another command
onyx-cli ask "What is the database schema for users?" | head -20
```

View File

@@ -29,20 +29,32 @@ jobs:
build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}
build-model-server: ${{ steps.check.outputs.build-model-server }}
is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
is-stable: ${{ steps.check.outputs.is-stable }}
is-beta: ${{ steps.check.outputs.is-beta }}
is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
is-craft-latest: ${{ steps.check.outputs.is-craft-latest }}
is-latest: ${{ steps.check.outputs.is-latest }}
is-test-run: ${{ steps.check.outputs.is-test-run }}
sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
short-sha: ${{ steps.check.outputs.short-sha }}
steps:
- name: Checkout (for git tags)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
fetch-tags: true
- name: Setup uv
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
enable-cache: false
- name: Check which components to build and version info
id: check
env:
EVENT_NAME: ${{ github.event_name }}
run: |
set -eo pipefail
TAG="${GITHUB_REF_NAME}"
# Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
@@ -54,9 +66,8 @@ jobs:
IS_VERSION_TAG=false
IS_STABLE=false
IS_BETA=false
IS_STABLE_STANDALONE=false
IS_BETA_STANDALONE=false
IS_CRAFT_LATEST=false
IS_LATEST=false
IS_PROD_TAG=false
IS_TEST_RUN=false
BUILD_DESKTOP=false
@@ -67,9 +78,6 @@ jobs:
BUILD_MODEL_SERVER=true
# Determine tag type based on pattern matching (do regex checks once)
if [[ "$TAG" == craft-* ]]; then
IS_CRAFT_LATEST=true
fi
if [[ "$TAG" == *cloud* ]]; then
IS_CLOUD=true
fi
@@ -97,20 +105,28 @@ jobs:
fi
fi
# Craft-latest builds backend with Craft enabled
if [[ "$IS_CRAFT_LATEST" == "true" ]]; then
BUILD_BACKEND_CRAFT=true
BUILD_BACKEND=false
fi
# Standalone version checks (for backend/model-server - version excluding cloud tags)
if [[ "$IS_STABLE" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
IS_STABLE_STANDALONE=true
fi
if [[ "$IS_BETA" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
IS_BETA_STANDALONE=true
fi
# Determine if this tag should get the "latest" Docker tag.
# Only the highest semver stable tag (vX.Y.Z exactly) gets "latest".
if [[ "$IS_STABLE" == "true" ]]; then
HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {
echo "::error::Failed to determine highest stable tag via 'ods latest-stable-tag'"
exit 1
}
if [[ "$TAG" == "$HIGHEST_STABLE" ]]; then
IS_LATEST=true
fi
fi
# Build craft-latest backend alongside the regular latest.
if [[ "$IS_LATEST" == "true" ]]; then
BUILD_BACKEND_CRAFT=true
fi
# Determine if this is a production tag
# Production tags are: version tags (v1.2.3*) or nightly tags
if [[ "$IS_VERSION_TAG" == "true" ]] || [[ "$IS_NIGHTLY" == "true" ]]; then
@@ -129,11 +145,9 @@ jobs:
echo "build-backend-craft=$BUILD_BACKEND_CRAFT"
echo "build-model-server=$BUILD_MODEL_SERVER"
echo "is-cloud-tag=$IS_CLOUD"
echo "is-stable=$IS_STABLE"
echo "is-beta=$IS_BETA"
echo "is-stable-standalone=$IS_STABLE_STANDALONE"
echo "is-beta-standalone=$IS_BETA_STANDALONE"
echo "is-craft-latest=$IS_CRAFT_LATEST"
echo "is-latest=$IS_LATEST"
echo "is-test-run=$IS_TEST_RUN"
echo "sanitized-tag=$SANITIZED_TAG"
echo "short-sha=$SHORT_SHA"
@@ -182,9 +196,53 @@ jobs:
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
build-desktop:
# Create GitHub release first, before desktop builds start.
# This ensures all desktop matrix jobs upload to the same release instead of
# racing to create duplicate releases.
create-release:
needs: determine-builds
if: needs.determine-builds.outputs.build-desktop == 'true'
runs-on: ubuntu-slim
timeout-minutes: 10
permissions:
contents: write
outputs:
release-id: ${{ steps.create-release.outputs.id }}
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Determine release tag
id: release-tag
env:
IS_TEST_RUN: ${{ needs.determine-builds.outputs.is-test-run }}
SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}
run: |
if [ "${IS_TEST_RUN}" == "true" ]; then
echo "tag=v0.0.0-dev+${SHORT_SHA}" >> "$GITHUB_OUTPUT"
else
echo "tag=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT"
fi
- name: Create GitHub Release
id: create-release
uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # ratchet:softprops/action-gh-release@v2
with:
tag_name: ${{ steps.release-tag.outputs.tag }}
name: ${{ steps.release-tag.outputs.tag }}
body: "See the assets to download this version and install."
draft: true
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build-desktop:
needs:
- determine-builds
- create-release
if: needs.determine-builds.outputs.build-desktop == 'true'
permissions:
id-token: write
contents: write
@@ -208,12 +266,12 @@ jobs:
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
with:
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
# NOTE: persist-credentials is needed for tauri-action to upload assets to GitHub releases.
persist-credentials: true # zizmor: ignore[artipacked]
- name: Configure AWS credentials
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -353,11 +411,9 @@ jobs:
APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}
APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}
with:
tagName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseBody: "See the assets to download this version and install."
releaseDraft: true
prerelease: false
# Use the release created by the create-release job to avoid race conditions
# when multiple matrix jobs try to create/update the same release simultaneously
releaseId: ${{ needs.create-release.outputs.release-id }}
assetNamePattern: "[name]_[arch][ext]"
args: ${{ matrix.args }}
@@ -384,7 +440,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -458,7 +514,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -527,7 +583,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -558,7 +614,8 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
@@ -597,7 +654,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -679,7 +736,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -756,7 +813,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -823,7 +880,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -896,7 +953,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -964,7 +1021,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -995,7 +1052,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
@@ -1034,7 +1091,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1107,7 +1164,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1176,7 +1233,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1207,8 +1264,6 @@ jobs:
latest=false
tags: |
type=raw,value=craft-latest
# TODO: Consider aligning craft-latest tags with regular backend builds (e.g., latest, edge, beta)
# to keep tagging strategy consistent across all backend images
- name: Create and push manifest
env:
@@ -1246,7 +1301,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1326,7 +1381,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1400,7 +1455,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1431,7 +1486,8 @@ jobs:
latest=false
tags: |
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
@@ -1465,7 +1521,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1520,7 +1576,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1580,7 +1636,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -1637,7 +1693,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2

View File

@@ -15,6 +15,8 @@ permissions:
jobs:
provider-chat-test:
uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
secrets:
AWS_OIDC_ROLE_ARN: ${{ secrets.AWS_OIDC_ROLE_ARN }}
permissions:
contents: read
id-token: write

View File

@@ -6,11 +6,13 @@ on:
- main
permissions:
contents: write
pull-requests: write
contents: read
jobs:
cherry-pick-to-latest-release:
permissions:
contents: write
pull-requests: write
outputs:
should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}
pr_number: ${{ steps.gate.outputs.pr_number }}

View File

@@ -57,7 +57,7 @@ jobs:
cache-dependency-path: ./desktop/package-lock.json
- name: Setup Rust
uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7
uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
with:
toolchain: stable
targets: ${{ matrix.target }}

56
.github/workflows/pr-golang-tests.yml vendored Normal file
View File

@@ -0,0 +1,56 @@
name: Golang Tests
concurrency:
group: Golang-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions: {}
env:
GO_VERSION: "1.26"
jobs:
detect-modules:
runs-on: ubuntu-latest
timeout-minutes: 10
outputs:
modules: ${{ steps.set-modules.outputs.modules }}
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
persist-credentials: false
- id: set-modules
run: echo "modules=$(find . -name 'go.mod' -exec dirname {} \; | jq -Rc '[.,inputs]')" >> "$GITHUB_OUTPUT"
golang:
needs: detect-modules
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
matrix:
modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]
with:
go-version: ${{ env.GO_VERSION }}
cache-dependency-path: "**/go.sum"
- run: go mod tidy
working-directory: ${{ matrix.modules }}
- run: git diff --exit-code go.mod go.sum
working-directory: ${{ matrix.modules }}
- run: go test ./...
working-directory: ${{ matrix.modules }}

View File

@@ -71,7 +71,7 @@ jobs:
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # ratchet:helm/kind-action@v1.14.0
- name: Pre-install cluster status check
if: steps.list-changed.outputs.changed == 'true'

View File

@@ -316,6 +316,7 @@ jobs:
# Base config shared by both editions
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
OPENSEARCH_FOR_ONYX_ENABLED=false
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
@@ -335,7 +336,6 @@ jobs:
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
fi
@@ -419,6 +419,7 @@ jobs:
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
@@ -471,13 +472,13 @@ jobs:
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
no-vectordb-tests:
onyx-lite-tests:
needs: [build-backend-image, build-integration-image]
runs-on:
[
runs-on,
runner=4cpu-linux-arm64,
"run-id=${{ github.run_id }}-no-vectordb-tests",
"run-id=${{ github.run_id }}-onyx-lite-tests",
"extras=ecr-cache",
]
timeout-minutes: 45
@@ -495,13 +496,12 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create .env file for no-vectordb Docker Compose
- name: Create .env file for Onyx Lite Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
@@ -509,28 +509,23 @@ jobs:
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
DISABLE_VECTOR_DB=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=true
EOF
# Start only the services needed for no-vectordb mode (no Vespa, no model servers)
- name: Start Docker containers (no-vectordb)
# Start only the services needed for Onyx Lite (Postgres + API server)
- name: Start Docker containers (onyx-lite)
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml up \
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up \
relational_db \
cache \
minio \
api_server \
background \
-d
id: start_docker_no_vectordb
id: start_docker_onyx_lite
- name: Wait for services to be ready
run: |
echo "Starting wait-for-service script (no-vectordb)..."
echo "Starting wait-for-service script (onyx-lite)..."
start_time=$(date +%s)
timeout=300
while true; do
@@ -552,14 +547,14 @@ jobs:
sleep 5
done
- name: Run No-VectorDB Integration Tests
- name: Run Onyx Lite Integration Tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
timeout_minutes: 20
max_attempts: 3
retry_wait_seconds: 10
command: |
echo "Running no-vectordb integration tests..."
echo "Running onyx-lite integration tests..."
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
@@ -570,39 +565,38 @@ jobs:
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e TEST_WEB_HOSTNAME=test-runner \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/tests/no_vectordb
- name: Dump API server logs (no-vectordb)
- name: Dump API server logs (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color api_server > $GITHUB_WORKSPACE/api_server_no_vectordb.log || true
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
logs --no-color api_server > $GITHUB_WORKSPACE/api_server_onyx_lite.log || true
- name: Dump all-container logs (no-vectordb)
- name: Dump all-container logs (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
logs --no-color > $GITHUB_WORKSPACE/docker-compose-no-vectordb.log || true
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
logs --no-color > $GITHUB_WORKSPACE/docker-compose-onyx-lite.log || true
- name: Upload logs (no-vectordb)
- name: Upload logs (onyx-lite)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-no-vectordb
path: ${{ github.workspace }}/docker-compose-no-vectordb.log
name: docker-all-logs-onyx-lite
path: ${{ github.workspace }}/docker-compose-onyx-lite.log
- name: Stop Docker containers (no-vectordb)
- name: Stop Docker containers (onyx-lite)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml down -v
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml down -v
multitenant-tests:
needs:
@@ -645,6 +639,7 @@ jobs:
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
DEV_MODE=true \
OPENSEARCH_FOR_ONYX_ENABLED=false \
docker compose -f docker-compose.multitenant-dev.yml up \
relational_db \
index \
@@ -699,6 +694,7 @@ jobs:
-e POSTGRES_DB=postgres \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
-e ENABLE_OPENSEARCH_INDEXING_FOR_ONYX=false \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
@@ -744,7 +740,7 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests, no-vectordb-tests, multitenant-tests]
needs: [integration-tests, onyx-lite-tests, multitenant-tests]
if: ${{ always() }}
steps:
- name: Check job status

View File

@@ -31,7 +31,7 @@ jobs:
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: "npm" # zizmor: ignore[cache-poisoning] test-only workflow; no deploy artifacts
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies

View File

@@ -12,6 +12,9 @@ on:
push:
tags:
- "v*.*.*"
# TODO: Remove this if we enable merge-queues for release branches.
branches:
- "release/**"
permissions:
contents: read
@@ -268,10 +271,11 @@ jobs:
persist-credentials: false
- name: Setup node
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
@@ -279,6 +283,7 @@ jobs:
run: npm ci
- name: Cache playwright cache
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
@@ -459,7 +464,7 @@ jobs:
# --- Visual Regression Diff ---
- name: Configure AWS credentials
if: always()
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -590,6 +595,108 @@ jobs:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
playwright-tests-lite:
needs: [build-web-image, build-backend-image]
name: Playwright Tests (lite)
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-playwright-tests-lite"
- "extras=ecr-cache"
timeout-minutes: 30
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup node
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- name: Cache playwright cache
# zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
restore-keys: |
${{ runner.os }}-playwright-npm-
- name: Install playwright browsers
working-directory: ./web
run: npx playwright install --with-deps
- name: Create .env file for Docker Compose
env:
OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
INTEGRATION_TESTS_MODE=true
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
MOCK_LLM_RESPONSE=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
EOF
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers (lite)
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up -d
id: start_docker
- name: Run Playwright tests (lite)
working-directory: ./web
run: npx playwright test --project lite
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
if: always()
with:
name: playwright-test-results-lite-${{ github.run_id }}
path: ./web/output/playwright/
retention-days: 30
- name: Save Docker logs
if: success() || failure()
env:
WORKSPACE: ${{ github.workspace }}
run: |
cd deployment/docker_compose
docker compose logs > docker-compose.log
mv docker-compose.log ${WORKSPACE}/docker-compose.log
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-logs-lite-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
# Post a single combined visual regression comment after all matrix jobs finish
visual-regression-comment:
needs: [playwright-tests]
@@ -686,7 +793,7 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [playwright-tests]
needs: [playwright-tests, playwright-tests-lite]
if: ${{ always() }}
steps:
- name: Check job status

View File

@@ -38,9 +38,9 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@9d6a3097e0c1865ecce00cfb89fe80f2ee91b547 # ratchet:j178/prek-action@v1
- uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1
with:
prek-version: '0.2.21'
prek-version: '0.3.4'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1

214
.github/workflows/release-cli.yml vendored Normal file
View File

@@ -0,0 +1,214 @@
name: Release CLI
on:
push:
tags:
- "cli/v*.*.*"
jobs:
pypi:
runs-on: ubuntu-latest
environment:
name: release-cli
permissions:
id-token: write
timeout-minutes: 10
strategy:
matrix:
os-arch:
- { goos: "linux", goarch: "amd64" }
- { goos: "linux", goarch: "arm64" }
- { goos: "windows", goarch: "amd64" }
- { goos: "windows", goarch: "arm64" }
- { goos: "darwin", goarch: "amd64" }
- { goos: "darwin", goarch: "arm64" }
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- run: |
GOOS="${{ matrix.os-arch.goos }}" \
GOARCH="${{ matrix.os-arch.goarch }}" \
uv build --wheel
working-directory: cli
- run: uv publish
working-directory: cli
docker-amd64:
runs-on:
- runs-on
- runner=2cpu-linux-x64
- run-id=${{ github.run_id }}-cli-amd64
- extras=ecr-cache
environment: deploy
permissions:
id-token: write
timeout-minutes: 30
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
REGISTRY_IMAGE: onyxdotapp/onyx-cli
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4
- name: Login to Docker Hub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7
with:
context: ./cli
file: ./cli/Dockerfile
platforms: linux/amd64
cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: type=inline
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
docker-arm64:
runs-on:
- runs-on
- runner=2cpu-linux-arm64
- run-id=${{ github.run_id }}-cli-arm64
- extras=ecr-cache
environment: deploy
permissions:
id-token: write
timeout-minutes: 30
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
REGISTRY_IMAGE: onyxdotapp/onyx-cli
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4
- name: Login to Docker Hub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # ratchet:docker/build-push-action@v7
with:
context: ./cli
file: ./cli/Dockerfile
platforms: linux/arm64
cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
cache-to: type=inline
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
merge-docker:
needs:
- docker-amd64
- docker-arm64
runs-on:
- runs-on
- runner=2cpu-linux-x64
- run-id=${{ github.run_id }}-cli-merge
environment: deploy
permissions:
id-token: write
timeout-minutes: 10
env:
REGISTRY_IMAGE: onyxdotapp/onyx-cli
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # ratchet:aws-actions/configure-aws-credentials@v6.0.0
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # ratchet:aws-actions/aws-secretsmanager-get-secrets@v2.0.10
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # ratchet:docker/setup-buildx-action@v4
- name: Login to Docker Hub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # ratchet:docker/login-action@v4
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Create and push manifest
env:
AMD64_DIGEST: ${{ needs.docker-amd64.outputs.digest }}
ARM64_DIGEST: ${{ needs.docker-arm64.outputs.digest }}
TAG: ${{ github.ref_name }}
run: |
SANITIZED_TAG="${TAG#cli/}"
IMAGES=(
"${REGISTRY_IMAGE}@${AMD64_DIGEST}"
"${REGISTRY_IMAGE}@${ARM64_DIGEST}"
)
if [[ "$TAG" =~ ^cli/v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
docker buildx imagetools create \
-t "${REGISTRY_IMAGE}:${SANITIZED_TAG}" \
-t "${REGISTRY_IMAGE}:latest" \
"${IMAGES[@]}"
else
docker buildx imagetools create \
-t "${REGISTRY_IMAGE}:${SANITIZED_TAG}" \
"${IMAGES[@]}"
fi

View File

@@ -22,12 +22,10 @@ jobs:
- { goos: "windows", goarch: "arm64" }
- { goos: "darwin", goarch: "amd64" }
- { goos: "darwin", goarch: "arm64" }
- { goos: "", goarch: "" }
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false

View File

@@ -48,6 +48,10 @@ on:
required: false
default: true
type: boolean
secrets:
AWS_OIDC_ROLE_ARN:
description: "AWS role ARN for OIDC auth"
required: true
permissions:
contents: read
@@ -73,7 +77,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -116,7 +120,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -158,7 +162,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -264,7 +268,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2

View File

@@ -110,7 +110,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -180,7 +180,7 @@ jobs:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
@@ -244,7 +244,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2

View File

@@ -119,10 +119,11 @@ repos:
]
- repo: https://github.com/golangci/golangci-lint
rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
hooks:
- id: golangci-lint
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
language_version: "1.26.0"
entry: bash -c "find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.

58
.vscode/launch.json vendored
View File

@@ -40,19 +40,7 @@
}
},
{
"name": "Celery (lightweight mode)",
"configurations": [
"Celery primary",
"Celery background",
"Celery beat"
],
"presentation": {
"group": "1"
},
"stopAll": true
},
{
"name": "Celery (standard mode)",
"name": "Celery",
"configurations": [
"Celery primary",
"Celery light",
@@ -253,35 +241,6 @@
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery background",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.background",
"worker",
"--pool=threads",
"--concurrency=20",
"--prefetch-multiplier=4",
"--loglevel=INFO",
"--hostname=background@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,docprocessing,connector_doc_fetching,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,kg_processing,monitoring,user_file_processing,user_file_project_sync,user_file_delete,opensearch_migration"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery background Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
@@ -526,21 +485,6 @@
"group": "3"
}
},
{
"name": "Clear and Restart OpenSearch Container",
// Generic debugger type, required arg but has no bearing on bash.
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Eval CLI",
"type": "debugpy",

View File

@@ -86,37 +86,6 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Light worker tasks (Vespa operations, permissions sync, deletion)
- Document processing (indexing pipeline)
- Document fetching (connector data retrieval)
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (fewer worker processes)
- Suitable for smaller deployments or development environments
- Default concurrency: 20 threads (increased to handle combined workload)
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
@@ -135,6 +104,10 @@ The deployment mode affects:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
- Never enqueue a task without an expiration. Always supply `expires=` when
sending tasks, either from the beat schedule or directly from another task. It
should never be acceptable to submit code which enqueues tasks without an
expiration, as doing so can lead to unbounded task queue growth.
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
@@ -571,6 +544,8 @@ To run them:
npx playwright test <TEST_NAME>
```
For shared fixtures, best practices, and detailed guidance, see `backend/tests/README.md`.
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
@@ -617,6 +592,45 @@ Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
## Error Handling
**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
`{"error_code": "...", "detail": "..."}` shape. This eliminates boilerplate and keeps error
handling consistent across the entire backend.
```python
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# ✅ Good
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
# ✅ Good — no extra message needed
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
# ✅ Good — upstream service with dynamic status code
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
# ❌ Bad — using HTTPException directly
raise HTTPException(status_code=404, detail="Session not found")
# ❌ Bad — starlette constant
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
```
Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
category is needed, add it there first — do not invent ad-hoc codes.
**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
status code is dynamic (comes from the upstream response), use `status_code_override`:
```python
raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
```
## Best Practices
In addition to the other content in this file, best practices for contributing

View File

@@ -46,7 +46,11 @@ RUN apt-get update && \
pkg-config \
gcc \
nano \
vim && \
vim \
# Install procps so kubernetes exec sessions can use ps aux for debugging
procps \
libjemalloc2 \
&& \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
@@ -164,6 +168,13 @@ ENV PYTHONPATH=/app
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
# Use jemalloc instead of glibc malloc to reduce memory fragmentation
# in long-running Python processes (API server, Celery workers).
# The soname is architecture-independent; the dynamic linker resolves
# the correct path from standard library directories.
# Placed after all RUN steps so build-time processes are unaffected.
ENV LD_PRELOAD=libjemalloc.so.2
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@@ -0,0 +1,37 @@
"""add cache_store table
Revision ID: 2664261bfaab
Revises: 4a1e4b1c89d2
Create Date: 2026-02-27 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "2664261bfaab"
down_revision = "4a1e4b1c89d2"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.create_table(
"cache_store",
sa.Column("key", sa.String(), nullable=False),
sa.Column("value", sa.LargeBinary(), nullable=True),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("key"),
)
op.create_index(
"ix_cache_store_expires",
"cache_store",
["expires_at"],
postgresql_where=sa.text("expires_at IS NOT NULL"),
)
def downgrade() -> None:
op.drop_index("ix_cache_store_expires", table_name="cache_store")
op.drop_table("cache_store")

View File

@@ -0,0 +1,34 @@
"""make scim_user_mapping.external_id nullable
Revision ID: a3b8d9e2f1c4
Revises: 2664261bfaab
Create Date: 2026-03-02
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "a3b8d9e2f1c4"
down_revision = "2664261bfaab"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"scim_user_mapping",
"external_id",
nullable=True,
)
def downgrade() -> None:
# Delete any rows where external_id is NULL before re-applying NOT NULL
op.execute("DELETE FROM scim_user_mapping WHERE external_id IS NULL")
op.alter_column(
"scim_user_mapping",
"external_id",
nullable=False,
)

View File

@@ -1,15 +0,0 @@
from onyx.background.celery.apps import app_base
from onyx.background.celery.apps.background import celery_app
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
"ee.onyx.background.celery.tasks.doc_permission_syncing",
"ee.onyx.background.celery.tasks.external_group_syncing",
"ee.onyx.background.celery.tasks.cleanup",
"ee.onyx.background.celery.tasks.tenant_provisioning",
"ee.onyx.background.celery.tasks.query_history",
]
)
)

View File

@@ -11,11 +11,10 @@ from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.cache.factory import get_cache_backend
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
@@ -142,7 +141,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
"""
Get license metadata from Redis cache.
Get license metadata from cache.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
@@ -150,38 +149,34 @@ def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata
Returns:
LicenseMetadata if cached, None otherwise
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_replica_client(tenant_id=tenant)
cache = get_cache_backend(tenant_id=tenant_id)
cached = cache.get(LICENSE_METADATA_KEY)
if not cached:
return None
cached = redis_client.get(LICENSE_METADATA_KEY)
if cached:
try:
cached_str: str
if isinstance(cached, bytes):
cached_str = cached.decode("utf-8")
else:
cached_str = str(cached)
return LicenseMetadata.model_validate_json(cached_str)
except Exception as e:
logger.warning(f"Failed to parse cached license metadata: {e}")
return None
return None
try:
cached_str = (
cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
)
return LicenseMetadata.model_validate_json(cached_str)
except Exception as e:
logger.warning(f"Failed to parse cached license metadata: {e}")
return None
def invalidate_license_cache(tenant_id: str | None = None) -> None:
"""
Invalidate the license metadata cache (not the license itself).
This deletes the cached LicenseMetadata from Redis. The actual license
in the database is not affected. Redis delete is idempotent - if the
key doesn't exist, this is a no-op.
Deletes the cached LicenseMetadata. The actual license in the database
is not affected. Delete is idempotent if the key doesn't exist, this
is a no-op.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
redis_client.delete(LICENSE_METADATA_KEY)
cache = get_cache_backend(tenant_id=tenant_id)
cache.delete(LICENSE_METADATA_KEY)
logger.info("License cache invalidated")
@@ -192,7 +187,7 @@ def update_license_cache(
tenant_id: str | None = None,
) -> LicenseMetadata:
"""
Update the Redis cache with license metadata.
Update the cache with license metadata.
We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
1. Frontend needs status to show appropriate UI/banners
@@ -211,7 +206,7 @@ def update_license_cache(
from ee.onyx.utils.license import get_license_status
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
cache = get_cache_backend(tenant_id=tenant_id)
used_seats = get_used_seats(tenant)
status = get_license_status(payload, grace_period_end)
@@ -230,7 +225,7 @@ def update_license_cache(
stripe_subscription_id=payload.stripe_subscription_id,
)
redis_client.set(
cache.set(
LICENSE_METADATA_KEY,
metadata.model_dump_json(),
ex=LICENSE_CACHE_TTL_SECONDS,

View File

@@ -126,12 +126,16 @@ class ScimDAL(DAL):
def create_user_mapping(
self,
external_id: str,
external_id: str | None,
user_id: UUID,
scim_username: str | None = None,
fields: ScimMappingFields | None = None,
) -> ScimUserMapping:
"""Create a mapping between a SCIM externalId and an Onyx user."""
"""Create a SCIM mapping for a user.
``external_id`` may be ``None`` when the IdP omits it (RFC 7643
allows this). The mapping still marks the user as SCIM-managed.
"""
f = fields or ScimMappingFields()
mapping = ScimUserMapping(
external_id=external_id,
@@ -270,8 +274,13 @@ class ScimDAL(DAL):
Raises:
ValueError: If the filter uses an unsupported attribute.
"""
query = select(User).where(
User.role.notin_([UserRole.SLACK_USER, UserRole.EXT_PERM_USER])
# Inner-join with ScimUserMapping so only SCIM-managed users appear.
# Pre-existing system accounts (anonymous, admin, etc.) are excluded
# unless they were explicitly linked via SCIM provisioning.
query = (
select(User)
.join(ScimUserMapping, ScimUserMapping.user_id == User.id)
.where(User.role.notin_([UserRole.SLACK_USER, UserRole.EXT_PERM_USER]))
)
if scim_filter:
@@ -321,34 +330,37 @@ class ScimDAL(DAL):
scim_username: str | None = None,
fields: ScimMappingFields | None = None,
) -> None:
"""Create, update, or delete the external ID mapping for a user.
"""Sync the SCIM mapping for a user.
If a mapping already exists, its fields are updated (including
setting ``external_id`` to ``None`` when the IdP omits it).
If no mapping exists and ``new_external_id`` is provided, a new
mapping is created. A mapping is never deleted here — SCIM-managed
users must retain their mapping to remain visible in ``GET /Users``.
When *fields* is provided, all mapping fields are written
unconditionally — including ``None`` values — so that a caller can
clear a previously-set field (e.g. removing a department).
"""
mapping = self.get_user_mapping_by_user_id(user_id)
if new_external_id:
if mapping:
if mapping.external_id != new_external_id:
mapping.external_id = new_external_id
if scim_username is not None:
mapping.scim_username = scim_username
if fields is not None:
mapping.department = fields.department
mapping.manager = fields.manager
mapping.given_name = fields.given_name
mapping.family_name = fields.family_name
mapping.scim_emails_json = fields.scim_emails_json
else:
self.create_user_mapping(
external_id=new_external_id,
user_id=user_id,
scim_username=scim_username,
fields=fields,
)
elif mapping:
self.delete_user_mapping(mapping.id)
if mapping:
if mapping.external_id != new_external_id:
mapping.external_id = new_external_id
if scim_username is not None:
mapping.scim_username = scim_username
if fields is not None:
mapping.department = fields.department
mapping.manager = fields.manager
mapping.given_name = fields.given_name
mapping.family_name = fields.family_name
mapping.scim_emails_json = fields.scim_emails_json
elif new_external_id:
self.create_user_mapping(
external_id=new_external_id,
user_id=user_id,
scim_username=scim_username,
fields=fields,
)
def _get_user_mappings_batch(
self, user_ids: list[UUID]

View File

@@ -15,6 +15,7 @@ from sqlalchemy.orm import Session
from ee.onyx.server.user_group.models import SetCuratorRequest
from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
@@ -471,7 +472,9 @@ def _add_user_group__cc_pair_relationships__no_commit(
def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:
db_user_group = UserGroup(
name=user_group.name, time_last_modified_by_user=func.now()
name=user_group.name,
time_last_modified_by_user=func.now(),
is_up_to_date=DISABLE_VECTOR_DB,
)
db_session.add(db_user_group)
db_session.flush() # give the group an ID
@@ -774,8 +777,7 @@ def update_user_group(
cc_pair_ids=user_group_update.cc_pair_ids,
)
# only needs to sync with Vespa if the cc_pairs have been updated
if cc_pairs_updated:
if cc_pairs_updated and not DISABLE_VECTOR_DB:
db_user_group.is_up_to_date = False
removed_users = db_session.scalars(

View File

@@ -68,6 +68,7 @@ def get_external_access_for_raw_gdrive_file(
company_domain: str,
retriever_drive_service: GoogleDriveService | None,
admin_drive_service: GoogleDriveService,
fallback_user_email: str,
add_prefix: bool = False,
) -> ExternalAccess:
"""
@@ -79,6 +80,11 @@ def get_external_access_for_raw_gdrive_file(
set add_prefix to True so group IDs are prefixed with the source type.
When invoked from doc_sync (permission sync), use the default (False)
since upsert_document_external_perms handles prefixing.
fallback_user_email: When we cannot retrieve any permission info for a file
(e.g. externally-owned files where the API returns no permissions
and permissions.list returns 403), fall back to granting access
to this user. This is typically the impersonated org user whose
drive contained the file.
"""
doc_id = file.get("id")
if not doc_id:
@@ -117,6 +123,26 @@ def get_external_access_for_raw_gdrive_file(
[permissions_list, backup_permissions_list]
)
# For externally-owned files, the Drive API may return no permissions
# and permissions.list may return 403. In this case, fall back to
# granting access to the user who found the file in their drive.
# Note, even if other users also have access to this file,
# they will not be granted access in Onyx.
# We check permissions_list (the final result after all fetch attempts)
# rather than the raw fields, because permission_ids may be present
# but the actual fetch can still return empty due to a 403.
if not permissions_list:
logger.info(
f"No permission info available for file {doc_id} "
f"(likely owned by a user outside of your organization). "
f"Falling back to granting access to retriever user: {fallback_user_email}"
)
return ExternalAccess(
external_user_emails={fallback_user_email},
external_user_group_ids=set(),
is_public=False,
)
folder_ids_to_inherit_permissions_from: set[str] = set()
user_emails: set[str] = set()
group_emails: set[str] = set()

View File

@@ -1,6 +1,8 @@
from collections.abc import Generator
from typing import Any
from jira import JIRA
from jira.exceptions import JIRAError
from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.connectors.jira.utils import build_jira_client
@@ -9,107 +11,102 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
_ATLASSIAN_ACCOUNT_TYPE = "atlassian"
_GROUP_MEMBER_PAGE_SIZE = 50
def _get_jira_group_members_email(
# The GET /group/member endpoint was introduced in Jira 6.0.
# Jira versions older than 6.0 do not have group management REST APIs at all.
_MIN_JIRA_VERSION_FOR_GROUP_MEMBER = "6.0"
def _fetch_group_member_page(
jira_client: JIRA,
group_name: str,
) -> list[str]:
"""Get all member emails for a Jira group.
start_at: int,
) -> dict[str, Any]:
"""Fetch a single page from the non-deprecated GET /group/member endpoint.
Filters out app accounts (bots, integrations) and only returns real user emails.
The old GET /group endpoint (used by jira_client.group_members()) is deprecated
and decommissioned in Jira Server 10.3+. This uses the replacement endpoint
directly via the library's internal _get_json helper, following the same pattern
as enhanced_search_ids / bulk_fetch_issues in connector.py.
There is an open PR to the library to switch to this endpoint since last year:
https://github.com/pycontribs/jira/pull/2356
so once it is merged and released, we can switch to using the library function.
"""
emails: list[str] = []
try:
# group_members returns an OrderedDict of account_id -> member_info
members = jira_client.group_members(group=group_name)
if not members:
logger.warning(f"No members found for group {group_name}")
return emails
for account_id, member_info in members.items():
# member_info is a dict with keys like 'fullname', 'email', 'active'
email = member_info.get("email")
# Skip "hidden" emails - these are typically app accounts
if email and email != "hidden":
emails.append(email)
else:
# For cloud, we might need to fetch user details separately
try:
user = jira_client.user(id=account_id)
# Skip app accounts (bots, integrations, etc.)
if hasattr(user, "accountType") and user.accountType == "app":
logger.info(
f"Skipping app account {account_id} for group {group_name}"
)
continue
if hasattr(user, "emailAddress") and user.emailAddress:
emails.append(user.emailAddress)
else:
logger.warning(f"User {account_id} has no email address")
except Exception as e:
logger.warning(
f"Could not fetch email for user {account_id} in group {group_name}: {e}"
)
except Exception as e:
logger.error(f"Error fetching members for group {group_name}: {e}")
return emails
return jira_client._get_json(
"group/member",
params={
"groupname": group_name,
"includeInactiveUsers": "false",
"startAt": start_at,
"maxResults": _GROUP_MEMBER_PAGE_SIZE,
},
)
except JIRAError as e:
if e.status_code == 404:
raise RuntimeError(
f"GET /group/member returned 404 for group '{group_name}'. "
f"This endpoint requires Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}+. "
f"If you are running a self-hosted Jira instance, please upgrade "
f"to at least Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}."
) from e
raise
def _build_group_member_email_map(
def _get_group_member_emails(
jira_client: JIRA,
) -> dict[str, set[str]]:
"""Build a map of group names to member emails."""
group_member_emails: dict[str, set[str]] = {}
group_name: str,
) -> set[str]:
"""Get all member emails for a single Jira group.
try:
# Get all groups from Jira - returns a list of group name strings
group_names = jira_client.groups()
Uses the non-deprecated GET /group/member endpoint which returns full user
objects including accountType, so we can filter out app/customer accounts
without making separate user() calls.
"""
emails: set[str] = set()
start_at = 0
if not group_names:
logger.warning("No groups found in Jira")
return group_member_emails
while True:
try:
page = _fetch_group_member_page(jira_client, group_name, start_at)
except Exception as e:
logger.error(f"Error fetching members for group {group_name}: {e}")
raise
logger.info(f"Found {len(group_names)} groups in Jira")
for group_name in group_names:
if not group_name:
members: list[dict[str, Any]] = page.get("values", [])
for member in members:
account_type = member.get("accountType")
# On Jira DC < 9.0, accountType is absent; include those users.
# On Cloud / DC 9.0+, filter to real user accounts only.
if account_type is not None and account_type != _ATLASSIAN_ACCOUNT_TYPE:
continue
member_emails = _get_jira_group_members_email(
jira_client=jira_client,
group_name=group_name,
)
if member_emails:
group_member_emails[group_name] = set(member_emails)
logger.debug(
f"Found {len(member_emails)} members for group {group_name}"
)
email = member.get("emailAddress")
if email:
emails.add(email)
else:
logger.debug(f"No members found for group {group_name}")
logger.warning(
f"Atlassian user {member.get('accountId', 'unknown')} "
f"in group {group_name} has no visible email address"
)
except Exception as e:
logger.error(f"Error building group member email map: {e}")
if page.get("isLast", True) or not members:
break
start_at += len(members)
return group_member_emails
return emails
def jira_group_sync(
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""
Sync Jira groups and their members.
"""Sync Jira groups and their members, yielding one group at a time.
This function fetches all groups from Jira and yields ExternalUserGroup
objects containing the group ID and member emails.
Streams group-by-group rather than accumulating all groups in memory.
"""
jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
scoped_token = cc_pair.connector.connector_specific_config.get(
@@ -130,12 +127,26 @@ def jira_group_sync(
scoped_token=scoped_token,
)
group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
if not group_member_email_map:
raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
group_names = jira_client.groups()
if not group_names:
raise ValueError(f"No groups found for cc_pair_id={cc_pair.id}")
for group_id, group_member_emails in group_member_email_map.items():
yield ExternalUserGroup(
id=group_id,
user_emails=list(group_member_emails),
logger.info(f"Found {len(group_names)} groups in Jira")
for group_name in group_names:
if not group_name:
continue
member_emails = _get_group_member_emails(
jira_client=jira_client,
group_name=group_name,
)
if not member_emails:
logger.debug(f"No members found for group {group_name}")
continue
logger.debug(f"Found {len(member_emails)} members for group {group_name}")
yield ExternalUserGroup(
id=group_name,
user_emails=list(member_emails),
)

View File

@@ -4,7 +4,6 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from httpx_oauth.clients.google import GoogleOAuth2
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.server.analytics.api import router as analytics_router
from ee.onyx.server.auth_check import check_ee_router_auth
from ee.onyx.server.billing.api import router as billing_router
@@ -153,12 +152,9 @@ def get_application() -> FastAPI:
# License management
include_router_with_global_prefix_prepended(application, license_router)
# Unified billing API - available when license system is enabled
# Works for both self-hosted and cloud deployments
# TODO(ENG-3533): Once frontend migrates to /admin/billing/*, this becomes the
# primary billing API and /tenants/* billing endpoints can be removed
if LICENSE_ENFORCEMENT_ENABLED:
include_router_with_global_prefix_prepended(application, billing_router)
# Unified billing API - always registered in EE.
# Each endpoint is protected by the `current_admin_user` dependency (admin auth).
include_router_with_global_prefix_prepended(application, billing_router)
if MULTI_TENANT:
# Tenant management

View File

@@ -26,7 +26,6 @@ import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -42,7 +41,6 @@ from ee.onyx.server.billing.models import SeatUpdateRequest
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import StripePublishableKeyResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.billing.service import BillingServiceError
from ee.onyx.server.billing.service import (
create_checkout_session as create_checkout_service,
)
@@ -58,6 +56,8 @@ from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.redis.redis_pool import get_shared_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -169,26 +169,23 @@ async def create_checkout_session(
if seats is not None:
used_seats = get_used_seats(tenant_id)
if seats < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot subscribe with fewer seats than current usage. "
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Cannot subscribe with fewer seats than current usage. "
f"You have {used_seats} active users/integrations but requested {seats} seats.",
)
# Build redirect URL for after checkout completion
redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"
try:
return await create_checkout_service(
billing_period=billing_period,
seats=seats,
email=email,
license_data=license_data,
redirect_url=redirect_url,
tenant_id=tenant_id,
)
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
return await create_checkout_service(
billing_period=billing_period,
seats=seats,
email=email,
license_data=license_data,
redirect_url=redirect_url,
tenant_id=tenant_id,
)
@router.post("/create-customer-portal-session")
@@ -206,18 +203,15 @@ async def create_customer_portal_session(
# Self-hosted requires license
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
return_url = request.return_url if request else f"{WEB_DOMAIN}/admin/billing"
try:
return await create_portal_service(
license_data=license_data,
return_url=return_url,
tenant_id=tenant_id,
)
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
return await create_portal_service(
license_data=license_data,
return_url=return_url,
tenant_id=tenant_id,
)
@router.get("/billing-information")
@@ -240,9 +234,9 @@ async def get_billing_information(
# Check circuit breaker (self-hosted only)
if _is_billing_circuit_open():
raise HTTPException(
status_code=503,
detail="Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
raise OnyxError(
OnyxErrorCode.SERVICE_UNAVAILABLE,
"Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
)
try:
@@ -250,11 +244,15 @@ async def get_billing_information(
license_data=license_data,
tenant_id=tenant_id,
)
except BillingServiceError as e:
except OnyxError as e:
# Open circuit breaker on connection failures (self-hosted only)
if e.status_code in (502, 503, 504):
if e.status_code in (
OnyxErrorCode.BAD_GATEWAY.status_code,
OnyxErrorCode.SERVICE_UNAVAILABLE.status_code,
OnyxErrorCode.GATEWAY_TIMEOUT.status_code,
):
_open_billing_circuit()
raise HTTPException(status_code=e.status_code, detail=e.message)
raise
@router.post("/seats/update")
@@ -274,31 +272,25 @@ async def update_seats(
# Self-hosted requires license
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
# Validate that new seat count is not less than current used seats
used_seats = get_used_seats(tenant_id)
if request.new_seat_count < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot reduce seats below current usage. "
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
f"Cannot reduce seats below current usage. "
f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
)
try:
result = await update_seat_service(
new_seat_count=request.new_seat_count,
license_data=license_data,
tenant_id=tenant_id,
)
# Note: Don't store license here - the control plane may still be processing
# the subscription update. The frontend should call /license/claim after a
# short delay to get the freshly generated license.
return result
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
# Note: Don't store license here - the control plane may still be processing
# the subscription update. The frontend should call /license/claim after a
# short delay to get the freshly generated license.
return await update_seat_service(
new_seat_count=request.new_seat_count,
license_data=license_data,
tenant_id=tenant_id,
)
@router.get("/stripe-publishable-key")
@@ -329,18 +321,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Stripe publishable key is not configured",
)
try:
@@ -351,17 +343,17 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to fetch Stripe publishable key",
)

View File

@@ -22,6 +22,8 @@ from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.billing.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.access import generate_data_plane_token
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -31,15 +33,6 @@ logger = setup_logger()
_REQUEST_TIMEOUT = 30.0
class BillingServiceError(Exception):
"""Exception raised for billing service errors."""
def __init__(self, message: str, status_code: int = 500):
self.message = message
self.status_code = status_code
super().__init__(self.message)
def _get_proxy_headers(license_data: str | None) -> dict[str, str]:
"""Build headers for proxy requests (self-hosted).
@@ -101,7 +94,7 @@ async def _make_billing_request(
Response JSON as dict
Raises:
BillingServiceError: If request fails
OnyxError: If request fails
"""
base_url = _get_base_url()
@@ -128,11 +121,17 @@ async def _make_billing_request(
except Exception:
pass
logger.error(f"{error_message}: {e.response.status_code} - {detail}")
raise BillingServiceError(detail, e.response.status_code)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=e.response.status_code,
)
except httpx.RequestError:
logger.exception("Failed to connect to billing service")
raise BillingServiceError("Failed to connect to billing service", 502)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, "Failed to connect to billing service"
)
async def create_checkout_session(

View File

@@ -223,6 +223,15 @@ def get_active_scim_token(
token = dal.get_active_token()
if not token:
raise HTTPException(status_code=404, detail="No active SCIM token")
# Derive the IdP domain from the first synced user as a heuristic.
idp_domain: str | None = None
mappings, _total = dal.list_user_mappings(start_index=1, count=1)
if mappings:
user = dal.get_user(mappings[0].user_id)
if user and "@" in user.email:
idp_domain = user.email.rsplit("@", 1)[1]
return ScimTokenResponse(
id=token.id,
name=token.name,
@@ -230,6 +239,7 @@ def get_active_scim_token(
is_active=token.is_active,
created_at=token.created_at,
last_used_at=token.last_used_at,
idp_domain=idp_domain,
)

View File

@@ -14,7 +14,6 @@ import requests
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import UploadFile
from sqlalchemy.orm import Session
@@ -35,6 +34,8 @@ from ee.onyx.server.license.models import SeatUsageResponse
from ee.onyx.utils.license import verify_license_signature
from onyx.auth.users import User
from onyx.db.engine.sql_engine import get_session
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -127,9 +128,9 @@ async def claim_license(
2. Without session_id: Re-claim using existing license for auth
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License claiming is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License claiming is only available for self-hosted deployments",
)
try:
@@ -146,15 +147,16 @@ async def claim_license(
# Re-claim using existing license for auth
metadata = get_license_metadata(db_session)
if not metadata or not metadata.tenant_id:
raise HTTPException(
status_code=400,
detail="No license found. Provide session_id after checkout.",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"No license found. Provide session_id after checkout.",
)
license_row = get_license(db_session)
if not license_row or not license_row.license_data:
raise HTTPException(
status_code=400, detail="No license found in database"
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"No license found in database",
)
url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
@@ -173,7 +175,7 @@ async def claim_license(
license_data = data.get("license")
if not license_data:
raise HTTPException(status_code=404, detail="No license in response")
raise OnyxError(OnyxErrorCode.NOT_FOUND, "No license in response")
# Verify signature before persisting
payload = verify_license_signature(license_data)
@@ -199,12 +201,14 @@ async def claim_license(
detail = error_data.get("detail", detail)
except Exception:
pass
raise HTTPException(status_code=status_code, detail=detail)
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=status_code
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
except requests.RequestException:
raise HTTPException(
status_code=502, detail="Failed to connect to license server"
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, "Failed to connect to license server"
)
@@ -221,9 +225,9 @@ async def upload_license(
The license file must be cryptographically signed by Onyx.
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License upload is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License upload is only available for self-hosted deployments",
)
try:
@@ -234,14 +238,14 @@ async def upload_license(
# Remove any stray whitespace/newlines from user input
license_data = license_data.strip()
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid license file format")
raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Invalid license file format")
# Verify cryptographic signature - this is the only validation needed
# The license's tenant_id identifies the customer in control plane, not locally
try:
payload = verify_license_signature(license_data)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
# Persist to DB and update cache
upsert_license(db_session, license_data)
@@ -297,9 +301,9 @@ async def delete_license(
Admin only - removes license from database and invalidates cache.
"""
if MULTI_TENANT:
raise HTTPException(
status_code=400,
detail="License deletion is only available for self-hosted deployments",
raise OnyxError(
OnyxErrorCode.VALIDATION_ERROR,
"License deletion is only available for self-hosted deployments",
)
try:

View File

@@ -46,7 +46,6 @@ from fastapi import FastAPI
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
@@ -56,6 +55,7 @@ from ee.onyx.configs.license_enforcement_config import (
)
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
from shared_configs.contextvars import get_current_tenant_id
@@ -164,9 +164,9 @@ def add_license_enforcement_middleware(
"[license_enforcement] No license, allowing community features"
)
is_gated = False
except RedisError as e:
except CACHE_TRANSIENT_ERRORS as e:
logger.warning(f"Failed to check license metadata: {e}")
# Fail open - don't block users due to Redis connectivity issues
# Fail open - don't block users due to cache connectivity issues
is_gated = False
if is_gated:

View File

@@ -423,15 +423,63 @@ def create_user(
email = user_resource.userName.strip()
# Enforce seat limit
# Check for existing user — if they exist but aren't SCIM-managed yet,
# link them to the IdP rather than rejecting with 409.
external_id: str | None = user_resource.externalId
scim_username: str = user_resource.userName.strip()
fields: ScimMappingFields = _fields_from_resource(user_resource)
existing_user = dal.get_user_by_email(email)
if existing_user:
existing_mapping = dal.get_user_mapping_by_user_id(existing_user.id)
if existing_mapping:
return _scim_error_response(409, f"User with email {email} already exists")
# Adopt pre-existing user into SCIM management.
# Reactivating a deactivated user consumes a seat, so enforce the
# seat limit the same way replace_user does.
if user_resource.active and not existing_user.is_active:
seat_error = _check_seat_availability(dal)
if seat_error:
return _scim_error_response(403, seat_error)
personal_name = _scim_name_to_str(user_resource.name)
dal.update_user(
existing_user,
is_active=user_resource.active,
**({"personal_name": personal_name} if personal_name else {}),
)
try:
dal.create_user_mapping(
external_id=external_id,
user_id=existing_user.id,
scim_username=scim_username,
fields=fields,
)
dal.commit()
except IntegrityError:
dal.rollback()
return _scim_error_response(
409, f"User with email {email} already has a SCIM mapping"
)
return _scim_resource_response(
provider.build_user_resource(
existing_user,
external_id,
scim_username=scim_username,
fields=fields,
),
status_code=201,
)
# Only enforce seat limit for net-new users — adopting a pre-existing
# user doesn't consume a new seat.
seat_error = _check_seat_availability(dal)
if seat_error:
return _scim_error_response(403, seat_error)
# Check for existing user
if dal.get_user_by_email(email):
return _scim_error_response(409, f"User with email {email} already exists")
# Create user with a random password (SCIM users authenticate via IdP)
personal_name = _scim_name_to_str(user_resource.name)
user = User(
@@ -449,21 +497,21 @@ def create_user(
dal.rollback()
return _scim_error_response(409, f"User with email {email} already exists")
# Create SCIM mapping when externalId is provided — this is how the IdP
# correlates this user on subsequent requests. Per RFC 7643, externalId
# is optional and assigned by the provisioning client.
external_id = user_resource.externalId
scim_username = user_resource.userName.strip()
fields = _fields_from_resource(user_resource)
if external_id:
# Always create a SCIM mapping so that the user is marked as
# SCIM-managed. externalId may be None (RFC 7643 says it's optional).
try:
dal.create_user_mapping(
external_id=external_id,
user_id=user.id,
scim_username=scim_username,
fields=fields,
)
dal.commit()
dal.commit()
except IntegrityError:
dal.rollback()
return _scim_error_response(
409, f"User with email {email} already has a SCIM mapping"
)
return _scim_resource_response(
provider.build_user_resource(

View File

@@ -365,6 +365,7 @@ class ScimTokenResponse(BaseModel):
is_active: bool
created_at: datetime
last_used_at: datetime | None = None
idp_domain: str | None = None
class ScimTokenCreatedResponse(ScimTokenResponse):

View File

@@ -170,7 +170,10 @@ class ScimProvider(ABC):
formatted=user.personal_name or "",
)
if not user.personal_name:
return ScimName(givenName="", familyName="", formatted="")
# Derive a reasonable name from the email so that SCIM spec tests
# see non-empty givenName / familyName for every user resource.
local = user.email.split("@")[0] if user.email else ""
return ScimName(givenName=local, familyName="", formatted=local)
parts = user.personal_name.split(" ", 1)
return ScimName(
givenName=parts[0],

View File

@@ -26,6 +26,7 @@ from onyx.db.models import Tool
from onyx.db.persona import upsert_persona
from onyx.server.features.persona.models import PersonaUpsertRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.settings.models import Settings
from onyx.server.settings.store import store_settings as store_base_settings
from onyx.utils.logger import setup_logger
@@ -125,10 +126,16 @@ def _seed_llms(
existing = fetch_existing_llm_provider(name=request.name, db_session=db_session)
if existing:
request.id = existing.id
seeded_providers = [
upsert_llm_provider(llm_upsert_request, db_session)
for llm_upsert_request in llm_upsert_requests
]
seeded_providers: list[LLMProviderView] = []
for llm_upsert_request in llm_upsert_requests:
try:
seeded_providers.append(upsert_llm_provider(llm_upsert_request, db_session))
except ValueError as e:
logger.warning(
"Failed to upsert LLM provider '%s' during seeding: %s",
llm_upsert_request.name,
e,
)
default_provider = next(
(p for p in seeded_providers if p.model_configurations), None

View File

@@ -6,6 +6,7 @@ from sqlalchemy.exc import SQLAlchemyError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.db.license import refresh_license_cache
from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.server.settings.models import ApplicationStatus
@@ -125,7 +126,7 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
# syncing) means indexed data may need protection.
settings.application_status = _BLOCKING_STATUS
settings.ee_features_enabled = False
except RedisError as e:
except CACHE_TRANSIENT_ERRORS as e:
logger.warning(f"Failed to check license metadata for settings: {e}")
# Fail closed - disable EE features if we can't verify license
settings.ee_features_enabled = False

View File

@@ -21,7 +21,6 @@ import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from ee.onyx.auth.users import current_admin_user
from ee.onyx.server.tenants.access import control_plane_dep
@@ -43,6 +42,8 @@ from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id
@@ -116,9 +117,14 @@ async def create_customer_portal_session(
try:
portal_url = fetch_customer_portal_session(tenant_id, return_url)
return {"stripe_customer_portal_url": portal_url}
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create customer portal session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create customer portal session",
)
@router.post("/create-checkout-session")
@@ -134,9 +140,14 @@ async def create_checkout_session(
try:
checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
return {"stripe_checkout_url": checkout_url}
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create checkout session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create checkout session",
)
@router.post("/create-subscription-session")
@@ -147,15 +158,20 @@ async def create_subscription_session(
try:
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
if not tenant_id:
raise HTTPException(status_code=400, detail="Tenant ID not found")
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Tenant ID not found")
billing_period = request.billing_period if request else "monthly"
session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
return SubscriptionSessionResponse(sessionId=session_id)
except Exception as e:
except OnyxError:
raise
except Exception:
logger.exception("Failed to create subscription session")
raise HTTPException(status_code=500, detail=str(e))
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to create subscription session",
)
@router.get("/stripe-publishable-key")
@@ -186,18 +202,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Stripe publishable key is not configured",
)
try:
@@ -208,15 +224,15 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
raise OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"Failed to fetch Stripe publishable key",
)

View File

@@ -5,6 +5,8 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from ee.onyx.db.user_group import add_users_to_user_group
from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
from ee.onyx.db.user_group import fetch_user_group
from ee.onyx.db.user_group import fetch_user_groups
from ee.onyx.db.user_group import fetch_user_groups_for_user
from ee.onyx.db.user_group import insert_user_group
@@ -20,6 +22,7 @@ from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
@@ -153,3 +156,8 @@ def delete_user_group(
prepare_user_group_for_deletion(db_session, user_group_id)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
if DISABLE_VECTOR_DB:
user_group = fetch_user_group(db_session, user_group_id)
if user_group:
db_delete_user_group(db_session, user_group)

View File

@@ -120,7 +120,6 @@ from onyx.db.models import User
from onyx.db.pat import fetch_user_for_pat
from onyx.db.users import get_user_by_email
from onyx.redis.redis_pool import get_async_redis_connection
from onyx.redis.redis_pool import get_redis_client
from onyx.server.settings.store import load_settings
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
@@ -201,13 +200,14 @@ def user_needs_to_be_verified() -> bool:
def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:
redis_client = get_redis_client(tenant_id=tenant_id)
value = redis_client.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
from onyx.cache.factory import get_cache_backend
cache = get_cache_backend(tenant_id=tenant_id)
value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
if value is None:
return False
assert isinstance(value, bytes)
return int(value.decode("utf-8")) == 1

View File

@@ -1,142 +0,0 @@
from typing import Any
from typing import cast
from celery import Celery
from celery import signals
from celery import Task
from celery.apps.worker import Worker
from celery.signals import celeryd_init
from celery.signals import worker_init
from celery.signals import worker_process_init
from celery.signals import worker_ready
from celery.signals import worker_shutdown
import onyx.background.celery.apps.app_base as app_base
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
logger = setup_logger()
celery_app = Celery(__name__)
celery_app.config_from_object("onyx.background.celery.configs.background")
celery_app.Task = app_base.TenantAwareTask # type: ignore [misc]
@signals.task_prerun.connect
def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
**kwds: Any,
) -> None:
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
@signals.task_postrun.connect
def on_task_postrun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
kwargs: dict | None = None,
retval: Any | None = None,
state: str | None = None,
**kwds: Any,
) -> None:
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
@celeryd_init.connect
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
app_base.on_celeryd_init(sender, conf, **kwargs)
@worker_init.connect
def on_worker_init(sender: Worker, **kwargs: Any) -> None:
EXTRA_CONCURRENCY = 8 # small extra fudge factor for connection limits
logger.info("worker_init signal received for consolidated background worker.")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME)
pool_size = cast(int, sender.concurrency) # type: ignore
SqlEngine.init_engine(pool_size=pool_size, max_overflow=EXTRA_CONCURRENCY)
# Initialize Vespa httpx pool (needed for light worker tasks)
if MANAGED_VESPA:
httpx_init_vespa_pool(
sender.concurrency + EXTRA_CONCURRENCY, # type: ignore
ssl_cert=VESPA_CLOUD_CERT_PATH,
ssl_key=VESPA_CLOUD_KEY_PATH,
)
else:
httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY) # type: ignore
app_base.wait_for_redis(sender, **kwargs)
app_base.wait_for_db(sender, **kwargs)
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
# Less startup checks in multi-tenant case
if MULTI_TENANT:
return
app_base.on_secondary_worker_init(sender, **kwargs)
@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_ready(sender, **kwargs)
@worker_shutdown.connect
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
app_base.on_worker_shutdown(sender, **kwargs)
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()
@signals.setup_logging.connect
def on_setup_logging(
loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
base_bootsteps = app_base.get_bootsteps()
for bootstep in base_bootsteps:
celery_app.steps["worker"].add(bootstep)
celery_app.autodiscover_tasks(
app_base.filter_task_modules(
[
# Original background worker tasks
"onyx.background.celery.tasks.pruning",
"onyx.background.celery.tasks.monitoring",
"onyx.background.celery.tasks.user_file_processing",
"onyx.background.celery.tasks.llm_model_update",
# Light worker tasks
"onyx.background.celery.tasks.shared",
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.opensearch_migration",
# Docprocessing worker tasks
"onyx.background.celery.tasks.docprocessing",
# Docfetching worker tasks
"onyx.background.celery.tasks.docfetching",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)
)

View File

@@ -39,9 +39,13 @@ CT = TypeVar("CT", bound=ConnectorCheckpoint)
class SlimConnectorExtractionResult(BaseModel):
"""Result of extracting document IDs and hierarchy nodes from a connector."""
"""Result of extracting document IDs and hierarchy nodes from a connector.
doc_ids: set[str]
raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
"""
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
@@ -93,30 +97,37 @@ def _get_failure_id(failure: ConnectorFailure) -> str | None:
return None
class BatchResult(BaseModel):
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
def _extract_from_batch(
doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
) -> tuple[set[str], list[HierarchyNode]]:
"""Separate a batch into document IDs and hierarchy nodes.
) -> BatchResult:
"""Separate a batch into document IDs (with parent mapping) and hierarchy nodes.
ConnectorFailure items have their failed document/entity IDs added to the
ID set so that failed-to-retrieve documents are not accidentally pruned.
ID dict so that failed-to-retrieve documents are not accidentally pruned.
"""
ids: set[str] = set()
ids: dict[str, str | None] = {}
hierarchy_nodes: list[HierarchyNode] = []
for item in doc_list:
if isinstance(item, HierarchyNode):
hierarchy_nodes.append(item)
ids.add(item.raw_node_id)
if item.raw_node_id not in ids:
ids[item.raw_node_id] = None
elif isinstance(item, ConnectorFailure):
failed_id = _get_failure_id(item)
if failed_id:
ids.add(failed_id)
ids[failed_id] = None
logger.warning(
f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
)
else:
ids.add(item.id)
return ids, hierarchy_nodes
parent_raw = getattr(item, "parent_hierarchy_raw_node_id", None)
ids[item.id] = parent_raw
return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)
def extract_ids_from_runnable_connector(
@@ -132,7 +143,7 @@ def extract_ids_from_runnable_connector(
Optionally, a callback can be passed to handle the length of each document batch.
"""
all_connector_doc_ids: set[str] = set()
all_raw_id_to_parent: dict[str, str | None] = {}
all_hierarchy_nodes: list[HierarchyNode] = []
# Sequence (covariant) lets all the specific list[...] iterator types unify here
@@ -177,15 +188,20 @@ def extract_ids_from_runnable_connector(
"extract_ids_from_runnable_connector: Stop signal detected"
)
batch_ids, batch_nodes = _extract_from_batch(doc_list)
all_connector_doc_ids.update(doc_batch_processing_func(batch_ids))
batch_result = _extract_from_batch(doc_list)
batch_ids = batch_result.raw_id_to_parent
batch_nodes = batch_result.hierarchy_nodes
doc_batch_processing_func(batch_ids)
for k, v in batch_ids.items():
if v is not None or k not in all_raw_id_to_parent:
all_raw_id_to_parent[k] = v
all_hierarchy_nodes.extend(batch_nodes)
if callback:
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
return SlimConnectorExtractionResult(
doc_ids=all_connector_doc_ids,
raw_id_to_parent=all_raw_id_to_parent,
hierarchy_nodes=all_hierarchy_nodes,
)

View File

@@ -1,23 +0,0 @@
import onyx.background.celery.configs.base as shared_config
from onyx.configs.app_configs import CELERY_WORKER_BACKGROUND_CONCURRENCY
broker_url = shared_config.broker_url
broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
broker_pool_limit = shared_config.broker_pool_limit
broker_transport_options = shared_config.broker_transport_options
redis_socket_keepalive = shared_config.redis_socket_keepalive
redis_retry_on_timeout = shared_config.redis_retry_on_timeout
redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
result_backend = shared_config.result_backend
result_expires = shared_config.result_expires # 86400 seconds is the default
task_default_priority = shared_config.task_default_priority
task_acks_late = shared_config.task_acks_late
worker_concurrency = CELERY_WORKER_BACKGROUND_CONCURRENCY
worker_pool = "threads"
# Increased from 1 to 4 to handle fast light worker tasks more efficiently
# This allows the worker to prefetch multiple tasks per thread
worker_prefetch_multiplier = 4

View File

@@ -30,6 +30,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
transform_vespa_chunks_to_opensearch_chunks,
)
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -47,6 +48,7 @@ from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchDocumentIndex,
)
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import IndexingSetting
from onyx.redis.redis_pool import get_redis_client
@@ -146,7 +148,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
with (
get_session_with_current_tenant() as db_session,
get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as vespa_client,
):
try_insert_opensearch_tenant_migration_record_with_commit(db_session)
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
@@ -161,6 +168,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
index_name=search_settings.index_name,
tenant_state=tenant_state,
large_chunks_enabled=False,
httpx_client=vespa_client,
)
sanitized_doc_start_time = time.monotonic()

View File

@@ -29,6 +29,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -47,6 +48,8 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
@@ -57,6 +60,8 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
@@ -113,6 +118,38 @@ class PruneCallback(IndexingCallbackBase):
super().progress(tag, amount)
def _resolve_and_update_document_parents(
db_session: Session,
redis_client: Redis,
source: DocumentSource,
raw_id_to_parent: dict[str, str | None],
) -> None:
"""Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
each document and bulk-update the DB. Mirrors the resolution logic in
run_docfetching.py."""
source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent_id in raw_id_to_parent.items():
if raw_parent_id is None:
continue
node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
resolved[doc_id] = node_id if found else source_node_id
if not resolved:
return
update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
task_logger.info(
f"Pruning: resolved and updated parent hierarchy for "
f"{len(resolved)} documents (source={source.value})"
)
"""Jobs / utils for kicking off pruning tasks."""
@@ -535,22 +572,22 @@ def connector_pruning_generator_task(
extraction_result = extract_ids_from_runnable_connector(
runnable_connector, callback
)
all_connector_doc_ids = extraction_result.doc_ids
all_connector_doc_ids = extraction_result.raw_id_to_parent
# Process hierarchy nodes (same as docfetching):
# upsert to Postgres and cache in Redis
source = cc_pair.connector.source
redis_client = get_redis_client(tenant_id=tenant_id)
if extraction_result.hierarchy_nodes:
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
redis_client = get_redis_client(tenant_id=tenant_id)
ensure_source_node_exists(
redis_client, db_session, cc_pair.connector.source
)
ensure_source_node_exists(redis_client, db_session, source)
upserted_nodes = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=extraction_result.hierarchy_nodes,
source=cc_pair.connector.source,
source=source,
commit=True,
is_connector_public=is_connector_public,
)
@@ -561,7 +598,7 @@ def connector_pruning_generator_task(
]
cache_hierarchy_nodes_batch(
redis_client=redis_client,
source=cc_pair.connector.source,
source=source,
entries=cache_entries,
)
@@ -570,6 +607,26 @@ def connector_pruning_generator_task(
f"hierarchy nodes for cc_pair={cc_pair_id}"
)
ensure_source_node_exists(redis_client, db_session, source)
# Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
# and bulk-update documents, mirroring the docfetching resolution
_resolve_and_update_document_parents(
db_session=db_session,
redis_client=redis_client,
source=source,
raw_id_to_parent=all_connector_doc_ids,
)
# Link hierarchy nodes to documents for sources where pages can be
# both hierarchy nodes AND documents (e.g. Notion, Confluence)
all_doc_id_list = list(all_connector_doc_ids.keys())
link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=all_doc_id_list,
source=source,
commit=True,
)
# a list of docs in our local index
all_indexed_document_ids = {
doc.id
@@ -581,7 +638,9 @@ def connector_pruning_generator_task(
}
# generate list of docs to remove (no longer in the source)
doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
doc_ids_to_remove = list(
all_indexed_document_ids - all_connector_doc_ids.keys()
)
task_logger.info(
"Pruning set collected: "

View File

@@ -24,6 +24,7 @@ from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
@@ -33,6 +34,7 @@ from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
from onyx.connectors.file.connector import LocalFileConnector
@@ -88,6 +90,17 @@ def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"
def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:
"""Key that exists while a delete_single_user_file task is sitting in the queue.
The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES
before enqueuing and the worker deletes it as its first action. This prevents
the beat from adding duplicate tasks for files that already have a live task
in flight.
"""
return f"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}"
def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
redis_celery: Redis = celery_app.broker_connection().channel().client # type: ignore
return celery_get_queue_length(
@@ -520,6 +533,7 @@ def process_user_file_impl(
task_logger.exception(
f"process_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
raise
finally:
if file_lock is not None and file_lock.owned():
file_lock.release()
@@ -545,7 +559,23 @@ def process_single_user_file(
ignore_result=True,
)
def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
"""Scan for user files with DELETING status and enqueue per-file tasks."""
"""Scan for user files with DELETING status and enqueue per-file tasks.
Three mechanisms prevent queue runaway (mirrors check_user_file_processing):
1. **Queue depth backpressure** if the broker queue already has more than
USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.
2. **Per-file queued guard** before enqueuing a task we set a short-lived
Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES). If that key
already exists the file already has a live task in the queue, so we skip
it. The worker deletes the key the moment it picks up the task so the
next beat cycle can re-enqueue if the file is still DELETING.
3. **Task expiry** every enqueued task carries an `expires` value equal to
CELERY_USER_FILE_DELETE_TASK_EXPIRES. If a task is still sitting in
the queue after that deadline, Celery discards it without touching the DB.
"""
task_logger.info("check_for_user_file_delete - Starting")
redis_client = get_redis_client(tenant_id=tenant_id)
lock: RedisLock = redis_client.lock(
@@ -554,8 +584,23 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
)
if not lock.acquire(blocking=False):
return None
enqueued = 0
skipped_guard = 0
try:
# --- Protection 1: queue depth backpressure ---
# NOTE: must use the broker's Redis client (not redis_client) because
# Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
r_celery: Redis = self.app.broker_connection().channel().client # type: ignore
queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
task_logger.warning(
f"check_for_user_file_delete - Queue depth {queue_len} exceeds "
f"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for "
f"tenant={tenant_id}"
)
return None
with get_session_with_current_tenant() as db_session:
user_file_ids = (
db_session.execute(
@@ -567,23 +612,40 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
.all()
)
for user_file_id in user_file_ids:
self.app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
# --- Protection 2: per-file queued guard ---
queued_key = _user_file_delete_queued_key(user_file_id)
guard_set = redis_client.set(
queued_key,
1,
ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
nx=True,
)
if not guard_set:
skipped_guard += 1
continue
# --- Protection 3: task expiry ---
try:
self.app.send_task(
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
kwargs={
"user_file_id": str(user_file_id),
"tenant_id": tenant_id,
},
queue=OnyxCeleryQueues.USER_FILE_DELETE,
priority=OnyxCeleryPriority.HIGH,
expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
)
except Exception:
redis_client.delete(queued_key)
raise
enqueued += 1
except Exception as e:
task_logger.exception(
f"check_for_user_file_delete - Error enqueuing deletes - {e.__class__.__name__}"
)
return None
finally:
if lock.owned():
lock.release()
task_logger.info(
f"check_for_user_file_delete - Enqueued {enqueued} tasks for tenant={tenant_id}"
f"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}"
)
return None
@@ -601,6 +663,9 @@ def delete_user_file_impl(
file_lock: RedisLock | None = None
if redis_locking:
redis_client = get_redis_client(tenant_id=tenant_id)
# Clear the queued guard so the beat can re-enqueue if deletion fails
# and the file remains in DELETING status.
redis_client.delete(_user_file_delete_queued_key(user_file_id))
file_lock = redis_client.lock(
_user_file_delete_lock_key(user_file_id),
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
@@ -675,6 +740,7 @@ def delete_user_file_impl(
task_logger.exception(
f"delete_user_file_impl - Error processing file id={user_file_id} - {e.__class__.__name__}"
)
raise
finally:
if file_lock is not None and file_lock.owned():
file_lock.release()
@@ -849,6 +915,7 @@ def project_sync_user_file_impl(
task_logger.exception(
f"project_sync_user_file_impl - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
)
raise
finally:
if file_lock is not None and file_lock.owned():
file_lock.release()

View File

@@ -1,10 +0,0 @@
from celery import Celery
from onyx.utils.variable_functionality import fetch_versioned_implementation
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
set_is_ee_based_on_env_variable()
app: Celery = fetch_versioned_implementation(
"onyx.background.celery.apps.background",
"celery_app",
)

View File

@@ -58,8 +58,6 @@ from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.indexing.postgres_sanitization import sanitize_document_for_postgres
from onyx.indexing.postgres_sanitization import sanitize_hierarchy_nodes_for_postgres
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
@@ -71,6 +69,8 @@ from onyx.server.features.build.indexing.persistent_document_writer import (
)
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.postgres_sanitization import sanitize_document_for_postgres
from onyx.utils.postgres_sanitization import sanitize_hierarchy_nodes_for_postgres
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR

View File

@@ -59,6 +59,12 @@ def _run_auto_llm_update() -> None:
sync_llm_models_from_github(db_session)
def _run_cache_cleanup() -> None:
from onyx.cache.postgres_backend import cleanup_expired_cache_entries
cleanup_expired_cache_entries()
def _run_scheduled_eval() -> None:
from onyx.configs.app_configs import BRAINTRUST_API_KEY
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
@@ -100,12 +106,26 @@ def _run_scheduled_eval() -> None:
)
_CACHE_CLEANUP_INTERVAL_SECONDS = 300
def _build_periodic_tasks() -> list[_PeriodicTaskDef]:
from onyx.cache.interface import CacheBackendType
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
from onyx.configs.app_configs import CACHE_BACKEND
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
tasks: list[_PeriodicTaskDef] = []
if CACHE_BACKEND == CacheBackendType.POSTGRES:
tasks.append(
_PeriodicTaskDef(
name="cache-cleanup",
interval_seconds=_CACHE_CLEANUP_INTERVAL_SECONDS,
lock_id=PERIODIC_TASK_LOCK_BASE + 2,
run_fn=_run_cache_cleanup,
)
)
if AUTO_LLM_CONFIG_URL:
tasks.append(
_PeriodicTaskDef(

View File

@@ -75,31 +75,41 @@ def _claim_next_processing_file(db_session: Session) -> UUID | None:
return file_id
def _claim_next_deleting_file(db_session: Session) -> UUID | None:
def _claim_next_deleting_file(
db_session: Session,
exclude_ids: set[UUID] | None = None,
) -> UUID | None:
"""Claim the next DELETING file.
No status transition needed — the impl deletes the row on success.
The short-lived FOR UPDATE lock prevents concurrent claims.
*exclude_ids* prevents re-processing the same file if the impl fails.
"""
file_id = db_session.execute(
stmt = (
select(UserFile.id)
.where(UserFile.status == UserFileStatus.DELETING)
.order_by(UserFile.created_at)
.limit(1)
.with_for_update(skip_locked=True)
).scalar_one_or_none()
# Commit to release the row lock promptly.
)
if exclude_ids:
stmt = stmt.where(UserFile.id.notin_(exclude_ids))
file_id = db_session.execute(stmt).scalar_one_or_none()
db_session.commit()
return file_id
def _claim_next_sync_file(db_session: Session) -> UUID | None:
def _claim_next_sync_file(
db_session: Session,
exclude_ids: set[UUID] | None = None,
) -> UUID | None:
"""Claim the next file needing project/persona sync.
No status transition needed — the impl clears the sync flags on
success. The short-lived FOR UPDATE lock prevents concurrent claims.
*exclude_ids* prevents re-processing the same file if the impl fails.
"""
file_id = db_session.execute(
stmt = (
select(UserFile.id)
.where(
sa.and_(
@@ -113,7 +123,10 @@ def _claim_next_sync_file(db_session: Session) -> UUID | None:
.order_by(UserFile.created_at)
.limit(1)
.with_for_update(skip_locked=True)
).scalar_one_or_none()
)
if exclude_ids:
stmt = stmt.where(UserFile.id.notin_(exclude_ids))
file_id = db_session.execute(stmt).scalar_one_or_none()
db_session.commit()
return file_id
@@ -135,11 +148,14 @@ def drain_processing_loop(tenant_id: str) -> None:
file_id = _claim_next_processing_file(session)
if file_id is None:
break
process_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
try:
process_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to process user file {file_id}")
def drain_delete_loop(tenant_id: str) -> None:
@@ -149,16 +165,21 @@ def drain_delete_loop(tenant_id: str) -> None:
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
failed: set[UUID] = set()
while True:
with get_session_with_current_tenant() as session:
file_id = _claim_next_deleting_file(session)
file_id = _claim_next_deleting_file(session, exclude_ids=failed)
if file_id is None:
break
delete_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
try:
delete_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to delete user file {file_id}")
failed.add(file_id)
def drain_project_sync_loop(tenant_id: str) -> None:
@@ -168,13 +189,18 @@ def drain_project_sync_loop(tenant_id: str) -> None:
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
failed: set[UUID] = set()
while True:
with get_session_with_current_tenant() as session:
file_id = _claim_next_sync_file(session)
file_id = _claim_next_sync_file(session, exclude_ids=failed)
if file_id is None:
break
project_sync_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
try:
project_sync_user_file_impl(
user_file_id=str(file_id),
tenant_id=tenant_id,
redis_locking=False,
)
except Exception:
logger.exception(f"Failed to sync user file {file_id}")
failed.add(file_id)

View File

@@ -12,9 +12,15 @@ def _build_redis_backend(tenant_id: str) -> CacheBackend:
return RedisCacheBackend(redis_pool.get_client(tenant_id))
def _build_postgres_backend(tenant_id: str) -> CacheBackend:
from onyx.cache.postgres_backend import PostgresCacheBackend
return PostgresCacheBackend(tenant_id)
_BACKEND_BUILDERS: dict[CacheBackendType, Callable[[str], CacheBackend]] = {
CacheBackendType.REDIS: _build_redis_backend,
# CacheBackendType.POSTGRES will be added in a follow-up PR.
CacheBackendType.POSTGRES: _build_postgres_backend,
}

View File

@@ -1,6 +1,20 @@
import abc
from enum import Enum
from redis.exceptions import RedisError
from sqlalchemy.exc import SQLAlchemyError
TTL_KEY_NOT_FOUND = -2
TTL_NO_EXPIRY = -1
CACHE_TRANSIENT_ERRORS: tuple[type[Exception], ...] = (RedisError, SQLAlchemyError)
"""Exception types that represent transient cache connectivity / operational
failures. Callers that want to fail-open (or fail-closed) on cache errors
should catch this tuple instead of bare ``Exception``.
When adding a new ``CacheBackend`` implementation, add its transient error
base class(es) here so all call-sites pick it up automatically."""
class CacheBackendType(str, Enum):
REDIS = "redis"
@@ -26,6 +40,14 @@ class CacheLock(abc.ABC):
def owned(self) -> bool:
raise NotImplementedError
def __enter__(self) -> "CacheLock":
if not self.acquire():
raise RuntimeError("Failed to acquire lock")
return self
def __exit__(self, *args: object) -> None:
self.release()
class CacheBackend(abc.ABC):
"""Thin abstraction over a key-value cache with TTL, locks, and blocking lists.
@@ -65,7 +87,11 @@ class CacheBackend(abc.ABC):
@abc.abstractmethod
def ttl(self, key: str) -> int:
"""Return remaining TTL in seconds. -1 if no expiry, -2 if key missing."""
"""Return remaining TTL in seconds.
Returns ``TTL_NO_EXPIRY`` (-1) if key exists without expiry,
``TTL_KEY_NOT_FOUND`` (-2) if key is missing or expired.
"""
raise NotImplementedError
# -- distributed lock --------------------------------------------------

323
backend/onyx/cache/postgres_backend.py vendored Normal file
View File

@@ -0,0 +1,323 @@
"""PostgreSQL-backed ``CacheBackend`` for NO_VECTOR_DB deployments.
Uses the ``cache_store`` table for key-value storage, PostgreSQL advisory locks
for distributed locking, and a polling loop for the BLPOP pattern.
"""
import hashlib
import struct
import time
import uuid
from contextlib import AbstractContextManager
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.orm import Session
from onyx.cache.interface import CacheBackend
from onyx.cache.interface import CacheLock
from onyx.cache.interface import TTL_KEY_NOT_FOUND
from onyx.cache.interface import TTL_NO_EXPIRY
from onyx.db.models import CacheStore
_LIST_KEY_PREFIX = "_q:"
# ASCII: ':' (0x3A) < ';' (0x3B). Upper bound for range queries so [prefix+, prefix;)
# captures all list-item keys (e.g. _q:mylist:123:uuid) without including other
# lists whose names share a prefix (e.g. _q:mylist2:...).
_LIST_KEY_RANGE_TERMINATOR = ";"
_LIST_ITEM_TTL_SECONDS = 3600
_LOCK_POLL_INTERVAL = 0.1
_BLPOP_POLL_INTERVAL = 0.25
def _list_item_key(key: str) -> str:
"""Unique key for a list item. Timestamp for FIFO ordering; UUID prevents
collision when concurrent rpush calls occur within the same nanosecond.
"""
return f"{_LIST_KEY_PREFIX}{key}:{time.time_ns()}:{uuid.uuid4().hex}"
def _to_bytes(value: str | bytes | int | float) -> bytes:
if isinstance(value, bytes):
return value
return str(value).encode()
# ------------------------------------------------------------------
# Lock
# ------------------------------------------------------------------
class PostgresCacheLock(CacheLock):
"""Advisory-lock-based distributed lock.
Uses ``get_session_with_tenant`` for connection lifecycle. The lock is tied
to the session's connection; releasing or closing the session frees it.
NOTE: Unlike Redis locks, advisory locks do not auto-expire after
``timeout`` seconds. They are released when ``release()`` is
called or when the session is closed.
"""
def __init__(self, lock_id: int, timeout: float | None, tenant_id: str) -> None:
self._lock_id = lock_id
self._timeout = timeout
self._tenant_id = tenant_id
self._session_cm: AbstractContextManager[Session] | None = None
self._session: Session | None = None
self._acquired = False
def acquire(
self,
blocking: bool = True,
blocking_timeout: float | None = None,
) -> bool:
from onyx.db.engine.sql_engine import get_session_with_tenant
self._session_cm = get_session_with_tenant(tenant_id=self._tenant_id)
self._session = self._session_cm.__enter__()
try:
if not blocking:
return self._try_lock()
effective_timeout = blocking_timeout or self._timeout
deadline = (
(time.monotonic() + effective_timeout) if effective_timeout else None
)
while True:
if self._try_lock():
return True
if deadline is not None and time.monotonic() >= deadline:
return False
time.sleep(_LOCK_POLL_INTERVAL)
finally:
if not self._acquired:
self._close_session()
def release(self) -> None:
if not self._acquired or self._session is None:
return
try:
self._session.execute(select(func.pg_advisory_unlock(self._lock_id)))
finally:
self._acquired = False
self._close_session()
def owned(self) -> bool:
return self._acquired
def _close_session(self) -> None:
if self._session_cm is not None:
try:
self._session_cm.__exit__(None, None, None)
finally:
self._session_cm = None
self._session = None
def _try_lock(self) -> bool:
assert self._session is not None
result = self._session.execute(
select(func.pg_try_advisory_lock(self._lock_id))
).scalar()
if result:
self._acquired = True
return True
return False
# ------------------------------------------------------------------
# Backend
# ------------------------------------------------------------------
class PostgresCacheBackend(CacheBackend):
"""``CacheBackend`` backed by the ``cache_store`` table in PostgreSQL.
Each operation opens and closes its own database session so the backend
is safe to share across threads. Tenant isolation is handled by
SQLAlchemy's ``schema_translate_map`` (set by ``get_session_with_tenant``).
"""
def __init__(self, tenant_id: str) -> None:
self._tenant_id = tenant_id
# -- basic key/value ---------------------------------------------------
def get(self, key: str) -> bytes | None:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = select(CacheStore.value).where(
CacheStore.key == key,
or_(CacheStore.expires_at.is_(None), CacheStore.expires_at > func.now()),
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
value = session.execute(stmt).scalar_one_or_none()
if value is None:
return None
return bytes(value)
def set(
self,
key: str,
value: str | bytes | int | float,
ex: int | None = None,
) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
value_bytes = _to_bytes(value)
expires_at = (
datetime.now(timezone.utc) + timedelta(seconds=ex)
if ex is not None
else None
)
stmt = (
pg_insert(CacheStore)
.values(key=key, value=value_bytes, expires_at=expires_at)
.on_conflict_do_update(
index_elements=[CacheStore.key],
set_={"value": value_bytes, "expires_at": expires_at},
)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(stmt)
session.commit()
def delete(self, key: str) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(delete(CacheStore).where(CacheStore.key == key))
session.commit()
def exists(self, key: str) -> bool:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = (
select(CacheStore.key)
.where(
CacheStore.key == key,
or_(
CacheStore.expires_at.is_(None),
CacheStore.expires_at > func.now(),
),
)
.limit(1)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
return session.execute(stmt).first() is not None
# -- TTL ---------------------------------------------------------------
def expire(self, key: str, seconds: int) -> None:
from onyx.db.engine.sql_engine import get_session_with_tenant
new_exp = datetime.now(timezone.utc) + timedelta(seconds=seconds)
stmt = (
update(CacheStore).where(CacheStore.key == key).values(expires_at=new_exp)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
session.execute(stmt)
session.commit()
def ttl(self, key: str) -> int:
from onyx.db.engine.sql_engine import get_session_with_tenant
stmt = select(CacheStore.expires_at).where(CacheStore.key == key)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
result = session.execute(stmt).first()
if result is None:
return TTL_KEY_NOT_FOUND
expires_at: datetime | None = result[0]
if expires_at is None:
return TTL_NO_EXPIRY
remaining = (expires_at - datetime.now(timezone.utc)).total_seconds()
if remaining <= 0:
return TTL_KEY_NOT_FOUND
return int(remaining)
# -- distributed lock --------------------------------------------------
def lock(self, name: str, timeout: float | None = None) -> CacheLock:
return PostgresCacheLock(
self._lock_id_for(name), timeout, tenant_id=self._tenant_id
)
# -- blocking list (MCP OAuth BLPOP pattern) ---------------------------
def rpush(self, key: str, value: str | bytes) -> None:
self.set(_list_item_key(key), value, ex=_LIST_ITEM_TTL_SECONDS)
def blpop(self, keys: list[str], timeout: int = 0) -> tuple[bytes, bytes] | None:
if timeout <= 0:
raise ValueError(
"PostgresCacheBackend.blpop requires timeout > 0. "
"timeout=0 would block the calling thread indefinitely "
"with no way to interrupt short of process termination."
)
from onyx.db.engine.sql_engine import get_session_with_tenant
deadline = time.monotonic() + timeout
while True:
for key in keys:
lower = f"{_LIST_KEY_PREFIX}{key}:"
upper = f"{_LIST_KEY_PREFIX}{key}{_LIST_KEY_RANGE_TERMINATOR}"
stmt = (
select(CacheStore)
.where(
CacheStore.key >= lower,
CacheStore.key < upper,
or_(
CacheStore.expires_at.is_(None),
CacheStore.expires_at > func.now(),
),
)
.order_by(CacheStore.key)
.limit(1)
.with_for_update(skip_locked=True)
)
with get_session_with_tenant(tenant_id=self._tenant_id) as session:
row = session.execute(stmt).scalars().first()
if row is not None:
value = bytes(row.value) if row.value else b""
session.delete(row)
session.commit()
return (key.encode(), value)
if time.monotonic() >= deadline:
return None
time.sleep(_BLPOP_POLL_INTERVAL)
# -- helpers -----------------------------------------------------------
def _lock_id_for(self, name: str) -> int:
"""Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
h = hashlib.md5(f"{self._tenant_id}:{name}".encode()).digest()
return struct.unpack("q", h[:8])[0]
# ------------------------------------------------------------------
# Periodic cleanup
# ------------------------------------------------------------------
def cleanup_expired_cache_entries() -> None:
"""Delete rows whose ``expires_at`` is in the past.
Called by the periodic poller every 5 minutes.
"""
from onyx.db.engine.sql_engine import get_session_with_current_tenant
with get_session_with_current_tenant() as session:
session.execute(
delete(CacheStore).where(
CacheStore.expires_at.is_not(None),
CacheStore.expires_at < func.now(),
)
)
session.commit()

View File

@@ -1,57 +1,52 @@
from uuid import UUID
from redis.client import Redis
from onyx.cache.interface import CacheBackend
# Redis key prefixes for chat message processing
PREFIX = "chatprocessing"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 30 * 60 # 30 minutes
def _get_fence_key(chat_session_id: UUID) -> str:
"""
Generate the Redis key for a chat session processing a message.
"""Generate the cache key for a chat session processing fence.
Args:
chat_session_id: The UUID of the chat session
Returns:
The fence key string (tenant_id is automatically added by the Redis client)
The fence key string. Tenant isolation is handled automatically
by the cache backend (Redis key-prefixing or Postgres schema routing).
"""
return f"{FENCE_PREFIX}_{chat_session_id}"
def set_processing_status(
chat_session_id: UUID, redis_client: Redis, value: bool
chat_session_id: UUID, cache: CacheBackend, value: bool
) -> None:
"""
Set or clear the fence for a chat session processing a message.
"""Set or clear the fence for a chat session processing a message.
If the key exists, we are processing a message. If the key does not exist, we are not processing a message.
If the key exists, a message is being processed.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
cache: Tenant-aware cache backend
value: True to set the fence, False to clear it
"""
fence_key = _get_fence_key(chat_session_id)
if value:
redis_client.set(fence_key, 0, ex=FENCE_TTL)
cache.set(fence_key, 0, ex=FENCE_TTL)
else:
redis_client.delete(fence_key)
cache.delete(fence_key)
def is_chat_session_processing(chat_session_id: UUID, redis_client: Redis) -> bool:
"""
Check if the chat session is processing a message.
def is_chat_session_processing(chat_session_id: UUID, cache: CacheBackend) -> bool:
"""Check if the chat session is processing a message.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
cache: Tenant-aware cache backend
Returns:
True if the chat session is processing a message, False otherwise
"""
fence_key = _get_fence_key(chat_session_id)
return bool(redis_client.exists(fence_key))
return cache.exists(_get_fence_key(chat_session_id))

View File

@@ -36,7 +36,6 @@ from onyx.db.memory import add_memory
from onyx.db.memory import update_memory_at_index
from onyx.db.memory import UserMemoryContext
from onyx.db.models import Persona
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.interfaces import ToolChoiceOptions
@@ -51,7 +50,9 @@ from onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES
from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import CustomToolCallSummary
from onyx.tools.models import MemoryToolResponseSnapshot
from onyx.tools.models import PythonToolRichResponse
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
@@ -83,28 +84,6 @@ def _looks_like_xml_tool_call_payload(text: str | None) -> bool:
)
def _should_keep_bedrock_tool_definitions(
llm: object, simple_chat_history: list[ChatMessageSimple]
) -> bool:
"""Bedrock requires tool config when history includes toolUse/toolResult blocks."""
model_provider = getattr(getattr(llm, "config", None), "model_provider", None)
if model_provider not in {
LlmProviderNames.BEDROCK,
LlmProviderNames.BEDROCK_CONVERSE,
}:
return False
return any(
(
msg.message_type == MessageType.ASSISTANT
and msg.tool_calls
and len(msg.tool_calls) > 0
)
or msg.message_type == MessageType.TOOL_CALL_RESPONSE
for msg in simple_chat_history
)
def _try_fallback_tool_extraction(
llm_step_result: LlmStepResult,
tool_choice: ToolChoiceOptions,
@@ -685,12 +664,7 @@ def run_llm_loop(
elif out_of_cycles or ran_image_gen:
# Last cycle, no tools allowed, just answer!
tool_choice = ToolChoiceOptions.NONE
# Bedrock requires tool config in requests that include toolUse/toolResult history.
final_tools = (
tools
if _should_keep_bedrock_tool_definitions(llm, simple_chat_history)
else []
)
final_tools = []
else:
tool_choice = ToolChoiceOptions.AUTO
final_tools = tools
@@ -966,6 +940,13 @@ def run_llm_loop(
):
generated_images = tool_response.rich_response.generated_images
# Extract generated_files if this is a code interpreter response
generated_files = None
if isinstance(tool_response.rich_response, PythonToolRichResponse):
generated_files = (
tool_response.rich_response.generated_files or None
)
# Persist memory if this is a memory tool response
memory_snapshot: MemoryToolResponseSnapshot | None = None
if isinstance(tool_response.rich_response, MemoryToolResponse):
@@ -1000,6 +981,10 @@ def run_llm_loop(
if memory_snapshot:
saved_response = json.dumps(memory_snapshot.model_dump())
elif isinstance(tool_response.rich_response, CustomToolCallSummary):
saved_response = json.dumps(
tool_response.rich_response.model_dump()
)
elif isinstance(tool_response.rich_response, str):
saved_response = tool_response.rich_response
else:
@@ -1017,6 +1002,7 @@ def run_llm_loop(
tool_call_response=saved_response,
search_docs=displayed_docs or search_docs,
generated_images=generated_images,
generated_files=generated_files,
)
# Add to state container for partial save support
state_container.add_tool_call(tool_call_info)

View File

@@ -55,6 +55,7 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tracing.framework.create import generation_span
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string
from onyx.utils.text_processing import find_all_json_objects
logger = setup_logger()
@@ -166,15 +167,6 @@ def _find_function_calls_open_marker(text_lower: str) -> int:
search_from = idx + 1
def _sanitize_llm_output(value: str) -> str:
"""Remove characters that PostgreSQL's text/JSONB types cannot store.
- NULL bytes (\x00): Not allowed in PostgreSQL text types
- UTF-16 surrogates (\ud800-\udfff): Invalid in UTF-8 encoding
"""
return "".join(c for c in value if c != "\x00" and not ("\ud800" <= c <= "\udfff"))
def _try_parse_json_string(value: Any) -> Any:
"""Attempt to parse a JSON string value into its Python equivalent.
@@ -222,9 +214,7 @@ def _parse_tool_args_to_dict(raw_args: Any) -> dict[str, Any]:
if isinstance(raw_args, dict):
# Parse any string values that look like JSON arrays/objects
return {
k: _try_parse_json_string(
_sanitize_llm_output(v) if isinstance(v, str) else v
)
k: _try_parse_json_string(sanitize_string(v) if isinstance(v, str) else v)
for k, v in raw_args.items()
}
@@ -232,7 +222,7 @@ def _parse_tool_args_to_dict(raw_args: Any) -> dict[str, Any]:
return {}
# Sanitize before parsing to remove NULL bytes and surrogates
raw_args = _sanitize_llm_output(raw_args)
raw_args = sanitize_string(raw_args)
try:
parsed1: Any = json.loads(raw_args)
@@ -545,12 +535,12 @@ def _extract_xml_attribute(attrs: str, attr_name: str) -> str | None:
)
if not attr_match:
return None
return _sanitize_llm_output(unescape(attr_match.group(2).strip()))
return sanitize_string(unescape(attr_match.group(2).strip()))
def _parse_xml_parameter_value(raw_value: str, string_attr: str | None) -> Any:
"""Parse a parameter value from XML-style tool call payloads."""
value = _sanitize_llm_output(unescape(raw_value).strip())
value = sanitize_string(unescape(raw_value).strip())
if string_attr and string_attr.lower() == "true":
return value
@@ -569,6 +559,7 @@ def _resolve_tool_arguments(obj: dict[str, Any]) -> dict[str, Any] | None:
"""
arguments = obj.get("arguments", obj.get("parameters", {}))
if isinstance(arguments, str):
arguments = sanitize_string(arguments)
try:
arguments = json.loads(arguments)
except json.JSONDecodeError:

View File

@@ -11,9 +11,10 @@ from contextvars import Token
from uuid import UUID
from pydantic import BaseModel
from redis.client import Redis
from sqlalchemy.orm import Session
from onyx.cache.factory import get_cache_backend
from onyx.cache.interface import CacheBackend
from onyx.chat.chat_processing_checker import set_processing_status
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import run_chat_loop_with_state_containers
@@ -79,7 +80,6 @@ from onyx.llm.request_context import reset_llm_mock_response
from onyx.llm.request_context import set_llm_mock_response
from onyx.llm.utils import litellm_exception_to_error_msg
from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import SendMessageRequest
@@ -448,7 +448,7 @@ def handle_stream_message_objects(
llm: LLM | None = None
chat_session: ChatSession | None = None
redis_client: Redis | None = None
cache: CacheBackend | None = None
user_id = user.id
if user.is_anonymous:
@@ -809,19 +809,19 @@ def handle_stream_message_objects(
)
simple_chat_history.insert(0, summary_simple)
redis_client = get_redis_client()
cache = get_cache_backend()
reset_cancel_status(
chat_session.id,
redis_client,
cache,
)
def check_is_connected() -> bool:
return check_stop_signal(chat_session.id, redis_client)
return check_stop_signal(chat_session.id, cache)
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
cache=cache,
value=True,
)
@@ -968,10 +968,10 @@ def handle_stream_message_objects(
reset_llm_mock_response(mock_response_token)
try:
if redis_client is not None and chat_session is not None:
if cache is not None and chat_session is not None:
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
cache=cache,
value=False,
)
except Exception:

View File

@@ -1,4 +1,5 @@
import json
import mimetypes
from sqlalchemy.orm import Session
@@ -12,14 +13,42 @@ from onyx.db.chat import create_db_search_doc
from onyx.db.models import ChatMessage
from onyx.db.models import ToolCall
from onyx.db.tools import create_tool_call_no_commit
from onyx.file_store.models import FileDescriptor
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.tools.models import ToolCallInfo
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string
logger = setup_logger()
def _extract_referenced_file_descriptors(
tool_calls: list[ToolCallInfo],
message_text: str,
) -> list[FileDescriptor]:
"""Extract FileDescriptors for code interpreter files referenced in the message text."""
descriptors: list[FileDescriptor] = []
for tool_call_info in tool_calls:
if not tool_call_info.generated_files:
continue
for gen_file in tool_call_info.generated_files:
file_id = (
gen_file.file_link.rsplit("/", 1)[-1] if gen_file.file_link else ""
)
if file_id and file_id in message_text:
mime_type, _ = mimetypes.guess_type(gen_file.filename)
descriptors.append(
FileDescriptor(
id=file_id,
type=mime_type_to_chat_file_type(mime_type),
name=gen_file.filename,
)
)
return descriptors
def _create_and_link_tool_calls(
tool_calls: list[ToolCallInfo],
assistant_message: ChatMessage,
@@ -173,8 +202,13 @@ def save_chat_turn(
pre_answer_processing_time: Duration of processing before answer starts (in seconds)
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
assistant_message.reasoning_tokens = reasoning_tokens
sanitized_message_text = (
sanitize_string(message_text) if message_text else message_text
)
assistant_message.message = sanitized_message_text
assistant_message.reasoning_tokens = (
sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens
)
assistant_message.is_clarification = is_clarification
# Use pre-answer processing time (captured when MESSAGE_START was emitted)
@@ -184,8 +218,10 @@ def save_chat_turn(
# Calculate token count using default tokenizer, when storing, this should not use the LLM
# specific one so we use a system default tokenizer here.
default_tokenizer = get_tokenizer(None, None)
if message_text:
assistant_message.token_count = len(default_tokenizer.encode(message_text))
if sanitized_message_text:
assistant_message.token_count = len(
default_tokenizer.encode(sanitized_message_text)
)
else:
assistant_message.token_count = 0
@@ -297,5 +333,16 @@ def save_chat_turn(
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
)
# 8. Attach code interpreter generated files that the assistant actually
# referenced in its response, so they are available via load_all_chat_files
# on subsequent turns. Files not mentioned are intermediate artifacts.
if sanitized_message_text:
referenced = _extract_referenced_file_descriptors(
tool_calls, sanitized_message_text
)
if referenced:
existing_files = assistant_message.files or []
assistant_message.files = existing_files + referenced
# Finally save the messages, tool calls, and docs
db_session.commit()

View File

@@ -1,65 +1,58 @@
from uuid import UUID
from redis.client import Redis
from onyx.cache.interface import CacheBackend
# Redis key prefixes for chat session stop signals
PREFIX = "chatsessionstop"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 10 * 60 # 10 minutes - defensive TTL to prevent memory leaks
FENCE_TTL = 10 * 60 # 10 minutes
def _get_fence_key(chat_session_id: UUID) -> str:
"""
Generate the Redis key for a chat session stop signal fence.
"""Generate the cache key for a chat session stop signal fence.
Args:
chat_session_id: The UUID of the chat session
Returns:
The fence key string (tenant_id is automatically added by the Redis client)
The fence key string. Tenant isolation is handled automatically
by the cache backend (Redis key-prefixing or Postgres schema routing).
"""
return f"{FENCE_PREFIX}_{chat_session_id}"
def set_fence(chat_session_id: UUID, redis_client: Redis, value: bool) -> None:
"""
Set or clear the stop signal fence for a chat session.
def set_fence(chat_session_id: UUID, cache: CacheBackend, value: bool) -> None:
"""Set or clear the stop signal fence for a chat session.
Args:
chat_session_id: The UUID of the chat session
redis_client: Redis client to use (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
value: True to set the fence (stop signal), False to clear it
"""
fence_key = _get_fence_key(chat_session_id)
if not value:
redis_client.delete(fence_key)
cache.delete(fence_key)
return
redis_client.set(fence_key, 0, ex=FENCE_TTL)
cache.set(fence_key, 0, ex=FENCE_TTL)
def is_connected(chat_session_id: UUID, redis_client: Redis) -> bool:
"""
Check if the chat session should continue (not stopped).
def is_connected(chat_session_id: UUID, cache: CacheBackend) -> bool:
"""Check if the chat session should continue (not stopped).
Args:
chat_session_id: The UUID of the chat session to check
redis_client: Redis client to use for checking the stop signal (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
Returns:
True if the session should continue, False if it should stop
"""
fence_key = _get_fence_key(chat_session_id)
return not bool(redis_client.exists(fence_key))
return not cache.exists(_get_fence_key(chat_session_id))
def reset_cancel_status(chat_session_id: UUID, redis_client: Redis) -> None:
"""
Clear the stop signal for a chat session.
def reset_cancel_status(chat_session_id: UUID, cache: CacheBackend) -> None:
"""Clear the stop signal for a chat session.
Args:
chat_session_id: The UUID of the chat session
redis_client: Redis client to use (tenant-aware client that auto-prefixes keys)
cache: Tenant-aware cache backend
"""
fence_key = _get_fence_key(chat_session_id)
redis_client.delete(fence_key)
cache.delete(_get_fence_key(chat_session_id))

View File

@@ -288,8 +288,9 @@ OPENSEARCH_TEXT_ANALYZER = os.environ.get("OPENSEARCH_TEXT_ANALYZER") or "englis
# environments we always want to be dual indexing into both OpenSearch and Vespa
# to stress test the new codepaths. Only enable this if there is some instance
# of OpenSearch running for the relevant Onyx instance.
# NOTE: Now enabled on by default, unless the env indicates otherwise.
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "true").lower() == "true"
)
# NOTE: This effectively does nothing anymore, admins can now toggle whether
# retrieval is through OpenSearch. This value is only used as a final fallback
@@ -495,14 +496,7 @@ CELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(
os.environ.get("CELERY_WORKER_PRIMARY_POOL_OVERFLOW") or 4
)
# Consolidated background worker (light, docprocessing, docfetching, heavy, monitoring, user_file_processing)
# separate workers' defaults: light=24, docprocessing=6, docfetching=1, heavy=4, kg=2, monitoring=1, user_file=2
# Total would be 40, but we use a more conservative default of 20 for the consolidated worker
CELERY_WORKER_BACKGROUND_CONCURRENCY = int(
os.environ.get("CELERY_WORKER_BACKGROUND_CONCURRENCY") or 20
)
# Individual worker concurrency settings (used when USE_LIGHTWEIGHT_BACKGROUND_WORKER is False or on Kuberenetes deployments)
# Individual worker concurrency settings
CELERY_WORKER_HEAVY_CONCURRENCY = int(
os.environ.get("CELERY_WORKER_HEAVY_CONCURRENCY") or 4
)
@@ -819,7 +813,9 @@ RERANK_COUNT = int(os.environ.get("RERANK_COUNT") or 1000)
# Tool Configs
#####
# Code Interpreter Service Configuration
CODE_INTERPRETER_BASE_URL = os.environ.get("CODE_INTERPRETER_BASE_URL")
CODE_INTERPRETER_BASE_URL = os.environ.get(
"CODE_INTERPRETER_BASE_URL", "http://localhost:8000"
)
CODE_INTERPRETER_DEFAULT_TIMEOUT_MS = int(
os.environ.get("CODE_INTERPRETER_DEFAULT_TIMEOUT_MS") or 60_000
@@ -900,6 +896,9 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
)
VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "15")
VESPA_MIGRATION_REQUEST_TIMEOUT_S = int(
os.environ.get("VESPA_MIGRATION_REQUEST_TIMEOUT_S") or "120"
)
SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

View File

@@ -84,7 +84,6 @@ POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME = "celery_worker_docprocessing"
POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME = "celery_worker_background"
POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (
@@ -178,6 +177,14 @@ USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
# How long a queued user-file-delete task is valid before workers discard it.
# Mirrors the processing task expiry to prevent indefinite queue growth when
# files are stuck in DELETING status and the beat keeps re-enqueuing them.
CELERY_USER_FILE_DELETE_TASK_EXPIRES = 60 # 1 minute (in seconds)
# Max queue depth before the delete beat stops enqueuing more delete tasks.
USER_FILE_DELETE_MAX_QUEUE_DEPTH = 500
CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
@@ -470,6 +477,9 @@ class OnyxRedisLocks:
USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
# Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.
# Prevents the beat from re-enqueuing the same file while a delete task is already queued.
USER_FILE_DELETE_QUEUED_PREFIX = "da_lock:user_file_delete_queued"
# Release notes
RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"

View File

@@ -943,6 +943,9 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
page
),
)
)
@@ -992,6 +995,7 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=page_id,
)
)

View File

@@ -1,4 +1,5 @@
import asyncio
from collections.abc import AsyncGenerator
from collections.abc import AsyncIterable
from collections.abc import Iterable
from datetime import datetime
@@ -204,7 +205,7 @@ def _manage_async_retrieval(
end_time: datetime | None = end
async def _async_fetch() -> AsyncIterable[Document]:
async def _async_fetch() -> AsyncGenerator[Document, None]:
intents = Intents.default()
intents.message_content = True
async with Client(intents=intents) as discord_client:
@@ -227,22 +228,23 @@ def _manage_async_retrieval(
def run_and_yield() -> Iterable[Document]:
loop = asyncio.new_event_loop()
async_gen = _async_fetch()
try:
# Get the async generator
async_gen = _async_fetch()
# Convert to AsyncIterator
async_iter = async_gen.__aiter__()
while True:
try:
# Create a coroutine by calling anext with the async iterator
next_coro = anext(async_iter)
# Run the coroutine to get the next document
doc = loop.run_until_complete(next_coro)
doc = loop.run_until_complete(anext(async_gen))
yield doc
except StopAsyncIteration:
break
finally:
loop.close()
# Must close the async generator before the loop so the Discord
# client's `async with` block can await its shutdown coroutine.
# The nested try/finally ensures the loop always closes even if
# aclose() raises (same pattern as cursor.close() before conn.close()).
try:
loop.run_until_complete(async_gen.aclose())
finally:
loop.close()
return run_and_yield()

View File

@@ -1722,6 +1722,7 @@ class GoogleDriveConnector(
primary_admin_email=self.primary_admin_email,
google_domain=self.google_domain,
),
retriever_email=file.user_email,
):
slim_batch.append(doc)

View File

@@ -476,6 +476,7 @@ def _get_external_access_for_raw_gdrive_file(
company_domain: str,
retriever_drive_service: GoogleDriveService | None,
admin_drive_service: GoogleDriveService,
fallback_user_email: str,
add_prefix: bool = False,
) -> ExternalAccess:
"""
@@ -484,6 +485,8 @@ def _get_external_access_for_raw_gdrive_file(
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path
where upsert_document_external_perms handles prefixing).
fallback_user_email: When permission info can't be retrieved (e.g. externally-owned
files), fall back to granting access to this user.
"""
external_access_fn = cast(
Callable[
@@ -492,6 +495,7 @@ def _get_external_access_for_raw_gdrive_file(
str,
GoogleDriveService | None,
GoogleDriveService,
str,
bool,
],
ExternalAccess,
@@ -507,6 +511,7 @@ def _get_external_access_for_raw_gdrive_file(
company_domain,
retriever_drive_service,
admin_drive_service,
fallback_user_email,
add_prefix,
)
@@ -672,6 +677,7 @@ def _convert_drive_item_to_document(
creds, user_email=permission_sync_context.primary_admin_email
),
add_prefix=True, # Indexing path - prefix here
fallback_user_email=retriever_email,
)
if permission_sync_context
else None
@@ -753,6 +759,7 @@ def build_slim_document(
# if not specified, we will not sync permissions
# will also be a no-op if EE is not enabled
permission_sync_context: PermissionSyncContext | None,
retriever_email: str,
) -> SlimDocument | None:
if file.get("mimeType") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:
return None
@@ -774,6 +781,7 @@ def build_slim_document(
creds,
user_email=permission_sync_context.primary_admin_email,
),
fallback_user_email=retriever_email,
)
if permission_sync_context
else None
@@ -781,4 +789,5 @@ def build_slim_document(
return SlimDocument(
id=onyx_document_id_from_drive_file(file),
external_access=external_access,
parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
)

View File

@@ -157,9 +157,7 @@ def _execute_single_retrieval(
logger.error(f"Error executing request: {e}")
raise e
elif _is_rate_limit_error(e):
results = _execute_with_retry(
lambda: retrieval_function(**request_kwargs).execute()
)
results = _execute_with_retry(retrieval_function(**request_kwargs))
elif e.resp.status == 404 or e.resp.status == 403:
if continue_on_404_or_403:
logger.debug(f"Error executing request: {e}")

View File

@@ -902,6 +902,11 @@ class JiraConnector(
external_access=self._get_project_permissions(
project_key, add_prefix=False
),
parent_hierarchy_raw_node_id=(
self._get_parent_hierarchy_raw_node_id(issue, project_key)
if project_key
else None
),
)
)
current_offset += 1

View File

@@ -385,6 +385,7 @@ class IndexingDocument(Document):
class SlimDocument(BaseModel):
id: str
external_access: ExternalAccess | None = None
parent_hierarchy_raw_node_id: str | None = None
class HierarchyNode(BaseModel):

View File

@@ -33,6 +33,7 @@ from office365.runtime.queries.client_query import ClientQuery # type: ignore[i
from office365.sharepoint.client_context import ClientContext # type: ignore[import-untyped]
from pydantic import BaseModel
from pydantic import Field
from requests.exceptions import HTTPError
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import REQUEST_TIMEOUT_SECONDS
@@ -268,6 +269,32 @@ class SizeCapExceeded(Exception):
"""Exception raised when the size cap is exceeded."""
def _log_and_raise_for_status(response: requests.Response) -> None:
"""Log the response text and raise for status."""
try:
response.raise_for_status()
except Exception:
logger.error(f"HTTP request failed: {response.text}")
raise
GRAPH_INVALID_REQUEST_CODE = "invalidRequest"
def _is_graph_invalid_request(response: requests.Response) -> bool:
"""Return True if the response body is the generic Graph API
``{"error": {"code": "invalidRequest", "message": "Invalid request"}}``
shape. This particular error has no actionable inner error code and is
returned by the site-pages endpoint when a page has a corrupt canvas layout
(e.g. duplicate web-part IDs — see SharePoint/sp-dev-docs#8822)."""
try:
body = response.json()
except Exception:
return False
error = body.get("error", {})
return error.get("code") == GRAPH_INVALID_REQUEST_CODE
def load_certificate_from_pfx(pfx_data: bytes, password: str) -> CertificateData | None:
"""Load certificate from .pfx file for MSAL authentication"""
try:
@@ -344,7 +371,7 @@ def _probe_remote_size(url: str, timeout: int) -> int | None:
"""Determine remote size using HEAD or a range GET probe. Returns None if unknown."""
try:
head_resp = requests.head(url, timeout=timeout, allow_redirects=True)
head_resp.raise_for_status()
_log_and_raise_for_status(head_resp)
cl = head_resp.headers.get("Content-Length")
if cl and cl.isdigit():
return int(cl)
@@ -359,7 +386,7 @@ def _probe_remote_size(url: str, timeout: int) -> int | None:
timeout=timeout,
stream=True,
) as range_resp:
range_resp.raise_for_status()
_log_and_raise_for_status(range_resp)
cr = range_resp.headers.get("Content-Range") # e.g., "bytes 0-0/12345"
if cr and "/" in cr:
total = cr.split("/")[-1]
@@ -384,7 +411,7 @@ def _download_with_cap(url: str, timeout: int, cap: int) -> bytes:
- Returns the full bytes if the content fits within `cap`.
"""
with requests.get(url, stream=True, timeout=timeout) as resp:
resp.raise_for_status()
_log_and_raise_for_status(resp)
# If the server provides Content-Length, prefer an early decision.
cl_header = resp.headers.get("Content-Length")
@@ -428,7 +455,7 @@ def _download_via_graph_api(
with requests.get(
url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT_SECONDS
) as resp:
resp.raise_for_status()
_log_and_raise_for_status(resp)
buf = io.BytesIO()
for chunk in resp.iter_content(64 * 1024):
if not chunk:
@@ -772,6 +799,7 @@ def _convert_driveitem_to_slim_document(
drive_name: str,
ctx: ClientContext,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
if driveitem.id is None:
raise ValueError("DriveItem ID is required")
@@ -787,11 +815,15 @@ def _convert_driveitem_to_slim_document(
return SlimDocument(
id=driveitem.id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
def _convert_sitepage_to_slim_document(
site_page: dict[str, Any], ctx: ClientContext | None, graph_client: GraphClient
site_page: dict[str, Any],
ctx: ClientContext | None,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
"""Convert a SharePoint site page to a SlimDocument object."""
if site_page.get("id") is None:
@@ -808,6 +840,7 @@ def _convert_sitepage_to_slim_document(
return SlimDocument(
id=id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
@@ -1232,26 +1265,135 @@ class SharepointConnector(
site.execute_query()
site_id = site.id
page_url: str | None = (
f"{self.graph_api_base}/sites/{site_id}" f"/pages/microsoft.graph.sitePage"
site_pages_base = (
f"{self.graph_api_base}/sites/{site_id}/pages/microsoft.graph.sitePage"
)
page_url: str | None = site_pages_base
params: dict[str, str] | None = {"$expand": "canvasLayout"}
total_yielded = 0
yielded_ids: set[str] = set()
while page_url:
data = self._graph_api_get_json(page_url, params)
try:
data = self._graph_api_get_json(page_url, params)
except HTTPError as e:
if e.response is not None and e.response.status_code == 404:
logger.warning(f"Site page not found: {page_url}")
break
if (
e.response is not None
and e.response.status_code == 400
and _is_graph_invalid_request(e.response)
):
logger.warning(
f"$expand=canvasLayout on the LIST endpoint returned 400 "
f"for site {site_descriptor.url}. Falling back to "
f"per-page expansion."
)
yield from self._fetch_site_pages_individually(
site_pages_base, start, end, skip_ids=yielded_ids
)
return
raise
params = None # nextLink already embeds query params
for page in data.get("value", []):
if not _site_page_in_time_window(page, start, end):
continue
total_yielded += 1
page_id = page.get("id")
if page_id:
yielded_ids.add(page_id)
yield page
page_url = data.get("@odata.nextLink")
logger.debug(f"Yielded {total_yielded} site pages for {site_descriptor.url}")
def _fetch_site_pages_individually(
self,
site_pages_base: str,
start: datetime | None = None,
end: datetime | None = None,
skip_ids: set[str] | None = None,
) -> Generator[dict[str, Any], None, None]:
"""Fallback for _fetch_site_pages: list pages without $expand, then
expand canvasLayout on each page individually.
The Graph API's LIST endpoint can return 400 when $expand=canvasLayout
is used and *any* page in the site has a corrupt canvas layout (e.g.
duplicate web part IDs — see SharePoint/sp-dev-docs#8822). Since the
LIST expansion is all-or-nothing, a single bad page poisons the entire
response. This method works around it by fetching metadata first, then
expanding each page individually so only the broken page loses its
canvas content.
``skip_ids`` contains page IDs already yielded by the caller before the
fallback was triggered, preventing duplicates.
"""
page_url: str | None = site_pages_base
total_yielded = 0
_skip_ids = skip_ids or set()
while page_url:
try:
data = self._graph_api_get_json(page_url)
except HTTPError as e:
if e.response is not None and e.response.status_code == 404:
break
raise
for page in data.get("value", []):
if not _site_page_in_time_window(page, start, end):
continue
page_id = page.get("id")
if page_id and page_id in _skip_ids:
continue
if not page_id:
total_yielded += 1
yield page
continue
expanded = self._try_expand_single_page(site_pages_base, page_id, page)
total_yielded += 1
yield expanded
page_url = data.get("@odata.nextLink")
logger.debug(
f"Yielded {total_yielded} site pages (per-page expansion fallback)"
)
def _try_expand_single_page(
self,
site_pages_base: str,
page_id: str,
fallback_page: dict[str, Any],
) -> dict[str, Any]:
"""Try to GET a single page with $expand=canvasLayout. On 400, return
the metadata-only fallback so the page is still indexed (without canvas
content)."""
pages_collection = site_pages_base.removesuffix("/microsoft.graph.sitePage")
single_url = f"{pages_collection}/{page_id}/microsoft.graph.sitePage"
try:
return self._graph_api_get_json(single_url, {"$expand": "canvasLayout"})
except HTTPError as e:
if (
e.response is not None
and e.response.status_code == 400
and _is_graph_invalid_request(e.response)
):
page_name = fallback_page.get("name", page_id)
logger.warning(
f"$expand=canvasLayout failed for page '{page_name}' "
f"({page_id}). Indexing metadata only."
)
return fallback_page
raise
def _acquire_token(self) -> dict[str, Any]:
"""
Acquire token via MSAL
@@ -1303,7 +1445,7 @@ class SharepointConnector(
access_token = self._get_graph_access_token()
headers = {"Authorization": f"Bearer {access_token}"}
continue
response.raise_for_status()
_log_and_raise_for_status(response)
return response.json()
except (requests.ConnectionError, requests.Timeout):
if attempt < GRAPH_API_MAX_RETRIES:
@@ -1594,12 +1736,22 @@ class SharepointConnector(
)
)
parent_hierarchy_url: str | None = None
if drive_web_url:
parent_hierarchy_url = self._get_parent_hierarchy_url(
site_url, drive_web_url, drive_name, driveitem
)
try:
logger.debug(f"Processing: {driveitem.web_url}")
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_driveitem_to_slim_document(
driveitem, drive_name, ctx, self.graph_client
driveitem,
drive_name,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=parent_hierarchy_url,
)
)
except Exception as e:
@@ -1619,7 +1771,10 @@ class SharepointConnector(
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_sitepage_to_slim_document(
site_page, ctx, self.graph_client
site_page,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=site_descriptor.url,
)
)
if len(doc_batch) >= SLIM_BATCH_SIZE:

View File

@@ -565,6 +565,7 @@ def _get_all_doc_ids(
channel_id=channel_id, thread_ts=message["ts"]
),
external_access=external_access,
parent_hierarchy_raw_node_id=channel_id,
)
)

View File

@@ -38,6 +38,7 @@ from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
from onyx.server.query_and_chat.models import ChatMessageDetail
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_string
logger = setup_logger()
@@ -675,58 +676,43 @@ def set_as_latest_chat_message(
db_session.commit()
def _sanitize_for_postgres(value: str) -> str:
"""Remove NUL (0x00) characters from strings as PostgreSQL doesn't allow them."""
sanitized = value.replace("\x00", "")
if value and not sanitized:
logger.warning("Sanitization removed all characters from string")
return sanitized
def _sanitize_list_for_postgres(values: list[str]) -> list[str]:
"""Remove NUL (0x00) characters from all strings in a list."""
return [_sanitize_for_postgres(v) for v in values]
def create_db_search_doc(
server_search_doc: ServerSearchDoc,
db_session: Session,
commit: bool = True,
) -> DBSearchDoc:
# Sanitize string fields to remove NUL characters (PostgreSQL doesn't allow them)
db_search_doc = DBSearchDoc(
document_id=_sanitize_for_postgres(server_search_doc.document_id),
document_id=sanitize_string(server_search_doc.document_id),
chunk_ind=server_search_doc.chunk_ind,
semantic_id=_sanitize_for_postgres(server_search_doc.semantic_identifier),
semantic_id=sanitize_string(server_search_doc.semantic_identifier),
link=(
_sanitize_for_postgres(server_search_doc.link)
sanitize_string(server_search_doc.link)
if server_search_doc.link is not None
else None
),
blurb=_sanitize_for_postgres(server_search_doc.blurb),
blurb=sanitize_string(server_search_doc.blurb),
source_type=server_search_doc.source_type,
boost=server_search_doc.boost,
hidden=server_search_doc.hidden,
doc_metadata=server_search_doc.metadata,
is_relevant=server_search_doc.is_relevant,
relevance_explanation=(
_sanitize_for_postgres(server_search_doc.relevance_explanation)
sanitize_string(server_search_doc.relevance_explanation)
if server_search_doc.relevance_explanation is not None
else None
),
# For docs further down that aren't reranked, we can't use the retrieval score
score=server_search_doc.score or 0.0,
match_highlights=_sanitize_list_for_postgres(
server_search_doc.match_highlights
),
match_highlights=[
sanitize_string(h) for h in server_search_doc.match_highlights
],
updated_at=server_search_doc.updated_at,
primary_owners=(
_sanitize_list_for_postgres(server_search_doc.primary_owners)
[sanitize_string(o) for o in server_search_doc.primary_owners]
if server_search_doc.primary_owners is not None
else None
),
secondary_owners=(
_sanitize_list_for_postgres(server_search_doc.secondary_owners)
[sanitize_string(o) for o in server_search_doc.secondary_owners]
if server_search_doc.secondary_owners is not None
else None
),

View File

@@ -13,6 +13,7 @@ from sqlalchemy.orm import aliased
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.enums import AccessType
@@ -246,6 +247,7 @@ def insert_document_set(
description=document_set_creation_request.description,
user_id=user_id,
is_public=document_set_creation_request.is_public,
is_up_to_date=DISABLE_VECTOR_DB,
time_last_modified_by_user=func.now(),
)
db_session.add(new_document_set_row)
@@ -336,7 +338,8 @@ def update_document_set(
)
document_set_row.description = document_set_update_request.description
document_set_row.is_up_to_date = False
if not DISABLE_VECTOR_DB:
document_set_row.is_up_to_date = False
document_set_row.is_public = document_set_update_request.is_public
document_set_row.time_last_modified_by_user = func.now()
versioned_private_doc_set_fn = fetch_versioned_implementation(

View File

@@ -1,5 +1,7 @@
"""CRUD operations for HierarchyNode."""
from collections import defaultdict
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -525,6 +527,53 @@ def get_document_parent_hierarchy_node_ids(
return {doc_id: parent_id for doc_id, parent_id in results}
def update_document_parent_hierarchy_nodes(
db_session: Session,
doc_parent_map: dict[str, int | None],
commit: bool = True,
) -> int:
"""Bulk-update Document.parent_hierarchy_node_id for multiple documents.
Only updates rows whose current value differs from the desired value to
avoid unnecessary writes.
Args:
db_session: SQLAlchemy session
doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
commit: Whether to commit the transaction
Returns:
Number of documents actually updated
"""
if not doc_parent_map:
return 0
doc_ids = list(doc_parent_map.keys())
existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)
by_parent: dict[int | None, list[str]] = defaultdict(list)
for doc_id, desired_parent_id in doc_parent_map.items():
current = existing.get(doc_id)
if current == desired_parent_id or doc_id not in existing:
continue
by_parent[desired_parent_id].append(doc_id)
updated = 0
for desired_parent_id, ids in by_parent.items():
db_session.query(Document).filter(Document.id.in_(ids)).update(
{Document.parent_hierarchy_node_id: desired_parent_id},
synchronize_session=False,
)
updated += len(ids)
if commit:
db_session.commit()
elif updated:
db_session.flush()
return updated
def update_hierarchy_node_permissions(
db_session: Session,
raw_node_id: str,

View File

@@ -583,6 +583,67 @@ def get_latest_index_attempt_for_cc_pair_id(
return db_session.execute(stmt).scalar_one_or_none()
def get_latest_successful_index_attempt_for_cc_pair_id(
db_session: Session,
connector_credential_pair_id: int,
secondary_index: bool = False,
) -> IndexAttempt | None:
"""Returns the most recent successful index attempt for the given cc pair,
filtered to the current (or future) search settings.
Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
stmt = (
select(IndexAttempt)
.where(
IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
IndexAttempt.status.in_(
[IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
),
)
.join(SearchSettings)
.where(SearchSettings.status == status)
.order_by(desc(IndexAttempt.id))
.limit(1)
)
return db_session.execute(stmt).scalar_one_or_none()
def get_latest_successful_index_attempts_parallel(
secondary_index: bool = False,
) -> Sequence[IndexAttempt]:
"""Batch version: returns the latest successful index attempt per cc pair.
Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
model_status = (
IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
)
with get_session_with_current_tenant() as db_session:
latest_ids = (
select(
IndexAttempt.connector_credential_pair_id,
func.max(IndexAttempt.id).label("max_id"),
)
.join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
.where(
SearchSettings.status == model_status,
IndexAttempt.status.in_(
[IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
),
)
.group_by(IndexAttempt.connector_credential_pair_id)
.subquery()
)
stmt = select(IndexAttempt).join(
latest_ids,
(
IndexAttempt.connector_credential_pair_id
== latest_ids.c.connector_credential_pair_id
)
& (IndexAttempt.id == latest_ids.c.max_id),
)
return db_session.execute(stmt).scalars().all()
def count_index_attempts_for_cc_pair(
db_session: Session,
cc_pair_id: int,

View File

@@ -25,8 +25,12 @@ from onyx.server.manage.embedding.models import CloudEmbeddingProvider
from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import SyncModelEntry
from onyx.utils.logger import setup_logger
from shared_configs.enums import EmbeddingProvider
logger = setup_logger()
def update_group_llm_provider_relationships__no_commit(
llm_provider_id: int,
@@ -267,10 +271,35 @@ def upsert_llm_provider(
mc.name for mc in llm_provider_upsert_request.model_configurations
}
# Build a lookup of requested visibility by model name
requested_visibility = {
mc.name: mc.is_visible
for mc in llm_provider_upsert_request.model_configurations
}
# Delete removed models
removed_ids = [
mc.id for name, mc in existing_by_name.items() if name not in models_to_exist
]
default_model = fetch_default_llm_model(db_session)
# Prevent removing and hiding the default model
if default_model:
for name, mc in existing_by_name.items():
if mc.id == default_model.id:
if default_model.id in removed_ids:
raise ValueError(
f"Cannot remove the default model '{name}'. "
"Please change the default model before removing."
)
if not requested_visibility.get(name, True):
raise ValueError(
f"Cannot hide the default model '{name}'. "
"Please change the default model before hiding."
)
break
if removed_ids:
db_session.query(ModelConfiguration).filter(
ModelConfiguration.id.in_(removed_ids)
@@ -341,9 +370,9 @@ def upsert_llm_provider(
def sync_model_configurations(
db_session: Session,
provider_name: str,
models: list[dict],
models: list[SyncModelEntry],
) -> int:
"""Sync model configurations for a dynamic provider (OpenRouter, Bedrock, Ollama).
"""Sync model configurations for a dynamic provider (OpenRouter, Bedrock, Ollama, etc.).
This inserts NEW models from the source API without overwriting existing ones.
User preferences (is_visible, max_input_tokens) are preserved for existing models.
@@ -351,7 +380,7 @@ def sync_model_configurations(
Args:
db_session: Database session
provider_name: Name of the LLM provider
models: List of model dicts with keys: name, display_name, max_input_tokens, supports_image_input
models: List of SyncModelEntry objects describing the fetched models
Returns:
Number of new models added
@@ -365,21 +394,20 @@ def sync_model_configurations(
new_count = 0
for model in models:
model_name = model["name"]
if model_name not in existing_names:
if model.name not in existing_names:
# Insert new model with is_visible=False (user must explicitly enable)
supported_flows = [LLMModelFlowType.CHAT]
if model.get("supports_image_input", False):
if model.supports_image_input:
supported_flows.append(LLMModelFlowType.VISION)
insert_new_model_configuration__no_commit(
db_session=db_session,
llm_provider_id=provider.id,
model_name=model_name,
model_name=model.name,
supported_flows=supported_flows,
is_visible=False,
max_input_tokens=model.get("max_input_tokens"),
display_name=model.get("display_name"),
max_input_tokens=model.max_input_tokens,
display_name=model.display_name,
)
new_count += 1
@@ -532,9 +560,9 @@ def fetch_default_model(
) -> ModelConfiguration | None:
model_config = db_session.scalar(
select(ModelConfiguration)
.options(selectinload(ModelConfiguration.llm_provider))
.join(LLMModelFlow)
.where(
ModelConfiguration.is_visible == True, # noqa: E712
LLMModelFlow.llm_model_flow_type == flow_type,
LLMModelFlow.is_default == True, # noqa: E712
)
@@ -810,6 +838,29 @@ def sync_auto_mode_models(
)
changes += 1
# Update the default if this provider currently holds the global CHAT default.
# We flush (but don't commit) so that _update_default_model can see the new
# model rows, then commit everything atomically to avoid a window where the
# old default is invisible but still pointed-to.
db_session.flush()
recommended_default = llm_recommendations.get_default_model(provider.provider)
if recommended_default:
current_default = fetch_default_llm_model(db_session)
if (
current_default
and current_default.llm_provider_id == provider.id
and current_default.name != recommended_default.name
):
_update_default_model__no_commit(
db_session=db_session,
provider_id=provider.id,
model=recommended_default.name,
flow_type=LLMModelFlowType.CHAT,
)
changes += 1
db_session.commit()
return changes
@@ -941,7 +992,7 @@ def update_model_configuration__no_commit(
db_session.flush()
def _update_default_model(
def _update_default_model__no_commit(
db_session: Session,
provider_id: int,
model: str,
@@ -979,6 +1030,14 @@ def _update_default_model(
new_default.is_default = True
model_config.is_visible = True
def _update_default_model(
db_session: Session,
provider_id: int,
model: str,
flow_type: LLMModelFlowType,
) -> None:
_update_default_model__no_commit(db_session, provider_id, model, flow_type)
db_session.commit()

View File

@@ -4926,7 +4926,9 @@ class ScimUserMapping(Base):
__tablename__ = "scim_user_mapping"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
external_id: Mapped[str] = mapped_column(String, unique=True, index=True)
external_id: Mapped[str | None] = mapped_column(
String, unique=True, index=True, nullable=True
)
user_id: Mapped[UUID] = mapped_column(
ForeignKey("user.id", ondelete="CASCADE"), unique=True, nullable=False
)
@@ -4983,3 +4985,25 @@ class CodeInterpreterServer(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
server_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
class CacheStore(Base):
"""Key-value cache table used by ``PostgresCacheBackend``.
Replaces Redis for simple KV caching, locks, and list operations
when ``CACHE_BACKEND=postgres`` (NO_VECTOR_DB deployments).
Intentionally separate from ``KVStore``:
- Stores raw bytes (LargeBinary) vs JSONB, matching Redis semantics.
- Has ``expires_at`` for TTL; rows are periodically garbage-collected.
- Holds ephemeral data (tokens, stop signals, lock state) not
persistent application config, so cleanup can be aggressive.
"""
__tablename__ = "cache_store"
key: Mapped[str] = mapped_column(String, primary_key=True)
value: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
expires_at: Mapped[datetime.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)

View File

@@ -12,6 +12,7 @@ from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
@@ -52,7 +53,7 @@ def create_user_files(
) -> CategorizedFilesResult:
# Categorize the files
categorized_files = categorize_uploaded_files(files)
categorized_files = categorize_uploaded_files(files, db_session)
# NOTE: At the moment, zip metadata is not used for user files.
# Should revisit to decide whether this should be a feature.
upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
@@ -144,6 +145,7 @@ def upload_files_to_user_files_with_indexing(
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
)
logger.info(
f"Triggered indexing for user_file_id={user_file.id} "

View File

@@ -129,7 +129,7 @@ def get_current_search_settings(db_session: Session) -> SearchSettings:
latest_settings = result.scalars().first()
if not latest_settings:
raise RuntimeError("No search settings specified, DB is not in a valid state")
raise RuntimeError("No search settings specified; DB is not in a valid state.")
return latest_settings

View File

@@ -13,12 +13,15 @@ from onyx.db.constants import UNSET
from onyx.db.constants import UnsetType
from onyx.db.enums import MCPServerStatus
from onyx.db.models import MCPServer
from onyx.db.models import OAuthConfig
from onyx.db.models import Tool
from onyx.db.models import ToolCall
from onyx.server.features.tool.models import Header
from onyx.tools.built_in_tools import BUILT_IN_TOOL_TYPES
from onyx.utils.headers import HeaderItemDict
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_json_like
from onyx.utils.postgres_sanitization import sanitize_string
if TYPE_CHECKING:
pass
@@ -159,10 +162,26 @@ def update_tool(
]
if passthrough_auth is not None:
tool.passthrough_auth = passthrough_auth
old_oauth_config_id = tool.oauth_config_id
if not isinstance(oauth_config_id, UnsetType):
tool.oauth_config_id = oauth_config_id
db_session.commit()
db_session.flush()
# Clean up orphaned OAuthConfig if the oauth_config_id was changed
if (
old_oauth_config_id is not None
and not isinstance(oauth_config_id, UnsetType)
and old_oauth_config_id != oauth_config_id
):
other_tools = db_session.scalars(
select(Tool).where(Tool.oauth_config_id == old_oauth_config_id)
).all()
if not other_tools:
oauth_config = db_session.get(OAuthConfig, old_oauth_config_id)
if oauth_config:
db_session.delete(oauth_config)
db_session.commit()
return tool
@@ -171,8 +190,21 @@ def delete_tool__no_commit(tool_id: int, db_session: Session) -> None:
if tool is None:
raise ValueError(f"Tool with ID {tool_id} does not exist")
oauth_config_id = tool.oauth_config_id
db_session.delete(tool)
db_session.flush() # Don't commit yet, let caller decide when to commit
db_session.flush()
# Clean up orphaned OAuthConfig if no other tools reference it
if oauth_config_id is not None:
other_tools = db_session.scalars(
select(Tool).where(Tool.oauth_config_id == oauth_config_id)
).all()
if not other_tools:
oauth_config = db_session.get(OAuthConfig, oauth_config_id)
if oauth_config:
db_session.delete(oauth_config)
db_session.flush()
def get_builtin_tool(
@@ -256,11 +288,13 @@ def create_tool_call_no_commit(
tab_index=tab_index,
tool_id=tool_id,
tool_call_id=tool_call_id,
reasoning_tokens=reasoning_tokens,
tool_call_arguments=tool_call_arguments,
tool_call_response=tool_call_response,
reasoning_tokens=(
sanitize_string(reasoning_tokens) if reasoning_tokens else reasoning_tokens
),
tool_call_arguments=sanitize_json_like(tool_call_arguments),
tool_call_response=sanitize_json_like(tool_call_response),
tool_call_tokens=tool_call_tokens,
generated_images=generated_images,
generated_images=sanitize_json_like(generated_images),
)
db_session.add(tool_call)

View File

@@ -0,0 +1,103 @@
# Vector DB Filter Semantics
How `IndexFilters` fields combine into the final query filter. Applies to both Vespa and OpenSearch.
## Filter categories
| Category | Fields | Join logic |
|---|---|---|
| **Visibility** | `hidden` | Always applied (unless `include_hidden`) |
| **Tenant** | `tenant_id` | AND (multi-tenant only) |
| **ACL** | `access_control_list` | OR within, AND with rest |
| **Narrowing** | `source_type`, `tags`, `time_cutoff` | Each OR within, AND with rest |
| **Knowledge scope** | `document_set`, `user_file_ids`, `attached_document_ids`, `hierarchy_node_ids` | OR within group, AND with rest |
| **Additive scope** | `project_id`, `persona_id` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |
## How filters combine
All categories are AND'd together. Within the knowledge scope category, individual filters are OR'd.
```
NOT hidden
AND tenant = T -- if multi-tenant
AND (acl contains A1 OR acl contains A2)
AND (source_type = S1 OR ...) -- if set
AND (tag = T1 OR ...) -- if set
AND <knowledge scope> -- see below
AND time >= cutoff -- if set
```
## Knowledge scope rules
The knowledge scope filter controls **what knowledge an assistant can access**.
### No explicit knowledge attached
When `document_set`, `user_file_ids`, `attached_document_ids`, and `hierarchy_node_ids` are all empty/None:
- **No knowledge scope filter is applied.** The assistant can see everything (subject to ACL).
- `project_id` and `persona_id` are ignored — they never restrict on their own.
### One explicit knowledge type
```
-- Only document sets
AND (document_sets contains "Engineering" OR document_sets contains "Legal")
-- Only user files
AND (document_id = "uuid-1" OR document_id = "uuid-2")
```
### Multiple explicit knowledge types (OR'd)
```
-- Document sets + user files
AND (
document_sets contains "Engineering"
OR document_id = "uuid-1"
)
```
### Explicit knowledge + overflowing user files
When an explicit knowledge restriction is in effect **and** `project_id` or `persona_id` is set (user files overflowed the LLM context window), the additive scopes widen the filter:
```
-- Document sets + persona user files overflowed
AND (
document_sets contains "Engineering"
OR personas contains 42
)
-- User files + project files overflowed
AND (
document_id = "uuid-1"
OR user_project contains 7
)
```
### Only project_id or persona_id (no explicit knowledge)
No knowledge scope filter. The assistant searches everything.
```
-- Just ACL, no restriction
NOT hidden
AND (acl contains ...)
```
## Field reference
| Filter field | Vespa field | Vespa type | Purpose |
|---|---|---|---|
| `document_set` | `document_sets` | `weightedset<string>` | Connector doc sets attached to assistant |
| `user_file_ids` | `document_id` | `string` | User files uploaded to assistant |
| `attached_document_ids` | `document_id` | `string` | Documents explicitly attached (OpenSearch only) |
| `hierarchy_node_ids` | `ancestor_hierarchy_node_ids` | `array<int>` | Folder/space nodes (OpenSearch only) |
| `project_id` | `user_project` | `array<int>` | Project tag for overflowing user files |
| `persona_id` | `personas` | `array<int>` | Persona tag for overflowing user files |
| `access_control_list` | `access_control_list` | `weightedset<string>` | ACL entries for the requesting user |
| `source_type` | `source_type` | `string` | Connector source type (e.g. `web`, `jira`) |
| `tags` | `metadata_list` | `array<string>` | Document metadata tags |
| `time_cutoff` | `doc_updated_at` | `long` | Minimum document update timestamp |
| `tenant_id` | `tenant_id` | `string` | Tenant isolation (multi-tenant) |

View File

@@ -32,9 +32,6 @@ def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
Determines whether to enable multipass and large chunks by examining
the current search settings and the embedder configuration.
"""
if not search_settings:
return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
multipass = should_use_multipass(search_settings)
enable_large_chunks = SearchSettings.can_use_large_chunks(
multipass, search_settings.model_name, search_settings.provider_type

View File

@@ -26,11 +26,10 @@ def get_default_document_index(
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated, updates are applied to both indices.
need to be updated. Updates are applied to both indices.
WARNING: In that case, get_all_document_indices should be used.
"""
if DISABLE_VECTOR_DB:
return DisabledDocumentIndex(
@@ -51,11 +50,26 @@ def get_default_document_index(
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
if opensearch_retrieval_enabled:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=secondary_index_name,
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=secondary_large_chunks_enabled,
multitenant=MULTI_TENANT,
@@ -86,8 +100,7 @@ def get_all_document_indices(
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
Large chunks are not currently supported so we hardcode appropriate values.
NOTE: Make sure the Vespa index object is returned first. In the rare event
that there is some conflict between indexing and the migration task, it is
@@ -123,13 +136,36 @@ def get_all_document_indices(
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
secondary_index_name=(
secondary_search_settings.index_name
if secondary_search_settings
else None
),
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)

View File

@@ -61,6 +61,25 @@ class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
explanation: dict[str, Any] | None = None
class IndexInfo(BaseModel):
"""
Represents information about an OpenSearch index.
"""
model_config = {"frozen": True}
name: str
health: str
status: str
num_primary_shards: str
num_replica_shards: str
docs_count: str
docs_deleted: str
created_at: str
total_size: str
primary_shards_size: str
def get_new_body_without_vectors(body: dict[str, Any]) -> dict[str, Any]:
"""Recursively replaces vectors in the body with their length.
@@ -159,8 +178,8 @@ class OpenSearchClient(AbstractContextManager):
Raises:
Exception: There was an error creating the search pipeline.
"""
result = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
if not result.get("acknowledged", False):
response = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
if not response.get("acknowledged", False):
raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
@@ -173,8 +192,8 @@ class OpenSearchClient(AbstractContextManager):
Raises:
Exception: There was an error deleting the search pipeline.
"""
result = self._client.search_pipeline.delete(id=pipeline_id)
if not result.get("acknowledged", False):
response = self._client.search_pipeline.delete(id=pipeline_id)
if not response.get("acknowledged", False):
raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
@@ -198,6 +217,34 @@ class OpenSearchClient(AbstractContextManager):
logger.error(f"Failed to put cluster settings: {response}.")
return False
@log_function_time(print_only=True, debug_only=True)
def list_indices_with_info(self) -> list[IndexInfo]:
"""
Lists the indices in the OpenSearch cluster with information about each
index.
Returns:
A list of IndexInfo objects for each index.
"""
response = self._client.cat.indices(format="json")
indices: list[IndexInfo] = []
for raw_index_info in response:
indices.append(
IndexInfo(
name=raw_index_info.get("index", ""),
health=raw_index_info.get("health", ""),
status=raw_index_info.get("status", ""),
num_primary_shards=raw_index_info.get("pri", ""),
num_replica_shards=raw_index_info.get("rep", ""),
docs_count=raw_index_info.get("docs.count", ""),
docs_deleted=raw_index_info.get("docs.deleted", ""),
created_at=raw_index_info.get("creation.date.string", ""),
total_size=raw_index_info.get("store.size", ""),
primary_shards_size=raw_index_info.get("pri.store.size", ""),
)
)
return indices
@log_function_time(print_only=True, debug_only=True)
def ping(self) -> bool:
"""Pings the OpenSearch cluster.

View File

@@ -271,6 +271,9 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
secondary_index_name: str | None,
secondary_embedding_dim: int | None,
secondary_embedding_precision: EmbeddingPrecision | None,
# NOTE: We do not support large chunks right now.
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
multitenant: bool = False,
@@ -286,12 +289,25 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
f"Expected {MULTI_TENANT}, got {multitenant}."
)
tenant_id = get_current_tenant_id()
tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
self._real_index = OpenSearchDocumentIndex(
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
tenant_state=tenant_state,
index_name=index_name,
embedding_dim=embedding_dim,
embedding_precision=embedding_precision,
)
self._secondary_real_index: OpenSearchDocumentIndex | None = None
if self.secondary_index_name:
if secondary_embedding_dim is None or secondary_embedding_precision is None:
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
self._secondary_real_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=self.secondary_index_name,
embedding_dim=secondary_embedding_dim,
embedding_precision=secondary_embedding_precision,
)
@staticmethod
def register_multitenant_indices(
@@ -307,19 +323,38 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
primary_embedding_dim: int,
primary_embedding_precision: EmbeddingPrecision,
secondary_index_embedding_dim: int | None, # noqa: ARG002
secondary_index_embedding_precision: EmbeddingPrecision | None, # noqa: ARG002
secondary_index_embedding_dim: int | None,
secondary_index_embedding_precision: EmbeddingPrecision | None,
) -> None:
# Only handle primary index for now, ignore secondary.
return self._real_index.verify_and_create_index_if_necessary(
self._real_index.verify_and_create_index_if_necessary(
primary_embedding_dim, primary_embedding_precision
)
if self.secondary_index_name:
if (
secondary_index_embedding_dim is None
or secondary_index_embedding_precision is None
):
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.verify_and_create_index_if_necessary(
secondary_index_embedding_dim, secondary_index_embedding_precision
)
def index(
self,
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
# Convert IndexBatchParams to IndexingMetadata.
chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
@@ -351,7 +386,20 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
) -> int:
return self._real_index.delete(doc_id, chunk_count)
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
total_chunks_deleted += self._secondary_real_index.delete(
doc_id, chunk_count
)
return total_chunks_deleted
def update_single(
self,
@@ -362,6 +410,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
fields: VespaDocumentFields | None,
user_fields: VespaDocumentUserFields | None,
) -> None:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
f"Tried to update document {doc_id} with no updated fields or user fields."
@@ -392,6 +445,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
try:
self._real_index.update([update_request])
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.update([update_request])
except NotFoundError:
logger.exception(
f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "
@@ -681,7 +739,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
The number of chunks successfully deleted.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
f"[OpenSearchDocumentIndex] Deleting document {document_id} from index "
f"{self._index_name}."
)
query_body = DocumentQuery.delete_from_document_id_query(
document_id=document_id,
@@ -717,7 +776,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
specified documents.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index "
f"{self._index_name}."
)
for update_request in update_requests:
properties_to_update: dict[str, Any] = dict()
@@ -773,9 +833,11 @@ class OpenSearchDocumentIndex(DocumentIndex):
# here.
# TODO(andrei): Fix the aforementioned race condition.
raise ChunkCountNotFoundError(
f"Tried to update document {doc_id} but its chunk count is not known. Older versions of the "
"application used to permit this but is not a supported state for a document when using OpenSearch. "
"The document was likely just added to the indexing pipeline and the chunk count will be updated shortly."
f"Tried to update document {doc_id} but its chunk count is not known. "
"Older versions of the application used to permit this but is not a "
"supported state for a document when using OpenSearch. The document was "
"likely just added to the indexing pipeline and the chunk count will be "
"updated shortly."
)
if doc_chunk_count == 0:
raise ValueError(
@@ -807,7 +869,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
chunk IDs vs querying for matching document chunks.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index "
f"{self._index_name}."
)
results: list[InferenceChunk] = []
for chunk_request in chunk_requests:
@@ -854,7 +917,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
num_to_retrieve: int,
) -> list[InferenceChunk]:
logger.debug(
f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index "
f"{self._index_name}."
)
# TODO(andrei): This could be better, the caller should just make this
# decision when passing in the query param. See the above comment in the
@@ -874,8 +938,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
index_filters=filters,
include_hidden=False,
)
# NOTE: Using z-score normalization here because it's better for hybrid search from a theoretical standpoint.
# Empirically on a small dataset of up to 10K docs, it's not very different. Likely more impactful at scale.
# NOTE: Using z-score normalization here because it's better for hybrid
# search from a theoretical standpoint. Empirically on a small dataset
# of up to 10K docs, it's not very different. Likely more impactful at
# scale.
# https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
body=query_body,
@@ -902,7 +968,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
dirty: bool | None = None, # noqa: ARG002
) -> list[InferenceChunk]:
logger.debug(
f"[OpenSearchDocumentIndex] Randomly retrieving {num_to_retrieve} chunks for index {self._index_name}."
f"[OpenSearchDocumentIndex] Randomly retrieving {num_to_retrieve} chunks for index "
f"{self._index_name}."
)
query_body = DocumentQuery.get_random_search_query(
tenant_state=self._tenant_state,
@@ -932,7 +999,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
complete.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} raw chunks for index {self._index_name}."
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} raw chunks for index "
f"{self._index_name}."
)
# Do not raise if the document already exists, just update. This is
# because the document may already have been indexed during the

View File

@@ -243,7 +243,8 @@ class DocumentChunk(BaseModel):
return value
if not isinstance(value, int):
raise ValueError(
f"Bug: Expected an int for the last_updated property from OpenSearch, got {type(value)} instead."
f"Bug: Expected an int for the last_updated property from OpenSearch, got "
f"{type(value)} instead."
)
return datetime.fromtimestamp(value, tz=timezone.utc)
@@ -284,19 +285,22 @@ class DocumentChunk(BaseModel):
elif isinstance(value, TenantState):
if MULTI_TENANT != value.multitenant:
raise ValueError(
f"Bug: An existing TenantState object was supplied to the DocumentChunk model but its multi-tenant mode "
f"({value.multitenant}) does not match the program's current global tenancy state."
f"Bug: An existing TenantState object was supplied to the DocumentChunk model "
f"but its multi-tenant mode ({value.multitenant}) does not match the program's "
"current global tenancy state."
)
return value
elif not isinstance(value, str):
raise ValueError(
f"Bug: Expected a str for the tenant_id property from OpenSearch, got {type(value)} instead."
f"Bug: Expected a str for the tenant_id property from OpenSearch, got "
f"{type(value)} instead."
)
else:
if not MULTI_TENANT:
raise ValueError(
"Bug: Got a non-null str for the tenant_id property from OpenSearch but multi-tenant mode is not enabled. "
"This is unexpected because in single-tenant mode we don't expect to see a tenant_id."
"Bug: Got a non-null str for the tenant_id property from OpenSearch but "
"multi-tenant mode is not enabled. This is unexpected because in single-tenant "
"mode we don't expect to see a tenant_id."
)
return TenantState(tenant_id=value, multitenant=MULTI_TENANT)
@@ -352,8 +356,10 @@ class DocumentSchema:
"properties": {
TITLE_FIELD_NAME: {
"type": "text",
# Language analyzer (e.g. english) stems at index and search time for variant matching.
# Configure via OPENSEARCH_TEXT_ANALYZER. Existing indices need reindexing after a change.
# Language analyzer (e.g. english) stems at index and search
# time for variant matching. Configure via
# OPENSEARCH_TEXT_ANALYZER. Existing indices need reindexing
# after a change.
"analyzer": OPENSEARCH_TEXT_ANALYZER,
"fields": {
# Subfield accessed as title.keyword. Not indexed for

View File

@@ -698,41 +698,6 @@ class DocumentQuery:
"""
return {"terms": {ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME: node_ids}}
def _get_assistant_knowledge_filter(
attached_doc_ids: list[str] | None,
node_ids: list[int] | None,
file_ids: list[UUID] | None,
document_sets: list[str] | None,
) -> dict[str, Any]:
"""Combined filter for assistant knowledge.
When an assistant has attached knowledge, search should be scoped to:
- Documents explicitly attached (by document ID), OR
- Documents under attached hierarchy nodes (by ancestor node IDs), OR
- User-uploaded files attached to the assistant, OR
- Documents in the assistant's document sets (if any)
"""
knowledge_filter: dict[str, Any] = {
"bool": {"should": [], "minimum_should_match": 1}
}
if attached_doc_ids:
knowledge_filter["bool"]["should"].append(
_get_attached_document_id_filter(attached_doc_ids)
)
if node_ids:
knowledge_filter["bool"]["should"].append(
_get_hierarchy_node_filter(node_ids)
)
if file_ids:
knowledge_filter["bool"]["should"].append(
_get_user_file_id_filter(file_ids)
)
if document_sets:
knowledge_filter["bool"]["should"].append(
_get_document_set_filter(document_sets)
)
return knowledge_filter
filter_clauses: list[dict[str, Any]] = []
if not include_hidden:
@@ -758,41 +723,53 @@ class DocumentQuery:
# document's metadata list.
filter_clauses.append(_get_tag_filter(tags))
# Check if this is an assistant knowledge search (has any assistant-scoped knowledge)
has_assistant_knowledge = (
# Knowledge scope: explicit knowledge attachments restrict what
# an assistant can see. When none are set the assistant
# searches everything.
#
# project_id / persona_id are additive: they make overflowing
# user files findable but must NOT trigger the restriction on
# their own (an agent with no explicit knowledge should search
# everything).
has_knowledge_scope = (
attached_document_ids
or hierarchy_node_ids
or user_file_ids
or document_sets
)
if has_assistant_knowledge:
# If assistant has attached knowledge, scope search to that knowledge.
# Document sets are included in the OR filter so directly attached
# docs are always findable even if not in the document sets.
filter_clauses.append(
_get_assistant_knowledge_filter(
attached_document_ids,
hierarchy_node_ids,
user_file_ids,
document_sets,
if has_knowledge_scope:
knowledge_filter: dict[str, Any] = {
"bool": {"should": [], "minimum_should_match": 1}
}
if attached_document_ids:
knowledge_filter["bool"]["should"].append(
_get_attached_document_id_filter(attached_document_ids)
)
)
elif user_file_ids:
# Fallback for non-assistant user file searches (e.g., project searches)
# If at least one user file ID is provided, the caller will only
# retrieve documents where the document ID is in this input list of
# file IDs.
filter_clauses.append(_get_user_file_id_filter(user_file_ids))
if project_id is not None:
# If a project ID is provided, the caller will only retrieve
# documents where the project ID provided here is present in the
# document's user projects list.
filter_clauses.append(_get_user_project_filter(project_id))
if persona_id is not None:
filter_clauses.append(_get_persona_filter(persona_id))
if hierarchy_node_ids:
knowledge_filter["bool"]["should"].append(
_get_hierarchy_node_filter(hierarchy_node_ids)
)
if user_file_ids:
knowledge_filter["bool"]["should"].append(
_get_user_file_id_filter(user_file_ids)
)
if document_sets:
knowledge_filter["bool"]["should"].append(
_get_document_set_filter(document_sets)
)
# Additive: widen scope to also cover overflowing user
# files, but only when an explicit restriction is already
# in effect.
if project_id is not None:
knowledge_filter["bool"]["should"].append(
_get_user_project_filter(project_id)
)
if persona_id is not None:
knowledge_filter["bool"]["should"].append(
_get_persona_filter(persona_id)
)
filter_clauses.append(knowledge_filter)
if time_cutoff is not None:
# If a time cutoff is provided, the caller will only retrieve

View File

@@ -1,5 +1,6 @@
import json
import string
import time
from collections.abc import Callable
from collections.abc import Mapping
from datetime import datetime
@@ -18,6 +19,7 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
)
from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.document_index.interfaces import VespaChunkRequest
@@ -338,12 +340,18 @@ def get_all_chunks_paginated(
params["continuation"] = continuation_token
response: httpx.Response | None = None
start_time = time.monotonic()
try:
with get_vespa_http_client() as http_client:
with get_vespa_http_client(
timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
) as http_client:
response = http_client.get(url, params=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = f"Failed to get chunks from Vespa slice {slice_id} with continuation token {continuation_token}."
error_base = (
f"Failed to get chunks from Vespa slice {slice_id} with continuation token "
f"{continuation_token} in {time.monotonic() - start_time:.3f} seconds."
)
logger.exception(
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"
@@ -495,20 +503,31 @@ def query_vespa(
response = http_client.post(SEARCH_ENDPOINT, json=params)
response.raise_for_status()
except httpx.HTTPError as e:
error_base = "Failed to query Vespa"
logger.error(
f"{error_base}:\n"
f"Request URL: {e.request.url}\n"
f"Request Headers: {e.request.headers}\n"
f"Request Payload: {params}\n"
f"Exception: {str(e)}"
+ (
f"\nResponse: {e.response.text}"
if isinstance(e, httpx.HTTPStatusError)
else ""
)
response_text = (
e.response.text if isinstance(e, httpx.HTTPStatusError) else None
)
raise httpx.HTTPError(error_base) from e
status_code = (
e.response.status_code if isinstance(e, httpx.HTTPStatusError) else None
)
yql_value = params.get("yql", "")
yql_length = len(str(yql_value))
# Log each detail on its own line so log collectors capture them
# as separate entries rather than truncating a single multiline msg
logger.error(
f"Failed to query Vespa | "
f"status={status_code} | "
f"yql_length={yql_length} | "
f"exception={str(e)}"
)
if response_text:
logger.error(f"Vespa error response: {response_text[:1000]}")
logger.error(f"Vespa request URL: {e.request.url}")
# Re-raise with diagnostics so callers see what actually went wrong
raise httpx.HTTPError(
f"Failed to query Vespa (status={status_code}, " f"yql_length={yql_length})"
) from e
response_json: dict[str, Any] = response.json()

View File

@@ -465,6 +465,12 @@ class VespaIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
index_batch_params.doc_id_to_new_chunk_cnt
):
@@ -659,6 +665,10 @@ class VespaIndex(DocumentIndex):
"""Note: if the document id does not exist, the update will be a no-op and the
function will complete with no errors or exceptions.
Handle other exceptions if you wish to implement retry behavior
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
@@ -679,13 +689,6 @@ class VespaIndex(DocumentIndex):
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
project_ids: set[int] | None = None
if user_fields is not None and user_fields.user_projects is not None:
project_ids = set(user_fields.user_projects)
@@ -705,7 +708,20 @@ class VespaIndex(DocumentIndex):
persona_ids=persona_ids,
)
vespa_document_index.update([update_request])
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
vespa_document_index.update([update_request])
def delete_single(
self,
@@ -714,6 +730,11 @@ class VespaIndex(DocumentIndex):
tenant_id: str,
chunk_count: int | None,
) -> int:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
multitenant=MULTI_TENANT,
@@ -726,13 +747,25 @@ class VespaIndex(DocumentIndex):
raise ValueError(
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
return vespa_document_index.delete(document_id=doc_id, chunk_count=chunk_count)
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
total_chunks_deleted = 0
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
total_chunks_deleted += vespa_document_index.delete(
document_id=doc_id, chunk_count=chunk_count
)
return total_chunks_deleted
def id_based_retrieval(
self,

View File

@@ -52,7 +52,9 @@ def replace_invalid_doc_id_characters(text: str) -> str:
return text.replace("'", "_")
def get_vespa_http_client(no_timeout: bool = False, http2: bool = True) -> httpx.Client:
def get_vespa_http_client(
no_timeout: bool = False, http2: bool = True, timeout: int | None = None
) -> httpx.Client:
"""
Configures and returns an HTTP client for communicating with Vespa,
including authentication if needed.
@@ -64,7 +66,7 @@ def get_vespa_http_client(no_timeout: bool = False, http2: bool = True) -> httpx
else None
),
verify=False if not MANAGED_VESPA else True,
timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
timeout=None if no_timeout else (timeout or VESPA_REQUEST_TIMEOUT),
http2=http2,
)

View File

@@ -23,11 +23,8 @@ from shared_configs.configs import MULTI_TENANT
logger = setup_logger()
def build_tenant_id_filter(tenant_id: str, include_trailing_and: bool = False) -> str:
filter_str = f'({TENANT_ID} contains "{tenant_id}")'
if include_trailing_and:
filter_str += " and "
return filter_str
def build_tenant_id_filter(tenant_id: str) -> str:
return f'({TENANT_ID} contains "{tenant_id}")'
def build_vespa_filters(
@@ -37,30 +34,38 @@ def build_vespa_filters(
remove_trailing_and: bool = False, # Set to True when using as a complete Vespa query
) -> str:
def _build_or_filters(key: str, vals: list[str] | None) -> str:
"""For string-based 'contains' filters, e.g. WSET fields or array<string> fields."""
"""For string-based 'contains' filters, e.g. WSET fields or array<string> fields.
Returns a bare clause like '(key contains "v1" or key contains "v2")' or ""."""
if not key or not vals:
return ""
eq_elems = [f'{key} contains "{val}"' for val in vals if val]
if not eq_elems:
return ""
or_clause = " or ".join(eq_elems)
return f"({or_clause}) and "
return f"({' or '.join(eq_elems)})"
def _build_weighted_set_filter(key: str, vals: list[str] | None) -> str:
"""Build a Vespa weightedSet filter for large value lists.
Uses Vespa's native weightedSet() operator instead of OR-chained
'contains' clauses. This is critical for fields like
access_control_list where a single user may have tens of thousands
of ACL entries — OR clauses at that scale cause Vespa to reject
the query with HTTP 400."""
if not key or not vals:
return ""
filtered = [val for val in vals if val]
if not filtered:
return ""
items = ", ".join(f'"{val}":1' for val in filtered)
return f"weightedSet({key}, {{{items}}})"
def _build_int_or_filters(key: str, vals: list[int] | None) -> str:
"""
For an integer field filter.
If vals is not None, we want *only* docs whose key matches one of vals.
"""
# If `vals` is None => skip the filter entirely
"""For an integer field filter.
Returns a bare clause or ""."""
if vals is None or not vals:
return ""
# Otherwise build the OR filter
eq_elems = [f"{key} = {val}" for val in vals]
or_clause = " or ".join(eq_elems)
result = f"({or_clause}) and "
return result
return f"({' or '.join(eq_elems)})"
def _build_kg_filter(
kg_entities: list[str] | None,
@@ -73,16 +78,12 @@ def build_vespa_filters(
combined_filter_parts = []
def _build_kge(entity: str) -> str:
# TYPE-SUBTYPE::ID -> "TYPE-SUBTYPE::ID"
# TYPE-SUBTYPE::* -> ({prefix: true}"TYPE-SUBTYPE")
# TYPE::* -> ({prefix: true}"TYPE")
GENERAL = "::*"
if entity.endswith(GENERAL):
return f'({{prefix: true}}"{entity.split(GENERAL, 1)[0]}")'
else:
return f'"{entity}"'
# OR the entities (give new design)
if kg_entities:
filter_parts = []
for kg_entity in kg_entities:
@@ -104,8 +105,7 @@ def build_vespa_filters(
# TODO: remove kg terms entirely from prompts and codebase
# AND the combined filter parts
return f"({' and '.join(combined_filter_parts)}) and "
return f"({' and '.join(combined_filter_parts)})"
def _build_kg_source_filters(
kg_sources: list[str] | None,
@@ -114,16 +114,14 @@ def build_vespa_filters(
return ""
source_phrases = [f'{DOCUMENT_ID} contains "{source}"' for source in kg_sources]
return f"({' or '.join(source_phrases)}) and "
return f"({' or '.join(source_phrases)})"
def _build_kg_chunk_id_zero_only_filter(
kg_chunk_id_zero_only: bool,
) -> str:
if not kg_chunk_id_zero_only:
return ""
return "(chunk_id = 0 ) and "
return "(chunk_id = 0)"
def _build_time_filter(
cutoff: datetime | None,
@@ -135,8 +133,8 @@ def build_vespa_filters(
cutoff_secs = int(cutoff.timestamp())
if include_untimed:
return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and "
return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and "
return f"!({DOC_UPDATED_AT} < {cutoff_secs})"
return f"({DOC_UPDATED_AT} >= {cutoff_secs})"
def _build_user_project_filter(
project_id: int | None,
@@ -147,8 +145,7 @@ def build_vespa_filters(
pid = int(project_id)
except Exception:
return ""
# Vespa YQL 'contains' expects a string literal; quote the integer
return f'({USER_PROJECT} contains "{pid}") and '
return f'({USER_PROJECT} contains "{pid}")'
def _build_persona_filter(
persona_id: int | None,
@@ -160,73 +157,99 @@ def build_vespa_filters(
except Exception:
logger.warning(f"Invalid persona ID: {persona_id}")
return ""
return f'({PERSONAS} contains "{pid}") and '
return f'({PERSONAS} contains "{pid}")'
# Start building the filter string
filter_str = f"!({HIDDEN}=true) and " if not include_hidden else ""
def _append(parts: list[str], clause: str) -> None:
if clause:
parts.append(clause)
# Collect all top-level filter clauses, then join with " and " at the end.
filter_parts: list[str] = []
if not include_hidden:
filter_parts.append(f"!({HIDDEN}=true)")
# TODO: add error condition if MULTI_TENANT and no tenant_id filter is set
# If running in multi-tenant mode
if filters.tenant_id and MULTI_TENANT:
filter_str += build_tenant_id_filter(
filters.tenant_id, include_trailing_and=True
)
filter_parts.append(build_tenant_id_filter(filters.tenant_id))
# ACL filters
# ACL filters — use weightedSet for efficient matching against the
# access_control_list weightedset<string> field. OR-chaining thousands
# of 'contains' clauses causes Vespa to reject the query (HTTP 400)
# for users with large numbers of external permission groups.
if filters.access_control_list is not None:
filter_str += _build_or_filters(
ACCESS_CONTROL_LIST, filters.access_control_list
_append(
filter_parts,
_build_weighted_set_filter(
ACCESS_CONTROL_LIST, filters.access_control_list
),
)
# Source type filters
source_strs = (
[s.value for s in filters.source_type] if filters.source_type else None
)
filter_str += _build_or_filters(SOURCE_TYPE, source_strs)
_append(filter_parts, _build_or_filters(SOURCE_TYPE, source_strs))
# Tag filters
tag_attributes = None
if filters.tags:
# build e.g. "tag_key|tag_value"
tag_attributes = [
f"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}" for tag in filters.tags
]
filter_str += _build_or_filters(METADATA_LIST, tag_attributes)
_append(filter_parts, _build_or_filters(METADATA_LIST, tag_attributes))
# Document sets
filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set)
# Knowledge scope: explicit knowledge attachments (document_sets,
# user_file_ids) restrict what an assistant can see. When none are
# set, the assistant can see everything.
#
# project_id / persona_id are additive: they make overflowing user
# files findable in Vespa but must NOT trigger the restriction on
# their own (an agent with no explicit knowledge should search
# everything).
knowledge_scope_parts: list[str] = []
_append(
knowledge_scope_parts, _build_or_filters(DOCUMENT_SETS, filters.document_set)
)
# Convert UUIDs to strings for user_file_ids
user_file_ids_str = (
[str(uuid) for uuid in filters.user_file_ids] if filters.user_file_ids else None
)
filter_str += _build_or_filters(DOCUMENT_ID, user_file_ids_str)
_append(knowledge_scope_parts, _build_or_filters(DOCUMENT_ID, user_file_ids_str))
# User project filter (array<int> attribute membership)
filter_str += _build_user_project_filter(filters.project_id)
# Only include project/persona scopes when an explicit knowledge
# restriction is already in effect — they widen the scope to also
# cover overflowing user files but never restrict on their own.
if knowledge_scope_parts:
_append(knowledge_scope_parts, _build_user_project_filter(filters.project_id))
_append(knowledge_scope_parts, _build_persona_filter(filters.persona_id))
# Persona filter (array<int> attribute membership)
filter_str += _build_persona_filter(filters.persona_id)
if len(knowledge_scope_parts) > 1:
filter_parts.append("(" + " or ".join(knowledge_scope_parts) + ")")
elif len(knowledge_scope_parts) == 1:
filter_parts.append(knowledge_scope_parts[0])
# Time filter
filter_str += _build_time_filter(filters.time_cutoff)
_append(filter_parts, _build_time_filter(filters.time_cutoff))
# # Knowledge Graph Filters
# filter_str += _build_kg_filter(
# _append(filter_parts, _build_kg_filter(
# kg_entities=filters.kg_entities,
# kg_relationships=filters.kg_relationships,
# kg_terms=filters.kg_terms,
# )
# ))
# filter_str += _build_kg_source_filters(filters.kg_sources)
# _append(filter_parts, _build_kg_source_filters(filters.kg_sources))
# filter_str += _build_kg_chunk_id_zero_only_filter(
# _append(filter_parts, _build_kg_chunk_id_zero_only_filter(
# filters.kg_chunk_id_zero_only or False
# )
# ))
# Trim trailing " and "
if remove_trailing_and and filter_str.endswith(" and "):
filter_str = filter_str[:-5]
filter_str = " and ".join(filter_parts)
if filter_str and not remove_trailing_and:
filter_str += " and "
return filter_str

View File

View File

@@ -0,0 +1,101 @@
"""
Standardized error codes for the Onyx backend.
Usage:
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, "Token expired")
"""
from enum import Enum
class OnyxErrorCode(Enum):
"""
Each member is a tuple of (error_code_string, http_status_code).
The error_code_string is a stable, machine-readable identifier that
API consumers can match on. The http_status_code is the default HTTP
status to return.
"""
# ------------------------------------------------------------------
# Authentication (401)
# ------------------------------------------------------------------
UNAUTHENTICATED = ("UNAUTHENTICATED", 401)
INVALID_TOKEN = ("INVALID_TOKEN", 401)
TOKEN_EXPIRED = ("TOKEN_EXPIRED", 401)
CSRF_FAILURE = ("CSRF_FAILURE", 403)
# ------------------------------------------------------------------
# Authorization (403)
# ------------------------------------------------------------------
UNAUTHORIZED = ("UNAUTHORIZED", 403)
INSUFFICIENT_PERMISSIONS = ("INSUFFICIENT_PERMISSIONS", 403)
ADMIN_ONLY = ("ADMIN_ONLY", 403)
EE_REQUIRED = ("EE_REQUIRED", 403)
# ------------------------------------------------------------------
# Validation / Bad Request (400)
# ------------------------------------------------------------------
VALIDATION_ERROR = ("VALIDATION_ERROR", 400)
INVALID_INPUT = ("INVALID_INPUT", 400)
MISSING_REQUIRED_FIELD = ("MISSING_REQUIRED_FIELD", 400)
# ------------------------------------------------------------------
# Not Found (404)
# ------------------------------------------------------------------
NOT_FOUND = ("NOT_FOUND", 404)
CONNECTOR_NOT_FOUND = ("CONNECTOR_NOT_FOUND", 404)
CREDENTIAL_NOT_FOUND = ("CREDENTIAL_NOT_FOUND", 404)
PERSONA_NOT_FOUND = ("PERSONA_NOT_FOUND", 404)
DOCUMENT_NOT_FOUND = ("DOCUMENT_NOT_FOUND", 404)
SESSION_NOT_FOUND = ("SESSION_NOT_FOUND", 404)
USER_NOT_FOUND = ("USER_NOT_FOUND", 404)
# ------------------------------------------------------------------
# Conflict (409)
# ------------------------------------------------------------------
CONFLICT = ("CONFLICT", 409)
DUPLICATE_RESOURCE = ("DUPLICATE_RESOURCE", 409)
# ------------------------------------------------------------------
# Rate Limiting / Quotas (429 / 402)
# ------------------------------------------------------------------
RATE_LIMITED = ("RATE_LIMITED", 429)
SEAT_LIMIT_EXCEEDED = ("SEAT_LIMIT_EXCEEDED", 402)
# ------------------------------------------------------------------
# Connector / Credential Errors (400-range)
# ------------------------------------------------------------------
CONNECTOR_VALIDATION_FAILED = ("CONNECTOR_VALIDATION_FAILED", 400)
CREDENTIAL_INVALID = ("CREDENTIAL_INVALID", 400)
CREDENTIAL_EXPIRED = ("CREDENTIAL_EXPIRED", 401)
# ------------------------------------------------------------------
# Server Errors (5xx)
# ------------------------------------------------------------------
INTERNAL_ERROR = ("INTERNAL_ERROR", 500)
NOT_IMPLEMENTED = ("NOT_IMPLEMENTED", 501)
SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
BAD_GATEWAY = ("BAD_GATEWAY", 502)
LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
def __init__(self, code: str, status_code: int) -> None:
self.code = code
self.status_code = status_code
def detail(self, message: str | None = None) -> dict[str, str]:
"""Build a structured error detail dict.
Returns a dict like:
{"error_code": "UNAUTHENTICATED", "detail": "Token expired"}
If no message is supplied, the error code itself is used as the detail.
"""
return {
"error_code": self.code,
"detail": message or self.code,
}

View File

@@ -0,0 +1,83 @@
"""OnyxError — the single exception type for all Onyx business errors.
Raise ``OnyxError`` instead of ``HTTPException`` in business code. A global
FastAPI exception handler (registered via ``register_onyx_exception_handlers``)
converts it into a JSON response with the standard
``{"error_code": "...", "detail": "..."}`` shape.
Usage::
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
For upstream errors with a dynamic HTTP status (e.g. billing service),
use ``status_code_override``::
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY,
detail,
status_code_override=upstream_status,
)
"""
from fastapi import FastAPI
from fastapi import Request
from fastapi.responses import JSONResponse
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.utils.logger import setup_logger
logger = setup_logger()
class OnyxError(Exception):
"""Structured error that maps to a specific ``OnyxErrorCode``.
Attributes:
error_code: The ``OnyxErrorCode`` enum member.
detail: Human-readable detail (defaults to the error code string).
status_code: HTTP status — either overridden or from the error code.
"""
def __init__(
self,
error_code: OnyxErrorCode,
detail: str | None = None,
*,
status_code_override: int | None = None,
) -> None:
resolved_detail = detail or error_code.code
super().__init__(resolved_detail)
self.error_code = error_code
self.detail = resolved_detail
self._status_code_override = status_code_override
@property
def status_code(self) -> int:
return self._status_code_override or self.error_code.status_code
def register_onyx_exception_handlers(app: FastAPI) -> None:
"""Register a global handler that converts ``OnyxError`` to JSON responses.
Must be called *after* the app is created but *before* it starts serving.
The handler logs at WARNING for 4xx and ERROR for 5xx.
"""
@app.exception_handler(OnyxError)
async def _handle_onyx_error(
request: Request, # noqa: ARG001
exc: OnyxError,
) -> JSONResponse:
status_code = exc.status_code
if status_code >= 500:
logger.error(f"OnyxError {exc.error_code.code}: {exc.detail}")
elif status_code >= 400:
logger.warning(f"OnyxError {exc.error_code.code}: {exc.detail}")
return JSONResponse(
status_code=status_code,
content=exc.error_code.detail(exc.detail),
)

View File

@@ -4,39 +4,33 @@ import base64
import json
import uuid
from typing import Any
from typing import cast
from typing import Dict
from typing import Optional
from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Redis key prefix for OAuth state
OAUTH_STATE_PREFIX = "federated_oauth"
# Default TTL for OAuth state (5 minutes)
OAUTH_STATE_TTL = 300
OAUTH_STATE_TTL = 300 # 5 minutes
class OAuthSession:
"""Represents an OAuth session stored in Redis."""
"""Represents an OAuth session stored in the cache backend."""
def __init__(
self,
federated_connector_id: int,
user_id: str,
redirect_uri: Optional[str] = None,
additional_data: Optional[Dict[str, Any]] = None,
redirect_uri: str | None = None,
additional_data: dict[str, Any] | None = None,
):
self.federated_connector_id = federated_connector_id
self.user_id = user_id
self.redirect_uri = redirect_uri
self.additional_data = additional_data or {}
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for Redis storage."""
def to_dict(self) -> dict[str, Any]:
return {
"federated_connector_id": self.federated_connector_id,
"user_id": self.user_id,
@@ -45,8 +39,7 @@ class OAuthSession:
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "OAuthSession":
"""Create from dictionary retrieved from Redis."""
def from_dict(cls, data: dict[str, Any]) -> "OAuthSession":
return cls(
federated_connector_id=data["federated_connector_id"],
user_id=data["user_id"],
@@ -58,31 +51,27 @@ class OAuthSession:
def generate_oauth_state(
federated_connector_id: int,
user_id: str,
redirect_uri: Optional[str] = None,
additional_data: Optional[Dict[str, Any]] = None,
redirect_uri: str | None = None,
additional_data: dict[str, Any] | None = None,
ttl: int = OAUTH_STATE_TTL,
) -> str:
"""
Generate a secure state parameter and store session data in Redis.
Generate a secure state parameter and store session data in the cache backend.
Args:
federated_connector_id: ID of the federated connector
user_id: ID of the user initiating OAuth
redirect_uri: Optional redirect URI after OAuth completion
additional_data: Any additional data to store with the session
ttl: Time-to-live in seconds for the Redis key
ttl: Time-to-live in seconds for the cache key
Returns:
Base64-encoded state parameter
"""
# Generate a random UUID for the state
state_uuid = uuid.uuid4()
state_b64 = base64.urlsafe_b64encode(state_uuid.bytes).decode("utf-8").rstrip("=")
# Convert UUID to base64 for URL-safe state parameter
state_bytes = state_uuid.bytes
state_b64 = base64.urlsafe_b64encode(state_bytes).decode("utf-8").rstrip("=")
# Create session object
session = OAuthSession(
federated_connector_id=federated_connector_id,
user_id=user_id,
@@ -90,15 +79,9 @@ def generate_oauth_state(
additional_data=additional_data,
)
# Store in Redis with TTL
redis_client = get_redis_client()
redis_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
redis_client.set(
redis_key,
json.dumps(session.to_dict()),
ex=ttl,
)
cache = get_cache_backend()
cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
cache.set(cache_key, json.dumps(session.to_dict()), ex=ttl)
logger.info(
f"Generated OAuth state for federated_connector_id={federated_connector_id}, "
@@ -125,18 +108,15 @@ def verify_oauth_state(state: str) -> OAuthSession:
state_bytes = base64.urlsafe_b64decode(padded_state)
state_uuid = uuid.UUID(bytes=state_bytes)
# Look up in Redis
redis_client = get_redis_client()
redis_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
cache = get_cache_backend()
cache_key = f"{OAUTH_STATE_PREFIX}:{state_uuid}"
session_data = cast(bytes, redis_client.get(redis_key))
session_data = cache.get(cache_key)
if not session_data:
raise ValueError(f"OAuth state not found in Redis: {state}")
raise ValueError(f"OAuth state not found: {state}")
# Delete the key after retrieval (one-time use)
redis_client.delete(redis_key)
cache.delete(cache_key)
# Parse and return session
session_dict = json.loads(session_data)
return OAuthSession.from_dict(session_dict)

View File

@@ -19,12 +19,16 @@ class OnyxMimeTypes:
PLAIN_TEXT_MIME_TYPE,
"text/markdown",
"text/x-markdown",
"text/x-log",
"text/x-config",
"text/tab-separated-values",
"application/json",
"application/xml",
"text/xml",
"application/x-yaml",
"application/yaml",
"text/yaml",
"text/x-yaml",
}
DOCUMENT_MIME_TYPES = {
PDF_MIME_TYPE,

View File

@@ -88,9 +88,13 @@ def summarize_image_with_error_handling(
try:
return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
except UnsupportedImageFormatError:
magic_hex = image_data[:8].hex() if image_data else "empty"
logger.info(
"Skipping image summarization due to unsupported MIME type for %s",
"Skipping image summarization due to unsupported MIME type "
"for %s (magic_bytes=%s, size=%d bytes)",
context_name,
magic_hex,
len(image_data),
)
return None
@@ -134,9 +138,23 @@ def _summarize_image(
return summary
except Exception as e:
error_msg = f"Summarization failed. Messages: {messages}"
error_msg = error_msg[:1024]
raise ValueError(error_msg) from e
# Extract structured details from LiteLLM exceptions when available,
# rather than dumping the full messages payload (which contains base64
# image data and produces enormous, unreadable error logs).
str_e = str(e)
if len(str_e) > 512:
str_e = str_e[:512] + "... (truncated)"
parts = [f"Summarization failed: {type(e).__name__}: {str_e}"]
status_code = getattr(e, "status_code", None)
llm_provider = getattr(e, "llm_provider", None)
model = getattr(e, "model", None)
if status_code is not None:
parts.append(f"status_code={status_code}")
if llm_provider is not None:
parts.append(f"llm_provider={llm_provider}")
if model is not None:
parts.append(f"model={model}")
raise ValueError(" | ".join(parts)) from e
def _encode_image_for_llm_prompt(image_data: bytes) -> str:

View File

@@ -49,7 +49,6 @@ from onyx.indexing.embedder import IndexingEmbedder
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import IndexingBatchAdapter
from onyx.indexing.models import UpdatableChunkData
from onyx.indexing.postgres_sanitization import sanitize_documents_for_postgres
from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
from onyx.llm.factory import get_default_llm_with_vision
from onyx.llm.factory import get_llm_for_contextual_rag
@@ -65,6 +64,7 @@ from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
from onyx.utils.logger import setup_logger
from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time

View File

@@ -1,13 +1,11 @@
import json
from typing import cast
from redis.client import Redis
from onyx.cache.interface import CacheBackend
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import KVStore
from onyx.key_value_store.interface import KeyValueStore
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
from onyx.utils.special_types import JSON_ro
@@ -20,22 +18,27 @@ KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24 # 1 Day
class PgRedisKVStore(KeyValueStore):
def __init__(self, redis_client: Redis | None = None) -> None:
# If no redis_client is provided, fall back to the context var
if redis_client is not None:
self.redis_client = redis_client
else:
self.redis_client = get_redis_client()
def __init__(self, cache: CacheBackend | None = None) -> None:
self._cache = cache
def _get_cache(self) -> CacheBackend:
if self._cache is None:
from onyx.cache.factory import get_cache_backend
self._cache = get_cache_backend()
return self._cache
def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
# Not encrypted in Redis, but encrypted in Postgres
# Not encrypted in Cache backend (typically Redis), but encrypted in Postgres
try:
self.redis_client.set(
self._get_cache().set(
REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION
)
except Exception as e:
# Fallback gracefully to Postgres if Redis fails
logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
# Fallback gracefully to Postgres if Cache backend fails
logger.error(
f"Failed to set value in Cache backend for key '{key}': {str(e)}"
)
encrypted_val = val if encrypt else None
plain_val = val if not encrypt else None
@@ -53,16 +56,12 @@ class PgRedisKVStore(KeyValueStore):
def load(self, key: str, refresh_cache: bool = False) -> JSON_ro:
if not refresh_cache:
try:
redis_value = self.redis_client.get(REDIS_KEY_PREFIX + key)
if redis_value:
if not isinstance(redis_value, bytes):
raise ValueError(
f"Redis value for key '{key}' is not a bytes object"
)
return json.loads(redis_value.decode("utf-8"))
cached = self._get_cache().get(REDIS_KEY_PREFIX + key)
if cached is not None:
return json.loads(cached.decode("utf-8"))
except Exception as e:
logger.error(
f"Failed to get value from Redis for key '{key}': {str(e)}"
f"Failed to get value from cache for key '{key}': {str(e)}"
)
with get_session_with_current_tenant() as db_session:
@@ -79,21 +78,21 @@ class PgRedisKVStore(KeyValueStore):
value = None
try:
self.redis_client.set(
self._get_cache().set(
REDIS_KEY_PREFIX + key,
json.dumps(value),
ex=KV_REDIS_KEY_EXPIRATION,
)
except Exception as e:
logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
logger.error(f"Failed to set value in cache for key '{key}': {str(e)}")
return cast(JSON_ro, value)
def delete(self, key: str) -> None:
try:
self.redis_client.delete(REDIS_KEY_PREFIX + key)
self._get_cache().delete(REDIS_KEY_PREFIX + key)
except Exception as e:
logger.error(f"Failed to delete value from Redis for key '{key}': {str(e)}")
logger.error(f"Failed to delete value from cache for key '{key}': {str(e)}")
with get_session_with_current_tenant() as db_session:
result = db_session.query(KVStore).filter_by(key=key).delete()

View File

@@ -22,6 +22,7 @@ class LlmProviderNames(str, Enum):
OPENROUTER = "openrouter"
AZURE = "azure"
OLLAMA_CHAT = "ollama_chat"
LM_STUDIO = "lm_studio"
MISTRAL = "mistral"
LITELLM_PROXY = "litellm_proxy"
@@ -41,6 +42,8 @@ WELL_KNOWN_PROVIDER_NAMES = [
LlmProviderNames.OPENROUTER,
LlmProviderNames.AZURE,
LlmProviderNames.OLLAMA_CHAT,
LlmProviderNames.LM_STUDIO,
LlmProviderNames.LITELLM_PROXY,
]
@@ -56,6 +59,8 @@ PROVIDER_DISPLAY_NAMES: dict[str, str] = {
LlmProviderNames.AZURE: "Azure",
"ollama": "Ollama",
LlmProviderNames.OLLAMA_CHAT: "Ollama",
LlmProviderNames.LM_STUDIO: "LM Studio",
LlmProviderNames.LITELLM_PROXY: "LiteLLM Proxy",
"groq": "Groq",
"anyscale": "Anyscale",
"deepseek": "DeepSeek",
@@ -103,8 +108,10 @@ AGGREGATOR_PROVIDERS: set[str] = {
LlmProviderNames.BEDROCK_CONVERSE,
LlmProviderNames.OPENROUTER,
LlmProviderNames.OLLAMA_CHAT,
LlmProviderNames.LM_STUDIO,
LlmProviderNames.VERTEX_AI,
LlmProviderNames.AZURE,
LlmProviderNames.LITELLM_PROXY,
}
# Model family name mappings for display name generation

Some files were not shown because too many files have changed in this diff Show More