Compare commits

...

198 Commits

Author SHA1 Message Date
Dane Urban
908d360011 . 2026-02-06 17:52:23 -08:00
Dane Urban
30578bdf9a n 2026-02-06 17:38:36 -08:00
Dane Urban
aebde89432 nits 2026-02-06 16:25:00 -08:00
Dane Urban
4a4b4bb378 t 2026-02-06 13:39:05 -08:00
Dane Urban
a8d231976a nit 2026-02-06 09:56:16 -08:00
Dane Urban
9c8ae5bb4b nit 2026-02-05 17:07:24 -08:00
Dane Urban
0fc1fa3d36 nits 2026-02-05 10:28:59 -08:00
Dane Urban
94633698c3 nit 2026-02-03 00:42:20 -08:00
Dane Urban
6ae15589cd nits 2026-02-02 18:56:22 -08:00
Dane Urban
c24a8bb228 Add change 2026-02-02 18:55:38 -08:00
Dane Urban
01945abd86 fix test 2026-02-02 16:49:31 -08:00
Dane Urban
658632195f nit 2026-02-02 16:47:21 -08:00
Dane Urban
ec6fd01ba4 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-02 15:02:12 -08:00
Dane Urban
148e6fb97d nit 2026-02-02 15:01:57 -08:00
Dane Urban
6598c1a48d nit 2026-02-02 14:59:42 -08:00
Dane Urban
497ce43bd8 Fix some tests 2026-02-02 13:36:42 -08:00
Dane Urban
8634cb0446 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-02 13:28:29 -08:00
Dane Urban
8d56fd3dc6 . 2026-02-02 13:27:08 -08:00
Dane Urban
a7579a99d0 Resolve merge conflicts 2026-02-02 12:01:44 -08:00
Dane Urban
3533c10da4 n 2026-02-02 11:48:28 -08:00
Dane Urban
7b0414bf0d fix migration 2026-02-02 11:48:08 -08:00
Dane Urban
b500ea537a nits 2026-02-02 11:46:52 -08:00
Dane Urban
abd6d55add Merge branch 'flow_mapping_table' into llm_provider_refactor_1 2026-02-02 11:44:27 -08:00
Dane Urban
f15b6b8034 Merge branch 'main' into llm_provider_refactor_1 2026-02-02 11:44:17 -08:00
Dane Urban
fb40485f25 Update this 2026-02-02 11:43:58 -08:00
Dane Urban
22e85f1f28 Merge branch 'main' into flow_mapping_table 2026-02-02 11:43:24 -08:00
Dane Urban
2ef7c3e6f3 rename 2026-02-02 11:40:21 -08:00
Dane Urban
92a471ed2b . 2026-02-02 11:35:09 -08:00
Dane Urban
d1b7e529a4 nit 2026-02-02 11:32:33 -08:00
Dane Urban
95c3579264 nits 2026-02-02 11:19:51 -08:00
Dane Urban
8802e5cad3 nit 2026-02-02 11:02:58 -08:00
victoria reese
df7ab6841a fix: resolve pod label duplication (#8098)
Co-authored-by: victoria-reese_wwg <victoria.reese@grainger.com>
2026-02-02 18:45:00 +00:00
Raunak Bhagat
2131c86c16 refactor: More app header cleanups (#8097)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 18:36:16 +00:00
acaprau
7d1b9e4356 chore(opensearch): Migration 2- Introduce external dependency tests (#8045)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-02-02 17:35:49 +00:00
SubashMohan
38e92308ec feat(chat): new agent timeline blocks (#8101) 2026-02-02 14:53:20 +00:00
Dane Urban
a41b4bbc82 fix tests 2026-02-01 22:59:15 -08:00
Dane Urban
c026c077b5 nit 2026-02-01 22:53:38 -08:00
Dane Urban
3eee539a86 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-01 22:13:54 -08:00
Dane Urban
143e7a0d72 nits 2026-02-01 22:13:21 -08:00
Dane Urban
4572358038 nits 2026-02-01 22:10:37 -08:00
Dane Urban
1753f94c11 start fixes 2026-02-01 21:51:02 -08:00
Dane Urban
120ddf2ef6 Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2 2026-02-01 21:42:40 -08:00
Dane Urban
2cce5bc58f Merge branch 'main' into flow_mapping_table 2026-02-01 21:38:54 -08:00
Dane Urban
383a6001d2 nit 2026-02-01 21:37:35 -08:00
Dane Urban
3a6f45bfca Merge branch 'main' into llm_provider_refactor_1 2026-02-01 19:36:43 -08:00
Danelegend
2444b59070 chore(provider): add more integration tests for provider flow (#8099) 2026-02-01 19:35:55 -08:00
Yuhong Sun
49771945e1 chore: DR to run more than 1 cycle typically (#8100) 2026-02-01 17:17:50 -08:00
Dane Urban
e06b5ef202 Merge branch 'flow_mapping_table' into llm_provider_refactor_1 2026-02-01 15:23:59 -08:00
Justin Tahara
15f0bc9c3d fix(ui): Agent Saving with other people files (#8095) 2026-02-01 22:38:45 +00:00
Justin Tahara
963b172a09 fix(ui): Cleanup Card Span (#8094) 2026-02-01 22:26:25 +00:00
Dane Urban
c13ce816fa fix revision id 2026-02-01 13:55:01 -08:00
Dane Urban
39f3e872ec Merge branch 'main' into flow_mapping_table 2026-02-01 13:53:53 -08:00
Dane Urban
b033c00217 . 2026-02-01 13:52:58 -08:00
Dane Urban
6d47c5f21a nit 2026-02-01 13:51:54 -08:00
Justin Tahara
dc2bf20a8d fix(ui): Ollama Model Selection (#8091) 2026-02-01 21:29:57 +00:00
Raunak Bhagat
d29f1efec0 refactor: hooks (#8089)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:26:40 +00:00
Justin Tahara
13d1c3d86a feat(desktop): Ensure that UI reflects Light/Dark Toggle (#7684) 2026-02-01 20:03:42 +00:00
Wenxi
adc6773f9f fix(craft): attempt to solve hanging with explicit k8s_stream timeout (#8066) 2026-02-01 17:51:25 +00:00
Raunak Bhagat
a819482749 refactor: Update Hoverable to be more adherent to the mocks (#8083) 2026-02-01 08:53:53 +00:00
Raunak Bhagat
f660f9f447 refactor: Update InputSelect implementation (#8076) 2026-02-01 08:53:43 +00:00
Yuhong Sun
26f9574364 chore: Web query sanitize (#8085) 2026-01-31 23:44:55 -08:00
Dane Urban
0645540e24 . 2026-01-31 23:44:17 -08:00
Yuhong Sun
9fa17c7713 chore: remove long term log (#8084) 2026-01-31 23:42:47 -08:00
Yuhong Sun
1af484503e chore: ASCII in docs (#8082) 2026-01-31 23:16:38 -08:00
Yuhong Sun
55276be061 chore: ensure ascii false (#8081) 2026-01-31 23:11:53 -08:00
Yuhong Sun
4bb02459ae chore: DR tool tuning (#8080) 2026-01-31 22:57:58 -08:00
Yuhong Sun
7109aea897 chore: OpenURL sometimes gives too many tokens (#8079) 2026-01-31 22:23:33 -08:00
Yuhong Sun
8ce4cfc302 chore: Tune web search (#8078) 2026-01-31 21:29:12 -08:00
Yuhong Sun
0f75de9687 chore: hopefully help LLM not spam web queries (#8075) 2026-01-31 20:21:22 -08:00
acaprau
9782fcb0b9 feat(opensearch): Migration 1 - Introduce and implement migration tasks (#8014)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-02-01 03:35:34 +00:00
Yuhong Sun
ec2a35b3a4 chore: DR edge case (#8074) 2026-01-31 19:26:07 -08:00
Dane Urban
a2c0fc4df0 . 2026-01-31 19:23:46 -08:00
Raunak Bhagat
9815c2c8d9 refactor: Clean up app page rendering logic (#8060) 2026-02-01 03:15:21 +00:00
acaprau
8c3e3a6e02 chore(tests): Fix name for test_expire_oauth_token, loosen timing bounds a bit (#8067) 2026-02-01 03:04:49 +00:00
acaprau
726c6232a5 feat(opensearch): Migration 0 - Introduce db tables, alembic migration, db model utils (#8013) 2026-02-01 02:36:16 +00:00
Evan Lohn
f9d41ff1da feat(filesys): initial confluence hierarchy impl (#7932) 2026-02-01 02:28:55 +00:00
Dane Urban
7dccc88b35 . 2026-01-31 18:24:42 -08:00
Evan Lohn
eb3eb83c95 chore: ban chat-tempmail (#8063) 2026-02-01 01:59:49 +00:00
Evan Lohn
e4b9ef176f fix: attaching user files to assistant (#8061) 2026-02-01 01:33:22 +00:00
Dane Urban
ac617a51ce nits 2026-01-31 17:30:49 -08:00
trial2onyx
d18dd62641 fix(chat): reduce scroll container bottom margin (#8048)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-01 00:26:48 +00:00
Yuhong Sun
96224164ca chore: Some settings for DR for evals (#8058) 2026-01-31 15:44:39 -08:00
Wenxi
78cec7c9e9 refactor(craft): make usage limit overrides feature flags instead of env vars (#8056) 2026-01-31 23:35:14 +00:00
Wenxi
8fa7002826 chore(craft): bump sandbox image default value (#8055) 2026-01-31 23:10:53 +00:00
Nikolas Garza
921305f8ff feat(billing): add circuit breaker, license re-claim, and seats to checkout (#8005) 2026-01-31 22:18:27 +00:00
Raunak Bhagat
71148dd880 refactor: Consolidate duplicated AppHeader components into one (#8054)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 22:07:57 +00:00
Nikolas Garza
ac26ba6c2d chore: remove license cache invalidation from multi-tenant (#8052) 2026-01-31 19:47:34 +00:00
Raunak Bhagat
24584d4067 fix: Consolidate providers into one central location (#8032) 2026-01-31 13:46:10 +00:00
Wenxi
39d8d1db0c fix: optional dependency for /me (#8042) 2026-01-31 03:06:01 +00:00
trial2onyx
17824c5d92 refactor(chat): move loading indicator to content area (#8039)
Co-authored-by: Onyx Trialee 2 <onyxtrial2@Onyxs-MBP.attlocal.net>
2026-01-31 02:23:15 +00:00
roshan
eba89fa635 fix(craft): idle sandbox cleanup (#8041) 2026-01-31 02:20:12 +00:00
Dane Urban
339a111a8f . 2026-01-30 18:19:03 -08:00
Dane Urban
09b7e6fc9b fix revision id 2026-01-30 17:39:02 -08:00
Dane Urban
135238014f Merge branch 'main' into flow_mapping_table 2026-01-30 17:38:20 -08:00
Dane Urban
303e37bf53 migrate 2026-01-30 17:38:15 -08:00
Nikolas Garza
53f4025a23 feat(components): add InputNumber with increment/decrement controls (#8003) 2026-01-31 01:17:38 +00:00
Dane Urban
6a888e9900 nit 2026-01-30 17:01:22 -08:00
Wenxi
9159b159fa fix: troll discord assertion (#8038) 2026-01-31 00:46:48 +00:00
Jamison Lahman
d7a22b916b fix(fe): polish chat UI with custom background (#8016)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-01-31 00:25:59 +00:00
roshan
97d90a82f8 fix(craft): files stuff (#8037) 2026-01-31 00:16:33 +00:00
Nikolas Garza
d9cf5afee8 fix(ee): use set(ex=) instead of setex() for license cache updates (#8004) 2026-01-30 16:16:40 -08:00
Dane Urban
e90a7767c6 nit 2026-01-30 15:35:31 -08:00
Wenxi
ce43dee20f fix: discord connector tests (#8036) 2026-01-30 23:32:09 +00:00
Justin Tahara
90ac23a564 fix(ui): Updating Dropdown Modal component (#8033) 2026-01-30 23:00:52 +00:00
Jamison Lahman
d9f97090d5 chore(gha): build desktop app in CI (#7996) 2026-01-30 22:54:28 +00:00
Raunak Bhagat
2661e27741 feat: Add new tag icon (#8029) 2026-01-30 22:33:10 +00:00
Wenxi
0481b61f8d refactor: craft onboarding ease (#8030) 2026-01-30 22:28:03 +00:00
Dane Urban
1ded3af63c nit 2026-01-30 14:22:27 -08:00
roshan
6d12c9c430 fix(craft): clear env vars from all sandboxes in file_sync pods (#8028) 2026-01-30 22:05:57 +00:00
Justin Tahara
b81dd6f4a3 fix(desktop): Remove Global Shortcuts (#7914) 2026-01-30 21:19:55 +00:00
Dane Urban
c53546c000 nit 2026-01-30 13:03:05 -08:00
Dane Urban
9afa12edda nit 2026-01-30 13:02:48 -08:00
Dane Urban
32046de962 nit 2026-01-30 13:01:36 -08:00
Justin Tahara
f9a648bb5f fix(asana): Workspace Team ID mismatch (#7674) 2026-01-30 20:52:21 +00:00
Raunak Bhagat
e9be9101e5 fix: Add explicit sizings to icons (#8018) 2026-01-30 20:48:14 +00:00
Danelegend
e670bd994b feat(persona): Add default_model_configuration_id column (#8020) 2026-01-30 20:44:03 +00:00
Chris Weaver
a48d74c7fd fix: onboarding model specification (#8019) 2026-01-30 19:57:11 +00:00
Evan Lohn
0e76ae3423 feat: notion connector hierarchynodes (#7931) 2026-01-30 19:28:34 +00:00
Evan Lohn
37bfa5833b fix: race conditions in drive hiernodes (#8017) 2026-01-30 18:30:05 +00:00
Wenxi
6c46fcd651 chore: dev env template defaults (#8015) 2026-01-30 18:05:36 +00:00
roshan
7700674b15 chore: launch.json web server uses .env.web (#7993) 2026-01-30 17:36:32 +00:00
Evan Lohn
4ac6ff633a feat(filesys): working filesys explorer (#7760) 2026-01-30 12:14:56 +00:00
Raunak Bhagat
efd198072e refactor: Update layout components and SettingsPage (#8008)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 11:29:22 +00:00
Evan Lohn
b207a165c7 feat(filesys): UI for selecting hierarchy in assistant creation part 1 (#7721) 2026-01-30 10:36:51 +00:00
Raunak Bhagat
c231d2ec67 refactor: Update hoverable (#8007)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 09:58:26 +00:00
Danelegend
d1a0c75a40 fix(llm): existing custom config not used (#8002) 2026-01-30 07:47:59 +00:00
Evan Lohn
3b737fe311 feat(filesys): filter on assistant info (#7852) 2026-01-30 06:51:00 +00:00
Evan Lohn
e7abbbdc7f feat(filesys): APIs for attaching hiernodes (#7698) 2026-01-30 06:02:25 +00:00
Raunak Bhagat
5d5080e9e1 feat: Add bottomSlot to modal API (#8000) 2026-01-30 04:56:33 +00:00
Jamison Lahman
83b7c5d088 chore(devserver): fix invalid customTheme require (#8001) 2026-01-30 04:53:03 +00:00
Danelegend
f08cdc603b fix(vertex): standardise vertex image config (#7988) 2026-01-30 04:50:54 +00:00
Raunak Bhagat
6932791dd5 refactor: Add a HoverableContainer (#7997) 2026-01-30 03:46:41 +00:00
acaprau
f334b365e0 hygiene(opensearch): Some cleanup (#7999) 2026-01-29 18:42:30 -08:00
Evan Lohn
af58ae5ad9 endpoint clean (#7998) 2026-01-29 18:40:45 -08:00
Raunak Bhagat
bcd8314dd7 refactor: Small tweaks to a few components (#7995) 2026-01-30 01:30:13 +00:00
Raunak Bhagat
cddb26ff19 feat: Add new star icon + rename icon file with invalid naming (#7992) 2026-01-30 01:29:47 +00:00
roshan
c8d38de37f fix(ce): documents sidebar spawns (#7994) 2026-01-30 00:55:07 +00:00
Jamison Lahman
f2e95ee8bb chore(deps): Bump mdast-util-to-hast from 13.2.0 to 13.2.1 in /web (#7991) 2026-01-30 00:50:24 +00:00
Jamison Lahman
94ee45ce64 chore(flags): rm unused NEXT_PUBLIC_ENABLE_CHROME_EXTENSION (#7983) 2026-01-30 00:35:22 +00:00
Jamison Lahman
f36d15d924 chore(flags): remove unused NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN (#7984) 2026-01-30 00:35:08 +00:00
Jamison Lahman
ec866debc0 chore(deps): Bump @sentry/nextjs from 10.23.0 to 10.27.0 in /web (#7990) 2026-01-30 00:19:29 +00:00
dependabot[bot]
08f80b4abf chore(deps): bump starlette from 0.47.2 to 0.49.3 in /backend/requirements (#5964)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-30 00:07:31 +00:00
Raunak Bhagat
e559a4925a refactor: Add expandable card layouts with smooth animations (#7981) 2026-01-29 15:37:45 -08:00
Justin Tahara
1f63a23238 fix(helm): Fixing PSQL Operator Labeling (#7985) 2026-01-29 23:13:20 +00:00
Evan Lohn
658c76dd0a fix: custom config (#7987) 2026-01-29 23:01:16 +00:00
Jamison Lahman
00828af63f chore(fe): update baseline-browser-mapping (#7986) 2026-01-29 22:55:22 +00:00
victoria reese
71c6e40d5e feat: enable optional host setting (#7979)
Co-authored-by: victoria-reese_wwg <victoria.reese@grainger.com>
2026-01-29 21:36:59 +00:00
Jessica Singh
f3ff4b57bd feat(auth): update default auth (#7443)
Co-authored-by: Dane Urban <danelegend13@gmail.com>
2026-01-29 12:57:24 -08:00
Jamison Lahman
bf1752552b chore(tests): add retries to azure embeddings daily test (#7978) 2026-01-29 20:42:10 +00:00
Raunak Bhagat
5a9f9e28dc refactor: Consolidate Label component (#7974) 2026-01-29 19:52:39 +00:00
Wenxi
655cfc4858 fix: input masking (#7977) 2026-01-29 18:10:29 +00:00
Wenxi
b26c2e27b2 fix: don't show intro anim with new tenant modal + usage (#7976) 2026-01-29 17:57:45 +00:00
Evan Lohn
305a667bf9 test(filesys): drive hierarchynodes (#7676) 2026-01-29 17:45:03 +00:00
Wenxi
6bc5b083d5 feat(craft): make last name optional in user info form (#7973)
Co-authored-by: Claude <noreply@anthropic.com>
2026-01-29 16:06:34 +00:00
Raunak Bhagat
31213d43b3 refactor: Edit SimpleCollapsible API and update stylings for Modal (#7971) 2026-01-29 00:51:57 -08:00
roshan
a9e79b45cc feat(craft): README (#7970)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-01-28 22:59:12 -08:00
Evan Lohn
936ce0535d fix: llm provider upserts (#7969) 2026-01-29 06:33:42 +00:00
Raunak Bhagat
165710b5d6 fix: Edit styling (#7968) 2026-01-28 22:18:56 -08:00
roshan
c2ab9ca2a2 fix(craft): RESTORING WORKS (#7966) 2026-01-28 20:06:51 -08:00
roshan
3bcdeea560 fix(craft): PROMPT IMPROVEMENTS (#7961) 2026-01-28 19:16:58 -08:00
Yuhong Sun
31200a1b41 chore: Remove Reranking (#7946) 2026-01-29 01:26:26 +00:00
Nikolas Garza
a6261d57fd feat(ee): fe - add billing hooks and actions (#7858) 2026-01-29 01:19:44 +00:00
Wenxi
4c5e65e6dd fix(craft): auto set best model instead of checking for visibility (#7962) 2026-01-29 00:29:05 +00:00
Chris Weaver
e70115d359 fix: improve termination (#7964) 2026-01-28 16:19:36 -08:00
Raunak Bhagat
eec188f9d3 refactor: Make AgentCard use LineItemLayout for its information instead (#7958) 2026-01-29 00:10:18 +00:00
Chris Weaver
0504335a7b fix: local indexing for craft (#7959) 2026-01-28 16:12:25 -08:00
Wenxi
f5186b5e44 refactor: craft onboarding nit and connector docs (#7960) 2026-01-28 23:49:33 +00:00
Wenxi
8e3d4e1474 refactor(craft): fix pre-provisioning state management, fix demo data state management (#7955) 2026-01-28 22:59:21 +00:00
dependabot[bot]
474fb028b0 chore(deps): bump lodash-es from 4.17.21 to 4.17.23 in /web (#7652)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-28 22:51:58 +00:00
dependabot[bot]
d25e773b0e chore(deps): Bump mistune from 0.8.4 to 3.1.4 in /backend (#6407)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-28 22:48:06 +00:00
dependabot[bot]
c5df9d8863 chore(deps): bump lodash from 4.17.21 to 4.17.23 in /web (#7670)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-28 22:32:52 +00:00
dependabot[bot]
28eabdc885 chore(deps): bump esbuild and vite in /widget (#7543)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:33:37 -08:00
dependabot[bot]
72f34e403c chore(deps): bump astral-sh/setup-uv from 7.1.5 to 7.2.0 (#7528)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:31:13 -08:00
dependabot[bot]
8037dd2420 chore(deps): bump actions/checkout from 6.0.1 to 6.0.2 (#7802)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:30:52 -08:00
Justin Tahara
d29a384da6 chore(braintrust): Removing indexing_pipeline logs (#7957) 2026-01-28 22:25:33 +00:00
Jamison Lahman
fe7e5d3c55 chore(deps): add pytest-repeat to dev (#7956) 2026-01-28 22:10:49 +00:00
dependabot[bot]
91185f80c4 chore(deps): bump j178/prek-action from 1.0.11 to 1.0.12 (#7529)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:19:33 -08:00
dependabot[bot]
1244df1176 chore(deps): bump next from 16.1.2 to 16.1.5 in /examples/widget (#7885)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:12:11 -08:00
dependabot[bot]
080e58d875 chore(deps): bump pypdf from 6.6.0 to 6.6.2 (#7834)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-28 14:11:47 -08:00
roshan
420f46ce48 chore(craft): more craft logging (#7954)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-01-28 14:11:04 -08:00
dependabot[bot]
50835b4fd0 chore(deps): bump hono from 4.11.5 to 4.11.7 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#7880)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:10:38 -08:00
dependabot[bot]
b08a3f2195 chore(deps): bump next from 16.1.4 to 16.1.5 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#7887)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-28 14:10:25 -08:00
dependabot[bot]
dbf0c10632 chore(deps): bump next from 16.0.10 to 16.1.5 in /web (#7882)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-01-28 21:48:33 +00:00
Jamison Lahman
04433f8d44 chore(hygiene): remove linux kernel (#7953) 2026-01-28 21:31:22 +00:00
Raunak Bhagat
e426ca627f refactor: rename /chat route to /app (#7711)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 21:03:04 +00:00
roshan
6c9651eb97 feat(craft): onyx craft upsell upgrade modal when you run out of free messages (#7943) 2026-01-28 20:55:57 +00:00
roshan
02140eed98 fix(craft): hide session limit (#7947) 2026-01-28 20:55:47 +00:00
Jamison Lahman
93f316fa8a chore(devtools): upgrade ods: v0.4.0->v0.4.1 (#7952) 2026-01-28 20:39:03 +00:00
Wenxi
e02a60ddc7 fix: exceptions trace modal (#7951) 2026-01-28 20:25:45 +00:00
Raunak Bhagat
aa413e93d1 refactor: New sections/cards directory to host all feature-specific cards. (#7949) 2026-01-28 20:23:50 +00:00
roshan
2749e9dd6d fix(craft): install script for craft will force pull latest image for any craft-* image tags (#7950) 2026-01-28 20:08:42 +00:00
Jamison Lahman
decca26a71 chore(devtools): ods cherry-pick QOL (#7708) 2026-01-28 19:03:54 +00:00
Justin Tahara
1c490735b1 chore(api): Cleanup (#7945) 2026-01-28 18:51:31 +00:00
Yuhong Sun
87da107a03 fix: Cloud Embedding Keys (#7944) 2026-01-28 18:31:08 +00:00
Evan Lohn
f8b56098cc feat(filesys): hierarchynodes carry permission info (#7669) 2026-01-28 09:12:47 +00:00
Evan Lohn
a3a43173f7 feat(filesys): drive hierarchynodes (#7560) 2026-01-28 08:15:35 +00:00
Evan Lohn
aea924119d feat(filesys): hierarchyfetching task impl (#7557) 2026-01-28 06:40:41 +00:00
Chris Weaver
a79e581465 fix: attachment prompt tweak (#7929) 2026-01-27 22:44:43 -08:00
801 changed files with 32848 additions and 17205 deletions

View File

@@ -145,13 +145,13 @@ jobs:
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Setup uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
# NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
@@ -170,7 +170,7 @@ jobs:
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -219,7 +219,7 @@ jobs:
timeout-minutes: 90
environment: release
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
with:
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
persist-credentials: true # zizmor: ignore[artipacked]
@@ -392,7 +392,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -465,7 +465,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -603,7 +603,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -684,7 +684,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -827,7 +827,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -899,7 +899,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1036,7 +1036,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1109,7 +1109,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1248,7 +1248,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1327,7 +1327,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1580,7 +1580,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1705,7 +1705,7 @@ jobs:
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -15,7 +15,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -28,7 +28,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -27,7 +27,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

114
.github/workflows/pr-desktop-build.yml vendored Normal file
View File

@@ -0,0 +1,114 @@
name: Build Desktop App
concurrency:
group: Build-Desktop-App-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
paths:
- "desktop/**"
- ".github/workflows/pr-desktop-build.yml"
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
build-desktop:
name: Build Desktop (${{ matrix.platform }})
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- platform: linux
os: ubuntu-latest
target: x86_64-unknown-linux-gnu
args: "--bundles deb,rpm"
# TODO: Fix and enable the macOS build.
#- platform: macos
# os: macos-latest
# target: universal-apple-darwin
# args: "--target universal-apple-darwin"
# TODO: Fix and enable the Windows build.
#- platform: windows
# os: windows-latest
# target: x86_64-pc-windows-msvc
# args: ""
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
with:
node-version: 24
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./desktop/package-lock.json
- name: Setup Rust
uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7
with:
toolchain: stable
targets: ${{ matrix.target }}
- name: Cache Cargo registry and build
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
desktop/src-tauri/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('desktop/src-tauri/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Install Linux dependencies
if: matrix.platform == 'linux'
run: |
sudo apt-get update
sudo apt-get install -y \
build-essential \
libglib2.0-dev \
libgirepository1.0-dev \
libgtk-3-dev \
libjavascriptcoregtk-4.1-dev \
libwebkit2gtk-4.1-dev \
libayatana-appindicator3-dev \
gobject-introspection \
pkg-config \
curl \
xdg-utils
- name: Install npm dependencies
working-directory: ./desktop
run: npm ci
- name: Build desktop app
working-directory: ./desktop
run: npx tauri build ${{ matrix.args }}
env:
TAURI_SIGNING_PRIVATE_KEY: ""
TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ""
- name: Upload build artifacts
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
path: |
desktop/src-tauri/target/release/bundle/
retention-days: 7
if-no-files-found: ignore

View File

@@ -57,7 +57,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -91,7 +91,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -30,7 +30,7 @@ jobs:
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -197,7 +197,6 @@ jobs:
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \

View File

@@ -48,7 +48,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -84,7 +84,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -144,7 +144,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -203,7 +203,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -279,7 +279,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -460,7 +460,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -23,7 +23,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -40,7 +40,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -76,7 +76,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -136,7 +136,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -195,7 +195,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -271,7 +271,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -66,7 +66,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -127,7 +127,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -188,7 +188,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -254,7 +254,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -488,7 +488,7 @@ jobs:
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
# uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -65,7 +65,7 @@ env:
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_USERNAME: ${{ vars.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
@@ -110,6 +110,9 @@ env:
# Slack
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
# Discord
DISCORD_CONNECTOR_BOT_TOKEN: ${{ secrets.DISCORD_CONNECTOR_BOT_TOKEN }}
# Teams
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
@@ -139,7 +142,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -38,7 +38,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -32,7 +32,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -38,7 +38,7 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
- uses: j178/prek-action@9d6a3097e0c1865ecce00cfb89fe80f2ee91b547 # ratchet:j178/prek-action@v1
with:
prek-version: '0.2.21'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}

View File

@@ -24,11 +24,11 @@ jobs:
- { goos: "darwin", goarch: "arm64" }
- { goos: "", goarch: "" }
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
- uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"

View File

@@ -14,7 +14,7 @@ jobs:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -18,7 +18,7 @@ jobs:
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true

View File

@@ -17,7 +17,7 @@ jobs:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
with:
persist-credentials: false
@@ -31,7 +31,7 @@ jobs:
- name: Install the latest version of uv
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"

3
.gitignore vendored
View File

@@ -1,6 +1,7 @@
# editors
.vscode
.vscode/*
!/.vscode/env_template.txt
!/.vscode/env.web_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed

16
.vscode/env.web_template.txt vendored Normal file
View File

@@ -0,0 +1,16 @@
# Copy this file to .env.web in the .vscode folder.
# Fill in the <REPLACE THIS> values as needed
# Web Server specific environment variables
# Minimal set needed for Next.js dev server
# Auth
AUTH_TYPE=basic
DEV_MODE=true
# Enable the full set of Danswer Enterprise Edition features.
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
# are using this for local testing/development).
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false
# Enable Onyx Craft
ENABLE_CRAFT=true

View File

@@ -6,13 +6,13 @@
# processes.
# For local dev, often user Authentication is not needed.
AUTH_TYPE=disabled
AUTH_TYPE=basic
DEV_MODE=true
# Always keep these on for Dev.
# Logs model prompts, reasoning, and answer to stdout.
LOG_ONYX_MODEL_INTERACTIONS=True
LOG_ONYX_MODEL_INTERACTIONS=False
# More verbose logging
LOG_LEVEL=debug
@@ -35,7 +35,6 @@ GEN_AI_API_KEY=<REPLACE THIS>
OPENAI_API_KEY=<REPLACE THIS>
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# Python stuff

5
.vscode/launch.json vendored
View File

@@ -25,6 +25,7 @@
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery user_file_processing",
"Celery beat"
],
"presentation": {
@@ -86,7 +87,7 @@
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.vscode/.env",
"envFile": "${workspaceFolder}/.vscode/.env.web",
"runtimeArgs": ["run", "dev"],
"presentation": {
"group": "2"
@@ -121,7 +122,6 @@
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_ONYX_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
@@ -572,7 +572,6 @@
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_ONYX_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."

View File

@@ -0,0 +1,58 @@
"""LLMProvider deprecated fields are nullable
Revision ID: 001984c88745
Revises: 01f8e6d95a33
Create Date: 2026-02-01 22:24:34.171100
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "001984c88745"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make default_model_name nullable (was NOT NULL)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=True,
)
# Remove server_default from is_default_vision_provider (was server_default=false())
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=None,
)
# is_default_provider and default_vision_model are already nullable with no server_default
def downgrade() -> None:
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
op.execute(
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=False,
)
# Restore server_default for is_default_vision_provider
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=sa.false(),
)

View File

@@ -0,0 +1,112 @@
"""Populate flow mapping data
Revision ID: 01f8e6d95a33
Revises: f220515df7b4
Create Date: 2026-01-31 17:37:10.485558
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "01f8e6d95a33"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add each model config to the conversation flow, setting the global default if it exists
# Exclude models that are part of ImageGenerationConfig
op.execute(
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'chat' AS llm_model_flow_type,
COALESCE(
(lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
FALSE
) AS is_default,
mc.id AS model_configuration_id
FROM model_configuration mc
LEFT JOIN llm_provider lp
ON lp.id = mc.llm_provider_id
WHERE NOT EXISTS (
SELECT 1 FROM image_generation_config igc
WHERE igc.model_configuration_id = mc.id
);
"""
)
# Add models with supports_image_input to the vision flow
op.execute(
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'vision' AS llm_model_flow_type,
COALESCE(
(lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
FALSE
) AS is_default,
mc.id AS model_configuration_id
FROM model_configuration mc
LEFT JOIN llm_provider lp
ON lp.id = mc.llm_provider_id
WHERE mc.supports_image_input IS TRUE;
"""
)
def downgrade() -> None:
# Populate vision defaults from model_flow
op.execute(
"""
UPDATE llm_provider AS lp
SET
is_default_vision_provider = TRUE,
default_vision_model = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'vision'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
)
# Populate conversation defaults from model_flow
op.execute(
"""
UPDATE llm_provider AS lp
SET
is_default_provider = TRUE,
default_model_name = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'chat'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
)
# For providers that have conversation flow mappings but aren't the default,
# we still need a default_model_name (it was NOT NULL originally)
# Pick the first visible model or any model for that provider
op.execute(
"""
UPDATE llm_provider AS lp
SET default_model_name = (
SELECT mc.name
FROM model_configuration mc
JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
WHERE mc.llm_provider_id = lp.id
AND mf.llm_model_flow_type = 'chat'
ORDER BY mc.is_visible DESC, mc.id ASC
LIMIT 1
)
WHERE lp.default_model_name IS NULL;
"""
)
# Delete all model_flow entries (reverse the inserts from upgrade)
op.execute("DELETE FROM llm_model_flow;")

View File

@@ -10,8 +10,6 @@ from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
# revision identifiers, used by Alembic.
revision = "1f60f60c3401"
down_revision = "f17bf3b0d9f1"
@@ -66,7 +64,7 @@ def upgrade() -> None:
"num_rerank",
sa.Integer(),
nullable=False,
server_default=str(NUM_POSTPROCESSED_RESULTS),
server_default=str(20),
),
)

View File

@@ -0,0 +1,58 @@
"""remove reranking from search_settings
Revision ID: 78ebc66946a0
Revises: 849b21c732f8
Create Date: 2026-01-28
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "78ebc66946a0"
down_revision = "849b21c732f8"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.drop_column("search_settings", "disable_rerank_for_streaming")
op.drop_column("search_settings", "rerank_model_name")
op.drop_column("search_settings", "rerank_provider_type")
op.drop_column("search_settings", "rerank_api_key")
op.drop_column("search_settings", "rerank_api_url")
op.drop_column("search_settings", "num_rerank")
def downgrade() -> None:
op.add_column(
"search_settings",
sa.Column(
"disable_rerank_for_streaming",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
op.add_column(
"search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True)
)
op.add_column(
"search_settings",
sa.Column(
"num_rerank",
sa.Integer(),
nullable=False,
server_default=str(20),
),
)

View File

@@ -0,0 +1,32 @@
"""add demo_data_enabled to build_session
Revision ID: 849b21c732f8
Revises: 81c22b1e2e78
Create Date: 2026-01-28 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "849b21c732f8"
down_revision = "81c22b1e2e78"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"build_session",
sa.Column(
"demo_data_enabled",
sa.Boolean(),
nullable=False,
server_default=sa.text("true"),
),
)
def downgrade() -> None:
op.drop_column("build_session", "demo_data_enabled")

View File

@@ -0,0 +1,27 @@
"""add processing_duration_seconds to chat_message
Revision ID: 9d1543a37106
Revises: cbc03e08d0f3
Create Date: 2026-01-21 11:42:18.546188
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9d1543a37106"
down_revision = "cbc03e08d0f3"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
)
def downgrade() -> None:
op.drop_column("chat_message", "processing_duration_seconds")

View File

@@ -0,0 +1,40 @@
"""Persona new default model configuration id column
Revision ID: be87a654d5af
Revises: e7f8a9b0c1d2
Create Date: 2026-01-30 11:14:17.306275
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "be87a654d5af"
down_revision = "e7f8a9b0c1d2"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"persona",
sa.Column("default_model_configuration_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"fk_persona_default_model_configuration_id",
"persona",
"model_configuration",
["default_model_configuration_id"],
["id"],
ondelete="SET NULL",
)
def downgrade() -> None:
op.drop_constraint(
"fk_persona_default_model_configuration_id", "persona", type_="foreignkey"
)
op.drop_column("persona", "default_model_configuration_id")

View File

@@ -0,0 +1,128 @@
"""add_opensearch_migration_tables
Revision ID: cbc03e08d0f3
Revises: be87a654d5af
Create Date: 2026-01-31 17:00:45.176604
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "cbc03e08d0f3"
down_revision = "be87a654d5af"
branch_labels = None
depends_on = None
def upgrade() -> None:
# 1. Create opensearch_document_migration_record table.
op.create_table(
"opensearch_document_migration_record",
sa.Column("document_id", sa.String(), nullable=False),
sa.Column("status", sa.String(), nullable=False, server_default="pending"),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("attempts_count", sa.Integer(), nullable=False, server_default="0"),
sa.Column("last_attempt_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.PrimaryKeyConstraint("document_id"),
sa.ForeignKeyConstraint(
["document_id"],
["document.id"],
ondelete="CASCADE",
),
)
# 2. Create indices.
op.create_index(
"ix_opensearch_document_migration_record_status",
"opensearch_document_migration_record",
["status"],
)
op.create_index(
"ix_opensearch_document_migration_record_attempts_count",
"opensearch_document_migration_record",
["attempts_count"],
)
op.create_index(
"ix_opensearch_document_migration_record_created_at",
"opensearch_document_migration_record",
["created_at"],
)
# 3. Create opensearch_tenant_migration_record table (singleton).
op.create_table(
"opensearch_tenant_migration_record",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"document_migration_record_table_population_status",
sa.String(),
nullable=False,
server_default="pending",
),
sa.Column(
"num_times_observed_no_additional_docs_to_populate_migration_table",
sa.Integer(),
nullable=False,
server_default="0",
),
sa.Column(
"overall_document_migration_status",
sa.String(),
nullable=False,
server_default="pending",
),
sa.Column(
"num_times_observed_no_additional_docs_to_migrate",
sa.Integer(),
nullable=False,
server_default="0",
),
sa.Column(
"last_updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# 4. Create unique index on constant to enforce singleton pattern.
op.execute(
sa.text(
"""
CREATE UNIQUE INDEX idx_opensearch_tenant_migration_singleton
ON opensearch_tenant_migration_record ((true))
"""
)
)
def downgrade() -> None:
# Drop opensearch_tenant_migration_record.
op.drop_index(
"idx_opensearch_tenant_migration_singleton",
table_name="opensearch_tenant_migration_record",
)
op.drop_table("opensearch_tenant_migration_record")
# Drop opensearch_document_migration_record.
op.drop_index(
"ix_opensearch_document_migration_record_created_at",
table_name="opensearch_document_migration_record",
)
op.drop_index(
"ix_opensearch_document_migration_record_attempts_count",
table_name="opensearch_document_migration_record",
)
op.drop_index(
"ix_opensearch_document_migration_record_status",
table_name="opensearch_document_migration_record",
)
op.drop_table("opensearch_document_migration_record")

View File

@@ -0,0 +1,125 @@
"""create_anonymous_user
This migration creates a permanent anonymous user in the database.
When anonymous access is enabled, unauthenticated requests will use this user
instead of returning user_id=NULL.
Revision ID: e7f8a9b0c1d2
Revises: f7ca3e2f45d9
Create Date: 2026-01-15 14:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "e7f8a9b0c1d2"
down_revision = "f7ca3e2f45d9"
branch_labels = None
depends_on = None
# Must match constants in onyx/configs/constants.py file
ANONYMOUS_USER_UUID = "00000000-0000-0000-0000-000000000002"
ANONYMOUS_USER_EMAIL = "anonymous@onyx.app"
# Tables with user_id foreign key that may need migration
TABLES_WITH_USER_ID = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
def upgrade() -> None:
"""
Create the anonymous user for anonymous access feature.
Also migrates any remaining user_id=NULL records to the anonymous user.
"""
connection = op.get_bind()
# Create the anonymous user (using ON CONFLICT to be idempotent)
connection.execute(
sa.text(
"""
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
ON CONFLICT (id) DO NOTHING
"""
),
{
"id": ANONYMOUS_USER_UUID,
"email": ANONYMOUS_USER_EMAIL,
"hashed_password": "", # Empty password - user cannot log in directly
"is_active": True, # Active so it can be used for anonymous access
"is_superuser": False,
"is_verified": True, # Verified since no email verification needed
"role": "LIMITED", # Anonymous users have limited role to restrict access
},
)
# Migrate any remaining user_id=NULL records to anonymous user
for table in TABLES_WITH_USER_ID:
try:
# Exclude public credential (id=0) which must remain user_id=NULL
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
# Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL
# Exclude system input prompts (is_public=True with user_id=NULL) which must remain user_id=NULL
if table == "credential":
condition = "user_id IS NULL AND id != 0"
elif table == "tool":
condition = "user_id IS NULL AND in_code_tool_id IS NULL"
elif table == "persona":
condition = "user_id IS NULL AND builtin_persona = false"
elif table == "inputprompt":
condition = "user_id IS NULL AND is_public = false"
else:
condition = "user_id IS NULL"
result = connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = :user_id
WHERE {condition}
"""
),
{"user_id": ANONYMOUS_USER_UUID},
)
if result.rowcount > 0:
print(f"Updated {result.rowcount} rows in {table} to anonymous user")
except Exception as e:
print(f"Skipping {table}: {e}")
def downgrade() -> None:
"""
Set anonymous user's records back to NULL and delete the anonymous user.
"""
connection = op.get_bind()
# Set records back to NULL
for table in TABLES_WITH_USER_ID:
try:
connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = NULL
WHERE user_id = :user_id
"""
),
{"user_id": ANONYMOUS_USER_UUID},
)
except Exception:
pass
# Delete the anonymous user
connection.execute(
sa.text('DELETE FROM "user" WHERE id = :user_id'),
{"user_id": ANONYMOUS_USER_UUID},
)

View File

@@ -0,0 +1,57 @@
"""Add flow mapping table
Revision ID: f220515df7b4
Revises: cbc03e08d0f3
Create Date: 2026-01-30 12:21:24.955922
"""
from onyx.db.enums import LLMModelFlowType
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "f220515df7b4"
down_revision = "9d1543a37106"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"llm_model_flow",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"llm_model_flow_type",
sa.Enum(LLMModelFlowType, name="llmmodelflowtype", native_enum=False),
nullable=False,
),
sa.Column(
"is_default", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column("model_configuration_id", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(
["model_configuration_id"], ["model_configuration.id"], ondelete="CASCADE"
),
sa.UniqueConstraint(
"llm_model_flow_type",
"model_configuration_id",
name="uq_model_config_per_llm_model_flow_type",
),
)
# Partial unique index so that there is at most one default for each flow type
op.create_index(
"ix_one_default_per_llm_model_flow",
"llm_model_flow",
["llm_model_flow_type"],
unique=True,
postgresql_where=sa.text("is_default IS TRUE"),
)
def downgrade() -> None:
# Drop the llm_model_flow table (index is dropped automatically with table)
op.drop_table("llm_model_flow")

View File

@@ -0,0 +1,281 @@
"""migrate_no_auth_data_to_placeholder
This migration handles the transition from AUTH_TYPE=disabled to requiring
authentication. It creates a placeholder user and assigns all data that was
created without a user (user_id=NULL) to this placeholder.
A database trigger is installed that automatically transfers all data from
the placeholder user to the first real user who registers, then drops itself.
Revision ID: f7ca3e2f45d9
Revises: 78ebc66946a0
Create Date: 2026-01-15 12:49:53.802741
"""
import os
from alembic import op
import sqlalchemy as sa
from shared_configs.configs import MULTI_TENANT
# revision identifiers, used by Alembic.
revision = "f7ca3e2f45d9"
down_revision = "78ebc66946a0"
branch_labels = None
depends_on = None
# Must match constants in onyx/configs/constants.py file
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"
NO_AUTH_PLACEHOLDER_USER_EMAIL = "no-auth-placeholder@onyx.app"
# Trigger and function names
TRIGGER_NAME = "trg_migrate_no_auth_data"
FUNCTION_NAME = "migrate_no_auth_data_to_user"
# Trigger function that migrates data from placeholder to first real user
MIGRATE_NO_AUTH_TRIGGER_FUNCTION = f"""
CREATE OR REPLACE FUNCTION {FUNCTION_NAME}()
RETURNS TRIGGER AS $$
DECLARE
placeholder_uuid UUID := '00000000-0000-0000-0000-000000000001'::uuid;
anonymous_uuid UUID := '00000000-0000-0000-0000-000000000002'::uuid;
placeholder_row RECORD;
schema_name TEXT;
BEGIN
-- Skip if this is the placeholder user being inserted
IF NEW.id = placeholder_uuid THEN
RETURN NULL;
END IF;
-- Skip if this is the anonymous user being inserted (not a real user)
IF NEW.id = anonymous_uuid THEN
RETURN NULL;
END IF;
-- Skip if the new user is not active
IF NEW.is_active = FALSE THEN
RETURN NULL;
END IF;
-- Get current schema for self-cleanup
schema_name := current_schema();
-- Try to lock the placeholder user row with FOR UPDATE SKIP LOCKED
-- This ensures only one concurrent transaction can proceed with migration
-- SKIP LOCKED means if another transaction has the lock, we skip (don't wait)
SELECT id INTO placeholder_row
FROM "user"
WHERE id = placeholder_uuid
FOR UPDATE SKIP LOCKED;
IF NOT FOUND THEN
-- Either placeholder doesn't exist or another transaction has it locked
-- Either way, drop the trigger and return without making admin
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
RETURN NULL;
END IF;
-- We have exclusive lock on placeholder - proceed with migration
-- The INSERT has already completed (AFTER INSERT), so NEW.id exists in the table
-- Migrate chat_session
UPDATE "chat_session" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate credential (exclude public credential id=0)
UPDATE "credential" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND id != 0;
-- Migrate document_set
UPDATE "document_set" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate persona (exclude builtin personas)
UPDATE "persona" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND builtin_persona = FALSE;
-- Migrate tool (exclude builtin tools)
UPDATE "tool" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND in_code_tool_id IS NULL;
-- Migrate notification
UPDATE "notification" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate inputprompt (exclude system/public prompts)
UPDATE "inputprompt" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND is_public = FALSE;
-- Make the new user an admin (they had admin access in no-auth mode)
-- In AFTER INSERT trigger, we must UPDATE the row since it already exists
UPDATE "user" SET role = 'ADMIN' WHERE id = NEW.id;
-- Delete the placeholder user (we hold the lock so this is safe)
DELETE FROM "user" WHERE id = placeholder_uuid;
-- Drop the trigger and function (self-cleanup)
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
"""
MIGRATE_NO_AUTH_TRIGGER = f"""
CREATE TRIGGER {TRIGGER_NAME}
AFTER INSERT ON "user"
FOR EACH ROW
EXECUTE FUNCTION {FUNCTION_NAME}();
"""
def upgrade() -> None:
"""
Create a placeholder user and assign all NULL user_id records to it.
Install a trigger that migrates data to the first real user and self-destructs.
Only runs if AUTH_TYPE is currently disabled/none.
Skipped in multi-tenant mode - each tenant starts fresh with no legacy data.
"""
# Skip in multi-tenant mode - this migration handles single-tenant
# AUTH_TYPE=disabled -> auth transitions only
if MULTI_TENANT:
return
# Only run if AUTH_TYPE is currently disabled/none
# If they've already switched to auth-enabled, NULL data is stale anyway
auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
if auth_type not in ("disabled", "none", ""):
print(f"AUTH_TYPE is '{auth_type}', not disabled. Skipping migration.")
return
connection = op.get_bind()
# Check if there are any NULL user_id records that need migration
tables_to_check = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
has_null_records = False
for table in tables_to_check:
try:
result = connection.execute(
sa.text(f'SELECT 1 FROM "{table}" WHERE user_id IS NULL LIMIT 1')
)
if result.fetchone():
has_null_records = True
break
except Exception:
# Table might not exist
pass
if not has_null_records:
return
# Create the placeholder user
connection.execute(
sa.text(
"""
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
"""
),
{
"id": NO_AUTH_PLACEHOLDER_USER_UUID,
"email": NO_AUTH_PLACEHOLDER_USER_EMAIL,
"hashed_password": "", # Empty password - user cannot log in
"is_active": False, # Inactive - user cannot log in
"is_superuser": False,
"is_verified": False,
"role": "BASIC",
},
)
# Assign NULL user_id records to the placeholder user
for table in tables_to_check:
try:
# Base condition for all tables
condition = "user_id IS NULL"
# Exclude public credential (id=0) which must remain user_id=NULL
if table == "credential":
condition += " AND id != 0"
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
elif table == "tool":
condition += " AND in_code_tool_id IS NULL"
# Exclude builtin personas which must remain user_id=NULL
elif table == "persona":
condition += " AND builtin_persona = FALSE"
# Exclude system/public input prompts which must remain user_id=NULL
elif table == "inputprompt":
condition += " AND is_public = FALSE"
result = connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = :user_id
WHERE {condition}
"""
),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)
if result.rowcount > 0:
print(f"Updated {result.rowcount} rows in {table}")
except Exception as e:
print(f"Skipping {table}: {e}")
# Install the trigger function and trigger for automatic migration on first user registration
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER_FUNCTION))
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER))
print("Installed trigger for automatic data migration on first user registration")
def downgrade() -> None:
"""
Drop trigger and function, set placeholder user's records back to NULL,
and delete the placeholder user.
"""
# Skip in multi-tenant mode for consistency with upgrade
if MULTI_TENANT:
return
connection = op.get_bind()
# Drop trigger and function if they exist (they may have already self-destructed)
connection.execute(sa.text(f'DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON "user"'))
connection.execute(sa.text(f"DROP FUNCTION IF EXISTS {FUNCTION_NAME}()"))
tables_to_update = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
# Set records back to NULL
for table in tables_to_update:
try:
connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = NULL
WHERE user_id = :user_id
"""
),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)
except Exception:
pass
# Delete the placeholder user
connection.execute(
sa.text('DELETE FROM "user" WHERE id = :user_id'),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)

View File

@@ -116,7 +116,7 @@ def _get_access_for_documents(
return access_map
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL
@@ -124,13 +124,16 @@ def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
DO NOT REMOVE."""
db_user_groups = fetch_user_groups_for_user(db_session, user.id) if user else []
is_anonymous = user.is_anonymous
db_user_groups = (
[] if is_anonymous else fetch_user_groups_for_user(db_session, user.id)
)
prefixed_user_groups = [
prefix_user_group(db_user_group.name) for db_user_group in db_user_groups
]
db_external_groups = (
fetch_external_groups_for_user(db_session, user.id) if user else []
[] if is_anonymous else fetch_external_groups_for_user(db_session, user.id)
)
prefixed_external_groups = [
prefix_external_group(db_external_group.external_user_group_id)

View File

@@ -0,0 +1,11 @@
from sqlalchemy.orm import Session
from ee.onyx.db.external_perm import fetch_external_groups_for_user
from onyx.db.models import User
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
if not user:
return []
external_groups = fetch_external_groups_for_user(db_session, user.id)
return [external_group.external_user_group_id for external_group in external_groups]

View File

@@ -33,8 +33,8 @@ def get_default_admin_user_emails_() -> list[str]:
async def current_cloud_superuser(
request: Request,
user: User | None = Depends(current_admin_user),
) -> User | None:
user: User = Depends(current_admin_user),
) -> User:
api_key = request.headers.get("Authorization", "").replace("Bearer ", "")
if api_key != SUPER_CLOUD_API_KEY:
raise HTTPException(status_code=401, detail="Invalid API key")

View File

@@ -25,6 +25,7 @@ from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.db.document import upsert_document_external_perms
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_queue_length
@@ -55,6 +56,9 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import (
update_hierarchy_node_permissions as db_update_hierarchy_node_permissions,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt
@@ -637,18 +641,25 @@ def connector_permission_sync_generator_task(
),
stop=stop_after_delay(DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER),
)
def document_update_permissions(
def element_update_permissions(
tenant_id: str,
permissions: DocExternalAccess,
permissions: ElementExternalAccess,
source_type_str: str,
connector_id: int,
credential_id: int,
) -> bool:
"""Update permissions for a document or hierarchy node."""
start = time.monotonic()
doc_id = permissions.doc_id
external_access = permissions.external_access
# Determine element type and identifier for logging
if isinstance(permissions, DocExternalAccess):
element_id = permissions.doc_id
element_type = "doc"
else:
element_id = permissions.raw_node_id
element_type = "node"
try:
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
# Add the users to the DB if they don't exist
@@ -657,39 +668,57 @@ def document_update_permissions(
emails=list(external_access.external_user_emails),
continue_on_error=True,
)
# Then upsert the document's external permissions
created_new_doc = upsert_document_external_perms(
db_session=db_session,
doc_id=doc_id,
external_access=external_access,
source_type=DocumentSource(source_type_str),
)
if created_new_doc:
# If a new document was created, we associate it with the cc_pair
upsert_document_by_connector_credential_pair(
if isinstance(permissions, DocExternalAccess):
# Document permission update
created_new_doc = upsert_document_external_perms(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
document_ids=[doc_id],
doc_id=permissions.doc_id,
external_access=external_access,
source_type=DocumentSource(source_type_str),
)
if created_new_doc:
# If a new document was created, we associate it with the cc_pair
upsert_document_by_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
document_ids=[permissions.doc_id],
)
else:
# Hierarchy node permission update
db_update_hierarchy_node_permissions(
db_session=db_session,
raw_node_id=permissions.raw_node_id,
source=DocumentSource(permissions.source),
is_public=external_access.is_public,
external_user_emails=(
list(external_access.external_user_emails)
if external_access.external_user_emails
else None
),
external_user_group_ids=(
list(external_access.external_user_group_ids)
if external_access.external_user_group_ids
else None
),
)
elapsed = time.monotonic() - start
task_logger.info(
f"connector_id={connector_id} "
f"doc={doc_id} "
f"{element_type}={element_id} "
f"action=update_permissions "
f"elapsed={elapsed:.2f}"
)
except Exception as e:
task_logger.exception(
f"document_update_permissions exceptioned: "
f"connector_id={connector_id} doc_id={doc_id}"
f"element_update_permissions exceptioned: {element_type}={element_id}, {connector_id=} {credential_id=}"
)
raise e
finally:
task_logger.info(
f"document_update_permissions completed: connector_id={connector_id} doc={doc_id}"
f"element_update_permissions completed: {element_type}={element_id}, {connector_id=} {credential_id=}"
)
return True

View File

@@ -334,11 +334,9 @@ def fetch_assistant_unique_users_total(
# Users can view assistant stats if they created the persona,
# or if they are an admin
def user_can_view_assistant_stats(
db_session: Session, user: User | None, assistant_id: int
db_session: Session, user: User, assistant_id: int
) -> bool:
# If user is None and auth is disabled, assume the user is an admin
if user is None or user.role == UserRole.ADMIN:
if user.role == UserRole.ADMIN:
return True
# Check if the user created the persona

View File

@@ -0,0 +1,67 @@
"""EE version of hierarchy node access control.
This module provides permission-aware hierarchy node access for Enterprise Edition.
It filters hierarchy nodes based on user email and external group membership.
"""
from sqlalchemy import any_
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement
from onyx.configs.constants import DocumentSource
from onyx.db.models import HierarchyNode
def _build_hierarchy_access_filter(
user_email: str | None,
external_group_ids: list[str],
) -> ColumnElement[bool]:
"""Build SQLAlchemy filter for hierarchy node access.
A user can access a hierarchy node if any of the following are true:
- The node is marked as public (is_public=True)
- The user's email is in the node's external_user_emails list
- Any of the user's external group IDs overlap with the node's external_user_group_ids
"""
access_filters: list[ColumnElement[bool]] = [HierarchyNode.is_public.is_(True)]
if user_email:
access_filters.append(any_(HierarchyNode.external_user_emails) == user_email)
if external_group_ids:
access_filters.append(
HierarchyNode.external_user_group_ids.overlap(
postgresql.array(external_group_ids)
)
)
return or_(*access_filters)
def _get_accessible_hierarchy_nodes_for_source(
db_session: Session,
source: DocumentSource,
user_email: str | None,
external_group_ids: list[str],
) -> list[HierarchyNode]:
"""
EE version: Returns hierarchy nodes filtered by user permissions.
A user can access a hierarchy node if any of the following are true:
- The node is marked as public (is_public=True)
- The user's email is in the node's external_user_emails list
- Any of the user's external group IDs overlap with the node's external_user_group_ids
Args:
db_session: SQLAlchemy session
source: Document source type
user_email: User's email for permission checking
external_group_ids: User's external group IDs for permission checking
Returns:
List of HierarchyNode objects the user has access to
"""
stmt = select(HierarchyNode).where(HierarchyNode.source == source)
stmt = stmt.where(_build_hierarchy_access_filter(user_email, external_group_ids))
stmt = stmt.order_by(HierarchyNode.display_name)
return list(db_session.execute(stmt).scalars().all())

View File

@@ -227,10 +227,10 @@ def update_license_cache(
stripe_subscription_id=payload.stripe_subscription_id,
)
redis_client.setex(
redis_client.set(
LICENSE_METADATA_KEY,
LICENSE_CACHE_TTL_SECONDS,
metadata.model_dump_json(),
ex=LICENSE_CACHE_TTL_SECONDS,
)
logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")

View File

@@ -7,7 +7,6 @@ from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DISABLE_AUTH
from onyx.configs.constants import TokenRateLimitScope
from onyx.db.models import TokenRateLimit
from onyx.db.models import TokenRateLimit__UserGroup
@@ -18,13 +17,15 @@ from onyx.db.models import UserRole
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
def _add_user_filters(
stmt: Select, user: User | None, get_editable: bool = True
) -> Select:
# If user is None and auth is disabled, assume the user is an admin
if (user is None and DISABLE_AUTH) or (user and user.role == UserRole.ADMIN):
def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
if user.role == UserRole.ADMIN:
return stmt
# If anonymous user, only show global/public token_rate_limits
if user.is_anonymous:
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
return stmt.where(where_clause)
stmt = stmt.distinct()
TRLimit_UG = aliased(TokenRateLimit__UserGroup)
User__UG = aliased(User__UserGroup)
@@ -49,11 +50,6 @@ def _add_user_filters(
- if we are not editing, we show all token_rate_limits in the groups the user curates
"""
# If user is None, this is an anonymous user and we should only show public token_rate_limits
if user is None:
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
return stmt.where(where_clause)
where_clause = User__UG.user_id == user.id
if user.role == UserRole.CURATOR and get_editable:
where_clause &= User__UG.is_curator == True # noqa: E712
@@ -114,7 +110,7 @@ def insert_user_group_token_rate_limit(
def fetch_user_group_token_rate_limits_for_user(
db_session: Session,
group_id: int,
user: User | None,
user: User,
enabled_only: bool = False,
ordered: bool = True,
get_editable: bool = True,

View File

@@ -125,7 +125,7 @@ def _cleanup_document_set__user_group_relationships__no_commit(
def validate_object_creation_for_user(
db_session: Session,
user: User | None,
user: User,
target_group_ids: list[int] | None = None,
object_is_public: bool | None = None,
object_is_perm_sync: bool | None = None,
@@ -144,7 +144,8 @@ def validate_object_creation_for_user(
if object_is_perm_sync and not target_group_ids:
return
if not user or user.role == UserRole.ADMIN:
# Admins are allowed
if user.role == UserRole.ADMIN:
return
# Allow curators and global curators to create public objects
@@ -474,14 +475,15 @@ def remove_curator_status__no_commit(db_session: Session, user: User) -> None:
def _validate_curator_relationship_update_requester(
db_session: Session,
user_group_id: int,
user_making_change: User | None = None,
user_making_change: User,
) -> None:
"""
This function validates that the user making the change has the necessary permissions
to update the curator relationship for the target user in the given user group.
"""
if user_making_change is None or user_making_change.role == UserRole.ADMIN:
# Admins can update curator relationships for any group
if user_making_change.role == UserRole.ADMIN:
return
# check if the user making the change is a curator in the group they are changing the curator relationship for
@@ -550,7 +552,7 @@ def update_user_curator_relationship(
db_session: Session,
user_group_id: int,
set_curator_request: SetCuratorRequest,
user_making_change: User | None = None,
user_making_change: User,
) -> None:
target_user = fetch_user_by_id(db_session, set_curator_request.user_id)
if not target_user:
@@ -599,7 +601,7 @@ def update_user_curator_relationship(
def add_users_to_user_group(
db_session: Session,
user: User | None,
user: User,
user_group_id: int,
user_ids: list[UUID],
) -> UserGroup:
@@ -641,7 +643,7 @@ def add_users_to_user_group(
def update_user_group(
db_session: Session,
user: User | None,
user: User,
user_group_id: int,
user_group_update: UserGroupUpdate,
) -> UserGroup:

View File

@@ -8,7 +8,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
@@ -28,7 +28,7 @@ def confluence_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
"""
Fetches document permissions from Confluence and yields DocExternalAccess objects.
Compares fetched documents against existing documents in the DB for the connector.

View File

@@ -5,6 +5,9 @@ from datetime import timezone
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.models import HierarchyNode
@@ -39,12 +42,12 @@ def gmail_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
"""
Adds the external permissions to the documents in postgres
if the document doesn't already exists in postgres, we create
Adds the external permissions to the documents and hierarchy nodes in postgres.
If the document doesn't already exist in postgres, we create
it in postgres so that when it gets created later, the permissions are
already populated
already populated.
"""
gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
gmail_connector.load_credentials(cc_pair.credential.credential_json)
@@ -62,7 +65,13 @@ def gmail_doc_sync(
callback.progress("gmail_doc_sync", 1)
if isinstance(slim_doc, HierarchyNode):
# TODO: handle hierarchynodes during sync
# Yield hierarchy node permissions to be processed in outer layer
if slim_doc.external_access:
yield NodeExternalAccess(
external_access=slim_doc.external_access,
raw_node_id=slim_doc.raw_node_id,
source=DocumentSource.GMAIL.value,
)
continue
if slim_doc.external_access is None:
logger.warning(f"No permissions found for document {slim_doc.id}")

View File

@@ -10,7 +10,10 @@ from ee.onyx.external_permissions.google_drive.permission_retrieval import (
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_utils.resources import GoogleDriveService
@@ -168,17 +171,101 @@ def get_external_access_for_raw_gdrive_file(
)
def get_external_access_for_folder(
folder: GoogleDriveFileType,
google_domain: str,
drive_service: GoogleDriveService,
) -> ExternalAccess:
"""
Extract ExternalAccess from a folder's permissions.
This fetches permissions using the Drive API (via permissionIds) and extracts
user emails, group emails, and public access status.
Args:
folder: The folder metadata from Google Drive API (must include permissionIds field)
google_domain: The company's Google Workspace domain (e.g., "company.com")
drive_service: Google Drive service for fetching permission details
Returns:
ExternalAccess with extracted permission info
"""
folder_id = folder.get("id")
if not folder_id:
logger.warning("Folder missing ID, returning empty permissions")
return ExternalAccess(
external_user_emails=set(),
external_user_group_ids=set(),
is_public=False,
)
# Get permission IDs from folder metadata
permission_ids = folder.get("permissionIds") or []
if not permission_ids:
logger.debug(f"No permissionIds found for folder {folder_id}")
return ExternalAccess(
external_user_emails=set(),
external_user_group_ids=set(),
is_public=False,
)
# Fetch full permission objects using the permission IDs
permissions_list = get_permissions_by_ids(
drive_service=drive_service,
doc_id=folder_id,
permission_ids=permission_ids,
)
user_emails: set[str] = set()
group_emails: set[str] = set()
is_public = False
for permission in permissions_list:
if permission.type == PermissionType.USER:
if permission.email_address:
user_emails.add(permission.email_address)
else:
logger.warning(f"User permission without email for folder {folder_id}")
elif permission.type == PermissionType.GROUP:
# Groups are represented as email addresses in Google Drive
if permission.email_address:
group_emails.add(permission.email_address)
else:
logger.warning(f"Group permission without email for folder {folder_id}")
elif permission.type == PermissionType.DOMAIN:
# Domain permission - check if it matches company domain
if permission.domain == google_domain:
# Only public if discoverable (allowFileDiscovery is not False)
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
else:
logger.debug(
f"Domain permission for {permission.domain} does not match "
f"company domain {google_domain} for folder {folder_id}"
)
elif permission.type == PermissionType.ANYONE:
# Only public if discoverable (allowFileDiscovery is not False)
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_emails,
is_public=is_public,
)
def gdrive_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
"""
Adds the external permissions to the documents in postgres
if the document doesn't already exists in postgres, we create
Adds the external permissions to the documents and hierarchy nodes in postgres.
If the document doesn't already exist in postgres, we create
it in postgres so that when it gets created later, the permissions are
already populated
already populated.
"""
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
@@ -197,7 +284,13 @@ def gdrive_doc_sync(
callback.progress("gdrive_doc_sync", 1)
if isinstance(slim_doc, HierarchyNode):
# TODO: handle hierarchynodes during sync
# Yield hierarchy node permissions to be processed in outer layer
if slim_doc.external_access:
yield NodeExternalAccess(
external_access=slim_doc.external_access,
raw_node_id=slim_doc.raw_node_id,
source=DocumentSource.GOOGLE_DRIVE.value,
)
continue
if slim_doc.external_access is None:
raise ValueError(

View File

@@ -30,6 +30,10 @@ class GoogleDrivePermission(BaseModel):
type: PermissionType
domain: str | None # only applies to domain permissions
permission_details: GoogleDrivePermissionDetails | None
# Whether this permission makes the file discoverable in search
# False means "anyone with the link" (not searchable/discoverable)
# Only applicable for domain/anyone permission types
allow_file_discovery: bool | None
@classmethod
def from_drive_permission(
@@ -46,6 +50,7 @@ class GoogleDrivePermission(BaseModel):
email_address=drive_permission.get("emailAddress"),
type=PermissionType(drive_permission["type"]),
domain=drive_permission.get("domain"),
allow_file_discovery=drive_permission.get("allowFileDiscovery"),
permission_details=(
GoogleDrivePermissionDetails(
permission_type=permission_details.get("type"),

View File

@@ -36,7 +36,7 @@ def get_permissions_by_ids(
retrieval_function=drive_service.permissions().list,
list_key="permissions",
fileId=doc_id,
fields="permissions(id, emailAddress, type, domain, permissionDetails),nextPageToken",
fields="permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails),nextPageToken",
supportsAllDrives=True,
continue_on_404_or_403=True,
)

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.jira.connector import JiraConnector
from onyx.db.models import ConnectorCredentialPair
@@ -20,7 +20,7 @@ def jira_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
jira_connector = JiraConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -5,6 +5,8 @@ from typing import Protocol
from ee.onyx.db.external_perm import ExternalUserGroup # noqa
from onyx.access.models import DocExternalAccess # noqa
from onyx.access.models import ElementExternalAccess # noqa
from onyx.access.models import NodeExternalAccess # noqa
from onyx.context.search.models import InferenceChunk
from onyx.db.models import ConnectorCredentialPair # noqa
from onyx.db.utils import DocumentRow
@@ -53,7 +55,7 @@ DocSyncFuncType = Callable[
FetchAllDocumentsIdsFunction,
Optional[IndexingHeartbeatInterface],
],
Generator[DocExternalAccess, None, None],
Generator[ElementExternalAccess, None, None],
]
GroupSyncFuncType = Callable[

View File

@@ -34,21 +34,21 @@ def _get_all_censoring_enabled_sources() -> set[DocumentSource]:
# NOTE: This is only called if ee is enabled.
def _post_query_chunk_censoring(
chunks: list[InferenceChunk],
user: User | None,
user: User,
) -> list[InferenceChunk]:
"""
This function checks all chunks to see if they need to be sent to a censoring
function. If they do, it sends them to the censoring function and returns the
censored chunks. If they don't, it returns the original chunks.
"""
if user is None:
# if user is None, permissions are not enforced
return chunks
sources_to_censor = _get_all_censoring_enabled_sources()
# Anonymous users can only access public (non-permission-synced) content
if user.is_anonymous:
return [chunk for chunk in chunks if chunk.source_type not in sources_to_censor]
final_chunk_dict: dict[str, InferenceChunk] = {}
chunks_to_process: dict[DocumentSource, list[InferenceChunk]] = {}
sources_to_censor = _get_all_censoring_enabled_sources()
for chunk in chunks:
# Separate out chunks that require permission post-processing by source
if chunk.source_type in sources_to_censor:

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.models import ConnectorCredentialPair
@@ -20,7 +20,7 @@ def sharepoint_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
sharepoint_connector = SharepointConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.teams.connector import TeamsConnector
from onyx.db.models import ConnectorCredentialPair
@@ -21,7 +21,7 @@ def teams_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
teams_connector = TeamsConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -2,7 +2,9 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import HierarchyNode
@@ -20,7 +22,7 @@ def generic_doc_sync(
doc_source: DocumentSource,
slim_connector: SlimConnectorWithPermSync,
label: str,
) -> Generator[DocExternalAccess, None, None]:
) -> Generator[ElementExternalAccess, None, None]:
"""
A convenience function for performing a generic document synchronization.
@@ -30,7 +32,7 @@ def generic_doc_sync(
- fetching *all* new (slim) docs
- yielding external-access permissions for existing docs which do not exist in the newly fetched slim-docs set (with their
`external_access` set to "private")
- yielding external-access permissions for newly fetched docs
- yielding external-access permissions for newly fetched docs and hierarchy nodes
Returns:
A `Generator` which yields existing and newly fetched external-access permissions.
@@ -51,7 +53,13 @@ def generic_doc_sync(
for doc in doc_batch:
if isinstance(doc, HierarchyNode):
# TODO: handle hierarchynodes during sync
# Yield hierarchy node permissions to be processed in outer layer
if doc.external_access:
yield NodeExternalAccess(
external_access=doc.external_access,
raw_node_id=doc.raw_node_id,
source=doc_source.value,
)
continue
if not doc.external_access:
raise RuntimeError(

View File

@@ -41,7 +41,7 @@ def _run_single_search(
query: str,
filters: BaseFilters | None,
document_index: DocumentIndex,
user: User | None,
user: User,
db_session: Session,
num_hits: int | None = None,
) -> list[InferenceChunk]:
@@ -63,7 +63,7 @@ def _run_single_search(
def stream_search_query(
request: SendSearchQueryRequest,
user: User | None,
user: User,
db_session: Session,
) -> Generator[
SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
@@ -101,8 +101,7 @@ def stream_search_query(
# Build list of all executed queries for tracking
all_executed_queries = [original_query] + keyword_expansions
# TODO remove this check, user should not be None
if user is not None:
if not user.is_anonymous:
create_search_query(
db_session=db_session,
user_id=user.id,

View File

@@ -40,7 +40,7 @@ class QueryAnalyticsResponse(BaseModel):
def get_query_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QueryAnalyticsResponse]:
daily_query_usage_info = fetch_query_analytics(
@@ -71,7 +71,7 @@ class UserAnalyticsResponse(BaseModel):
def get_user_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[UserAnalyticsResponse]:
daily_query_usage_info_per_user = fetch_per_user_query_analytics(
@@ -105,7 +105,7 @@ class OnyxbotAnalyticsResponse(BaseModel):
def get_onyxbot_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[OnyxbotAnalyticsResponse]:
daily_onyxbot_info = fetch_onyxbot_analytics(
@@ -141,7 +141,7 @@ def get_persona_messages(
persona_id: int,
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[PersonaMessageAnalyticsResponse]:
"""Fetch daily message counts for a single persona within the given time range."""
@@ -179,7 +179,7 @@ def get_persona_unique_users(
persona_id: int,
start: datetime.datetime,
end: datetime.datetime,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[PersonaUniqueUsersResponse]:
"""Get unique users per day for a single persona."""
@@ -218,7 +218,7 @@ def get_assistant_stats(
assistant_id: int,
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
user: User | None = Depends(current_user),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> AssistantStatsResponse:
"""

View File

@@ -27,6 +27,7 @@ import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from ee.onyx.auth.users import current_admin_user
@@ -56,6 +57,7 @@ from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.db.engine.sql_engine import get_session
from onyx.redis.redis_pool import get_shared_redis_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
@@ -68,6 +70,63 @@ router = APIRouter(prefix="/admin/billing")
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()
# Redis key for billing circuit breaker (self-hosted only)
# When set, billing requests to Stripe are disabled until user manually retries
BILLING_CIRCUIT_BREAKER_KEY = "billing_circuit_open"
# Circuit breaker auto-expires after 1 hour (user can manually retry sooner)
BILLING_CIRCUIT_BREAKER_TTL_SECONDS = 3600
def _is_billing_circuit_open() -> bool:
"""Check if the billing circuit breaker is open (self-hosted only)."""
if MULTI_TENANT:
return False
try:
redis_client = get_shared_redis_client()
is_open = bool(redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY))
logger.debug(
f"Circuit breaker check: key={BILLING_CIRCUIT_BREAKER_KEY}, is_open={is_open}"
)
return is_open
except Exception as e:
logger.error(f"Failed to check circuit breaker: {e}")
return False
def _open_billing_circuit() -> None:
"""Open the billing circuit breaker after a failure (self-hosted only)."""
if MULTI_TENANT:
return
try:
redis_client = get_shared_redis_client()
redis_client.set(
BILLING_CIRCUIT_BREAKER_KEY,
"1",
ex=BILLING_CIRCUIT_BREAKER_TTL_SECONDS,
)
# Verify it was set
exists = redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY)
logger.warning(
f"Billing circuit breaker opened (TTL={BILLING_CIRCUIT_BREAKER_TTL_SECONDS}s, "
f"verified={exists}). Stripe billing requests are disabled until manually reset."
)
except Exception as e:
logger.error(f"Failed to open circuit breaker: {e}")
def _close_billing_circuit() -> None:
"""Close the billing circuit breaker (re-enable Stripe requests)."""
if MULTI_TENANT:
return
try:
redis_client = get_shared_redis_client()
redis_client.delete(BILLING_CIRCUIT_BREAKER_KEY)
logger.info(
"Billing circuit breaker closed. Stripe billing requests re-enabled."
)
except Exception as e:
logger.error(f"Failed to close circuit breaker: {e}")
def _get_license_data(db_session: Session) -> str | None:
"""Get license data from database if exists (self-hosted only)."""
@@ -102,6 +161,7 @@ async def create_checkout_session(
license_data = _get_license_data(db_session)
tenant_id = _get_tenant_id()
billing_period = request.billing_period if request else "monthly"
seats = request.seats if request else None
email = request.email if request else None
# Build redirect URL for after checkout completion
@@ -110,6 +170,7 @@ async def create_checkout_session(
try:
return await create_checkout_service(
billing_period=billing_period,
seats=seats,
email=email,
license_data=license_data,
redirect_url=redirect_url,
@@ -156,6 +217,8 @@ async def get_billing_information(
"""Get billing information for the current subscription.
Returns subscription status and details from Stripe.
For self-hosted: If the circuit breaker is open (previous failure),
returns a 503 error without making the request.
"""
license_data = _get_license_data(db_session)
tenant_id = _get_tenant_id()
@@ -164,12 +227,22 @@ async def get_billing_information(
if not MULTI_TENANT and not license_data:
return SubscriptionStatusResponse(subscribed=False)
# Check circuit breaker (self-hosted only)
if _is_billing_circuit_open():
raise HTTPException(
status_code=503,
detail="Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
)
try:
return await get_billing_service(
license_data=license_data,
tenant_id=tenant_id,
)
except BillingServiceError as e:
# Open circuit breaker on connection failures (self-hosted only)
if e.status_code in (502, 503, 504):
_open_billing_circuit()
raise HTTPException(status_code=e.status_code, detail=e.message)
@@ -182,6 +255,8 @@ async def update_seats(
"""Update the seat count for the current subscription.
Handles Stripe proration and license regeneration via control plane.
For self-hosted, the frontend should call /license/claim after a short delay
to fetch the regenerated license.
"""
license_data = _get_license_data(db_session)
tenant_id = _get_tenant_id()
@@ -191,11 +266,17 @@ async def update_seats(
raise HTTPException(status_code=400, detail="No license found")
try:
return await update_seat_service(
result = await update_seat_service(
new_seat_count=request.new_seat_count,
license_data=license_data,
tenant_id=tenant_id,
)
# Note: Don't store license here - the control plane may still be processing
# the subscription update. The frontend should call /license/claim after a
# short delay to get the freshly generated license.
return result
except BillingServiceError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
@@ -262,3 +343,31 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
status_code=500,
detail="Failed to fetch Stripe publishable key",
)
class ResetConnectionResponse(BaseModel):
success: bool
message: str
@router.post("/reset-connection")
async def reset_stripe_connection(
_: User = Depends(current_admin_user),
) -> ResetConnectionResponse:
"""Reset the Stripe connection circuit breaker.
Called when user clicks "Connect to Stripe" to retry after a previous failure.
This clears the circuit breaker flag, allowing billing requests to proceed again.
Self-hosted only - cloud deployments don't use the circuit breaker.
"""
if MULTI_TENANT:
return ResetConnectionResponse(
success=True,
message="Circuit breaker not applicable for cloud deployments",
)
_close_billing_circuit()
return ResetConnectionResponse(
success=True,
message="Stripe connection reset. Billing requests re-enabled.",
)

View File

@@ -10,6 +10,7 @@ class CreateCheckoutSessionRequest(BaseModel):
"""Request to create a Stripe checkout session."""
billing_period: Literal["monthly", "annual"] = "monthly"
seats: int | None = None
email: str | None = None
@@ -67,6 +68,7 @@ class SeatUpdateResponse(BaseModel):
current_seats: int
used_seats: int
message: str | None = None
license: str | None = None # Regenerated license (self-hosted stores this)
class StripePublishableKeyResponse(BaseModel):

View File

@@ -103,6 +103,7 @@ async def _make_billing_request(
Raises:
BillingServiceError: If request fails
"""
base_url = _get_base_url()
url = f"{base_url}{path}"
headers = _get_headers(license_data)
@@ -134,6 +135,7 @@ async def _make_billing_request(
async def create_checkout_session(
billing_period: str = "monthly",
seats: int | None = None,
email: str | None = None,
license_data: str | None = None,
redirect_url: str | None = None,
@@ -143,6 +145,7 @@ async def create_checkout_session(
Args:
billing_period: "monthly" or "annual"
seats: Number of seats to purchase (optional, uses default if not provided)
email: Customer email for new subscriptions
license_data: Existing license for renewals (self-hosted)
redirect_url: URL to redirect after successful checkout
@@ -152,6 +155,8 @@ async def create_checkout_session(
CreateCheckoutSessionResponse with checkout URL
"""
body: dict = {"billing_period": billing_period}
if seats is not None:
body["seats"] = seats
if email:
body["email"] = email
if redirect_url:
@@ -264,4 +269,5 @@ async def update_seat_count(
current_seats=data.get("current_seats", 0),
used_seats=data.get("used_seats", 0),
message=data.get("message"),
license=data.get("license"),
)

View File

@@ -115,7 +115,7 @@ async def refresh_access_token(
@admin_router.put("")
def admin_ee_put_settings(
settings: EnterpriseSettings, _: User | None = Depends(current_admin_user)
settings: EnterpriseSettings, _: User = Depends(current_admin_user)
) -> None:
store_settings(settings)
@@ -134,7 +134,7 @@ def ee_fetch_settings() -> EnterpriseSettings:
def put_logo(
file: UploadFile,
is_logotype: bool = False,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> None:
upload_logo(file=file, is_logotype=is_logotype)
@@ -187,7 +187,7 @@ def fetch_logo(
@admin_router.put("/custom-analytics-script")
def upload_custom_analytics_script(
script_upload: AnalyticsScriptUpload, _: User | None = Depends(current_admin_user)
script_upload: AnalyticsScriptUpload, _: User = Depends(current_admin_user)
) -> None:
try:
store_analytics_script(script_upload)

View File

@@ -21,6 +21,7 @@ from sqlalchemy.orm import Session
from ee.onyx.auth.users import current_admin_user
from ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL
from ee.onyx.db.license import delete_license as db_delete_license
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_license_metadata
from ee.onyx.db.license import invalidate_license_cache
from ee.onyx.db.license import refresh_license_cache
@@ -90,24 +91,21 @@ async def get_seat_usage(
@router.post("/claim")
async def claim_license(
session_id: str,
session_id: str | None = None,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> LicenseResponse:
"""
Claim a license after Stripe checkout (self-hosted only).
Claim a license from the control plane (self-hosted only).
After a user completes Stripe checkout, they're redirected back with a
session_id. This endpoint exchanges that session_id for a signed license
via the cloud data plane proxy.
Two modes:
1. With session_id: After Stripe checkout, exchange session_id for license
2. Without session_id: Re-claim using existing license for auth
Flow:
1. Self-hosted frontend redirects to Stripe checkout (via cloud proxy)
2. User completes payment
3. Stripe redirects back to self-hosted instance with session_id
4. Frontend calls this endpoint with session_id
5. We call cloud data plane /proxy/claim-license to get the signed license
6. License is stored locally and cached
Use without session_id after:
- Updating seats via the billing API
- Returning from the Stripe customer portal
- Any operation that regenerates the license on control plane
"""
if MULTI_TENANT:
raise HTTPException(
@@ -116,14 +114,40 @@ async def claim_license(
)
try:
# Call cloud data plane to claim the license
url = f"{CLOUD_DATA_PLANE_URL}/proxy/claim-license"
response = requests.post(
url,
json={"session_id": session_id},
headers={"Content-Type": "application/json"},
timeout=30,
)
if session_id:
# Claim license after checkout using session_id
url = f"{CLOUD_DATA_PLANE_URL}/proxy/claim-license"
response = requests.post(
url,
json={"session_id": session_id},
headers={"Content-Type": "application/json"},
timeout=30,
)
else:
# Re-claim using existing license for auth
metadata = get_license_metadata(db_session)
if not metadata or not metadata.tenant_id:
raise HTTPException(
status_code=400,
detail="No license found. Provide session_id after checkout.",
)
license_row = get_license(db_session)
if not license_row or not license_row.license_data:
raise HTTPException(
status_code=400, detail="No license found in database"
)
url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
response = requests.get(
url,
headers={
"Authorization": f"Bearer {license_row.license_data}",
"Content-Type": "application/json",
},
timeout=30,
)
response.raise_for_status()
data = response.json()

View File

@@ -27,7 +27,7 @@ router = APIRouter(prefix="/manage")
def create_standard_answer(
standard_answer_creation_request: StandardAnswerCreationRequest,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> StandardAnswer:
standard_answer_model = insert_standard_answer(
keyword=standard_answer_creation_request.keyword,
@@ -43,7 +43,7 @@ def create_standard_answer(
@router.get("/admin/standard-answer")
def list_standard_answers(
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> list[StandardAnswer]:
standard_answer_models = fetch_standard_answers(db_session=db_session)
return [
@@ -57,7 +57,7 @@ def patch_standard_answer(
standard_answer_id: int,
standard_answer_creation_request: StandardAnswerCreationRequest,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> StandardAnswer:
existing_standard_answer = fetch_standard_answer(
standard_answer_id=standard_answer_id,
@@ -83,7 +83,7 @@ def patch_standard_answer(
def delete_standard_answer(
standard_answer_id: int,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> None:
return remove_standard_answer(
standard_answer_id=standard_answer_id,
@@ -95,7 +95,7 @@ def delete_standard_answer(
def create_standard_answer_category(
standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> StandardAnswerCategory:
standard_answer_category_model = insert_standard_answer_category(
category_name=standard_answer_category_creation_request.name,
@@ -107,7 +107,7 @@ def create_standard_answer_category(
@router.get("/admin/standard-answer/category")
def list_standard_answer_categories(
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> list[StandardAnswerCategory]:
standard_answer_category_models = fetch_standard_answer_categories(
db_session=db_session
@@ -123,7 +123,7 @@ def patch_standard_answer_category(
standard_answer_category_id: int,
standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> StandardAnswerCategory:
existing_standard_answer_category = fetch_standard_answer_category(
standard_answer_category_id=standard_answer_category_id,

View File

@@ -22,7 +22,7 @@ basic_router = APIRouter(prefix="/query")
def get_standard_answer(
request: StandardAnswerRequest,
db_session: Session = Depends(get_session),
_: User | None = Depends(current_user),
_: User = Depends(current_user),
) -> StandardAnswerResponse:
try:
standard_answers = oneoff_standard_answers(

View File

@@ -37,8 +37,7 @@ router = APIRouter(prefix="/search")
@router.post("/search-flow-classification")
def search_flow_classification(
request: SearchFlowClassificationRequest,
# This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
_: User | None = Depends(current_user),
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SearchFlowClassificationResponse:
query = request.user_query
@@ -70,7 +69,7 @@ def search_flow_classification(
@router.post("/send-search-message", response_model=None)
def handle_send_search_message(
request: SendSearchQueryRequest,
user: User | None = Depends(current_user),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse | SearchFullResponse:
"""
@@ -114,7 +113,7 @@ def handle_send_search_message(
def get_search_history(
limit: int = 100,
filter_days: int | None = None,
user: User | None = Depends(current_user),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SearchHistoryResponse:
"""
@@ -146,11 +145,6 @@ def get_search_history(
detail="filter_days must be greater than 0",
)
# TODO(yuhong) remove this
if user is None:
# Return empty list for unauthenticated users
return SearchHistoryResponse(search_queries=[])
search_queries = fetch_search_queries_for_user(
db_session=db_session,
user_id=user.id,

View File

@@ -28,9 +28,9 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
def _check_token_rate_limits(user: User | None) -> None:
if user is None:
# Unauthenticated users are only rate limited by global settings
def _check_token_rate_limits(user: User) -> None:
# Anonymous users are only rate limited by global settings
if user.is_anonymous:
_user_is_rate_limited_by_global()
elif is_api_key_email_address(user.email):

View File

@@ -153,7 +153,7 @@ def snapshot_from_chat_session(
@router.get("/admin/chat-sessions")
def admin_get_chat_sessions(
user_id: UUID,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ChatSessionsResponse:
# we specifically don't allow this endpoint if "anonymized" since
@@ -196,7 +196,7 @@ def get_chat_session_history(
feedback_type: QAFeedbackType | None = None,
start_time: datetime | None = None,
end_time: datetime | None = None,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> PaginatedReturn[ChatSessionMinimal]:
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
@@ -234,7 +234,7 @@ def get_chat_session_history(
@router.get("/admin/chat-session-history/{chat_session_id}")
def get_chat_session_admin(
chat_session_id: UUID,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ChatSessionSnapshot:
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
@@ -269,7 +269,7 @@ def get_chat_session_admin(
@router.get("/admin/query-history/list")
def list_all_query_history_exports(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QueryHistoryExport]:
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
@@ -297,7 +297,7 @@ def list_all_query_history_exports(
@router.post("/admin/query-history/start-export", tags=PUBLIC_API_TAGS)
def start_query_history_export(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
start: datetime | None = None,
end: datetime | None = None,
@@ -344,7 +344,7 @@ def start_query_history_export(
@router.get("/admin/query-history/export-status", tags=PUBLIC_API_TAGS)
def get_query_history_export_status(
request_id: str,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> dict[str, str]:
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
@@ -378,7 +378,7 @@ def get_query_history_export_status(
@router.get("/admin/query-history/download", tags=PUBLIC_API_TAGS)
def download_query_history_csv(
request_id: str,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse:
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])

View File

@@ -58,7 +58,7 @@ def generate_report(
@router.get("/admin/usage-report/{report_name}")
def read_usage_report(
report_name: str,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> Response:
try:
@@ -82,7 +82,7 @@ def read_usage_report(
@router.get("/admin/usage-report")
def fetch_usage_reports(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[UsageReportMetadata]:
try:

View File

@@ -123,9 +123,14 @@ def _seed_llms(
upsert_llm_provider(llm_upsert_request, db_session)
for llm_upsert_request in llm_upsert_requests
]
update_default_provider(
provider_id=seeded_providers[0].id, db_session=db_session
)
if len(seeded_providers[0].model_configurations) > 0:
default_model = seeded_providers[0].model_configurations[0].name
update_default_provider(
provider_id=seeded_providers[0].id,
model_name=default_model,
db_session=db_session,
)
def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:

View File

@@ -14,7 +14,6 @@ from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_p
from ee.onyx.server.tenants.models import AnonymousUserPath
from onyx.auth.users import anonymous_user_enabled
from onyx.auth.users import current_admin_user
from onyx.auth.users import optional_user
from onyx.auth.users import User
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
@@ -29,7 +28,7 @@ router = APIRouter(prefix="/tenants")
@router.get("/anonymous-user-path")
async def get_anonymous_user_path_api(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> AnonymousUserPath:
tenant_id = get_current_tenant_id()
@@ -45,7 +44,7 @@ async def get_anonymous_user_path_api(
@router.post("/anonymous-user-path")
async def set_anonymous_user_path_api(
anonymous_user_path: str,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> None:
tenant_id = get_current_tenant_id()
try:
@@ -72,7 +71,6 @@ async def set_anonymous_user_path_api(
@router.post("/anonymous-user")
async def login_as_anonymous_user(
anonymous_user_path: str,
_: User | None = Depends(optional_user),
) -> Response:
with get_session_with_shared_schema() as db_session:
tenant_id = get_tenant_id_for_anonymous_user_path(

View File

@@ -300,12 +300,12 @@ def configure_default_api_keys(db_session: Session) -> None:
has_set_default_provider = False
def _upsert(request: LLMProviderUpsertRequest) -> None:
def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:
nonlocal has_set_default_provider
try:
provider = upsert_llm_provider(request, db_session)
if not has_set_default_provider:
update_default_provider(provider.id, db_session)
update_default_provider(provider.id, default_model, db_session)
has_set_default_provider = True
except Exception as e:
logger.error(f"Failed to configure {request.provider} provider: {e}")
@@ -323,14 +323,13 @@ def configure_default_api_keys(db_session: Session) -> None:
name="OpenAI",
provider=OPENAI_PROVIDER_NAME,
api_key=OPENAI_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
OPENAI_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(openai_provider)
_upsert(openai_provider, default_model_name)
# Create default image generation config using the OpenAI API key
try:
@@ -359,14 +358,13 @@ def configure_default_api_keys(db_session: Session) -> None:
name="Anthropic",
provider=ANTHROPIC_PROVIDER_NAME,
api_key=ANTHROPIC_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
ANTHROPIC_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(anthropic_provider)
_upsert(anthropic_provider, default_model_name)
else:
logger.info(
"ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
@@ -391,14 +389,13 @@ def configure_default_api_keys(db_session: Session) -> None:
name="Google Vertex AI",
provider=VERTEXAI_PROVIDER_NAME,
custom_config=custom_config,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
VERTEXAI_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(vertexai_provider)
_upsert(vertexai_provider, default_model_name)
else:
logger.info(
"VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
@@ -430,12 +427,11 @@ def configure_default_api_keys(db_session: Session) -> None:
name="OpenRouter",
provider=OPENROUTER_PROVIDER_NAME,
api_key=OPENROUTER_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=model_configurations,
api_key_changed=True,
is_auto_mode=True,
)
_upsert(openrouter_provider)
_upsert(openrouter_provider, default_model_name)
else:
logger.info(
"OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"

View File

@@ -29,17 +29,13 @@ from fastapi import HTTPException
from pydantic import BaseModel
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import update_license_cache
from ee.onyx.db.license import upsert_license
from ee.onyx.server.billing.models import SeatUpdateRequest
from ee.onyx.server.billing.models import SeatUpdateResponse
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.utils.license import is_license_valid
from ee.onyx.utils.license import verify_license_signature
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -209,36 +205,6 @@ async def forward_to_control_plane(
)
def fetch_and_store_license(tenant_id: str, license_data: str) -> None:
"""Store license in database and update Redis cache.
Args:
tenant_id: The tenant ID
license_data: Base64-encoded signed license blob
"""
try:
# Verify before storing
payload = verify_license_signature(license_data)
# Store in database using the specific tenant's schema
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
upsert_license(db_session, license_data)
# Update Redis cache
update_license_cache(
payload,
source=LicenseSource.AUTO_FETCH,
tenant_id=tenant_id,
)
except ValueError as e:
logger.error(f"Failed to verify license: {e}")
raise
except Exception:
logger.exception("Failed to store license")
raise
# -----------------------------------------------------------------------------
# Endpoints
# -----------------------------------------------------------------------------
@@ -246,6 +212,7 @@ def fetch_and_store_license(tenant_id: str, license_data: str) -> None:
class CreateCheckoutSessionRequest(BaseModel):
billing_period: Literal["monthly", "annual"] = "monthly"
seats: int | None = None
email: str | None = None
# Redirect URL after successful checkout - self-hosted passes their instance URL
redirect_url: str | None = None
@@ -277,6 +244,8 @@ async def proxy_create_checkout_session(
}
if tenant_id:
body["tenant_id"] = tenant_id
if request_body.seats is not None:
body["seats"] = request_body.seats
if request_body.email:
body["email"] = request_body.email
if request_body.redirect_url:
@@ -439,7 +408,6 @@ async def proxy_license_fetch(
result = await forward_to_control_plane("GET", f"/license/{tenant_id}")
# Auto-store the refreshed license
license_data = result.get("license")
if not license_data:
logger.error(f"Control plane returned incomplete license response: {result}")
@@ -448,8 +416,7 @@ async def proxy_license_fetch(
detail="Control plane returned incomplete license data",
)
fetch_and_store_license(tenant_id, license_data)
# Return license to caller - self-hosted instance stores it via /api/license/claim
return LicenseFetchResponse(license=license_data, tenant_id=tenant_id)
@@ -462,6 +429,7 @@ async def proxy_seat_update(
Auth: Valid (non-expired) license required.
Handles Stripe proration and license regeneration.
Returns the regenerated license in the response for the caller to store.
"""
if not license_payload.tenant_id:
raise HTTPException(status_code=401, detail="License missing tenant_id")
@@ -477,9 +445,11 @@ async def proxy_seat_update(
},
)
# Return license in response - self-hosted instance stores it via /api/license/claim
return SeatUpdateResponse(
success=result.get("success", False),
current_seats=result.get("current_seats", 0),
used_seats=result.get("used_seats", 0),
message=result.get("message"),
license=result.get("license"),
)

View File

@@ -24,12 +24,12 @@ router = APIRouter(prefix="/tenants")
@router.post("/leave-team")
async def leave_organization(
user_email: UserByEmail,
current_user: User | None = Depends(current_admin_user),
current_user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> None:
tenant_id = get_current_tenant_id()
if current_user is None or current_user.email != user_email.user_email:
if current_user.email != user_email.user_email:
raise HTTPException(
status_code=403, detail="You can only leave the organization as yourself"
)

View File

@@ -26,10 +26,8 @@ FORBIDDEN_COMMON_EMAIL_SUBSTRINGS = [
@router.get("/existing-team-by-domain")
def get_existing_tenant_by_domain(
user: User | None = Depends(current_user),
user: User = Depends(current_user),
) -> TenantByDomainResponse | None:
if not user:
return None
domain = user.email.split("@")[1]
if any(substring in domain for substring in FORBIDDEN_COMMON_EMAIL_SUBSTRINGS):
return None

View File

@@ -24,10 +24,8 @@ router = APIRouter(prefix="/tenants")
@router.post("/users/invite/request")
async def request_invite(
invite_request: RequestInviteRequest,
user: User | None = Depends(current_admin_user),
user: User = Depends(current_admin_user),
) -> None:
if user is None:
raise HTTPException(status_code=401, detail="User not authenticated")
try:
invite_self_to_tenant(user.email, invite_request.tenant_id)
except Exception as e:
@@ -39,7 +37,7 @@ async def request_invite(
@router.get("/users/pending")
def list_pending_users(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> list[PendingUserSnapshot]:
pending_emails = get_pending_users()
return [PendingUserSnapshot(email=email) for email in pending_emails]
@@ -48,7 +46,7 @@ def list_pending_users(
@router.post("/users/invite/approve")
async def approve_user(
approve_user_request: ApproveUserRequest,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
) -> None:
tenant_id = get_current_tenant_id()
approve_user_invite(approve_user_request.email, tenant_id)
@@ -57,14 +55,11 @@ async def approve_user(
@router.post("/users/invite/accept")
async def accept_invite(
invite_request: RequestInviteRequest,
user: User | None = Depends(current_user),
user: User = Depends(current_user),
) -> None:
"""
Accept an invitation to join a tenant.
"""
if not user:
raise HTTPException(status_code=401, detail="Not authenticated")
try:
accept_user_invite(user.email, invite_request.tenant_id)
except Exception as e:
@@ -75,14 +70,11 @@ async def accept_invite(
@router.post("/users/invite/deny")
async def deny_invite(
invite_request: RequestInviteRequest,
user: User | None = Depends(current_user),
user: User = Depends(current_user),
) -> None:
"""
Deny an invitation to join a tenant.
"""
if not user:
raise HTTPException(status_code=401, detail="Not authenticated")
try:
deny_user_invite(user.email, invite_request.tenant_id)
except Exception as e:

View File

@@ -1,7 +1,6 @@
from fastapi_users import exceptions
from sqlalchemy import select
from ee.onyx.db.license import invalidate_license_cache
from onyx.auth.invited_users import get_invited_users
from onyx.auth.invited_users import get_pending_users
from onyx.auth.invited_users import write_invited_users
@@ -48,8 +47,6 @@ def get_tenant_id_for_email(email: str) -> str:
mapping.active = True
db_session.commit()
tenant_id = mapping.tenant_id
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
except Exception as e:
logger.exception(f"Error getting tenant id for email {email}: {e}")
raise exceptions.UserNotExists()
@@ -78,14 +75,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
an inactive mapping (invitation) to this tenant. They can accept the
invitation later to switch tenants.
Raises:
HTTPException: 402 if adding active users would exceed seat limit
"""
from fastapi import HTTPException
from ee.onyx.db.license import check_seat_availability
from onyx.db.engine.sql_engine import get_session_with_tenant as get_tenant_session
unique_emails = set(emails)
if not unique_emails:
return
@@ -119,33 +109,6 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
)
emails_with_active_mapping = {m.email for m in active_mappings}
# Determine which users will consume a new seat.
# Users with active mappings elsewhere get INACTIVE mappings (invitations)
# and don't consume seats until they accept. Only users without any active
# mapping will get an ACTIVE mapping and consume a seat immediately.
emails_consuming_seats = {
email
for email in unique_emails
if email not in emails_with_mapping
and email not in emails_with_active_mapping
}
# Check seat availability inside the transaction to prevent race conditions.
# Note: ALL users in unique_emails still get added below - this check only
# validates we have capacity for users who will consume seats immediately.
if emails_consuming_seats:
with get_tenant_session(tenant_id=tenant_id) as tenant_session:
result = check_seat_availability(
tenant_session,
seats_needed=len(emails_consuming_seats),
tenant_id=tenant_id,
)
if not result.available:
raise HTTPException(
status_code=402,
detail=result.error_message or "Seat limit exceeded",
)
# Add mappings for emails that don't already have one to this tenant
for email in unique_emails:
if email in emails_with_mapping:
@@ -165,12 +128,6 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
db_session.commit()
logger.info(f"Successfully added users {emails} to tenant {tenant_id}")
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
except HTTPException:
db_session.rollback()
raise
except Exception:
logger.exception(f"Failed to add users to tenant {tenant_id}")
db_session.rollback()
@@ -193,9 +150,6 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
db_session.delete(mapping)
db_session.commit()
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
except Exception as e:
logger.exception(
f"Failed to remove users from tenant {tenant_id}: {str(e)}"
@@ -210,9 +164,6 @@ def remove_all_users_from_tenant(tenant_id: str) -> None:
).delete()
db_session.commit()
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
def invite_self_to_tenant(email: str, tenant_id: str) -> None:
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@@ -241,9 +192,6 @@ def approve_user_invite(email: str, tenant_id: str) -> None:
db_session.add(new_mapping)
db_session.commit()
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
# Also remove the user from pending users list
# Remove from pending users
pending_users = get_pending_users()
@@ -262,20 +210,11 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
"""
Accept an invitation to join a tenant.
This activates the user's mapping to the tenant.
Raises:
HTTPException: 402 if accepting would exceed seat limit
"""
from fastapi import HTTPException
from ee.onyx.db.license import check_seat_availability
from onyx.db.engine.sql_engine import get_session_with_tenant
with get_session_with_shared_schema() as db_session:
try:
# Lock the user's mappings first to prevent race conditions.
# This ensures no concurrent request can modify this user's mappings
# while we check seats and activate.
# This ensures no concurrent request can modify this user's mappings.
active_mapping = (
db_session.query(UserTenantMapping)
.filter(
@@ -286,18 +225,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
.first()
)
# Check seat availability within the same logical operation.
# Note: This queries fresh data from DB, not cache.
with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
result = check_seat_availability(
tenant_session, seats_needed=1, tenant_id=tenant_id
)
if not result.available:
raise HTTPException(
status_code=402,
detail=result.error_message or "Seat limit exceeded",
)
# If an active mapping exists, delete it
if active_mapping:
db_session.delete(active_mapping)
@@ -327,9 +254,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
mapping.active = True
db_session.commit()
logger.info(f"User {email} accepted invitation to tenant {tenant_id}")
# Invalidate license cache so used_seats reflects the new count
invalidate_license_cache(tenant_id)
else:
logger.warning(
f"No invitation found for user {email} in tenant {tenant_id}"

View File

@@ -28,7 +28,7 @@ Group Token Limit Settings
@router.get("/user-groups")
def get_all_group_token_limit_settings(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> dict[str, list[TokenRateLimitDisplay]]:
user_groups_to_token_rate_limits = fetch_all_user_group_token_rate_limits_by_group(
@@ -47,7 +47,7 @@ def get_all_group_token_limit_settings(
@router.get("/user-group/{group_id}")
def get_group_token_limit_settings(
group_id: int,
user: User | None = Depends(current_curator_or_admin_user),
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> list[TokenRateLimitDisplay]:
return [
@@ -64,7 +64,7 @@ def get_group_token_limit_settings(
def create_group_token_limit_settings(
group_id: int,
token_limit_settings: TokenRateLimitArgs,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
rate_limit_display = TokenRateLimitDisplay.from_db(
@@ -86,7 +86,7 @@ User Token Limit Settings
@router.get("/users")
def get_user_token_limit_settings(
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[TokenRateLimitDisplay]:
return [
@@ -98,7 +98,7 @@ def get_user_token_limit_settings(
@router.post("/users")
def create_user_token_limit_settings(
token_limit_settings: TokenRateLimitArgs,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> TokenRateLimitDisplay:
rate_limit_display = TokenRateLimitDisplay.from_db(

View File

@@ -31,10 +31,10 @@ router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)
@router.get("/admin/user-group")
def list_user_groups(
user: User | None = Depends(current_curator_or_admin_user),
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> list[UserGroup]:
if user is None or user.role == UserRole.ADMIN:
if user.role == UserRole.ADMIN:
user_groups = fetch_user_groups(db_session, only_up_to_date=False)
else:
user_groups = fetch_user_groups_for_user(
@@ -48,7 +48,7 @@ def list_user_groups(
@router.post("/admin/user-group")
def create_user_group(
user_group: UserGroupCreate,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> UserGroup:
try:
@@ -66,7 +66,7 @@ def create_user_group(
def patch_user_group(
user_group_id: int,
user_group_update: UserGroupUpdate,
user: User | None = Depends(current_curator_or_admin_user),
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> UserGroup:
try:
@@ -86,7 +86,7 @@ def patch_user_group(
def add_users(
user_group_id: int,
add_users_request: AddUsersToUserGroupRequest,
user: User | None = Depends(current_curator_or_admin_user),
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> UserGroup:
try:
@@ -106,7 +106,7 @@ def add_users(
def set_user_curator(
user_group_id: int,
set_curator_request: SetCuratorRequest,
user: User | None = Depends(current_curator_or_admin_user),
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> None:
try:
@@ -124,7 +124,7 @@ def set_user_curator(
@router.delete("/admin/user-group/{user_group_id}")
def delete_user_group(
user_group_id: int,
_: User | None = Depends(current_admin_user),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> None:
try:

Binary file not shown.

View File

@@ -96,22 +96,20 @@ def get_access_for_documents(
return versioned_get_access_for_documents_fn(document_ids, db_session)
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
"""Returns a list of ACL entries that the user has access to.
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL
matches one entry in the returned set.
This is meant to be used downstream to filter out documents that the user
does not have access to. The user should have access to a document if at
least one entry in the document's ACL matches one entry in the returned set.
NOTE: These strings must be formatted in the same way as the output of
DocumentAccess::to_acl.
Anonymous users only have access to public documents.
"""
if user:
return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
return {PUBLIC_DOC_PAT}
if user.is_anonymous:
return {PUBLIC_DOC_PAT}
return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
def get_acl_for_user(user: User | None, db_session: Session | None = None) -> set[str]:
def get_acl_for_user(user: User, db_session: Session | None = None) -> set[str]:
versioned_acl_for_user_fn = fetch_versioned_implementation(
"onyx.access.access", "_get_acl_for_user"
)

View File

@@ -0,0 +1,15 @@
from sqlalchemy.orm import Session
from onyx.db.models import User
from onyx.utils.variable_functionality import fetch_versioned_implementation
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
return []
def get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
versioned_get_user_external_group_ids = fetch_versioned_implementation(
"onyx.access.hierarchy_access", "_get_user_external_group_ids"
)
return versioned_get_user_external_group_ids(db_session, user)

View File

@@ -105,6 +105,54 @@ class DocExternalAccess:
)
@dataclass(frozen=True)
class NodeExternalAccess:
"""
Wraps external access with a hierarchy node's raw ID.
Used for syncing hierarchy node permissions (e.g., folder permissions).
"""
external_access: ExternalAccess
# The raw node ID from the source system (e.g., Google Drive folder ID)
raw_node_id: str
# The source type (e.g., "google_drive")
source: str
def to_dict(self) -> dict:
return {
"external_access": {
"external_user_emails": list(self.external_access.external_user_emails),
"external_user_group_ids": list(
self.external_access.external_user_group_ids
),
"is_public": self.external_access.is_public,
},
"raw_node_id": self.raw_node_id,
"source": self.source,
}
@classmethod
def from_dict(cls, data: dict) -> "NodeExternalAccess":
external_access = ExternalAccess(
external_user_emails=set(
data["external_access"].get("external_user_emails", [])
),
external_user_group_ids=set(
data["external_access"].get("external_user_group_ids", [])
),
is_public=data["external_access"]["is_public"],
)
return cls(
external_access=external_access,
raw_node_id=data["raw_node_id"],
source=data["source"],
)
# Union type for elements that can have permissions synced
ElementExternalAccess = DocExternalAccess | NodeExternalAccess
# TODO(andrei): First refactor this into a pydantic model, then get rid of
# duplicate fields.
@dataclass(frozen=True, init=False)

View File

@@ -3,10 +3,10 @@ from typing import Any
from typing import cast
from onyx.auth.schemas import UserRole
from onyx.configs.constants import KV_NO_AUTH_USER_PERSONALIZATION_KEY
from onyx.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY
from onyx.configs.constants import NO_AUTH_USER_EMAIL
from onyx.configs.constants import NO_AUTH_USER_ID
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import ANONYMOUS_USER_INFO_ID
from onyx.configs.constants import KV_ANONYMOUS_USER_PERSONALIZATION_KEY
from onyx.configs.constants import KV_ANONYMOUS_USER_PREFERENCES_KEY
from onyx.key_value_store.store import KeyValueStore
from onyx.key_value_store.store import KvKeyNotFoundError
from onyx.server.manage.models import UserInfo
@@ -14,22 +14,22 @@ from onyx.server.manage.models import UserPersonalization
from onyx.server.manage.models import UserPreferences
def set_no_auth_user_preferences(
def set_anonymous_user_preferences(
store: KeyValueStore, preferences: UserPreferences
) -> None:
store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump())
store.store(KV_ANONYMOUS_USER_PREFERENCES_KEY, preferences.model_dump())
def set_no_auth_user_personalization(
def set_anonymous_user_personalization(
store: KeyValueStore, personalization: UserPersonalization
) -> None:
store.store(KV_NO_AUTH_USER_PERSONALIZATION_KEY, personalization.model_dump())
store.store(KV_ANONYMOUS_USER_PERSONALIZATION_KEY, personalization.model_dump())
def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
def load_anonymous_user_preferences(store: KeyValueStore) -> UserPreferences:
try:
preferences_data = cast(
Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY)
Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PREFERENCES_KEY)
)
return UserPreferences(**preferences_data)
except KvKeyNotFoundError:
@@ -38,27 +38,26 @@ def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
)
def fetch_no_auth_user(
store: KeyValueStore, *, anonymous_user_enabled: bool | None = None
) -> UserInfo:
def fetch_anonymous_user_info(store: KeyValueStore) -> UserInfo:
"""Fetch a UserInfo object for anonymous users (used for API responses)."""
personalization = UserPersonalization()
try:
personalization_data = cast(
Mapping[str, Any], store.load(KV_NO_AUTH_USER_PERSONALIZATION_KEY)
Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PERSONALIZATION_KEY)
)
personalization = UserPersonalization(**personalization_data)
except KvKeyNotFoundError:
pass
return UserInfo(
id=NO_AUTH_USER_ID,
email=NO_AUTH_USER_EMAIL,
id=ANONYMOUS_USER_INFO_ID,
email=ANONYMOUS_USER_EMAIL,
is_active=True,
is_superuser=False,
is_verified=True,
role=UserRole.BASIC if anonymous_user_enabled else UserRole.ADMIN,
preferences=load_no_auth_user_preferences(store),
role=UserRole.LIMITED,
preferences=load_anonymous_user_preferences(store),
personalization=personalization,
is_anonymous_user=anonymous_user_enabled,
is_anonymous_user=True,
password_configured=False,
)

View File

@@ -56,6 +56,7 @@ class DisposableEmailValidator:
"guerrillamail.com",
"mailinator.com",
"tempmail.com",
"chat-tempmail.com",
"throwaway.email",
"yopmail.com",
"temp-mail.org",

View File

@@ -75,7 +75,6 @@ from onyx.auth.schemas import UserUpdateWithRole
from onyx.configs.app_configs import AUTH_BACKEND
from onyx.configs.app_configs import AUTH_COOKIE_EXPIRE_TIME_SECONDS
from onyx.configs.app_configs import AUTH_TYPE
from onyx.configs.app_configs import DISABLE_AUTH
from onyx.configs.app_configs import EMAIL_CONFIGURED
from onyx.configs.app_configs import JWT_PUBLIC_KEY_URL
from onyx.configs.app_configs import PASSWORD_MAX_LENGTH
@@ -92,6 +91,8 @@ from onyx.configs.app_configs import USER_AUTH_SECRET
from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.configs.constants import ANONYMOUS_USER_UUID
from onyx.configs.constants import AuthType
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
from onyx.configs.constants import DANSWER_API_KEY_PREFIX
@@ -134,12 +135,8 @@ from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
def is_user_admin(user: User | None) -> bool:
if AUTH_TYPE == AuthType.DISABLED:
return True
if user and user.role == UserRole.ADMIN:
return True
return False
def is_user_admin(user: User) -> bool:
return user.role == UserRole.ADMIN
def verify_auth_setting() -> None:
@@ -1331,6 +1328,14 @@ async def optional_user(
user: User | None = Depends(optional_fastapi_current_user),
) -> User | None:
tenant_id = get_current_tenant_id()
if (
user is not None
and user.is_anonymous
and anonymous_user_enabled(tenant_id=tenant_id)
):
return get_anonymous_user()
if user := await _check_for_saml_and_jwt(request, user, async_db_session):
# If user is already set, _check_for_saml_and_jwt returns the same user object
return user
@@ -1347,15 +1352,26 @@ async def optional_user(
return user
def get_anonymous_user() -> User:
"""Create anonymous user object."""
user = User(
id=uuid.UUID(ANONYMOUS_USER_UUID),
email=ANONYMOUS_USER_EMAIL,
hashed_password="",
is_active=True,
is_verified=True,
is_superuser=False,
role=UserRole.LIMITED,
use_memories=False,
)
return user
async def double_check_user(
user: User | None,
optional: bool = DISABLE_AUTH,
include_expired: bool = False,
allow_anonymous_access: bool = False,
) -> User | None:
if optional:
return user
) -> User:
if user is not None:
# If user attempted to authenticate, verify them, do not default
# to anonymous access if it fails.
@@ -1376,7 +1392,7 @@ async def double_check_user(
return user
if allow_anonymous_access:
return None
return get_anonymous_user()
raise BasicAuthenticationError(
detail="Access denied. User is not authenticated.",
@@ -1385,19 +1401,19 @@ async def double_check_user(
async def current_user_with_expired_token(
user: User | None = Depends(optional_user),
) -> User | None:
) -> User:
return await double_check_user(user, include_expired=True)
async def current_limited_user(
user: User | None = Depends(optional_user),
) -> User | None:
) -> User:
return await double_check_user(user)
async def current_chat_accessible_user(
user: User | None = Depends(optional_user),
) -> User | None:
) -> User:
tenant_id = get_current_tenant_id()
return await double_check_user(
@@ -1407,10 +1423,8 @@ async def current_chat_accessible_user(
async def current_user(
user: User | None = Depends(optional_user),
) -> User | None:
) -> User:
user = await double_check_user(user)
if not user:
return None
if user.role == UserRole.LIMITED:
raise BasicAuthenticationError(
@@ -1420,16 +1434,8 @@ async def current_user(
async def current_curator_or_admin_user(
user: User | None = Depends(current_user),
) -> User | None:
if DISABLE_AUTH:
return None
if not user or not hasattr(user, "role"):
raise BasicAuthenticationError(
detail="Access denied. User is not authenticated or lacks role information.",
)
user: User = Depends(current_user),
) -> User:
allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN}
if user.role not in allowed_roles:
raise BasicAuthenticationError(
@@ -1439,11 +1445,8 @@ async def current_curator_or_admin_user(
return user
async def current_admin_user(user: User | None = Depends(current_user)) -> User | None:
if DISABLE_AUTH:
return None
if not user or not hasattr(user, "role") or user.role != UserRole.ADMIN:
async def current_admin_user(user: User = Depends(current_user)) -> User:
if user.role != UserRole.ADMIN:
raise BasicAuthenticationError(
detail="Access denied. User must be an admin to perform this action.",
)

View File

@@ -124,6 +124,7 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.monitoring",
"onyx.background.celery.tasks.user_file_processing",
"onyx.background.celery.tasks.llm_model_update",
"onyx.background.celery.tasks.opensearch_migration",
# Light worker tasks
"onyx.background.celery.tasks.shared",
"onyx.background.celery.tasks.vespa",

View File

@@ -325,5 +325,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.llm_model_update",
"onyx.background.celery.tasks.user_file_processing",
"onyx.background.celery.tasks.opensearch_migration",
]
)

View File

@@ -6,6 +6,7 @@ from celery.schedules import crontab
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
@@ -210,6 +211,31 @@ if SCHEDULED_EVAL_DATASET_NAMES:
}
)
# Add OpenSearch migration task if enabled.
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
beat_task_templates.append(
{
"name": "check-for-documents-for-opensearch-migration",
"task": OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
"schedule": timedelta(seconds=120), # 2 minutes
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
},
}
)
beat_task_templates.append(
{
"name": "migrate-documents-from-vespa-to-opensearch",
"task": OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
"schedule": timedelta(seconds=120), # 2 minutes
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
},
}
)
def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
cloud_task: dict[str, Any] = {}

View File

@@ -24,18 +24,27 @@ from sqlalchemy.orm import Session
from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import HierarchyConnector
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
from onyx.db.connector import mark_cc_pair_as_hierarchy_fetched
from onyx.db.connector_credential_pair import (
fetch_indexable_standard_connector_credential_pair_ids,
)
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
@@ -211,6 +220,101 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
return tasks_created
# Batch size for hierarchy node processing
HIERARCHY_NODE_BATCH_SIZE = 100
def _run_hierarchy_extraction(
db_session: Session,
cc_pair: ConnectorCredentialPair,
source: DocumentSource,
tenant_id: str,
) -> int:
"""
Run the hierarchy extraction for a connector.
Instantiates the connector and calls load_hierarchy() if the connector
implements HierarchyConnector.
Returns the total number of hierarchy nodes extracted.
"""
connector = cc_pair.connector
credential = cc_pair.credential
# Instantiate the connector using its configured input type
runnable_connector = instantiate_connector(
db_session=db_session,
source=source,
input_type=connector.input_type,
connector_specific_config=connector.connector_specific_config,
credential=credential,
)
# Check if the connector supports hierarchy fetching
if not isinstance(runnable_connector, HierarchyConnector):
task_logger.debug(
f"Connector {source} does not implement HierarchyConnector, skipping"
)
return 0
redis_client = get_redis_client(tenant_id=tenant_id)
# Ensure the SOURCE-type root node exists before processing hierarchy nodes.
# This is the root of the hierarchy tree - all other nodes for this source
# should ultimately have this as an ancestor.
ensure_source_node_exists(redis_client, db_session, source)
# Determine time range: start from last hierarchy fetch, end at now
last_fetch = cc_pair.last_time_hierarchy_fetch
start_time = last_fetch.timestamp() if last_fetch else 0
end_time = datetime.now(timezone.utc).timestamp()
# Check if connector is public - all hierarchy nodes from public connectors
# should be accessible to all users
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
total_nodes = 0
node_batch: list[PydanticHierarchyNode] = []
def _process_batch() -> int:
"""Process accumulated hierarchy nodes batch."""
if not node_batch:
return 0
upserted_nodes = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=node_batch,
source=source,
commit=True,
is_connector_public=is_connector_public,
)
# Cache in Redis for fast ancestor resolution
cache_entries = [
HierarchyNodeCacheEntry.from_db_model(node) for node in upserted_nodes
]
cache_hierarchy_nodes_batch(
redis_client=redis_client,
source=source,
entries=cache_entries,
)
count = len(node_batch)
node_batch.clear()
return count
# Fetch hierarchy nodes from the connector
for node in runnable_connector.load_hierarchy(start=start_time, end=end_time):
node_batch.append(node)
if len(node_batch) >= HIERARCHY_NODE_BATCH_SIZE:
total_nodes += _process_batch()
# Process any remaining nodes
total_nodes += _process_batch()
return total_nodes
@shared_task(
name=OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,
soft_time_limit=3600, # 1 hour soft limit
@@ -253,15 +357,17 @@ def connector_hierarchy_fetching_task(
)
return
# TODO: Implement the actual hierarchy fetching logic
# This will involve:
# 1. Instantiating the connector
# 2. Calling a hierarchy-specific method on the connector
# 3. Upserting the hierarchy nodes to the database
source = cc_pair.connector.source
total_nodes = _run_hierarchy_extraction(
db_session=db_session,
cc_pair=cc_pair,
source=source,
tenant_id=tenant_id,
)
task_logger.info(
f"connector_hierarchy_fetching_task: "
f"Hierarchy fetching not yet implemented for cc_pair={cc_pair_id}"
f"Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}"
)
# Update the last fetch time to prevent re-running until next interval

View File

@@ -0,0 +1,359 @@
"""Celery tasks for migrating documents from Vespa to OpenSearch."""
import traceback
from datetime import datetime
from datetime import timezone
from typing import Any
from celery import shared_task
from celery import Task
from redis.lock import Lock as RedisLock
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.opensearch_migration.transformer import (
transform_vespa_chunks_to_opensearch_chunks,
)
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import OpenSearchDocumentMigrationStatus
from onyx.db.opensearch_migration import create_opensearch_migration_records_with_commit
from onyx.db.opensearch_migration import get_last_opensearch_migration_document_id
from onyx.db.opensearch_migration import (
get_opensearch_migration_records_needing_migration,
)
from onyx.db.opensearch_migration import get_paginated_document_batch
from onyx.db.opensearch_migration import (
increment_num_times_observed_no_additional_docs_to_migrate_with_commit,
)
from onyx.db.opensearch_migration import (
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit,
)
from onyx.db.opensearch_migration import should_document_migration_be_permanently_failed
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchDocumentIndex,
)
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.redis.redis_pool import get_redis_client
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
def _migrate_single_document(
document_id: str,
opensearch_document_index: OpenSearchDocumentIndex,
vespa_document_index: VespaDocumentIndex,
tenant_state: TenantState,
) -> int:
"""Migrates a single document from Vespa to OpenSearch.
Args:
document_id: The ID of the document to migrate.
opensearch_document_index: The OpenSearch document index to use.
vespa_document_index: The Vespa document index to use.
tenant_state: The tenant state to use.
Raises:
RuntimeError: If no chunks are found for the document in Vespa, or if
the number of candidate chunks to migrate does not match the number
of chunks in Vespa.
Returns:
The number of chunks migrated.
"""
vespa_document_chunks: list[dict[str, Any]] = (
vespa_document_index.get_raw_document_chunks(document_id=document_id)
)
if not vespa_document_chunks:
raise RuntimeError(f"No chunks found for document {document_id} in Vespa.")
opensearch_document_chunks: list[DocumentChunk] = (
transform_vespa_chunks_to_opensearch_chunks(vespa_document_chunks, tenant_state)
)
if len(opensearch_document_chunks) != len(vespa_document_chunks):
raise RuntimeError(
f"Bug: Number of candidate chunks to migrate ({len(opensearch_document_chunks)}) does not match "
f"number of chunks in Vespa ({len(vespa_document_chunks)})."
)
opensearch_document_index.index_raw_chunks(chunks=opensearch_document_chunks)
return len(opensearch_document_chunks)
# shared_task allows this task to be shared across celery app instances.
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
# Does not store the task's return value in the result backend.
ignore_result=True,
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
soft_time_limit=60 * 5, # 5 minutes.
# When exceeded the task will be forcefully terminated.
time_limit=60 * 6, # 6 minutes.
# Passed in self to the task to get task metadata.
bind=True,
)
def check_for_documents_for_opensearch_migration_task(
self: Task, *, tenant_id: str
) -> bool | None:
"""
Periodic task to check for and add documents to the OpenSearch migration
table.
Should not execute meaningful logic at the same time as
migrate_document_from_vespa_to_opensearch_task.
Returns:
None if OpenSearch migration is not enabled, or if the lock could not be
acquired; effectively a no-op. True if the task completed
successfully. False if the task failed.
"""
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
task_logger.warning(
"OpenSearch migration is not enabled, skipping check for documents for the OpenSearch migration task."
)
return None
task_logger.info("Checking for documents for OpenSearch migration.")
r = get_redis_client()
# Use a lock to prevent overlapping tasks. Only this task or
# migrate_document_from_vespa_to_opensearch_task can interact with the
# OpenSearchMigration table at once.
lock_beat: RedisLock = r.lock(
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
# The maximum time the lock can be held for. Will automatically be
# released after this time.
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
# .acquire will block until the lock is acquired.
blocking=True,
# Wait for 2 minutes trying to acquire the lock.
blocking_timeout=60 * 2, # 2 minutes.
)
if not lock_beat.acquire():
task_logger.warning(
"The OpenSearch migration check task timed out waiting for the lock."
)
return None
try:
# Double check that tenant info is correct.
if tenant_id != get_current_tenant_id():
err_str = (
f"Tenant ID mismatch in the OpenSearch migration check task: "
f"{tenant_id} != {get_current_tenant_id()}. This should never happen."
)
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
# For pagination, get the last ID we've inserted into
# OpenSearchMigration.
last_opensearch_migration_document_id = (
get_last_opensearch_migration_document_id(db_session)
)
# Now get the next batch of doc IDs starting after the last ID.
document_ids = get_paginated_document_batch(
db_session,
prev_ending_document_id=last_opensearch_migration_document_id,
)
if not document_ids:
task_logger.info(
"No more documents to insert for OpenSearch migration."
)
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(
db_session
)
# TODO(andrei): Once we've done this enough times and the number
# of documents matches the number of migration records, we can
# be done with this task and update
# document_migration_record_table_population_status.
return True
# Create the migration records for the next batch of documents with
# status PENDING.
create_opensearch_migration_records_with_commit(db_session, document_ids)
task_logger.info(
f"Created {len(document_ids)} migration records for the next batch of documents."
)
except Exception:
task_logger.exception("Error in the OpenSearch migration check task.")
return False
finally:
if lock_beat.owned():
lock_beat.release()
else:
task_logger.warning(
"The OpenSearch migration lock was not owned on completion of the check task."
)
return True
# shared_task allows this task to be shared across celery app instances.
@shared_task(
name=OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
# Does not store the task's return value in the result backend.
ignore_result=True,
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
soft_time_limit=60 * 5, # 5 minutes.
# When exceeded the task will be forcefully terminated.
time_limit=60 * 6, # 6 minutes.
# Passed in self to the task to get task metadata.
bind=True,
)
def migrate_documents_from_vespa_to_opensearch_task(
self: Task,
*,
tenant_id: str,
) -> bool | None:
"""Periodic task to migrate documents from Vespa to OpenSearch.
Should not execute meaningful logic at the same time as
check_for_documents_for_opensearch_migration_task.
Returns:
None if OpenSearch migration is not enabled, or if the lock could not be
acquired; effectively a no-op. True if the task completed
successfully. False if the task failed.
"""
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
task_logger.warning(
"OpenSearch migration is not enabled, skipping trying to migrate documents from Vespa to OpenSearch."
)
return None
task_logger.info("Trying to migrate documents from Vespa to OpenSearch.")
r = get_redis_client()
# Use a lock to prevent overlapping tasks. Only this task or
# check_for_documents_for_opensearch_migration_task can interact with the
# OpenSearchMigration table at once.
lock_beat: RedisLock = r.lock(
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
# The maximum time the lock can be held for. Will automatically be
# released after this time.
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
# .acquire will block until the lock is acquired.
blocking=True,
# Wait for 2 minutes trying to acquire the lock.
blocking_timeout=60 * 2, # 2 minutes.
)
if not lock_beat.acquire():
task_logger.warning(
"The OpenSearch migration task timed out waiting for the lock."
)
return None
try:
# Double check that tenant info is correct.
if tenant_id != get_current_tenant_id():
err_str = (
f"Tenant ID mismatch in the OpenSearch migration task: "
f"{tenant_id} != {get_current_tenant_id()}. This should never happen."
)
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
records_needing_migration = (
get_opensearch_migration_records_needing_migration(db_session)
)
if not records_needing_migration:
task_logger.info(
"No documents found that need to be migrated from Vespa to OpenSearch."
)
increment_num_times_observed_no_additional_docs_to_migrate_with_commit(
db_session
)
# TODO(andrei): Once we've done this enough times and
# document_migration_record_table_population_status is done, we
# can be done with this task and update
# overall_document_migration_status accordingly. Note that this
# includes marking connectors as needing reindexing if some
# migrations failed.
return True
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
opensearch_document_index = OpenSearchDocumentIndex(
index_name=search_settings.index_name, tenant_state=tenant_state
)
vespa_document_index = VespaDocumentIndex(
index_name=search_settings.index_name,
tenant_state=tenant_state,
large_chunks_enabled=False,
)
task_logger.info(
f"Trying to migrate {len(records_needing_migration)} documents from Vespa to OpenSearch."
)
for record in records_needing_migration:
try:
# If the Document's chunk count is not known, it was
# probably just indexed so fail here to give it a chance to
# sync. If in the rare event this Document has not been
# re-indexed in a very long time and is still under the
# "old" embedding/indexing logic where chunk count was never
# stored, we will eventually permanently fail and thus force
# a re-index of this doc, which is a desireable outcome.
if record.document.chunk_count is None:
raise RuntimeError(
f"Document {record.document_id} has no chunk count."
)
chunks_migrated = _migrate_single_document(
document_id=record.document_id,
opensearch_document_index=opensearch_document_index,
vespa_document_index=vespa_document_index,
tenant_state=tenant_state,
)
# If the number of chunks in Vespa is not in sync with the
# Document table for this doc let's not consider this
# completed and let's let a subsequent run take care of it.
if chunks_migrated != record.document.chunk_count:
raise RuntimeError(
f"Number of chunks migrated ({chunks_migrated}) does not match number of expected chunks in Vespa "
f"({record.document.chunk_count}) for document {record.document_id}."
)
record.status = OpenSearchDocumentMigrationStatus.COMPLETED
except Exception:
record.status = OpenSearchDocumentMigrationStatus.FAILED
record.error_message = f"Attempt {record.attempts_count + 1}:\n{traceback.format_exc()}"
task_logger.exception(
f"Error migrating document {record.document_id} from Vespa to OpenSearch."
)
finally:
record.attempts_count += 1
record.last_attempt_at = datetime.now(timezone.utc)
if should_document_migration_be_permanently_failed(record):
record.status = (
OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED
)
# TODO(andrei): Not necessarily here but if this happens
# we'll need to mark the connector as needing reindex.
db_session.commit()
except Exception:
task_logger.exception("Error in the OpenSearch migration task.")
return False
finally:
if lock_beat.owned():
lock_beat.release()
else:
task_logger.warning(
"The OpenSearch migration lock was not owned on completion of the migration task."
)
return True

View File

@@ -0,0 +1,268 @@
from datetime import datetime
from datetime import timezone
from typing import Any
from onyx.configs.constants import PUBLIC_DOC_PAT
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.schema import DocumentChunk
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
from onyx.document_index.vespa_constants import BLURB
from onyx.document_index.vespa_constants import BOOST
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
from onyx.document_index.vespa_constants import CHUNK_ID
from onyx.document_index.vespa_constants import CONTENT
from onyx.document_index.vespa_constants import DOC_SUMMARY
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
from onyx.document_index.vespa_constants import DOCUMENT_ID
from onyx.document_index.vespa_constants import DOCUMENT_SETS
from onyx.document_index.vespa_constants import EMBEDDINGS
from onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY
from onyx.document_index.vespa_constants import HIDDEN
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
from onyx.document_index.vespa_constants import METADATA_LIST
from onyx.document_index.vespa_constants import METADATA_SUFFIX
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
from onyx.document_index.vespa_constants import SOURCE_LINKS
from onyx.document_index.vespa_constants import SOURCE_TYPE
from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from shared_configs.configs import MULTI_TENANT
def _extract_content_vector(embeddings: Any) -> list[float]:
"""Extracts the full chunk embedding vector from Vespa's embeddings tensor.
Vespa stores embeddings as a tensor<float>(t{},x[dim]) where 't' maps
embedding names (like "full_chunk") to vectors. The API can return this in
different formats:
1. Direct list: {"full_chunk": [...]}
2. Blocks format: {"blocks": {"full_chunk": [0.1, 0.2, ...]}}
3. Possibly other formats.
We only support formats 1 and 2. Any other supplied format will raise an
error.
Raises:
ValueError: If the embeddings format is not supported.
Returns:
The full chunk content embedding vector as a list of floats.
"""
if isinstance(embeddings, dict):
# Handle format 1.
full_chunk_embedding = embeddings.get(FULL_CHUNK_EMBEDDING_KEY)
if isinstance(full_chunk_embedding, list):
# Double check that within the list we have floats and not another
# list or dict.
if not full_chunk_embedding:
raise ValueError("Full chunk embedding is empty.")
if isinstance(full_chunk_embedding[0], float):
return full_chunk_embedding
# Handle format 2.
blocks = embeddings.get("blocks")
if isinstance(blocks, dict):
full_chunk_embedding = blocks.get(FULL_CHUNK_EMBEDDING_KEY)
if isinstance(full_chunk_embedding, list):
# Double check that within the list we have floats and not another
# list or dict.
if not full_chunk_embedding:
raise ValueError("Full chunk embedding is empty.")
if isinstance(full_chunk_embedding[0], float):
return full_chunk_embedding
raise ValueError(f"Unknown embedding format: {type(embeddings)}")
def _extract_title_vector(title_embedding: Any | None) -> list[float] | None:
"""Extract the title embedding vector.
Returns None if no title embedding exists.
Vespa returns title_embedding as tensor<float>(x[dim]) which can be in
formats:
1. Direct list: [0.1, 0.2, ...]
2. Values format: {"values": [0.1, 0.2, ...]}
3. Possibly other formats.
Only formats 1 and 2 are supported. Any other supplied format will raise an
error.
Raises:
ValueError: If the title embedding format is not supported.
Returns:
The title embedding vector as a list of floats.
"""
if title_embedding is None:
return None
# Handle format 1.
if isinstance(title_embedding, list):
# Double check that within the list we have floats and not another
# list or dict.
if not title_embedding:
return None
if isinstance(title_embedding[0], float):
return title_embedding
# Handle format 2.
if isinstance(title_embedding, dict):
# Try values format.
values = title_embedding.get("values")
if values is not None and isinstance(values, list):
# Double check that within the list we have floats and not another
# list or dict.
if not values:
return None
if isinstance(values[0], float):
return values
raise ValueError(f"Unknown title embedding format: {type(title_embedding)}")
def _transform_vespa_document_sets_to_opensearch_document_sets(
vespa_document_sets: dict[str, int] | None,
) -> list[str] | None:
if not vespa_document_sets:
return None
return list(vespa_document_sets.keys())
def _transform_vespa_acl_to_opensearch_acl(
vespa_acl: dict[str, int] | None,
) -> tuple[bool, list[str]]:
if not vespa_acl:
raise ValueError(
"Missing ACL in Vespa chunk. This does not make sense as it implies the document is never searchable by anyone ever."
)
acl_list = list(vespa_acl.keys())
is_public = PUBLIC_DOC_PAT in acl_list
if is_public:
acl_list.remove(PUBLIC_DOC_PAT)
return is_public, acl_list
def transform_vespa_chunks_to_opensearch_chunks(
vespa_chunks: list[dict[str, Any]],
tenant_state: TenantState,
) -> list[DocumentChunk]:
result: list[DocumentChunk] = []
for vespa_chunk in vespa_chunks:
# This should exist; fail loudly if it does not.
document_id: str = vespa_chunk[DOCUMENT_ID]
if not document_id:
raise ValueError("Missing document_id in Vespa chunk.")
# This should exist; fail loudly if it does not.
chunk_index: int = vespa_chunk[CHUNK_ID]
title: str | None = vespa_chunk.get(TITLE)
# WARNING: Should supply format.tensors=short-value to the Vespa client
# in order to get a supported format for the tensors.
title_vector: list[float] | None = _extract_title_vector(
vespa_chunk.get(TITLE_EMBEDDING)
)
# This should exist; fail loudly if it does not.
content: str = vespa_chunk[CONTENT]
if not content:
raise ValueError("Missing content in Vespa chunk.")
# This should exist; fail loudly if it does not.
# WARNING: Should supply format.tensors=short-value to the Vespa client
# in order to get a supported format for the tensors.
content_vector: list[float] = _extract_content_vector(vespa_chunk[EMBEDDINGS])
if not content_vector:
raise ValueError("Missing content_vector in Vespa chunk.")
# This should exist; fail loudly if it does not.
source_type: str = vespa_chunk[SOURCE_TYPE]
if not source_type:
raise ValueError("Missing source_type in Vespa chunk.")
metadata_list: list[str] | None = vespa_chunk.get(METADATA_LIST)
_raw_doc_updated_at: int | None = vespa_chunk.get(DOC_UPDATED_AT)
last_updated: datetime | None = (
datetime.fromtimestamp(_raw_doc_updated_at, tz=timezone.utc)
if _raw_doc_updated_at is not None
else None
)
hidden: bool = vespa_chunk.get(HIDDEN, False)
# This should exist; fail loudly if it does not.
global_boost: int = vespa_chunk[BOOST]
# This should exist; fail loudly if it does not.
semantic_identifier: str = vespa_chunk[SEMANTIC_IDENTIFIER]
if not semantic_identifier:
raise ValueError("Missing semantic_identifier in Vespa chunk.")
image_file_id: str | None = vespa_chunk.get(IMAGE_FILE_NAME)
source_links: str | None = vespa_chunk.get(SOURCE_LINKS)
blurb: str = vespa_chunk.get(BLURB, "")
doc_summary: str = vespa_chunk.get(DOC_SUMMARY, "")
chunk_context: str = vespa_chunk.get(CHUNK_CONTEXT, "")
metadata_suffix: str | None = vespa_chunk.get(METADATA_SUFFIX)
document_sets: list[str] | None = (
_transform_vespa_document_sets_to_opensearch_document_sets(
vespa_chunk.get(DOCUMENT_SETS)
)
)
user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)
primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)
secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)
# This should exist; fail loudly if it does not; this function will
# raise in that event.
is_public, acl_list = _transform_vespa_acl_to_opensearch_acl(
vespa_chunk.get(ACCESS_CONTROL_LIST)
)
chunk_tenant_id: str | None = vespa_chunk.get(TENANT_ID)
if MULTI_TENANT:
if not chunk_tenant_id:
raise ValueError(
"Missing tenant_id in Vespa chunk in a multi-tenant environment."
)
if chunk_tenant_id != tenant_state.tenant_id:
raise ValueError(
f"Chunk tenant_id {chunk_tenant_id} does not match expected tenant_id {tenant_state.tenant_id}"
)
opensearch_chunk = DocumentChunk(
document_id=document_id,
chunk_index=chunk_index,
title=title,
title_vector=title_vector,
content=content,
content_vector=content_vector,
source_type=source_type,
metadata_list=metadata_list,
last_updated=last_updated,
public=is_public,
access_control_list=acl_list,
hidden=hidden,
global_boost=global_boost,
semantic_identifier=semantic_identifier,
image_file_id=image_file_id,
source_links=source_links,
blurb=blurb,
doc_summary=doc_summary,
chunk_context=chunk_context,
metadata_suffix=metadata_suffix,
document_sets=document_sets,
user_projects=user_projects,
primary_owners=primary_owners,
secondary_owners=secondary_owners,
tenant_id=tenant_state,
)
result.append(opensearch_chunk)
return result

View File

@@ -59,6 +59,9 @@ from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.indexing.persistent_document_writer import (
@@ -381,6 +384,9 @@ def connector_document_extraction(
db_credential = index_attempt.connector_credential_pair.credential
processing_mode = index_attempt.connector_credential_pair.processing_mode
is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT
is_connector_public = (
index_attempt.connector_credential_pair.access_type == AccessType.PUBLIC
)
from_beginning = index_attempt.from_beginning
has_successful_attempt = (
@@ -542,6 +548,13 @@ def connector_document_extraction(
total_failures = 0
document_count = 0
# Ensure the SOURCE-type root hierarchy node exists before processing.
# This is the root of the hierarchy tree for this source - all other
# hierarchy nodes should ultimately have this as an ancestor.
redis_client = get_redis_client(tenant_id=tenant_id)
with get_session_with_current_tenant() as db_session:
ensure_source_node_exists(redis_client, db_session, db_connector.source)
# Main extraction loop
while checkpoint.has_more:
logger.info(
@@ -595,6 +608,7 @@ def connector_document_extraction(
nodes=hierarchy_node_batch,
source=db_connector.source,
commit=True,
is_connector_public=is_connector_public,
)
# Cache in Redis for fast ancestor resolution during doc processing
@@ -620,6 +634,26 @@ def connector_document_extraction(
# Clean documents and create batch
doc_batch_cleaned = strip_null_characters(document_batch)
# Resolve parent_hierarchy_raw_node_id to parent_hierarchy_node_id
# using the Redis cache (just populated from hierarchy nodes batch)
with get_session_with_current_tenant() as db_session_tmp:
source_node_id = get_source_node_id_from_cache(
redis_client, db_session_tmp, db_connector.source
)
for doc in doc_batch_cleaned:
if doc.parent_hierarchy_raw_node_id is not None:
node_id, found = get_node_id_from_raw_id(
redis_client,
db_connector.source,
doc.parent_hierarchy_raw_node_id,
)
doc.parent_hierarchy_node_id = (
node_id if found else source_node_id
)
else:
doc.parent_hierarchy_node_id = source_node_id
batch_description = []
for doc in doc_batch_cleaned:

View File

@@ -45,6 +45,8 @@ class ChatStateContainer:
self.citation_to_doc: CitationMapping = {}
# True if this turn is a clarification question (deep research flow)
self.is_clarification: bool = False
# Pre-answer processing time (time before answer starts) in seconds
self.pre_answer_processing_time: float | None = None
# Note: LLM cost tracking is now handled in multi_llm.py
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
@@ -101,6 +103,16 @@ class ChatStateContainer:
with self._lock:
return self.is_clarification
def set_pre_answer_processing_time(self, duration: float | None) -> None:
"""Set the pre-answer processing time (time before answer starts)."""
with self._lock:
self.pre_answer_processing_time = duration
def get_pre_answer_processing_time(self) -> float | None:
"""Thread-safe getter for pre_answer_processing_time."""
with self._lock:
return self.pre_answer_processing_time
@staticmethod
def create_search_doc_key(
search_doc: SearchDoc, use_simple_key: bool = True

View File

@@ -277,7 +277,7 @@ def extract_headers(
def create_temporary_persona(
persona_config: PersonaOverrideConfig, db_session: Session, user: User | None = None
persona_config: PersonaOverrideConfig, db_session: Session, user: User
) -> Persona:
if not is_user_admin(user):
raise HTTPException(

View File

@@ -1,3 +1,4 @@
import time
from collections.abc import Callable
from sqlalchemy.orm import Session
@@ -390,6 +391,9 @@ def run_llm_loop(
initialize_litellm()
# Track when the loop starts for calculating time-to-answer
loop_start_time = time.monotonic()
# Initialize citation processor for handling citations dynamically
# When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)
# When include_citations is False, use REMOVE mode to strip citations from output
@@ -551,6 +555,11 @@ def run_llm_loop(
# This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
# It also pre-processes the tool calls in preparation for running them
tool_defs = [tool.tool_definition() for tool in final_tools]
# Calculate total processing time from loop start until now
# This measures how long the user waits before the answer starts streaming
pre_answer_processing_time = time.monotonic() - loop_start_time
llm_step_result, has_reasoned = run_llm_step(
emitter=emitter,
history=truncated_message_history,
@@ -565,6 +574,7 @@ def run_llm_loop(
# final set of documents immediately if desired.
final_documents=gathered_documents,
user_identity=user_identity,
pre_answer_processing_time=pre_answer_processing_time,
)
if has_reasoned:
reasoning_cycles += 1

View File

@@ -622,6 +622,7 @@ def run_llm_step_pkt_generator(
# TODO: Temporary handling of nested tool calls with agents, figure out a better way to handle this
use_existing_tab_index: bool = False,
is_deep_research: bool = False,
pre_answer_processing_time: float | None = None,
) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:
"""Run an LLM step and stream the response as packets.
NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and
@@ -677,9 +678,8 @@ def run_llm_step_pkt_generator(
llm_msg_history = translate_history_to_llm_format(history, llm.config)
has_reasoned = 0
# Uncomment the line below to log the entire message history to the console
if LOG_ONYX_MODEL_INTERACTIONS:
logger.info(
logger.debug(
f"Message history:\n{_format_message_history_for_logging(llm_msg_history)}"
)
@@ -822,6 +822,12 @@ def run_llm_step_pkt_generator(
reasoning_start = False
if not answer_start:
# Store pre-answer processing time in state container for save_chat
if state_container and pre_answer_processing_time is not None:
state_container.set_pre_answer_processing_time(
pre_answer_processing_time
)
yield Packet(
placement=Placement(
turn_index=turn_index,
@@ -830,6 +836,7 @@ def run_llm_step_pkt_generator(
),
obj=AgentResponseStart(
final_documents=final_documents,
pre_answer_processing_seconds=pre_answer_processing_time,
),
)
answer_start = True
@@ -1038,6 +1045,7 @@ def run_llm_step(
max_tokens: int | None = None,
use_existing_tab_index: bool = False,
is_deep_research: bool = False,
pre_answer_processing_time: float | None = None,
) -> tuple[LlmStepResult, bool]:
"""Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.
@@ -1059,6 +1067,7 @@ def run_llm_step(
max_tokens=max_tokens,
use_existing_tab_index=use_existing_tab_index,
is_deep_research=is_deep_research,
pre_answer_processing_time=pre_answer_processing_time,
)
while True:

View File

@@ -4,6 +4,7 @@ An overview can be found in the README.md file in this directory.
"""
import re
import time
import traceback
from collections.abc import Callable
from uuid import UUID
@@ -82,7 +83,6 @@ from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import CustomToolConfig
from onyx.tools.tool_constructor import SearchToolConfig
from onyx.utils.logger import setup_logger
from onyx.utils.long_term_log import LongTermLogger
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.timing import log_function_time
from onyx.utils.variable_functionality import (
@@ -293,7 +293,7 @@ def _get_project_search_availability(
def handle_stream_message_objects(
new_msg_req: SendMessageRequest,
user: User | None,
user: User,
db_session: Session,
# if specified, uses the last user message and does not create a new user message based
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
@@ -312,17 +312,17 @@ def handle_stream_message_objects(
external_state_container: ChatStateContainer | None = None,
) -> AnswerStream:
tenant_id = get_current_tenant_id()
processing_start_time = time.monotonic()
llm: LLM | None = None
chat_session: ChatSession | None = None
redis_client: Redis | None = None
user_id = user.id if user is not None else None
llm_user_identifier = (
user.email
if user is not None and getattr(user, "email", None)
else (str(user_id) if user_id else "anonymous_user")
)
user_id = user.id
if user.is_anonymous:
llm_user_identifier = "anonymous_user"
else:
llm_user_identifier = user.email or str(user_id)
try:
if not new_msg_req.chat_session_id:
if not new_msg_req.chat_session_info:
@@ -349,15 +349,10 @@ def handle_stream_message_objects(
user_id=llm_user_identifier, session_id=str(chat_session.id)
)
# permanent "log" store, used primarily for debugging
long_term_logger = LongTermLogger(
metadata={"user_id": str(user_id), "chat_session_id": str(chat_session.id)}
)
# Milestone tracking, most devs using the API don't need to understand this
mt_cloud_telemetry(
tenant_id=tenant_id,
distinct_id=user.email if user else tenant_id,
distinct_id=user.email if not user.is_anonymous else tenant_id,
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
)
@@ -367,7 +362,7 @@ def handle_stream_message_objects(
attribute="event_telemetry",
fallback=noop_fallback,
)(
distinct_id=user.email if user else tenant_id,
distinct_id=user.email if not user.is_anonymous else tenant_id,
event="user_message_sent",
properties={
"origin": new_msg_req.origin.value,
@@ -384,7 +379,6 @@ def handle_stream_message_objects(
user=user,
llm_override=new_msg_req.llm_override or chat_session.llm_override,
additional_headers=litellm_additional_headers,
long_term_logger=long_term_logger,
)
token_counter = get_llm_token_counter(llm)
@@ -602,6 +596,7 @@ def handle_stream_message_objects(
chat_session_id=str(chat_session.id),
is_connected=check_is_connected,
assistant_message=assistant_response,
processing_start_time=processing_start_time,
)
# Run the LLM loop with explicit wrapper for stop signal handling
@@ -722,6 +717,7 @@ def llm_loop_completion_handle(
db_session: Session,
chat_session_id: str,
assistant_message: ChatMessage,
processing_start_time: float | None = None,
) -> None:
# Determine if stopped by user
completed_normally = is_connected()
@@ -753,12 +749,13 @@ def llm_loop_completion_handle(
assistant_message=assistant_message,
is_clarification=state_container.is_clarification,
emitted_citations=state_container.get_emitted_citations(),
pre_answer_processing_time=state_container.get_pre_answer_processing_time(),
)
def stream_chat_message_objects(
new_msg_req: CreateChatMessageRequest,
user: User | None,
user: User,
db_session: Session,
# if specified, uses the last user message and does not create a new user message based
# on the `new_msg_req.message`. Currently, requires a state where the last message is a

View File

@@ -145,6 +145,7 @@ def save_chat_turn(
assistant_message: ChatMessage,
is_clarification: bool = False,
emitted_citations: set[int] | None = None,
pre_answer_processing_time: float | None = None,
) -> None:
"""
Save a chat turn by populating the assistant_message and creating related entities.
@@ -169,12 +170,17 @@ def save_chat_turn(
is_clarification: Whether this assistant message is a clarification question (deep research flow)
emitted_citations: Set of citation numbers that were actually emitted during streaming.
If provided, only citations in this set will be saved; others are filtered out.
pre_answer_processing_time: Duration of processing before answer starts (in seconds)
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
assistant_message.reasoning_tokens = reasoning_tokens
assistant_message.is_clarification = is_clarification
# Use pre-answer processing time (captured when MESSAGE_START was emitted)
if pre_answer_processing_time is not None:
assistant_message.processing_duration_seconds = pre_answer_processing_time
# Calculate token count using default tokenizer, when storing, this should not use the LLM
# specific one so we use a system default tokenizer here.
default_tokenizer = get_tokenizer(None, None)

View File

@@ -11,6 +11,9 @@ from onyx.configs.constants import QueryHistoryType
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT
from onyx.utils.logger import setup_logger
logger = setup_logger()
#####
# App Configs
@@ -71,8 +74,16 @@ WEB_DOMAIN = os.environ.get("WEB_DOMAIN") or "http://localhost:3000"
#####
# Auth Configs
#####
AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower())
DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED
# Upgrades users from disabled auth to basic auth and shows warning.
_auth_type_str = (os.environ.get("AUTH_TYPE") or "").lower()
if not _auth_type_str or _auth_type_str in ("disabled", "none"):
logger.warning(
"AUTH_TYPE='disabled' is no longer supported. "
"Defaulting to 'basic'. Please update your configuration. "
"Your existing data will be migrated automatically."
)
_auth_type_str = AuthType.BASIC.value
AUTH_TYPE = AuthType(_auth_type_str)
PASSWORD_MIN_LENGTH = int(os.getenv("PASSWORD_MIN_LENGTH", 8))
PASSWORD_MAX_LENGTH = int(os.getenv("PASSWORD_MAX_LENGTH", 64))
@@ -145,6 +156,10 @@ OAUTH_CLIENT_SECRET = (
os.environ.get("OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET"))
or ""
)
# Whether Google OAuth is enabled (requires both client ID and secret)
OAUTH_ENABLED = bool(OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET)
# OpenID Connect configuration URL for OIDC integrations
OPENID_CONFIG_URL = os.environ.get("OPENID_CONFIG_URL") or ""
@@ -203,6 +218,7 @@ TRACK_EXTERNAL_IDP_EXPIRY = (
#####
DOCUMENT_INDEX_NAME = "danswer_index"
# OpenSearch Configs
OPENSEARCH_HOST = os.environ.get("OPENSEARCH_HOST") or "localhost"
OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 9200)
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
@@ -1041,14 +1057,3 @@ STRIPE_PUBLISHABLE_KEY_URL = (
)
# Override for local testing with Stripe test keys (pk_test_*)
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
# Persistent Document Storage Configuration
# When enabled, indexed documents are written to local filesystem with hierarchical structure
PERSISTENT_DOCUMENT_STORAGE_ENABLED = (
os.environ.get("PERSISTENT_DOCUMENT_STORAGE_ENABLED", "").lower() == "true"
)
# Base directory path for persistent document storage (local filesystem)
# Example: /var/onyx/indexed-docs or /app/indexed-docs
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/indexed-docs"
)

View File

@@ -3,10 +3,6 @@ import os
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
NUM_RETURNED_HITS = 50
# Used for LLM filtering and reranking
# We want this to be approximately the number of results we want to show on the first page
# It cannot be too large due to cost and latency implications
NUM_POSTPROCESSED_RESULTS = 20
# May be less depending on model
MAX_CHUNKS_FED_TO_CHAT = int(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 25)
@@ -56,3 +52,7 @@ USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
== "true"
)
SKIP_DEEP_RESEARCH_CLARIFICATION = (
os.environ.get("SKIP_DEEP_RESEARCH_CLARIFICATION", "false").lower() == "true"
)

Some files were not shown because too many files have changed in this diff Show More