Compare commits

..

147 Commits

Author SHA1 Message Date
Dane Urban
5115b621c8 n 2026-02-10 21:01:32 -08:00
Jamison Lahman
a924b49405 chore(playwright): improve preflight checks and setup (#8283) 2026-02-10 08:09:54 -08:00
SubashMohan
2d2d998811 feat(memory): add user preferences and structured user context in system prompt (#8264) 2026-02-10 04:20:42 +00:00
SubashMohan
0925b5fbd4 fix(chatpage): Improve agent message layout, sidebar nesting, and icon fixes (#8224) 2026-02-10 04:06:31 +00:00
Wenxi
a02d8414ee chore(craft): update demo dataset and add sandbox image readme (#8059) 2026-02-10 03:37:00 +00:00
SubashMohan
c8abc4a115 fix(timeline): reduce agent message re-renders with referential stability in usePacedTurnGroups (#8265) 2026-02-10 03:18:53 +00:00
Nikolas Garza
cec37bff6a feat(ee): Enable license enforcement by default (#8270) 2026-02-10 02:48:55 +00:00
Nikolas Garza
06d5d3971b feat(chat): dynamic bottom spacer for fresh-chat push-up effect (#8285) 2026-02-10 02:04:01 +00:00
Justin Tahara
ed287a2fc0 chore(ollama): Sort model names (#8288) 2026-02-10 02:03:32 +00:00
Raunak Bhagat
60857d1e73 refactor(opal): select variant, transient/selected separation, OpenButton chevron fix (#8284)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 01:20:17 +00:00
Yuhong Sun
bb5c22104e chore: Better enforcement of masking (#7967)
Co-authored-by: justin-tahara <justintahara@gmail.com>
2026-02-10 00:41:11 +00:00
Jamison Lahman
03d919c918 chore(devtools): upgrade ods: 0.5.0->0.5.1 (#8279) 2026-02-09 23:31:54 +00:00
Justin Tahara
71d2ae563a fix(posthog): Chat metrics for Cloud (#8278) 2026-02-09 22:58:37 +00:00
Jamison Lahman
19f9c7357c chore(devtools): ods logs, ods pull, ods compose --force-recreate (#8277) 2026-02-09 22:51:01 +00:00
acaprau
f8fa5b243c chore(opensearch): Try to create OpenSearchTenantMigrationRecord earlier in check_for_documents_for_opensearch_migration_task (#8260) 2026-02-09 22:13:43 +00:00
dependabot[bot]
5f845c208f chore(deps-dev): bump pytest-xdist from 3.6.1 to 3.8.0 in /backend (#8120)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-09 22:06:38 +00:00
Raunak Bhagat
d8595f8de0 refactor(opal): add new Button component built on Interactive.Base (#8263)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 21:52:13 +00:00
dependabot[bot]
5b00d1ef9c chore(deps-dev): bump faker from 37.1.0 to 40.1.2 in /backend (#8126)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-09 21:46:06 +00:00
dependabot[bot]
41b6ed92a9 chore(deps): bump docker/login-action from 3.6.0 to 3.7.0 (#8275)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 21:26:07 +00:00
dependabot[bot]
07f35336ad chore(deps): bump @modelcontextprotocol/sdk from 1.25.3 to 1.26.0 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8166)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 13:20:12 -08:00
dependabot[bot]
4728bb87c7 chore(deps): bump @isaacs/brace-expansion from 5.0.0 to 5.0.1 in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#8139)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 13:20:04 -08:00
dependabot[bot]
adfa2f30af chore(deps): bump actions/cache from 4.3.0 to 5.0.3 (#8273)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 13:19:41 -08:00
dependabot[bot]
9dac4165fb chore(deps): bump actions/setup-python from 6.1.0 to 6.2.0 (#8274)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 13:18:26 -08:00
dependabot[bot]
7d2ede5efc chore(deps): bump protobuf from 6.33.4 to 6.33.5 in /backend/requirements (#8182)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-09 21:04:34 +00:00
dependabot[bot]
4592f6885f chore(deps): bump python-multipart from 0.0.21 to 0.0.22 in /backend/requirements (#7831)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-09 20:44:54 +00:00
Evan Lohn
9dc14fad79 chore: disable hiernodes when opensearch not available (#8271) 2026-02-09 20:32:47 +00:00
dependabot[bot]
ff6e471cfb chore(deps): bump actions/setup-node from 4.4.0 to 6.2.0 (#8122)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 20:31:35 +00:00
dependabot[bot]
09b9443405 chore(deps): bump bytes from 1.11.0 to 1.11.1 in /desktop/src-tauri (#8138)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 12:34:07 -08:00
dependabot[bot]
14cd6d08e8 chore(deps): bump webpack from 5.102.1 to 5.105.0 in /web (#8199)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-09 12:18:29 -08:00
dependabot[bot]
5ee16697ce chore(deps): bump time from 0.3.44 to 0.3.47 in /desktop/src-tauri (#8187)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 12:17:40 -08:00
dependabot[bot]
b794f7e10d chore(deps): bump actions/upload-artifact from 4.6.2 to 6.0.0 (#8121)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 12:14:25 -08:00
dependabot[bot]
bb3275bb75 chore(deps): bump actions/checkout from 6.0.1 to 6.0.2 (#8123)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-09 12:13:37 -08:00
roshan
7644e225a5 fix(chrome extension): Simplify NRFPage ChatInputBar layout to use normal flex flow (#8267)
Co-authored-by: Claude <noreply@anthropic.com>
2026-02-09 18:12:54 +00:00
roshan
811600b84a fix(craft): snapshot restore (#8194) 2026-02-09 18:00:07 +00:00
Jamison Lahman
40ce8615ff fix(login): window undefined on login (#8266) 2026-02-09 17:55:05 +00:00
Justin Tahara
0cee3f6960 chore(llm): Introduce Scaffolding for Integration Tests (#8251) 2026-02-09 17:26:15 +00:00
acaprau
8883e5608f chore(chat frontend): Round up in formatDurationSeconds so we don't see "Thought for 0s" (#8259) 2026-02-09 07:54:39 +00:00
acaprau
7c2f3ded44 fix(opensearch): Tighten up task timing (#8256) 2026-02-09 07:53:44 +00:00
Raunak Bhagat
aa094ce1f0 refactor(opal): interactive base variant types + foreground color system (#8255)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 07:26:05 +00:00
Evan Lohn
4b0c800db7 feat: postgres file store (#8246) 2026-02-09 04:28:42 +00:00
acaprau
8386742c10 fix(profiling): log_function_time should use time.monotonic not time.time (#8258) 2026-02-09 04:11:50 +00:00
Evan Lohn
f2e5e4f040 feat: jwt-based auth (#8244) 2026-02-09 00:29:56 +00:00
Yuhong Sun
c0498cf2fc fix: Deep Research Agent Cycle Count (#8254) 2026-02-08 22:27:33 +00:00
acaprau
954ee1706a chore(opensearch): Improve ordering of migration records that we query (#8248) 2026-02-08 19:51:52 +00:00
SubashMohan
0745765a56 refactor(chat): agent timeline layout and spacing changes (#8226) 2026-02-07 08:40:16 +00:00
Jessica Singh
10feb6ae77 chore(auth): anon fix (#8222) 2026-02-07 06:58:54 +00:00
Danelegend
f5b170af1e chore(provider config): llm provider config prefers LLMModelFlow (#8064) 2026-02-07 03:37:56 +00:00
Jessica Singh
2d2f252e95 fix(web search): strictly typed provider config (#8022) 2026-02-07 00:35:33 +00:00
Evan Lohn
a05f304960 fix: column overlap typing (#8247) 2026-02-07 00:34:34 +00:00
acaprau
7ce5120302 chore(opensearch): Make indexing use the client's new bulk index API (#8238) 2026-02-06 22:07:27 +00:00
Evan Lohn
2d8f864251 fix: metadata hardening (#8201) 2026-02-06 21:57:28 +00:00
acaprau
3b48c2104b fix(opensearch): Allow update to skip if a doc chunk is not found in OpenSearch, or if chunk count is not known (#8236) 2026-02-06 21:47:01 +00:00
Nikolas Garza
a9ec6a2434 fix(settings): default ee_features_enabled to False (#8237) 2026-02-06 20:59:49 +00:00
Nikolas Garza
e85575c6cc fix: make it more clear how to add channels to fed slack config form (#8227) 2026-02-06 18:35:34 +00:00
Danelegend
c966c81e8a fix(llm): LLM override can fail if admin (#8204) 2026-02-06 18:28:31 +00:00
Jamison Lahman
a0d6ebe66d chore(migrations): database migration runner (#8217) 2026-02-06 18:03:03 +00:00
Wenxi
d75b501a1f fix(craft): upload to s3 before marking docs as indexed in db (#8216) 2026-02-06 17:55:18 +00:00
Nikolas Garza
89dd44bee8 fix(db): null out document set and persona ownership on user deletion (#8219) 2026-02-06 17:08:22 +00:00
Justin Tahara
c5451ffe53 fix(ui): Inconsistent LLM Provider Logo (#8220) 2026-02-06 04:38:44 +00:00
Yuhong Sun
85da1d85ce fix: LiteLLM for OpenAI compatible models not using Responses route (#8215) 2026-02-06 03:58:34 +00:00
acaprau
00d90c5e27 chore(opensearch): Add timing and debug logging in the OpenSearch client; also expand log_function_time (#8209) 2026-02-06 03:31:44 +00:00
Nikolas Garza
ea7654e4b8 feat(ee): block Slack bot for suspended tenants and enforce seat limits (#8202) 2026-02-06 03:28:34 +00:00
Yuhong Sun
eb90775e42 feat: basic langfuse tracing + tracing consolidation (#8207) 2026-02-06 03:28:28 +00:00
Nikolas Garza
75865fcdfd feat: support PEM-style delimiters in license file uploads (#7559) 2026-02-06 02:45:11 +00:00
acaprau
d50dc8fa68 feat(opensearch): Support bulk indexing (#8203) 2026-02-06 02:40:50 +00:00
Justin Tahara
39b96973ec chore(openai): Add test for Chat Models (#8213) 2026-02-06 02:21:03 +00:00
Justin Tahara
a342c4d848 fix(openai): Set Auto Reasoning effort to Medium (#8211) 2026-02-06 01:40:49 +00:00
Yuhong Sun
7c084a35b6 fix: GPT -chat models (#8210) 2026-02-06 00:47:23 +00:00
Raunak Bhagat
946eba5ba5 feat(opal): expand InteractiveBase variant system (#8200)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 00:13:41 +00:00
Wenxi
ec4f85f4a4 chore: bump sandbox cpu and memory limits (#8208) 2026-02-05 15:32:39 -08:00
Jamison Lahman
d8fd6d398e chore(ruff): enable flake8s unused arg rules (#8206) 2026-02-05 23:25:07 +00:00
Wenxi
ef85a14b6e fix: mt provisioning rollback and add tests (#8205) 2026-02-05 23:15:36 +00:00
Jamison Lahman
97b44b530e chore(devtools): CLAUDE.md.template -> AGENTS.md (#8197) 2026-02-05 22:46:37 +00:00
Justin Tahara
e05a34cad3 chore(chat): Cleaning Error Codes + Tests (#8186) 2026-02-05 21:13:30 +00:00
Yuhong Sun
d80a4270cb fix: LLM Read Timeout (#8193) 2026-02-05 20:51:37 +00:00
Justin Tahara
a26b4ff888 fix(agents): Removing Label Dependency (#8189) 2026-02-05 20:41:44 +00:00
Wenxi
185d2bb813 feat: recommend opus 4-6 (#8198) 2026-02-05 20:36:12 +00:00
Justin Tahara
d5b64e8472 chore(openai): Add Reasoning Specific Test (#8195) 2026-02-05 20:24:18 +00:00
Justin Tahara
378a216af3 fix(ci): Model Check update (#8196) 2026-02-05 20:17:06 +00:00
Jamison Lahman
2c002c48f7 chore(ruff): move config up a level (#8192) 2026-02-05 20:11:54 +00:00
Jamison Lahman
9c20549e58 chore(devtools): upgrade ods: 0.4.1->0.5.0 (#8190) 2026-02-05 20:08:03 +00:00
Evan Lohn
ffd30ae72a chore: bump default usage limits (#8188) 2026-02-05 19:43:46 +00:00
Wenxi
e18496dfa7 fix: don't run craft setup script unless it exists (#8191) 2026-02-05 19:39:19 +00:00
roshan
560a78a5d0 fix(craft): file upload (#8149) 2026-02-05 19:18:39 +00:00
Wenxi
10bc398746 refactor(craft): chad s5cmd > chud aws cli (mem overhead + speed) (#8170) 2026-02-05 18:58:32 +00:00
Jamison Lahman
9356f79461 chore(devtools): ods compose to start containers (#8185) 2026-02-05 18:44:31 +00:00
Raunak Bhagat
e246b53108 feat(opal): extract Hoverable into Interactive atom (#8173)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 18:32:49 +00:00
Justin Tahara
26533d58e2 fix(openai): Fix reasoning (#8183) 2026-02-05 18:27:13 +00:00
SubashMohan
a32f27f4c8 feat(CommandMenu): add comprehensive tests (#8159) 2026-02-05 17:57:47 +00:00
Yuhong Sun
413a96f138 chore: DR Prompt Tuning (#8180) 2026-02-05 07:02:07 +00:00
Yuhong Sun
73a6721886 chore: Slight tweaks of DR (#8179) 2026-02-04 21:07:27 -08:00
Evan Lohn
01872a7196 fix(salesforce): cleanup logic (#8175) 2026-02-05 02:50:08 +00:00
Evan Lohn
0ba1f715f2 feat(filesys): disabled sections (#8153) 2026-02-05 02:27:47 +00:00
Yuhong Sun
94d0dc0ffe chore: DR description for GA (#8178) 2026-02-05 01:22:04 +00:00
Yuhong Sun
039daa0027 chore: Sanitize LLM tool call args (#8177) 2026-02-05 01:14:05 +00:00
Yuhong Sun
62b1c55494 fix: Anthropic DR requires setting reasoning limit if we want to set output limit (#8168) 2026-02-05 00:39:58 +00:00
Nikolas Garza
1800d4b9d7 fix(db): add cascade delete to search_query user_id foreign key (#8176) 2026-02-05 00:34:28 +00:00
Justin Tahara
5ed2d78471 fix(ui): Additional LLM Config update (#8174) 2026-02-04 23:16:01 +00:00
Justin Tahara
ff28dc9c72 fix(ci): Allow for flexible beta tag (#8171) 2026-02-04 22:17:12 +00:00
Raunak Bhagat
e88a7ac868 fix: Fix expansion error inside of TextView (#8151) 2026-02-04 21:50:40 +00:00
Justin Tahara
79c1bbe666 fix(ci): Notification workflow for Slack (#8167) 2026-02-04 21:38:04 +00:00
Jessica Singh
b1168d4526 chore(chat compress): create readme (#8165) 2026-02-04 21:31:18 +00:00
Wenxi
21751b2cf2 fix(craft): bump aws sync concurrent requests 10-->200 (#8163) 2026-02-04 19:54:35 +00:00
Evan Lohn
cb33263ef0 feat(filesys): sorting attached knowledge (#8156) 2026-02-04 18:56:28 +00:00
Justin Tahara
9f9a68f2eb fix(ci): Fix Bedrock Test (#8161) 2026-02-04 17:45:24 +00:00
SubashMohan
9c09c07980 test(timeline): add unit tests for packet processor (#8135) 2026-02-04 09:41:01 +00:00
Yuhong Sun
9aaac7f1ad chore: firecrawl v2 (#8155) 2026-02-03 23:12:34 -08:00
SubashMohan
8b2071a3ae fix(timeline): consolidate header components and visual fixes (#8133) 2026-02-04 06:57:36 +00:00
Evan Lohn
733d55c948 feat(filesys): sharepoint v1 (#8130) 2026-02-04 05:15:26 +00:00
Evan Lohn
1498238c43 feat(filesys): slack connector (#8118) 2026-02-04 04:48:43 +00:00
Evan Lohn
f0657dc1a3 feat(filesys): jira hierarchy v1 (#8113) 2026-02-04 04:48:31 +00:00
SubashMohan
96e71c496b fix(ChatSearchCommandMenu): improve keyboard navigation and search UX (#8134) 2026-02-04 03:20:18 +00:00
Yuhong Sun
db4e1dc1a3 fix: Give more helpful message to LLM on bad tool calls (#8150) 2026-02-04 01:51:39 +00:00
acaprau
bce5f0889f chore(document index): Remove offset (#8148) 2026-02-04 00:55:53 +00:00
acaprau
fa2f4e781a feat(opensearch): Implement admin and random retrieval; fully deprecate update in the old interface; relax update restrictions (#8142) 2026-02-04 00:26:16 +00:00
Yuhong Sun
abdb683584 fix: Back off to basic auth (#8146) 2026-02-03 15:28:53 -08:00
Yuhong Sun
b7b4737b05 chore: Remove auth log (#8145) 2026-02-03 15:22:03 -08:00
Yuhong Sun
3f9b143429 feat: Track reasoning in Braintrust (#8143) 2026-02-03 15:07:26 -08:00
Wenxi
dbf08a3483 fix(craft): pod restoration race, recovery from unexpected state, and updating heartbeat on session creation (#8140) 2026-02-03 22:47:16 +00:00
Nikolas Garza
43e2e7c69c fix(auth): redirect to login page after email verification (#8137) 2026-02-03 22:35:29 +00:00
Yuhong Sun
1da20bc240 fix: DR time based wrap ups (#8141) 2026-02-03 14:39:58 -08:00
acaprau
58b376d7b7 feat(opensearch): Add tenant ID to the document chunk ID (#8129) 2026-02-03 19:41:19 +00:00
Wenxi
23e47a48e1 fix(craft): wrong usage limit string (#8136) 2026-02-03 19:24:16 +00:00
acaprau
cda5b00174 feat(opensearch): String filtering (#8110) 2026-02-03 17:54:54 +00:00
acaprau
6f4ababb11 chore(opensearch): Some small cleanup around update (#8119) 2026-02-03 17:52:06 +00:00
Nikolas Garza
e90656efbe feat(ee): updating billing api (#8073) 2026-02-03 08:26:44 -08:00
Jessica Singh
b3803808e0 feat(chat history): summarize older messages (#7810)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-02-03 00:35:23 -08:00
SubashMohan
f5415bace6 feat(timeline): QA fixes aroung renderers (#8132) 2026-02-03 06:34:16 +00:00
Nikolas Garza
b255297365 feat(ee): fe - enable new billing UI (5/5) (#8072) 2026-02-03 05:11:37 +00:00
Yuhong Sun
5463d6aadc feat: LLM history now allows parallel tool call for a single message (#8131) 2026-02-03 04:05:17 +00:00
roshan
b547d487c1 fix(craft): CREEPA? AW MAN (#8115) 2026-02-03 02:47:05 +00:00
Nikolas Garza
18821b612b feat(ee): fe - update billing hooks and services (4/5) (#8071) 2026-02-03 02:12:35 +00:00
Yuhong Sun
2368cef307 fix: LiteLLM in threading context (#8103)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 01:58:08 +00:00
Nikolas Garza
668cc71be4 feat(ee): fe - add BillingDetailsView component (3/5) (#8070) 2026-02-03 01:39:27 +00:00
Justin Tahara
09f3ad8985 feat(ui): Selecting all Models toggle (#8125) 2026-02-03 01:28:09 +00:00
Nikolas Garza
38e88c7b5c feat(ee): fe - add CheckoutView component (2/5) (#8069) 2026-02-03 01:15:34 +00:00
Justin Tahara
cc7bfdbcde fix(ui): Model Selection in Place (#8128) 2026-02-03 00:46:46 +00:00
Nikolas Garza
0e3c511974 feat(ee): fe - add new billing page structure with PlansView (#8068) 2026-02-03 00:45:03 +00:00
Wenxi
9606461ba0 fix(craft): phantom pre-provisionsed sandboxes and poll for fresh session on welcome page (#8124) 2026-02-03 00:26:45 +00:00
Yuhong Sun
d01fcbbf7a chore: LiteLLM bump version (#8114) 2026-02-03 00:14:39 +00:00
Evan Lohn
325a38e502 feat(filesys): selected info improvements (#8117) 2026-02-03 00:10:28 +00:00
Evan Lohn
3916556397 fix: perm sync group prefixing (#8077) 2026-02-02 23:44:17 +00:00
Danelegend
a7edcd6880 feat(provider): create flow mapping table (#8025) 2026-02-02 23:41:53 +00:00
Justin Tahara
f18f0ffd96 feat(helm): Add Probes for All (#8112) 2026-02-02 23:05:01 +00:00
Justin Tahara
06c060bb1f fix(ui): Search Connectors (#8116) 2026-02-02 23:04:53 +00:00
roshan
94ebe9e221 fix(craft): fix default dockerfile outputs_template_path and venv_template_path (#8102) 2026-02-02 14:28:08 -08:00
Justin Tahara
99c9c378cd chore(no-auth): Clean up Playwright (#8109) 2026-02-02 12:19:54 -08:00
752 changed files with 29303 additions and 9898 deletions

4
.github/CODEOWNERS vendored
View File

@@ -6,5 +6,5 @@
/web/STANDARDS.md @raunakab @Weves
# Agent context files
/CLAUDE.md.template @Weves
/AGENTS.md.template @Weves
/CLAUDE.md @Weves
/AGENTS.md @Weves

View File

@@ -82,7 +82,7 @@ jobs:
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
IS_STABLE=true
fi
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta(\.[0-9]+)?$ ]]; then
IS_BETA=true
fi
@@ -174,23 +174,10 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
@@ -262,7 +249,7 @@ jobs:
xdg-utils
- name: setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6.1.0
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6.2.0
with:
node-version: 24
package-manager-cache: false
@@ -422,7 +409,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -495,7 +482,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -555,7 +542,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -633,7 +620,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -714,7 +701,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -782,7 +769,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -857,7 +844,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -929,7 +916,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -988,7 +975,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1066,7 +1053,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1139,7 +1126,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1200,7 +1187,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1280,7 +1267,7 @@ jobs:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1359,7 +1346,7 @@ jobs:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1422,7 +1409,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
@@ -1709,19 +1696,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Determine failed jobs
id: failed-jobs
shell: bash
@@ -1787,7 +1761,7 @@ jobs:
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
title: "🚨 Deployment Workflow Failed"
ref-name: ${{ github.ref_name }}

View File

@@ -24,7 +24,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -24,7 +24,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -33,7 +33,7 @@ jobs:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
cache: 'pip'
@@ -97,7 +97,7 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -40,13 +40,16 @@ jobs:
- name: Generate OpenAPI schema and Python client
shell: bash
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
env:
LICENSE_ENFORCEMENT_ENABLED: "false"
run: |
ods openapi all
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -45,12 +45,12 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238
with:
node-version: 24
cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -63,7 +63,7 @@ jobs:
targets: ${{ matrix.target }}
- name: Cache Cargo registry and build
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
with:
path: |
~/.cargo/bin/
@@ -105,7 +105,7 @@ jobs:
- name: Upload build artifacts
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
path: |

View File

@@ -110,7 +110,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -118,6 +118,7 @@ jobs:
- name: Create .env file for Docker Compose
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
CODE_INTERPRETER_BETA_ENABLED=true
DISABLE_TELEMETRY=true
EOF

View File

@@ -109,7 +109,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -169,7 +169,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -214,7 +214,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -287,7 +287,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -300,7 +300,10 @@ jobs:
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
@@ -465,7 +468,7 @@ jobs:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -477,6 +480,7 @@ jobs:
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
LICENSE_ENFORCEMENT_ENABLED=false \
MULTI_TENANT=true \
AUTH_TYPE=cloud \
REQUIRE_EMAIL_VERIFICATION=false \

View File

@@ -28,7 +28,7 @@ jobs:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"

View File

@@ -101,7 +101,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -161,7 +161,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -220,7 +220,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -279,7 +279,7 @@ jobs:
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -292,6 +292,7 @@ jobs:
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true

View File

@@ -90,7 +90,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -151,7 +151,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -212,7 +212,7 @@ jobs:
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -249,7 +249,7 @@ jobs:
strategy:
fail-fast: false
matrix:
project: [admin, no-auth, exclusive]
project: [admin, exclusive]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -259,7 +259,7 @@ jobs:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
@@ -289,7 +289,10 @@ jobs:
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
COMPOSE_PROFILES=s3-filestore
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED=false
AUTH_TYPE=basic
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
EXA_API_KEY=${EXA_API_KEY_VALUE}
@@ -299,15 +302,12 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
EOF
if [ "${{ matrix.project }}" = "no-auth" ]; then
echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
fi
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
@@ -430,9 +430,6 @@ jobs:
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
if [ "${PROJECT}" = "no-auth" ]; then
export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
fi
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
@@ -493,7 +490,7 @@ jobs:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
# uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
# with:
# node-version: 22

View File

@@ -42,6 +42,9 @@ jobs:
- name: Generate OpenAPI schema and Python client
shell: bash
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
env:
LICENSE_ENFORCEMENT_ENABLED: "false"
run: |
ods openapi all

View File

@@ -64,7 +64,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}

View File

@@ -27,6 +27,8 @@ jobs:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
DISABLE_TELEMETRY: "true"
# TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
LICENSE_ENFORCEMENT_ENABLED: "false"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

View File

@@ -24,13 +24,13 @@ jobs:
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"

4
.gitignore vendored
View File

@@ -40,10 +40,6 @@ settings.json
/backend/tests/regression/answer_quality/search_test_config.yaml
*.egg-info
# Claude
AGENTS.md
CLAUDE.md
# Local .terraform directories
**/.terraform/*

View File

@@ -1,26 +1,25 @@
# CLAUDE.md
# PROJECT KNOWLEDGE BASE
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
This file provides guidance to AI agents when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
@@ -92,6 +91,7 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Light worker tasks (Vespa operations, permissions sync, deletion)
- Document processing (indexing pipeline)
@@ -105,12 +105,14 @@ Onyx supports two deployment modes for background workers, controlled by the `US
- Default concurrency: 20 threads (increased to handle combined workload)
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
@@ -119,18 +121,18 @@ The deployment mode affects:
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
@@ -143,6 +145,7 @@ If you make any updates to a celery worker and you want to test these changes, y
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
@@ -154,6 +157,7 @@ NOTE: Always make sure everything is strictly typed (both in Python and Typescri
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
@@ -435,6 +439,7 @@ function ContactForm() {
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
@@ -467,6 +472,7 @@ function ContactForm() {
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
@@ -476,6 +482,7 @@ alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
@@ -488,13 +495,14 @@ Write the migration manually and place it in the file that alembic creates when
## Testing Strategy
First, you must activate the virtual environment with `source .venv/bin/activate`.
First, you must activate the virtual environment with `source .venv/bin/activate`.
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
@@ -504,13 +512,14 @@ pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
@@ -523,15 +532,16 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
@@ -543,8 +553,9 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, _including_ the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
@@ -556,13 +567,11 @@ To run them:
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
will be tailing their logs to this file.
## Security Considerations
@@ -581,6 +590,7 @@ will be tailing their logs to this file.
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
@@ -593,10 +603,10 @@ Things you come across in your research that are important to the implementation
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: *Timeline*, *Rollback plan*
Do NOT include these: _Timeline_, _Rollback plan_
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.

View File

@@ -1,599 +0,0 @@
# AGENTS.md
This file provides guidance to AI agents when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
outside of those directories.
## Project Overview
**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
### Background Workers (Celery)
Onyx uses Celery for asynchronous task processing with multiple specialized workers:
#### Worker Types
1. **Primary Worker** (`celery_app.py`)
- Coordinates core background tasks and system-wide operations
- Handles connector management, document sync, pruning, and periodic checks
- Runs with 4 threads concurrency
- Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
2. **Docfetching Worker** (`docfetching`)
- Fetches documents from external data sources (connectors)
- Spawns docprocessing tasks for each document batch
- Implements watchdog monitoring for stuck connectors
- Configurable concurrency (default from env)
3. **Docprocessing Worker** (`docprocessing`)
- Processes fetched documents through the indexing pipeline:
- Upserts documents to PostgreSQL
- Chunks documents and adds contextual information
- Embeds chunks via model server
- Writes chunks to Vespa vector database
- Updates document metadata
- Configurable concurrency (default from env)
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)
- Handles Knowledge Graph processing and clustering
- Builds relationships between documents
- Runs clustering algorithms
- Configurable concurrency
7. **Monitoring Worker** (`monitoring`)
- System health monitoring and metrics collection
- Monitors Celery queues, process memory, and system status
- Single thread (monitoring doesn't need parallelism)
- Cloud-specific monitoring tasks
8. **User File Processing Worker** (`user_file_processing`)
- Processes user-uploaded files
- Handles user file indexing and project synchronization
- Configurable concurrency
9. **Beat Worker** (`beat`)
- Celery's scheduler for periodic tasks
- Uses DynamicTenantScheduler for multi-tenant support
- Schedules tasks like:
- Indexing checks (every 15 seconds)
- Connector deletion checks (every 20 seconds)
- Vespa sync checks (every 20 seconds)
- Pruning checks (every 20 seconds)
- KG processing (every 60 seconds)
- Monitoring tasks (every 5 minutes)
- Cleanup tasks (hourly)
#### Worker Deployment Modes
Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
- Runs a single consolidated `background` worker that handles all background tasks:
- Pruning operations (from `heavy` worker)
- Knowledge graph processing (from `kg_processing` worker)
- Monitoring tasks (from `monitoring` worker)
- User file processing (from `user_file_processing` worker)
- Lower resource footprint (single worker process)
- Suitable for smaller deployments or development environments
- Default concurrency: 6 threads
**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
- Runs separate specialized workers as documented above (heavy, kg_processing, monitoring, user_file_processing)
- Better isolation and scalability
- Can scale individual workers independently based on workload
- Suitable for production deployments with higher load
The deployment mode affects:
- **Backend**: Worker processes spawned by supervisord or dev scripts
- **Helm**: Which Kubernetes deployments are created
- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
#### Key Features
- **Thread-based Workers**: All workers use thread pools (not processes) for stability
- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
middleware layer that automatically finds the appropriate tenant ID when sending tasks
via Celery Beat.
- **Task Prioritization**: High, Medium, Low priority queues
- **Monitoring**: Built-in heartbeat and liveness checking
- **Failure Handling**: Automatic retry and failure recovery mechanisms
- **Redis Coordination**: Inter-process communication via Redis
- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
#### Important Notes
**Defining Tasks**:
- Always use `@shared_task` rather than `@celery_app`
- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
**Defining APIs**:
When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
function.
**Testing Updates**:
If you make any updates to a celery worker and you want to test these changes, you will need
to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
### Code Quality
```bash
# Install and run pre-commit hooks
pre-commit install
pre-commit run --all-files
```
NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
## Architecture Overview
### Technology Stack
- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
- **Database**: PostgreSQL with Redis caching
- **Search**: Vespa vector database
- **Auth**: OAuth2, SAML, multi-provider support
- **AI/ML**: LangChain, LiteLLM, multiple embedding models
### Directory Structure
```
backend/
├── onyx/
│ ├── auth/ # Authentication & authorization
│ ├── chat/ # Chat functionality & LLM interactions
│ ├── connectors/ # Data source connectors
│ ├── db/ # Database models & operations
│ ├── document_index/ # Vespa integration
│ ├── federated_connectors/ # External search connectors
│ ├── llm/ # LLM provider integrations
│ └── server/ # API endpoints & routers
├── ee/ # Enterprise Edition features
├── alembic/ # Database migrations
└── tests/ # Test suites
web/
├── src/app/ # Next.js app router pages
├── src/components/ # Reusable React components
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
### 1. Import Standards
**Always use absolute imports with the `@` prefix.**
**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
```typescript
// ✅ Good
import { Button } from "@/components/ui/button";
import { useAuth } from "@/hooks/useAuth";
import { Text } from "@/refresh-components/texts/Text";
// ❌ Bad
import { Button } from "../../../components/ui/button";
import { useAuth } from "./hooks/useAuth";
```
### 2. React Component Functions
**Prefer regular functions over arrow functions for React components.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
function UserProfile({ userId }: UserProfileProps) {
return <div>User Profile</div>
}
// ❌ Bad
const UserProfile = ({ userId }: UserProfileProps) => {
return <div>User Profile</div>
}
```
### 3. Props Interface Extraction
**Extract prop types into their own interface definitions.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
interface UserCardProps {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}
function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
return <div>User Card</div>
}
// ❌ Bad
function UserCard({
user,
showActions = false,
onEdit
}: {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}) {
return <div>User Card</div>
}
```
### 4. Spacing Guidelines
**Prefer padding over margins for spacing.**
**Reason:** We want to consolidate usage to paddings instead of margins.
```typescript
// ✅ Good
<div className="p-4 space-y-2">
<div className="p-2">Content</div>
</div>
// ❌ Bad
<div className="m-4 space-y-2">
<div className="m-2">Content</div>
</div>
```
### 5. Tailwind Dark Mode
**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
```typescript
// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
<div className="bg-background-neutral-03 text-text-02">
Content
</div>
// ✅ Good - Logo icons with dark mode handling via createLogoIcon
export const GithubIcon = createLogoIcon(githubLightIcon, {
monochromatic: true, // Will apply dark:invert internally
});
export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
darkSrc: gitbookDarkIcon, // Will use dark:hidden/dark:block internally
});
// ❌ Bad - Manual dark mode overrides
<div className="bg-white dark:bg-black text-black dark:text-white">
Content
</div>
```
### 6. Class Name Utilities
**Use the `cn` utility instead of raw string formatting for classNames.**
**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
```typescript
import { cn } from '@/lib/utils'
// ✅ Good
<div className={cn(
'base-class',
isActive && 'active-class',
className
)}>
Content
</div>
// ❌ Bad
<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
Content
</div>
```
### 7. Custom Hooks Organization
**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
**Reason:** This is just a layout preference. Keeps code clean.
```typescript
// web/src/hooks/useUserData.ts
export function useUserData(userId: string) {
// hook implementation
}
// web/src/hooks/useLocalStorage.ts
export function useLocalStorage<T>(key: string, initialValue: T) {
// hook implementation
}
```
### 8. Icon Usage
**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
```typescript
// ✅ Good
import SvgX from "@/icons/x";
import SvgMoreHorizontal from "@/icons/more-horizontal";
// ❌ Bad
import { User } from "lucide-react";
import { FiSearch } from "react-icons/fi";
```
**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
If you need help with this step, reach out to `raunak@onyx.app`.
### 9. Text Rendering
**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
```typescript
// ✅ Good
import { Text } from '@/refresh-components/texts/Text'
function UserCard({ name }: { name: string }) {
return (
<Text
{/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
text03
{/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
mainAction
>
{name}
</Text>
)
}
// ❌ Bad
function UserCard({ name }: { name: string }) {
return (
<div>
<h2>{name}</h2>
<p>User details</p>
</div>
)
}
```
### 10. Component Usage
**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
```typescript
// ✅ Good
import Button from '@/refresh-components/buttons/Button'
import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
import SvgPlusCircle from '@/icons/plus-circle'
function ContactForm() {
return (
<form>
<InputTypeIn placeholder="Search..." />
<Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
</form>
)
}
// ❌ Bad
function ContactForm() {
return (
<form>
<input placeholder="Name" />
<textarea placeholder="Message" />
<button type="submit">Submit</button>
</form>
)
}
```
### 11. Colors
**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
```typescript
// ✅ Good - Use custom Onyx color classes
<div className="bg-background-neutral-01 border border-border-02" />
<div className="bg-background-tint-02 border border-border-01" />
<div className="bg-status-success-01" />
<div className="bg-action-link-01" />
<div className="bg-theme-primary-05" />
// ❌ Bad - Do NOT use standard Tailwind colors
<div className="bg-gray-100 border border-gray-300 text-gray-600" />
<div className="bg-white border border-slate-200" />
<div className="bg-green-100 text-green-700" />
<div className="bg-blue-100 text-blue-600" />
<div className="bg-indigo-500" />
```
### 12. Data Fetching
**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
## Database & Migrations
### Running Migrations
```bash
# Standard migrations
alembic upgrade head
# Multi-tenant (Enterprise)
alembic -n schema_private upgrade head
```
### Creating Migrations
```bash
# Create migration
alembic revision -m "description"
# Multi-tenant migration
alembic -n schema_private revision -m "description"
```
Write the migration manually and place it in the file that alembic creates when running the above command.
## Testing Strategy
There are 4 main types of tests within Onyx:
### Unit Tests
These should not assume any Onyx/external services are available to be called.
Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
write these for complex, isolated modules e.g. `citation_processing.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
```
### External Dependency Unit Tests
These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
However, the actual Onyx containers are not running and with these tests we call the function to test directly.
We can also mock components/calls at will.
The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
with certain args, something that would be impossible with proper integration tests).
A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
```
### Integration Tests
Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
verification is necessary) over any other type of test.
Tests are parallelized at a directory level.
When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
calling the utilities directly (e.g. do NOT create admin users with
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
To run them:
```bash
python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
```
### Playwright (E2E) Tests
These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
running, *including* the Web Server.
Use these tests for anything that requires significant frontend <-> backend coordination.
Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
To run them:
```bash
npx playwright test <TEST_NAME>
```
## Logs
When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
will be tailing their logs to this file.
## Security Considerations
- Never commit API keys or secrets to repository
- Use encrypted credential storage for connector credentials
- Follow RBAC patterns for new features
- Implement proper input validation with Pydantic models
- Use parameterized queries to prevent SQL injection
## AI/LLM Integration
- Multiple LLM providers supported via LiteLLM
- Configurable models per feature (chat, search, embeddings)
- Streaming support for real-time responses
- Token management and rate limiting
- Custom prompts and agent actions
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:
**Issues to Address**
What the change is meant to do.
**Important Notes**
Things you come across in your research that are important to the implementation.
**Implementation strategy**
How you are going to make the changes happen. High level approach.
**Tests**
What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
Do NOT include these: *Timeline*, *Rollback plan*
This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
Keep it high level. You can reference certain files or functions though.
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

1
CLAUDE.md Symbolic link
View File

@@ -0,0 +1 @@
AGENTS.md

View File

@@ -149,6 +149,11 @@ RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \
fi
# Set Craft template paths to the in-image locations
# These match the paths where setup_craft_templates.sh creates the templates
ENV OUTPUTS_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs
ENV VENV_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

View File

@@ -48,6 +48,7 @@ WORKDIR /app
# Utils used by model server
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
COPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py
# Place to fetch version information
COPY ./onyx/__init__.py /app/onyx/__init__.py

View File

@@ -57,7 +57,7 @@ if USE_IAM_AUTH:
def include_object(
object: SchemaItem,
object: SchemaItem, # noqa: ARG001
name: str | None,
type_: Literal[
"schema",
@@ -67,8 +67,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool,
compare_to: SchemaItem | None,
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False
@@ -244,7 +244,7 @@ def do_run_migrations(
def provide_iam_token_for_alembic(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
) -> None:
if USE_IAM_AUTH:
# Database connection settings

View File

@@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""Parallel Alembic Migration Runner
Upgrades tenant schemas to head in batched, parallel alembic subprocesses.
Each subprocess handles a batch of schemas (via ``-x schemas=a,b,c``),
reducing per-process overhead compared to one-schema-per-process.
Usage examples::
# defaults: 6 workers, 50 schemas/batch
python alembic/run_multitenant_migrations.py
# custom settings
python alembic/run_multitenant_migrations.py -j 8 -b 100
"""
from __future__ import annotations
import argparse
import subprocess
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, NamedTuple
from alembic.config import Config
from alembic.script import ScriptDirectory
from sqlalchemy import text
from onyx.db.engine.sql_engine import is_valid_schema_name
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from shared_configs.configs import TENANT_ID_PREFIX
# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------
class Args(NamedTuple):
jobs: int
batch_size: int
class BatchResult(NamedTuple):
schemas: list[str]
success: bool
output: str
elapsed_sec: float
# ---------------------------------------------------------------------------
# Core functions
# ---------------------------------------------------------------------------
def run_alembic_for_batch(schemas: list[str]) -> BatchResult:
"""Run ``alembic upgrade head`` for a batch of schemas in one subprocess.
If the batch fails, it is automatically retried with ``-x continue=true``
so that the remaining schemas in the batch still get migrated. The retry
output (which contains alembic's per-schema error messages) is returned
for diagnosis.
"""
csv = ",".join(schemas)
base_cmd = ["alembic", "-x", f"schemas={csv}"]
start = time.monotonic()
result = subprocess.run(
[*base_cmd, "upgrade", "head"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
if result.returncode == 0:
elapsed = time.monotonic() - start
return BatchResult(schemas, True, result.stdout or "", elapsed)
# At least one schema failed. Print the initial error output, then
# re-run with continue=true so the remaining schemas still get migrated.
if result.stdout:
print(f"Initial error output:\n{result.stdout}", file=sys.stderr, flush=True)
print(
f"Batch failed (exit {result.returncode}), retrying with 'continue=true'...",
file=sys.stderr,
flush=True,
)
retry = subprocess.run(
[*base_cmd, "-x", "continue=true", "upgrade", "head"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
elapsed = time.monotonic() - start
return BatchResult(schemas, False, retry.stdout or "", elapsed)
def get_head_revision() -> str | None:
"""Get the head revision from the alembic script directory."""
alembic_cfg = Config("alembic.ini")
script = ScriptDirectory.from_config(alembic_cfg)
return script.get_current_head()
def get_schemas_needing_migration(
tenant_schemas: List[str], head_rev: str
) -> List[str]:
"""Return only schemas whose current alembic version is not at head."""
if not tenant_schemas:
return []
engine = SqlEngine.get_engine()
with engine.connect() as conn:
# Find which schemas actually have an alembic_version table
rows = conn.execute(
text(
"SELECT table_schema FROM information_schema.tables "
"WHERE table_name = 'alembic_version' "
"AND table_schema = ANY(:schemas)"
),
{"schemas": tenant_schemas},
)
schemas_with_table = set(row[0] for row in rows)
# Schemas without the table definitely need migration
needs_migration = [s for s in tenant_schemas if s not in schemas_with_table]
if not schemas_with_table:
return needs_migration
# Validate schema names before interpolating into SQL
for schema in schemas_with_table:
if not is_valid_schema_name(schema):
raise ValueError(f"Invalid schema name: {schema}")
# Single query to get every schema's current revision at once.
# Use integer tags instead of interpolating schema names into
# string literals to avoid quoting issues.
schema_list = list(schemas_with_table)
union_parts = [
f'SELECT {i} AS idx, version_num FROM "{schema}".alembic_version'
for i, schema in enumerate(schema_list)
]
rows = conn.execute(text(" UNION ALL ".join(union_parts)))
version_by_schema = {schema_list[row[0]]: row[1] for row in rows}
needs_migration.extend(
s for s in schemas_with_table if version_by_schema.get(s) != head_rev
)
return needs_migration
def run_migrations_parallel(
schemas: list[str],
max_workers: int,
batch_size: int,
) -> bool:
"""Chunk *schemas* into batches and run them in parallel.
A background monitor thread prints a status line every 60 s listing
which batches are still in-flight, making it easy to spot hung tenants.
"""
batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]
total_batches = len(batches)
print(
f"{len(schemas)} schemas in {total_batches} batch(es) "
f"with {max_workers} workers (batch size: {batch_size})...",
flush=True,
)
all_success = True
# Thread-safe tracking of in-flight batches for the monitor thread.
in_flight: dict[int, list[str]] = {}
prev_in_flight: set[int] = set()
lock = threading.Lock()
stop_event = threading.Event()
def _monitor() -> None:
"""Print a status line every 60 s listing batches still in-flight.
Only prints batches that were also present in the previous tick,
making it easy to spot batches that are stuck.
"""
nonlocal prev_in_flight
while not stop_event.wait(60):
with lock:
if not in_flight:
prev_in_flight = set()
continue
current = set(in_flight)
stuck = current & prev_in_flight
prev_in_flight = current
if not stuck:
continue
schemas = [s for idx in sorted(stuck) for s in in_flight[idx]]
print(
f"⏳ batch(es) still running since last check "
f"({', '.join(str(i + 1) for i in sorted(stuck))}): "
+ ", ".join(schemas),
flush=True,
)
monitor_thread = threading.Thread(target=_monitor, daemon=True)
monitor_thread.start()
try:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
def _run(batch_idx: int, batch: list[str]) -> BatchResult:
with lock:
in_flight[batch_idx] = batch
print(
f"Batch {batch_idx + 1}/{total_batches} started "
f"({len(batch)} schemas): {', '.join(batch)}",
flush=True,
)
result = run_alembic_for_batch(batch)
with lock:
in_flight.pop(batch_idx, None)
return result
future_to_idx = {
executor.submit(_run, i, b): i for i, b in enumerate(batches)
}
for future in as_completed(future_to_idx):
batch_idx = future_to_idx[future]
try:
result = future.result()
status = "" if result.success else ""
print(
f"Batch {batch_idx + 1}/{total_batches} "
f"{status} {len(result.schemas)} schemas "
f"in {result.elapsed_sec:.1f}s",
flush=True,
)
if not result.success:
# Print last 20 lines of retry output for diagnosis
tail = result.output.strip().splitlines()[-20:]
for line in tail:
print(f" {line}", flush=True)
all_success = False
except Exception as e:
print(
f"Batch {batch_idx + 1}/{total_batches} " f"✗ exception: {e}",
flush=True,
)
all_success = False
finally:
stop_event.set()
monitor_thread.join(timeout=2)
return all_success
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def parse_args() -> Args:
parser = argparse.ArgumentParser(
description="Run alembic migrations for all tenant schemas in parallel"
)
parser.add_argument(
"-j",
"--jobs",
type=int,
default=6,
metavar="N",
help="Number of parallel alembic processes (default: 6)",
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
default=50,
metavar="N",
help="Schemas per alembic process (default: 50)",
)
args = parser.parse_args()
if args.jobs < 1:
parser.error("--jobs must be >= 1")
if args.batch_size < 1:
parser.error("--batch-size must be >= 1")
return Args(jobs=args.jobs, batch_size=args.batch_size)
def main() -> int:
args = parse_args()
head_rev = get_head_revision()
if head_rev is None:
print("Could not determine head revision.", file=sys.stderr)
return 1
with SqlEngine.scoped_engine(pool_size=5, max_overflow=2):
tenant_ids = get_all_tenant_ids()
tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
if not tenant_schemas:
print(
"No tenant schemas found. Is MULTI_TENANT=true set?",
file=sys.stderr,
)
return 1
schemas_to_migrate = get_schemas_needing_migration(tenant_schemas, head_rev)
if not schemas_to_migrate:
print(
f"All {len(tenant_schemas)} tenants are already at head "
f"revision ({head_rev})."
)
return 0
print(
f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need "
f"migration (head: {head_rev})."
)
success = run_migrations_parallel(
schemas_to_migrate,
max_workers=args.jobs,
batch_size=args.batch_size,
)
print(f"\n{'All migrations successful' if success else 'Some migrations failed'}")
return 0 if success else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,58 +0,0 @@
"""LLMProvider deprecated fields are nullable
Revision ID: 001984c88745
Revises: 01f8e6d95a33
Create Date: 2026-02-01 22:24:34.171100
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "001984c88745"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make default_model_name nullable (was NOT NULL)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=True,
)
# Remove server_default from is_default_vision_provider (was server_default=false())
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=None,
)
# is_default_provider and default_vision_model are already nullable with no server_default
def downgrade() -> None:
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
op.execute(
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=False,
)
# Restore server_default for is_default_vision_provider
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=sa.false(),
)

View File

@@ -1,7 +1,7 @@
"""Populate flow mapping data
Revision ID: 01f8e6d95a33
Revises: f220515df7b4
Revises: d5c86e2c6dc6
Create Date: 2026-01-31 17:37:10.485558
"""
@@ -11,7 +11,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision = "01f8e6d95a33"
down_revision = "f220515df7b4"
down_revision = "d5c86e2c6dc6"
branch_labels = None
depends_on = None
@@ -23,7 +23,7 @@ def upgrade() -> None:
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'chat' AS llm_model_flow_type,
'CHAT' AS llm_model_flow_type,
COALESCE(
(lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
FALSE
@@ -44,7 +44,7 @@ def upgrade() -> None:
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'vision' AS llm_model_flow_type,
'VISION' AS llm_model_flow_type,
COALESCE(
(lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
FALSE
@@ -68,7 +68,7 @@ def downgrade() -> None:
default_vision_model = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'vision'
WHERE mf.llm_model_flow_type = 'VISION'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
@@ -83,7 +83,7 @@ def downgrade() -> None:
default_model_name = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'chat'
WHERE mf.llm_model_flow_type = 'CHAT'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
@@ -100,7 +100,7 @@ def downgrade() -> None:
FROM model_configuration mc
JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
WHERE mc.llm_provider_id = lp.id
AND mf.llm_model_flow_type = 'chat'
AND mf.llm_model_flow_type = 'CHAT'
ORDER BY mc.is_visible DESC, mc.id ASC
LIMIT 1
)

View File

@@ -0,0 +1,27 @@
"""add_user_preferences
Revision ID: 175ea04c7087
Revises: d56ffa94ca32
Create Date: 2026-02-04 18:16:24.830873
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "175ea04c7087"
down_revision = "d56ffa94ca32"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column("user_preferences", sa.Text(), nullable=True),
)
def downgrade() -> None:
op.drop_column("user", "user_preferences")

View File

@@ -0,0 +1,36 @@
"""add_chat_compression_fields
Revision ID: 90b409d06e50
Revises: f220515df7b4
Create Date: 2026-01-26 09:13:09.635427
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "90b409d06e50"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add last_summarized_message_id to chat_message
# This field marks a message as a summary and indicates the last message it covers.
# Summaries are branch-aware via their parent_message_id pointing to the branch.
op.add_column(
"chat_message",
sa.Column(
"last_summarized_message_id",
sa.Integer(),
sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("chat_message", "last_summarized_message_id")

View File

@@ -0,0 +1,35 @@
"""add_file_content
Revision ID: d56ffa94ca32
Revises: 01f8e6d95a33
Create Date: 2026-02-06 15:29:34.192960
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d56ffa94ca32"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"file_content",
sa.Column(
"file_id",
sa.String(),
sa.ForeignKey("file_record.file_id", ondelete="CASCADE"),
primary_key=True,
),
sa.Column("lobj_oid", sa.BigInteger(), nullable=False),
sa.Column("file_size", sa.BigInteger(), nullable=False, server_default="0"),
)
def downgrade() -> None:
op.drop_table("file_content")

View File

@@ -0,0 +1,35 @@
"""add_cascade_delete_to_search_query_user_id
Revision ID: d5c86e2c6dc6
Revises: 90b409d06e50
Create Date: 2026-02-04 16:05:04.749804
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "d5c86e2c6dc6"
down_revision = "90b409d06e50"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
op.create_foreign_key(
"search_query_user_id_fkey",
"search_query",
"user",
["user_id"],
["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
op.create_foreign_key(
"search_query_user_id_fkey", "search_query", "user", ["user_id"], ["id"]
)

View File

@@ -39,7 +39,7 @@ EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
def include_object(
object: SchemaItem,
object: SchemaItem, # noqa: ARG001
name: str | None,
type_: Literal[
"schema",
@@ -49,8 +49,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool,
compare_to: SchemaItem | None,
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False

View File

@@ -951,7 +951,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
return False
def progress(self, tag: str, amount: int) -> None:
def progress(self, tag: str, amount: int) -> None: # noqa: ARG002
try:
self.redis_connector.permissions.set_active()
@@ -982,7 +982,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
def monitor_ccpair_permissions_taskset(
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -259,7 +259,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
def try_creating_external_group_sync_task(
app: Celery,
cc_pair_id: int,
r: Redis,
r: Redis, # noqa: ARG001
tenant_id: str,
) -> str | None:
"""Returns an int if syncing is needed. The int represents the number of sync tasks generated.
@@ -344,7 +344,7 @@ def try_creating_external_group_sync_task(
bind=True,
)
def connector_external_group_sync_generator_task(
self: Task,
self: Task, # noqa: ARG001
cc_pair_id: int,
tenant_id: str,
) -> None:
@@ -590,8 +590,8 @@ def _perform_external_group_sync(
def validate_external_group_sync_fences(
tenant_id: str,
celery_app: Celery,
r: Redis,
celery_app: Celery, # noqa: ARG001
r: Redis, # noqa: ARG001
r_replica: Redis,
r_celery: Redis,
lock_beat: RedisLock,

View File

@@ -40,7 +40,7 @@ def export_query_history_task(
end: datetime,
start_time: datetime,
# Need to include the tenant_id since the TenantAwareTask needs this
tenant_id: str,
tenant_id: str, # noqa: ARG001
) -> None:
if not self.request.id:
raise RuntimeError("No task id defined for this task; cannot identify it")

View File

@@ -43,7 +43,7 @@ _TENANT_PROVISIONING_TIME_LIMIT = 60 * 10 # 10 minutes
trail=False,
bind=True,
)
def check_available_tenants(self: Task) -> None:
def check_available_tenants(self: Task) -> None: # noqa: ARG001
"""
Check if we have enough pre-provisioned tenants available.
If not, trigger the pre-provisioning of new tenants.

View File

@@ -21,9 +21,9 @@ logger = setup_logger()
trail=False,
)
def generate_usage_report_task(
self: Task,
self: Task, # noqa: ARG001
*,
tenant_id: str,
tenant_id: str, # noqa: ARG001
user_id: str | None = None,
period_from: str | None = None,
period_to: str | None = None,

View File

@@ -7,7 +7,7 @@ QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK
def name_chat_ttl_task(
retention_limit_days: float, tenant_id: str | None = None
retention_limit_days: float, tenant_id: str | None = None # noqa: ARG001
) -> str:
return f"chat_ttl_{retention_limit_days}_days"

View File

@@ -134,7 +134,7 @@ GATED_TENANTS_KEY = "gated_tenants"
# License enforcement - when True, blocks API access for gated/expired licenses
LICENSE_ENFORCEMENT_ENABLED = (
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "true").lower() == "true"
)
# Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints

View File

@@ -54,7 +54,7 @@ def delete_document_set_privacy__no_commit(
def fetch_document_sets(
user_id: UUID | None,
db_session: Session,
include_outdated: bool = True, # Parameter only for versioned implementation, unused
include_outdated: bool = True, # Parameter only for versioned implementation, unused # noqa: ARG001
) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:
assert user_id is not None

View File

@@ -5,8 +5,10 @@ It filters hierarchy nodes based on user email and external group membership.
"""
from sqlalchemy import any_
from sqlalchemy import cast
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement
@@ -32,7 +34,7 @@ def _build_hierarchy_access_filter(
if external_group_ids:
access_filters.append(
HierarchyNode.external_user_group_ids.overlap(
postgresql.array(external_group_ids)
cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))
)
)
return or_(*access_filters)

View File

@@ -11,6 +11,7 @@ from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
@@ -107,7 +108,8 @@ def get_used_seats(tenant_id: str | None = None) -> int:
Get current seat usage directly from database.
For multi-tenant: counts users in UserTenantMapping for this tenant.
For self-hosted: counts all active users (excludes EXT_PERM_USER role).
For self-hosted: counts all active users (excludes EXT_PERM_USER role
and the anonymous system user).
TODO: Exclude API key dummy users from seat counting. API keys create
users with emails like `__DANSWER_API_KEY_*` that should not count toward
@@ -127,6 +129,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
.where(
User.is_active == True, # type: ignore # noqa: E712
User.role != UserRole.EXT_PERM_USER,
User.email != ANONYMOUS_USER_EMAIL, # type: ignore
)
)
return result.scalar() or 0

View File

@@ -643,7 +643,7 @@ def add_users_to_user_group(
def update_user_group(
db_session: Session,
user: User,
user: User, # noqa: ARG001
user_group_id: int,
user_group_update: UserGroupUpdate,
) -> UserGroup:

View File

@@ -25,7 +25,7 @@ CONFLUENCE_DOC_SYNC_LABEL = "confluence_doc_sync"
def confluence_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -1,6 +1,8 @@
from typing import Any
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
@@ -72,6 +74,7 @@ def get_page_restrictions(
page_id: str,
page_restrictions: dict[str, Any],
ancestors: list[dict[str, Any]],
add_prefix: bool = False,
) -> ExternalAccess | None:
"""
This function gets the restrictions for a page. In Confluence, a child can have
@@ -79,6 +82,9 @@ def get_page_restrictions(
If no restrictions are found anywhere, then return None, indicating that the page
should inherit the space's restrictions.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
found_user_emails: set[str] = set()
found_group_names: set[str] = set()
@@ -92,13 +98,22 @@ def get_page_restrictions(
restrictions=page_restrictions,
)
)
def _maybe_prefix_groups(group_names: set[str]) -> set[str]:
if add_prefix:
return {
build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
for g in group_names
}
return group_names
# if there are individual page-level restrictions, then this is the accurate
# restriction for the page. You cannot both have page-level restrictions AND
# inherit restrictions from the parent.
if found_any_page_level_restriction:
return ExternalAccess(
external_user_emails=found_user_emails,
external_user_group_ids=found_group_names,
external_user_group_ids=_maybe_prefix_groups(found_group_names),
is_public=False,
)
@@ -125,7 +140,7 @@ def get_page_restrictions(
)
return ExternalAccess(
external_user_emails=ancestor_user_emails,
external_user_group_ids=ancestor_group_names,
external_user_group_ids=_maybe_prefix_groups(ancestor_group_names),
is_public=False,
)

View File

@@ -3,6 +3,8 @@ from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GR
from ee.onyx.external_permissions.confluence.constants import REQUEST_PAGINATION_LIMIT
from ee.onyx.external_permissions.confluence.constants import VIEWSPACE_PERMISSION_TYPE
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
@@ -112,6 +114,7 @@ def get_space_permission(
confluence_client: OnyxConfluence,
space_key: str,
is_cloud: bool,
add_prefix: bool = False,
) -> ExternalAccess:
if is_cloud:
space_permissions = _get_cloud_space_permissions(confluence_client, space_key)
@@ -130,13 +133,32 @@ def get_space_permission(
f"permissions for space '{space_key}'"
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix and space_permissions.external_user_group_ids:
prefixed_groups = {
build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
for g in space_permissions.external_user_group_ids
}
return ExternalAccess(
external_user_emails=space_permissions.external_user_emails,
external_user_group_ids=prefixed_groups,
is_public=space_permissions.is_public,
)
return space_permissions
def get_all_space_permissions(
confluence_client: OnyxConfluence,
is_cloud: bool,
add_prefix: bool = False,
) -> dict[str, ExternalAccess]:
"""
Get access permissions for all spaces in Confluence.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
logger.debug("Getting space permissions")
# Gets all the spaces in the Confluence instance
all_space_keys = [
@@ -151,7 +173,9 @@ def get_all_space_permissions(
logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
space_permissions_by_space_key: dict[str, ExternalAccess] = {}
for space_key in all_space_keys:
space_permissions = get_space_permission(confluence_client, space_key, is_cloud)
space_permissions = get_space_permission(
confluence_client, space_key, is_cloud, add_prefix
)
# Stores the permissions for each space
space_permissions_by_space_key[space_key] = space_permissions

View File

@@ -34,7 +34,7 @@ GITHUB_DOC_SYNC_LABEL = "github_doc_sync"
def github_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
"""
@@ -50,7 +50,12 @@ def github_doc_sync(
**cc_pair.connector.connector_specific_config
)
github_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
github_connector.load_credentials(credential_json)
logger.info("GitHub connector credentials loaded successfully")
if not github_connector.github_client:

View File

@@ -12,13 +12,18 @@ logger = setup_logger()
def github_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
github_connector: GithubConnector = GithubConnector(
**cc_pair.connector.connector_specific_config
)
github_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
github_connector.load_credentials(credential_json)
if not github_connector.github_client:
raise ValueError("github_client is required")

View File

@@ -91,7 +91,7 @@ class TeamInfo(BaseModel):
def _fetch_organization_members(
github_client: Github, org_name: str, retry_count: int = 0
github_client: Github, org_name: str, retry_count: int = 0 # noqa: ARG001
) -> List[UserInfo]:
"""Fetch all organization members including owners and regular members."""
org_members: List[UserInfo] = []
@@ -124,7 +124,7 @@ def _fetch_organization_members(
def _fetch_repository_teams_detailed(
repo: Repository, github_client: Github, retry_count: int = 0
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
) -> List[TeamInfo]:
"""Fetch teams with access to the repository and their members."""
teams_data: List[TeamInfo] = []
@@ -167,7 +167,7 @@ def _fetch_repository_teams_detailed(
def fetch_repository_team_slugs(
repo: Repository, github_client: Github, retry_count: int = 0
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
) -> List[str]:
"""Fetch team slugs with access to the repository."""
logger.info(f"Fetching team slugs for repository {repo.full_name}")

View File

@@ -39,8 +39,8 @@ def _get_slim_doc_generator(
def gmail_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""
@@ -50,7 +50,12 @@ def gmail_doc_sync(
already populated.
"""
gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
gmail_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
gmail_connector.load_credentials(credential_json)
slim_doc_generator = _get_slim_doc_generator(
cc_pair, gmail_connector, callback=callback

View File

@@ -13,6 +13,7 @@ from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import GoogleDriveFileType
@@ -67,11 +68,17 @@ def get_external_access_for_raw_gdrive_file(
company_domain: str,
retriever_drive_service: GoogleDriveService | None,
admin_drive_service: GoogleDriveService,
add_prefix: bool = False,
) -> ExternalAccess:
"""
Get the external access for a raw Google Drive file.
Assumes the file we retrieved has EITHER `permissions` or `permission_ids`
add_prefix: When this method is called during the initial indexing via the connector,
set add_prefix to True so group IDs are prefixed with the source type.
When invoked from doc_sync (permission sync), use the default (False)
since upsert_document_external_perms handles prefixing.
"""
doc_id = file.get("id")
if not doc_id:
@@ -164,6 +171,13 @@ def get_external_access_for_raw_gdrive_file(
| ({drive_id} if drive_id is not None else set())
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix:
group_ids = {
build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
for group_id in group_ids
}
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_ids,
@@ -175,6 +189,7 @@ def get_external_access_for_folder(
folder: GoogleDriveFileType,
google_domain: str,
drive_service: GoogleDriveService,
add_prefix: bool = False,
) -> ExternalAccess:
"""
Extract ExternalAccess from a folder's permissions.
@@ -186,6 +201,8 @@ def get_external_access_for_folder(
folder: The folder metadata from Google Drive API (must include permissionIds field)
google_domain: The company's Google Workspace domain (e.g., "company.com")
drive_service: Google Drive service for fetching permission details
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
Returns:
ExternalAccess with extracted permission info
@@ -248,17 +265,25 @@ def get_external_access_for_folder(
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
# Prefix group IDs with source type if requested (for indexing path)
group_ids: set[str] = group_emails
if add_prefix:
group_ids = {
build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
for group_id in group_emails
}
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_emails,
external_user_group_ids=group_ids,
is_public=is_public,
)
def gdrive_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""
@@ -270,7 +295,12 @@ def gdrive_doc_sync(
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
google_drive_connector.load_credentials(credential_json)
slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)

View File

@@ -384,14 +384,19 @@ def _build_onyx_groups(
def gdrive_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
# Initialize connector and build credential/service objects
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
google_drive_connector.load_credentials(credential_json)
admin_service = get_admin_service(
google_drive_connector.creds, google_drive_connector.primary_admin_email
)

View File

@@ -17,14 +17,19 @@ JIRA_DOC_SYNC_TAG = "jira_doc_sync"
def jira_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
jira_connector = JiraConnector(
**cc_pair.connector.connector_specific_config,
)
jira_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
jira_connector.load_credentials(credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -102,7 +102,7 @@ def _build_group_member_email_map(
def jira_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""
@@ -119,8 +119,13 @@ def jira_group_sync(
if not jira_base_url:
raise ValueError("No jira_base_url found in connector config")
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
jira_client = build_jira_client(
credentials=cc_pair.credential.credential_json,
credentials=credential_json,
jira_base=jira_base_url,
scoped_token=scoped_token,
)

View File

@@ -8,6 +8,8 @@ from ee.onyx.external_permissions.jira.models import Holder
from ee.onyx.external_permissions.jira.models import Permission
from ee.onyx.external_permissions.jira.models import User
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.utils.logger import setup_logger
HolderMap = dict[str, list[Holder]]
@@ -252,7 +254,14 @@ def _build_external_access_from_holder_map(
def get_project_permissions(
jira_client: JIRA,
jira_project: str,
add_prefix: bool = False,
) -> ExternalAccess | None:
"""
Get project permissions from Jira.
add_prefix: When True, prefix group IDs with source type (for indexing path).
When False (default), leave unprefixed (for permission sync path).
"""
project_permissions: PermissionScheme = jira_client.project_permissionscheme(
project=jira_project
)
@@ -267,6 +276,20 @@ def get_project_permissions(
holder_map = _build_holder_map(permissions=project_permissions.permissions)
return _build_external_access_from_holder_map(
external_access = _build_external_access_from_holder_map(
jira_client=jira_client, jira_project=jira_project, holder_map=holder_map
)
# Prefix group IDs with source type if requested (for indexing path)
if add_prefix and external_access and external_access.external_user_group_ids:
prefixed_groups = {
build_ext_group_name_for_onyx(g, DocumentSource.JIRA)
for g in external_access.external_user_group_ids
}
return ExternalAccess(
external_user_emails=external_access.external_user_emails,
external_user_group_ids=prefixed_groups,
is_public=external_access.is_public,
)
return external_access

View File

@@ -23,7 +23,7 @@ ContentRange = tuple[int, int | None] # (start_index, end_index) None means to
# NOTE: Used for testing timing
def _get_dummy_object_access_map(
object_ids: set[str], user_email: str, chunks: list[InferenceChunk]
object_ids: set[str], user_email: str, chunks: list[InferenceChunk] # noqa: ARG001
) -> dict[str, bool]:
time.sleep(0.15)
# return {object_id: True for object_id in object_ids}

View File

@@ -30,7 +30,11 @@ def get_any_salesforce_client_for_doc_id(
if _ANY_SALESFORCE_CLIENT is None:
cc_pairs = get_cc_pairs_for_document(db_session, doc_id)
first_cc_pair = cc_pairs[0]
credential_json = first_cc_pair.credential.credential_json
credential_json = (
first_cc_pair.credential.credential_json.get_value(apply_mask=False)
if first_cc_pair.credential.credential_json
else {}
)
_ANY_SALESFORCE_CLIENT = Salesforce(
username=credential_json["sf_username"],
password=credential_json["sf_password"],
@@ -158,7 +162,11 @@ def _get_salesforce_client_for_doc_id(db_session: Session, doc_id: str) -> Sales
)
if cc_pair is None:
raise ValueError(f"CC pair {cc_pair_id} not found")
credential_json = cc_pair.credential.credential_json
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
_CC_PAIR_ID_SALESFORCE_CLIENT_MAP[cc_pair_id] = Salesforce(
username=credential_json["sf_username"],
password=credential_json["sf_password"],

View File

@@ -17,14 +17,19 @@ SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"
def sharepoint_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
sharepoint_connector = SharepointConnector(
**cc_pair.connector.connector_specific_config,
)
sharepoint_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
sharepoint_connector.load_credentials(credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -15,7 +15,7 @@ logger = setup_logger()
def sharepoint_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""Sync SharePoint groups and their members"""
@@ -25,7 +25,12 @@ def sharepoint_group_sync(
# Create SharePoint connector instance and load credentials
connector = SharepointConnector(**connector_config)
connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
connector.load_credentials(credential_json)
if not connector.msal_app:
raise RuntimeError("MSAL app not initialized in connector")

View File

@@ -103,7 +103,7 @@ def _fetch_channel_permissions(
def _get_slack_document_access(
slack_connector: SlackConnector,
channel_permissions: dict[str, ExternalAccess],
channel_permissions: dict[str, ExternalAccess], # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
@@ -136,8 +136,8 @@ def _get_slack_document_access(
def slack_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
"""
@@ -151,9 +151,14 @@ def slack_doc_sync(
tenant_id = get_current_tenant_id()
provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
r = get_redis_client(tenant_id=tenant_id)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
slack_client = SlackConnector.make_slack_web_client(
provider.get_provider_key(),
cc_pair.credential.credential_json["slack_bot_token"],
credential_json["slack_bot_token"],
SlackConnector.MAX_RETRIES,
r,
)

View File

@@ -63,9 +63,14 @@ def slack_group_sync(
provider = OnyxDBCredentialsProvider(tenant_id, "slack", cc_pair.credential.id)
r = get_redis_client(tenant_id=tenant_id)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
slack_client = SlackConnector.make_slack_web_client(
provider.get_provider_key(),
cc_pair.credential.credential_json["slack_bot_token"],
credential_json["slack_bot_token"],
SlackConnector.MAX_RETRIES,
r,
)

View File

@@ -72,10 +72,10 @@ class SyncConfig(BaseModel):
# Mock doc sync function for testing (no-op)
def mock_doc_sync(
cc_pair: "ConnectorCredentialPair",
fetch_all_docs_fn: FetchAllDocumentsFunction,
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: Optional["IndexingHeartbeatInterface"],
cc_pair: "ConnectorCredentialPair", # noqa: ARG001
fetch_all_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: Optional["IndexingHeartbeatInterface"], # noqa: ARG001
) -> Generator["DocExternalAccess", None, None]:
"""Mock doc sync function for testing - returns empty list since permissions are fetched during indexing"""
yield from []

View File

@@ -18,14 +18,19 @@ TEAMS_DOC_SYNC_LABEL = "teams_doc_sync"
def teams_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
teams_connector = TeamsConnector(
**cc_pair.connector.connector_specific_config,
)
teams_connector.load_credentials(cc_pair.credential.credential_json)
credential_json = (
cc_pair.credential.credential_json.get_value(apply_mask=False)
if cc_pair.credential.credential_json
else {}
)
teams_connector.load_credentials(credential_json)
yield from generic_doc_sync(
cc_pair=cc_pair,

View File

@@ -32,6 +32,7 @@ from sqlalchemy.orm import Session
from ee.onyx.auth.users import current_admin_user
from ee.onyx.db.license import get_license
from ee.onyx.db.license import get_used_seats
from ee.onyx.server.billing.models import BillingInformationResponse
from ee.onyx.server.billing.models import CreateCheckoutSessionRequest
from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
@@ -164,6 +165,16 @@ async def create_checkout_session(
seats = request.seats if request else None
email = request.email if request else None
# Validate that requested seats is not less than current used seats
if seats is not None:
used_seats = get_used_seats(tenant_id)
if seats < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot subscribe with fewer seats than current usage. "
f"You have {used_seats} active users/integrations but requested {seats} seats.",
)
# Build redirect URL for after checkout completion
redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"
@@ -265,6 +276,15 @@ async def update_seats(
if not MULTI_TENANT and not license_data:
raise HTTPException(status_code=400, detail="No license found")
# Validate that new seat count is not less than current used seats
used_seats = get_used_seats(tenant_id)
if request.new_seat_count < used_seats:
raise HTTPException(
status_code=400,
detail=f"Cannot reduce seats below current usage. "
f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
)
try:
result = await update_seat_service(
new_seat_count=request.new_seat_count,

View File

@@ -139,7 +139,7 @@ def put_logo(
upload_logo(file=file, is_logotype=is_logotype)
def fetch_logo_helper(db_session: Session) -> Response:
def fetch_logo_helper(db_session: Session) -> Response: # noqa: ARG001
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
@@ -155,7 +155,7 @@ def fetch_logo_helper(db_session: Session) -> Response:
return Response(content=onyx_file.data, media_type=onyx_file.mime_type)
def fetch_logotype_helper(db_session: Session) -> Response:
def fetch_logotype_helper(db_session: Session) -> Response: # noqa: ARG001
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())

View File

@@ -17,7 +17,7 @@ router = APIRouter(prefix="/evals")
@router.post("/eval_run", response_model=EvalRunAck)
def eval_run(
request: EvalConfigurationOptions,
user: User = Depends(current_cloud_superuser),
user: User = Depends(current_cloud_superuser), # noqa: ARG001
) -> EvalRunAck:
"""
Run an evaluation with the given message and optional dataset.

View File

@@ -42,6 +42,20 @@ logger = setup_logger()
router = APIRouter(prefix="/license")
# PEM-style delimiters used in license file format
_PEM_BEGIN = "-----BEGIN ONYX LICENSE-----"
_PEM_END = "-----END ONYX LICENSE-----"
def _strip_pem_delimiters(content: str) -> str:
"""Strip PEM-style delimiters from license content if present."""
content = content.strip()
if content.startswith(_PEM_BEGIN) and content.endswith(_PEM_END):
# Remove first and last lines (the delimiters)
lines = content.split("\n")
return "\n".join(lines[1:-1]).strip()
return content
@router.get("")
async def get_license_status(
@@ -106,6 +120,11 @@ async def claim_license(
- Updating seats via the billing API
- Returning from the Stripe customer portal
- Any operation that regenerates the license on control plane
Claim a license from the control plane (self-hosted only).
Two modes:
1. With session_id: After Stripe checkout, exchange session_id for license
2. Without session_id: Re-claim using existing license for auth
"""
if MULTI_TENANT:
raise HTTPException(
@@ -210,6 +229,10 @@ async def upload_license(
try:
content = await license_file.read()
license_data = content.decode("utf-8").strip()
# Strip PEM-style delimiters if present (used in .lic file format)
license_data = _strip_pem_delimiters(license_data)
# Remove any stray whitespace/newlines from user input
license_data = license_data.strip()
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid license file format")

View File

@@ -260,7 +260,7 @@ def confluence_oauth_accessible_resources(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
) -> JSONResponse:
"""Atlassian's API is weird and does not supply us with enough info to be in a
usable state after authorizing. All API's require a cloud id. We have to list
@@ -270,7 +270,11 @@ def confluence_oauth_accessible_resources(
if not credential:
raise HTTPException(400, f"Credential {credential_id} not found.")
credential_dict = credential.credential_json
credential_dict = (
credential.credential_json.get_value(apply_mask=False)
if credential.credential_json
else {}
)
access_token = credential_dict["confluence_access_token"]
try:
@@ -323,7 +327,7 @@ def confluence_oauth_finalize(
cloud_url: str,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
) -> JSONResponse:
"""Saves the info for the selected cloud site to the credential.
This is the final step in the confluence oauth flow where after the traditional
@@ -337,7 +341,12 @@ def confluence_oauth_finalize(
detail=f"Confluence Cloud OAuth failed - credential {credential_id} not found.",
)
new_credential_json: dict[str, Any] = dict(credential.credential_json)
existing_credential_json = (
credential.credential_json.get_value(apply_mask=False)
if credential.credential_json
else {}
)
new_credential_json: dict[str, Any] = dict(existing_credential_json)
new_credential_json["cloud_id"] = cloud_id
new_credential_json["cloud_name"] = cloud_name
new_credential_json["wiki_base"] = cloud_url

View File

@@ -78,7 +78,7 @@ def fetch_and_process_chat_session_history(
db_session: Session,
start: datetime,
end: datetime,
limit: int | None = 500,
limit: int | None = 500, # noqa: ARG001
) -> Generator[ChatSessionSnapshot]:
PAGE_SIZE = 100

View File

@@ -59,7 +59,7 @@ def generate_report(
def read_usage_report(
report_name: str,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
db_session: Session = Depends(get_session), # noqa: ARG001
) -> Response:
try:
file = get_usage_report_data(report_name)

View File

@@ -123,14 +123,9 @@ def _seed_llms(
upsert_llm_provider(llm_upsert_request, db_session)
for llm_upsert_request in llm_upsert_requests
]
if len(seeded_providers[0].model_configurations) > 0:
default_model = seeded_providers[0].model_configurations[0].name
update_default_provider(
provider_id=seeded_providers[0].id,
model_name=default_model,
db_session=db_session,
)
update_default_provider(
provider_id=seeded_providers[0].id, db_session=db_session
)
def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:

View File

@@ -58,26 +58,42 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
For self-hosted, looks up license metadata and overrides application_status
if the license indicates GATED_ACCESS (fully expired).
Also sets ee_features_enabled based on license status to control
visibility of EE features in the UI.
For multi-tenant (cloud), the settings already have the correct status
from the control plane, so no override is needed.
If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
allowing the product to function normally without license checks.
If LICENSE_ENFORCEMENT_ENABLED is false, ee_features_enabled is set to True
(since EE code was loaded via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES).
"""
if not LICENSE_ENFORCEMENT_ENABLED:
# License enforcement disabled - EE code is loaded via
# ENABLE_PAID_ENTERPRISE_EDITION_FEATURES, so EE features are on
settings.ee_features_enabled = True
return settings
if MULTI_TENANT:
# Cloud mode - EE features always available (gating handled by is_tenant_gated)
settings.ee_features_enabled = True
return settings
tenant_id = get_current_tenant_id()
try:
metadata = get_cached_license_metadata(tenant_id)
if metadata and metadata.status == _BLOCKING_STATUS:
settings.application_status = metadata.status
# No license = user hasn't purchased yet, allow access for upgrade flow
# GRACE_PERIOD/PAYMENT_REMINDER don't block - they're for notifications
if metadata:
if metadata.status == _BLOCKING_STATUS:
settings.application_status = metadata.status
settings.ee_features_enabled = False
else:
# Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
settings.ee_features_enabled = True
else:
# No license = community edition, disable EE features
settings.ee_features_enabled = False
except RedisError as e:
logger.warning(f"Failed to check license metadata for settings: {e}")
# Fail closed - disable EE features if we can't verify license
settings.ee_features_enabled = False
return settings

View File

@@ -19,6 +19,7 @@ logger = setup_logger()
def fetch_stripe_checkout_session(
tenant_id: str,
billing_period: Literal["monthly", "annual"] = "monthly",
seats: int | None = None,
) -> str:
token = generate_data_plane_token()
headers = {
@@ -29,10 +30,23 @@ def fetch_stripe_checkout_session(
payload = {
"tenant_id": tenant_id,
"billing_period": billing_period,
"seats": seats,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()["sessionId"]
if not response.ok:
try:
data = response.json()
error_msg = (
data.get("error")
or f"Request failed with status {response.status_code}"
)
except (ValueError, requests.exceptions.JSONDecodeError):
error_msg = f"Request failed with status {response.status_code}: {response.text[:200]}"
raise Exception(error_msg)
data = response.json()
if data.get("error"):
raise Exception(data["error"])
return data["sessionId"]
def fetch_tenant_stripe_information(tenant_id: str) -> dict:
@@ -51,7 +65,6 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
def fetch_billing_information(
tenant_id: str,
) -> BillingInformation | SubscriptionStatusResponse:
logger.info("Fetching billing information")
token = generate_data_plane_token()
headers = {
"Authorization": f"Bearer {token}",

View File

@@ -29,6 +29,7 @@ from ee.onyx.server.tenants.billing import fetch_billing_information
from ee.onyx.server.tenants.billing import fetch_customer_portal_session
from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import CreateCheckoutSessionRequest
from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
from ee.onyx.server.tenants.models import ProductGatingRequest
@@ -114,12 +115,30 @@ async def create_customer_portal_session(
try:
portal_url = fetch_customer_portal_session(tenant_id, return_url)
return {"url": portal_url}
return {"stripe_customer_portal_url": portal_url}
except Exception as e:
logger.exception("Failed to create customer portal session")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/create-checkout-session")
async def create_checkout_session(
request: CreateCheckoutSessionRequest | None = None,
_: User = Depends(current_admin_user),
) -> dict:
"""Create a Stripe checkout session via the control plane."""
tenant_id = get_current_tenant_id()
billing_period = request.billing_period if request else "monthly"
seats = request.seats if request else None
try:
checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
return {"stripe_checkout_url": checkout_url}
except Exception as e:
logger.exception("Failed to create checkout session")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/create-subscription-session")
async def create_subscription_session(
request: CreateSubscriptionSessionRequest | None = None,

View File

@@ -42,6 +42,12 @@ class BillingInformation(BaseModel):
payment_method_enabled: bool
class CreateCheckoutSessionRequest(BaseModel):
billing_period: Literal["monthly", "annual"] = "monthly"
seats: int | None = None
email: str | None = None
class CheckoutSessionCreationResponse(BaseModel):
id: str

View File

@@ -121,7 +121,9 @@ async def get_or_provision_tenant(
)
async def create_tenant(email: str, referral_source: str | None = None) -> str:
async def create_tenant(
email: str, referral_source: str | None = None # noqa: ARG001
) -> str:
"""
Create a new tenant on-demand when no pre-provisioned tenants are available.
This is the fallback method when we can't use a pre-provisioned tenant.
@@ -300,12 +302,12 @@ def configure_default_api_keys(db_session: Session) -> None:
has_set_default_provider = False
def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:
def _upsert(request: LLMProviderUpsertRequest) -> None:
nonlocal has_set_default_provider
try:
provider = upsert_llm_provider(request, db_session)
if not has_set_default_provider:
update_default_provider(provider.id, default_model, db_session)
update_default_provider(provider.id, db_session)
has_set_default_provider = True
except Exception as e:
logger.error(f"Failed to configure {request.provider} provider: {e}")
@@ -323,13 +325,14 @@ def configure_default_api_keys(db_session: Session) -> None:
name="OpenAI",
provider=OPENAI_PROVIDER_NAME,
api_key=OPENAI_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
OPENAI_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(openai_provider, default_model_name)
_upsert(openai_provider)
# Create default image generation config using the OpenAI API key
try:
@@ -358,13 +361,14 @@ def configure_default_api_keys(db_session: Session) -> None:
name="Anthropic",
provider=ANTHROPIC_PROVIDER_NAME,
api_key=ANTHROPIC_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
ANTHROPIC_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(anthropic_provider, default_model_name)
_upsert(anthropic_provider)
else:
logger.info(
"ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
@@ -389,13 +393,14 @@ def configure_default_api_keys(db_session: Session) -> None:
name="Google Vertex AI",
provider=VERTEXAI_PROVIDER_NAME,
custom_config=custom_config,
default_model_name=default_model_name,
model_configurations=_build_model_configuration_upsert_requests(
VERTEXAI_PROVIDER_NAME, recommendations
),
api_key_changed=True,
is_auto_mode=True,
)
_upsert(vertexai_provider, default_model_name)
_upsert(vertexai_provider)
else:
logger.info(
"VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
@@ -427,11 +432,12 @@ def configure_default_api_keys(db_session: Session) -> None:
name="OpenRouter",
provider=OPENROUTER_PROVIDER_NAME,
api_key=OPENROUTER_DEFAULT_API_KEY,
default_model_name=default_model_name,
model_configurations=model_configurations,
api_key_changed=True,
is_auto_mode=True,
)
_upsert(openrouter_provider, default_model_name)
_upsert(openrouter_provider)
else:
logger.info(
"OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
@@ -671,7 +677,7 @@ async def setup_tenant(tenant_id: str) -> None:
async def assign_tenant_to_user(
tenant_id: str, email: str, referral_source: str | None = None
tenant_id: str, email: str, referral_source: str | None = None # noqa: ARG001
) -> None:
"""
Assign a tenant to a user and perform necessary operations.

View File

@@ -1,5 +1,6 @@
import logging
import os
import re
from types import SimpleNamespace
from sqlalchemy import text
@@ -10,9 +11,30 @@ from alembic import command
from alembic.config import Config
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from shared_configs.configs import TENANT_ID_PREFIX
logger = logging.getLogger(__name__)
# Regex pattern for valid tenant IDs:
# - UUID format: tenant_xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
# - AWS instance ID format: tenant_i-xxxxxxxxxxxxxxxxx
# Also useful for not accidentally dropping `public` schema
TENANT_ID_PATTERN = re.compile(
rf"^{re.escape(TENANT_ID_PREFIX)}("
r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}" # UUID
r"|i-[a-f0-9]+" # AWS instance ID
r")$"
)
def validate_tenant_id(tenant_id: str) -> bool:
"""Validate that tenant_id matches expected format.
This is important for SQL injection prevention since schema names
cannot be parameterized in SQL and must be formatted directly.
"""
return bool(TENANT_ID_PATTERN.match(tenant_id))
def run_alembic_migrations(schema_name: str) -> None:
logger.info(f"Starting Alembic migrations for schema: {schema_name}")
@@ -67,13 +89,18 @@ def create_schema_if_not_exists(tenant_id: str) -> bool:
def drop_schema(tenant_id: str) -> None:
if not tenant_id.isidentifier():
raise ValueError("Invalid tenant_id.")
"""Drop a tenant's schema.
Uses strict regex validation to reject unexpected formats early,
preventing SQL injection since schema names cannot be parameterized.
"""
if not validate_tenant_id(tenant_id):
raise ValueError(f"Invalid tenant_id format: {tenant_id}")
with get_sqlalchemy_engine().connect() as connection:
connection.execute(
text("DROP SCHEMA IF EXISTS %(schema_name)s CASCADE"),
{"schema_name": tenant_id},
)
with connection.begin():
# Use string formatting with validated tenant_id (safe after validation)
connection.execute(text(f'DROP SCHEMA IF EXISTS "{tenant_id}" CASCADE'))
def get_current_alembic_version(tenant_id: str) -> str:

View File

@@ -319,11 +319,13 @@ def get_tenant_count(tenant_id: str) -> int:
A user counts toward the seat count if:
1. They have an active mapping to this tenant (UserTenantMapping.active == True)
2. AND the User is active (User.is_active == True)
3. AND the User is not the anonymous system user
TODO: Exclude API key dummy users from seat counting. API keys create
users with emails like `__DANSWER_API_KEY_*` that should not count toward
seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
"""
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
from onyx.db.models import User
# First get all emails with active mappings to this tenant
@@ -333,6 +335,7 @@ def get_tenant_count(tenant_id: str) -> int:
.filter(
UserTenantMapping.tenant_id == tenant_id,
UserTenantMapping.active == True, # noqa: E712
UserTenantMapping.email != ANONYMOUS_USER_EMAIL,
)
.all()
)

View File

@@ -96,7 +96,7 @@ def get_access_for_documents(
return versioned_get_access_for_documents_fn(document_ids, db_session)
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
def _get_acl_for_user(user: User, db_session: Session) -> set[str]: # noqa: ARG001
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL

View File

@@ -4,7 +4,9 @@ from onyx.db.models import User
from onyx.utils.variable_functionality import fetch_versioned_implementation
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
def _get_user_external_group_ids(
db_session: Session, user: User # noqa: ARG001
) -> list[str]:
return []

View File

@@ -30,7 +30,7 @@ REFRESH_ENDPOINTS = {
async def _test_expire_oauth_token(
user: User,
oauth_account: OAuthAccount,
db_session: AsyncSession,
db_session: AsyncSession, # noqa: ARG001
user_manager: BaseUserManager[User, Any],
expire_in_seconds: int = 10,
) -> bool:
@@ -59,7 +59,7 @@ async def _test_expire_oauth_token(
async def refresh_oauth_token(
user: User,
oauth_account: OAuthAccount,
db_session: AsyncSession,
db_session: AsyncSession, # noqa: ARG001
user_manager: BaseUserManager[User, Any],
) -> bool:
"""
@@ -182,7 +182,7 @@ async def check_and_refresh_oauth_tokens(
async def check_oauth_account_has_refresh_token(
user: User,
user: User, # noqa: ARG001
oauth_account: OAuthAccount,
) -> bool:
"""

View File

@@ -11,6 +11,7 @@ from onyx.db.models import OAuthUserToken
from onyx.db.oauth_config import get_user_oauth_token
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.utils.logger import setup_logger
from onyx.utils.sensitive import SensitiveValue
logger = setup_logger()
@@ -33,7 +34,10 @@ class OAuthTokenManager:
if not user_token:
return None
token_data = user_token.token_data
if not user_token.token_data:
return None
token_data = self._unwrap_token_data(user_token.token_data)
# Check if token is expired
if OAuthTokenManager.is_token_expired(token_data):
@@ -51,7 +55,10 @@ class OAuthTokenManager:
def refresh_token(self, user_token: OAuthUserToken) -> str:
"""Refresh access token using refresh token"""
token_data = user_token.token_data
if not user_token.token_data:
raise ValueError("No token data available for refresh")
token_data = self._unwrap_token_data(user_token.token_data)
response = requests.post(
self.oauth_config.token_url,
@@ -153,3 +160,11 @@ class OAuthTokenManager:
separator = "&" if "?" in oauth_config.authorization_url else "?"
return f"{oauth_config.authorization_url}{separator}{urlencode(params)}"
@staticmethod
def _unwrap_token_data(
token_data: SensitiveValue[dict[str, Any]] | dict[str, Any],
) -> dict[str, Any]:
if isinstance(token_data, SensitiveValue):
return token_data.get_value(apply_mask=False)
return token_data

View File

@@ -58,3 +58,4 @@ class UserUpdate(schemas.BaseUserUpdate):
class AuthBackend(str, Enum):
REDIS = "redis"
POSTGRES = "postgres"
JWT = "jwt"

View File

@@ -38,6 +38,7 @@ from fastapi_users import schemas
from fastapi_users import UUIDIDMixin
from fastapi_users.authentication import AuthenticationBackend
from fastapi_users.authentication import CookieTransport
from fastapi_users.authentication import JWTStrategy
from fastapi_users.authentication import RedisStrategy
from fastapi_users.authentication import Strategy
from fastapi_users.authentication.strategy.db import AccessTokenDatabase
@@ -780,7 +781,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
)
async def on_after_forgot_password(
self, user: User, token: str, request: Optional[Request] = None
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
) -> None:
if not EMAIL_CONFIGURED:
logger.error(
@@ -799,7 +800,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)
async def on_after_request_verify(
self, user: User, token: str, request: Optional[Request] = None
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
) -> None:
verify_email_domain(user.email)
@@ -983,7 +984,7 @@ class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
except (exceptions.UserNotExists, exceptions.InvalidID, KeyError):
return None
async def destroy_token(self, token: str, user: User) -> None:
async def destroy_token(self, token: str, user: User) -> None: # noqa: ARG002
"""Properly delete the token from async redis."""
redis = await get_async_redis_connection()
await redis.delete(f"{self.key_prefix}{token}")
@@ -1046,6 +1047,61 @@ class RefreshableDatabaseStrategy(DatabaseStrategy[User, uuid.UUID, AccessToken]
return token
class SingleTenantJWTStrategy(JWTStrategy[User, uuid.UUID]):
"""Stateless JWT strategy for single-tenant deployments.
Tokens are self-contained and verified via signature — no Redis or DB
lookup required per request. An ``iat`` claim is embedded so that
downstream code can determine when the token was created without
querying an external store.
Refresh is implemented by issuing a brand-new JWT (the old one remains
valid until its natural expiry). ``destroy_token`` is a no-op because
JWTs cannot be server-side invalidated.
"""
def __init__(
self,
secret: SecretType,
lifetime_seconds: int | None = SESSION_EXPIRE_TIME_SECONDS,
token_audience: list[str] | None = None,
algorithm: str = "HS256",
public_key: SecretType | None = None,
):
super().__init__(
secret=secret,
lifetime_seconds=lifetime_seconds,
token_audience=token_audience or ["fastapi-users:auth"],
algorithm=algorithm,
public_key=public_key,
)
async def write_token(self, user: User) -> str:
data = {
"sub": str(user.id),
"aud": self.token_audience,
"iat": int(datetime.now(timezone.utc).timestamp()),
}
return generate_jwt(
data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm
)
async def destroy_token(self, token: str, user: User) -> None: # noqa: ARG002
# JWTs are stateless — nothing to invalidate server-side.
# NOTE: a compromise that makes JWT auth stateful but revocable
# is to include a token_version claim in the JWT payload. The token_version
# is incremented whenever the user logs out (or gets login revoked). Whenever
# the JWT is used, it is only valid if the token_version claim is the same as the one
# in the db. If not, the JWT is invalid and the user needs to login again.
return
async def refresh_token(
self, token: Optional[str], user: User # noqa: ARG002
) -> str:
"""Issue a fresh JWT with a new expiry."""
return await self.write_token(user)
def get_redis_strategy() -> TenantAwareRedisStrategy:
return TenantAwareRedisStrategy()
@@ -1058,6 +1114,22 @@ def get_database_strategy(
)
def get_jwt_strategy() -> SingleTenantJWTStrategy:
return SingleTenantJWTStrategy(
secret=USER_AUTH_SECRET,
lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,
)
if AUTH_BACKEND == AuthBackend.JWT:
if MULTI_TENANT or AUTH_TYPE == AuthType.CLOUD:
raise ValueError(
"JWT auth backend is only supported for single-tenant, self-hosted deployments. "
"Use 'redis' or 'postgres' instead."
)
if not USER_AUTH_SECRET:
raise ValueError("USER_AUTH_SECRET is required for JWT auth backend.")
if AUTH_BACKEND == AuthBackend.REDIS:
auth_backend = AuthenticationBackend(
name="redis", transport=cookie_transport, get_strategy=get_redis_strategy
@@ -1066,6 +1138,10 @@ elif AUTH_BACKEND == AuthBackend.POSTGRES:
auth_backend = AuthenticationBackend(
name="postgres", transport=cookie_transport, get_strategy=get_database_strategy
)
elif AUTH_BACKEND == AuthBackend.JWT:
auth_backend = AuthenticationBackend(
name="jwt", transport=cookie_transport, get_strategy=get_jwt_strategy
)
else:
raise ValueError(f"Invalid auth backend: {AUTH_BACKEND}")
@@ -1328,14 +1404,6 @@ async def optional_user(
user: User | None = Depends(optional_fastapi_current_user),
) -> User | None:
tenant_id = get_current_tenant_id()
if (
user is not None
and user.is_anonymous
and anonymous_user_enabled(tenant_id=tenant_id)
):
return get_anonymous_user()
if user := await _check_for_saml_and_jwt(request, user, async_db_session):
# If user is already set, _check_for_saml_and_jwt returns the same user object
return user

View File

@@ -43,7 +43,7 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.tracing.braintrust_tracing import setup_braintrust_if_creds_available
from onyx.tracing.setup import setup_tracing
from onyx.utils.logger import ColoredFormatter
from onyx.utils.logger import LoggerContextVars
from onyx.utils.logger import PlainFormatter
@@ -93,12 +93,12 @@ class TenantAwareTask(Task):
@task_prerun.connect
def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None, # noqa: ARG001
task: Task | None = None, # noqa: ARG001
args: tuple[Any, ...] | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None, # noqa: ARG001
**other_kwargs: Any, # noqa: ARG001
) -> None:
# Reset any per-task logging context so that prefixes (e.g. pruning_ctx)
# from a previous task executed in the same worker process do not leak
@@ -110,14 +110,14 @@ def on_task_prerun(
def on_task_postrun(
sender: Any | None = None,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
args: tuple | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None,
retval: Any | None = None,
retval: Any | None = None, # noqa: ARG001
state: str | None = None,
**kwds: Any,
**kwds: Any, # noqa: ARG001
) -> None:
"""We handle this signal in order to remove completed tasks
from their respective tasksets. This allows us to track the progress of document set
@@ -209,7 +209,9 @@ def on_task_postrun(
return
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
def on_celeryd_init(
sender: str, conf: Any = None, **kwargs: Any # noqa: ARG001
) -> None:
"""The first signal sent on celery worker startup"""
# NOTE(rkuo): start method "fork" is unsafe and we really need it to be "spawn"
@@ -238,11 +240,11 @@ def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
f"Multiprocessing selected start method: {multiprocessing.get_start_method()}"
)
# Initialize Braintrust tracing in workers if credentials are available.
setup_braintrust_if_creds_available()
# Initialize tracing in workers if credentials are available.
setup_tracing()
def wait_for_redis(sender: Any, **kwargs: Any) -> None:
def wait_for_redis(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for redis to become ready subject to a hardcoded timeout.
Will raise WorkerShutdown to kill the celery worker if the timeout
is reached."""
@@ -285,7 +287,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
return
def wait_for_db(sender: Any, **kwargs: Any) -> None:
def wait_for_db(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for the db to become ready subject to a hardcoded timeout.
Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""
@@ -327,7 +329,7 @@ def wait_for_db(sender: Any, **kwargs: Any) -> None:
return
def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
logger.info(f"Running as a secondary celery worker: pid={os.getpid()}")
# Set up variables for waiting on primary worker
@@ -359,7 +361,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
return
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
def on_worker_ready(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
task_logger.info("worker_ready signal received.")
# file based way to do readiness/liveness probes
@@ -372,7 +374,7 @@ def on_worker_ready(sender: Any, **kwargs: Any) -> None:
logger.info(f"Readiness signal touched at {path}.")
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
HttpxPool.close_all()
hostname: str = cast(str, sender.hostname)
@@ -405,9 +407,9 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
def on_setup_logging(
loglevel: int,
logfile: str | None,
format: str,
colorize: bool,
**kwargs: Any,
format: str, # noqa: ARG001
colorize: bool, # noqa: ARG001
**kwargs: Any, # noqa: ARG001
) -> None:
# TODO: could unhardcode format and colorize and accept these as options from
# celery's config
@@ -508,18 +510,18 @@ class TenantContextFilter(logging.Filter):
@task_postrun.connect
def reset_tenant_id(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None, # noqa: ARG001
task: Task | None = None, # noqa: ARG001
args: tuple[Any, ...] | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None, # noqa: ARG001
**other_kwargs: Any, # noqa: ARG001
) -> None:
"""Signal handler to reset tenant ID in context var after task ends."""
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for Vespa to become ready subject to a timeout.
Raises WorkerShutdown if the timeout is reached."""
@@ -553,12 +555,12 @@ class LivenessProbe(bootsteps.StartStopStep):
priority=10,
)
def stop(self, worker: Any) -> None:
def stop(self, worker: Any) -> None: # noqa: ARG002
self.path.unlink(missing_ok=True)
if self.task_tref:
self.task_tref.cancel()
def update_liveness_file(self, worker: Any) -> None:
def update_liveness_file(self, worker: Any) -> None: # noqa: ARG002
self.path.touch()

View File

@@ -102,7 +102,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -244,7 +244,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
# it's unclear to me whether using the hub's timer or the bootstep timer is better
requires = {"celery.worker.components:Hub"}
def __init__(self, worker: Any, **kwargs: Any) -> None:
def __init__(self, worker: Any, **kwargs: Any) -> None: # noqa: ARG002
self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8 # Interval in seconds
self.task_tref = None
@@ -300,7 +300,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
except Exception:
task_logger.exception("Periodic task failed.")
def stop(self, worker: Any) -> None:
def stop(self, worker: Any) -> None: # noqa: ARG002
# Cancel the scheduled task when the worker stops
if self.task_tref:
self.task_tref.cancel()

View File

@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -217,9 +217,11 @@ if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
{
"name": "check-for-documents-for-opensearch-migration",
"task": OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
# Try to enqueue an invocation of this task with this frequency.
"schedule": timedelta(seconds=120), # 2 minutes
"options": {
"priority": OnyxCeleryPriority.LOW,
# If the task was not dequeued in this time, revoke it.
"expires": BEAT_EXPIRES_DEFAULT,
},
}
@@ -227,10 +229,18 @@ if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
beat_task_templates.append(
{
"name": "migrate-documents-from-vespa-to-opensearch",
"task": OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
"task": OnyxCeleryTask.MIGRATE_DOCUMENTS_FROM_VESPA_TO_OPENSEARCH_TASK,
# Try to enqueue an invocation of this task with this frequency.
# NOTE: If MIGRATION_TASK_SOFT_TIME_LIMIT_S is greater than this
# value and the task is maximally busy, we can expect to see some
# enqueued tasks be revoked over time. This is ok; by erring on the
# side of "there will probably always be at least one task of this
# type in the queue", we are minimizing this task's idleness while
# still giving chances for other tasks to execute.
"schedule": timedelta(seconds=120), # 2 minutes
"options": {
"priority": OnyxCeleryPriority.LOW,
# If the task was not dequeued in this time, revoke it.
"expires": BEAT_EXPIRES_DEFAULT,
},
}

View File

@@ -366,7 +366,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
def monitor_connector_deletion_taskset(
tenant_id: str, key_bytes: bytes, r: Redis
tenant_id: str, key_bytes: bytes, r: Redis # noqa: ARG001
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -1071,7 +1071,7 @@ def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
bind=True,
)
def cleanup_checkpoint_task(
self: Task, *, index_attempt_id: int, tenant_id: str | None
self: Task, *, index_attempt_id: int, tenant_id: str | None # noqa: ARG001
) -> None:
"""Clean up a checkpoint for a given index attempt"""
@@ -1160,7 +1160,7 @@ def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
bind=True,
)
def cleanup_index_attempt_task(
self: Task, *, index_attempt_ids: list[int], tenant_id: str
self: Task, *, index_attempt_ids: list[int], tenant_id: str # noqa: ARG001
) -> None:
"""Clean up an index attempt"""
start = time.monotonic()
@@ -1266,7 +1266,7 @@ def _resolve_indexing_document_errors(
bind=True,
)
def docprocessing_task(
self: Task,
self: Task, # noqa: ARG001
index_attempt_id: int,
cc_pair_id: int,
tenant_id: str,

View File

@@ -57,7 +57,7 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
# TODO: Pass index_attempt_id to the callback and check cancellation using the db
return bool(self.redis_connector.stop.fenced)
def progress(self, tag: str, amount: int) -> None:
def progress(self, tag: str, amount: int) -> None: # noqa: ARG002
"""Amount isn't used yet."""
# rkuo: this shouldn't be necessary yet because we spawn the process this runs inside

View File

@@ -26,7 +26,7 @@ logger = setup_logger()
trail=False,
)
def eval_run_task(
self: Task,
self: Task, # noqa: ARG001
*,
configuration_dict: dict[str, Any],
) -> None:
@@ -48,7 +48,7 @@ def eval_run_task(
bind=True,
trail=False,
)
def scheduled_eval_task(self: Task, **kwargs: Any) -> None:
def scheduled_eval_task(self: Task, **kwargs: Any) -> None: # noqa: ARG001
"""
Scheduled task to run evaluations on configured datasets.
Runs weekly on Sunday at midnight UTC.

View File

@@ -322,7 +322,7 @@ def _run_hierarchy_extraction(
bind=True,
)
def connector_hierarchy_fetching_task(
self: Task,
self: Task, # noqa: ARG001
*,
cc_pair_id: int,
tenant_id: str,

View File

@@ -17,7 +17,9 @@ from onyx.llm.well_known_providers.auto_update_service import (
trail=False,
bind=True,
)
def check_for_auto_llm_updates(self: Task, *, tenant_id: str) -> bool | None:
def check_for_auto_llm_updates(
self: Task, *, tenant_id: str # noqa: ARG001
) -> bool | None:
"""Periodic task to fetch LLM model updates from GitHub
and sync them to providers in Auto mode.

View File

@@ -871,7 +871,7 @@ def cloud_monitor_celery_queues(
@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)
def monitor_celery_queues(self: Task, *, tenant_id: str) -> None:
def monitor_celery_queues(self: Task, *, tenant_id: str) -> None: # noqa: ARG001
return monitor_celery_queues_helper(self)
@@ -952,7 +952,7 @@ def _get_cmdline_for_process(process: psutil.Process) -> str | None:
queue=OnyxCeleryQueues.MONITORING,
bind=True,
)
def monitor_process_memory(self: Task, *, tenant_id: str) -> None:
def monitor_process_memory(self: Task, *, tenant_id: str) -> None: # noqa: ARG001
"""
Task to monitor memory usage of supervisor-managed processes.
This periodically checks the memory usage of processes and logs information

View File

@@ -0,0 +1,43 @@
# Tasks are expected to cease execution and do cleanup after the soft time
# limit. In principle they are also forceably terminated after the hard time
# limit, in practice this does not happen since we use threadpools for Celery
# task execution, and we simple hope that the total task time plus cleanup does
# not exceed this. Therefore tasks should regularly check their timeout and lock
# status. The lock timeout is the maximum time the lock manager (Redis in this
# case) will enforce the lock, independent of what is happening in the task. To
# reduce the chances that a task is still doing work while a lock has expired,
# make the lock timeout well above the task timeouts. In practice we should
# never see locks be held for this long anyway because a task should release the
# lock after its cleanup which happens at most after its soft timeout.
# Constants corresponding to migrate_documents_from_vespa_to_opensearch_task.
MIGRATION_TASK_SOFT_TIME_LIMIT_S = 60 * 5 # 5 minutes.
MIGRATION_TASK_TIME_LIMIT_S = 60 * 6 # 6 minutes.
# The maximum time the lock can be held for. Will automatically be released
# after this time.
MIGRATION_TASK_LOCK_TIMEOUT_S = 60 * 7 # 7 minutes.
assert (
MIGRATION_TASK_SOFT_TIME_LIMIT_S < MIGRATION_TASK_TIME_LIMIT_S
), "The soft time limit must be less than the time limit."
assert (
MIGRATION_TASK_TIME_LIMIT_S < MIGRATION_TASK_LOCK_TIMEOUT_S
), "The time limit must be less than the lock timeout."
# Time to wait to acquire the lock.
MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S = 60 * 2 # 2 minutes.
# Constants corresponding to check_for_documents_for_opensearch_migration_task.
CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S = 60 # 60 seconds / 1 minute.
CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S = 90 # 90 seconds.
# The maximum time the lock can be held for. Will automatically be released
# after this time.
CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S = 120 # 120 seconds / 2 minutes.
assert (
CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S
), "The soft time limit must be less than the time limit."
assert (
CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S < CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S
), "The time limit must be less than the lock timeout."
# Time to wait to acquire the lock.
CHECK_FOR_DOCUMENTS_TASK_LOCK_BLOCKING_TIMEOUT_S = 30 # 30 seconds.
TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE = 15

View File

@@ -1,5 +1,6 @@
"""Celery tasks for migrating documents from Vespa to OpenSearch."""
import time
import traceback
from datetime import datetime
from datetime import timezone
@@ -10,6 +11,30 @@ from celery import Task
from redis.lock import Lock as RedisLock
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.opensearch_migration.constants import (
CHECK_FOR_DOCUMENTS_TASK_LOCK_BLOCKING_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
MIGRATION_TASK_LOCK_TIMEOUT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
MIGRATION_TASK_SOFT_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.constants import (
MIGRATION_TASK_TIME_LIMIT_S,
)
from onyx.background.celery.tasks.opensearch_migration.transformer import (
transform_vespa_chunks_to_opensearch_chunks,
)
@@ -31,6 +56,9 @@ from onyx.db.opensearch_migration import (
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit,
)
from onyx.db.opensearch_migration import should_document_migration_be_permanently_failed
from onyx.db.opensearch_migration import (
try_insert_opensearch_tenant_migration_record_with_commit,
)
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.opensearch_document_index import (
@@ -72,7 +100,9 @@ def _migrate_single_document(
raise RuntimeError(f"No chunks found for document {document_id} in Vespa.")
opensearch_document_chunks: list[DocumentChunk] = (
transform_vespa_chunks_to_opensearch_chunks(vespa_document_chunks, tenant_state)
transform_vespa_chunks_to_opensearch_chunks(
vespa_document_chunks, tenant_state, document_id
)
)
if len(opensearch_document_chunks) != len(vespa_document_chunks):
raise RuntimeError(
@@ -90,22 +120,30 @@ def _migrate_single_document(
name=OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
# Does not store the task's return value in the result backend.
ignore_result=True,
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
soft_time_limit=60 * 5, # 5 minutes.
# When exceeded the task will be forcefully terminated.
time_limit=60 * 6, # 6 minutes.
# WARNING: This is here just for rigor but since we use threads for Celery
# this config is not respected and timeout logic must be implemented in the
# task.
soft_time_limit=CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S,
# WARNING: This is here just for rigor but since we use threads for Celery
# this config is not respected and timeout logic must be implemented in the
# task.
time_limit=CHECK_FOR_DOCUMENTS_TASK_TIME_LIMIT_S,
# Passed in self to the task to get task metadata.
bind=True,
)
def check_for_documents_for_opensearch_migration_task(
self: Task, *, tenant_id: str
self: Task, *, tenant_id: str # noqa: ARG001
) -> bool | None:
"""
Periodic task to check for and add documents to the OpenSearch migration
table.
Should not execute meaningful logic at the same time as
migrate_document_from_vespa_to_opensearch_task.
migrate_documents_from_vespa_to_opensearch_task.
Effectively tries to populate as many migration records as possible within
CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S seconds. Does so in batches of
1000 documents.
Returns:
None if OpenSearch migration is not enabled, or if the lock could not be
@@ -119,29 +157,33 @@ def check_for_documents_for_opensearch_migration_task(
return None
task_logger.info("Checking for documents for OpenSearch migration.")
task_start_time = time.monotonic()
r = get_redis_client()
# Use a lock to prevent overlapping tasks. Only this task or
# migrate_document_from_vespa_to_opensearch_task can interact with the
# migrate_documents_from_vespa_to_opensearch_task can interact with the
# OpenSearchMigration table at once.
lock_beat: RedisLock = r.lock(
lock: RedisLock = r.lock(
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
# The maximum time the lock can be held for. Will automatically be
# released after this time.
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
timeout=CHECK_FOR_DOCUMENTS_TASK_LOCK_TIMEOUT_S,
# .acquire will block until the lock is acquired.
blocking=True,
# Wait for 2 minutes trying to acquire the lock.
blocking_timeout=60 * 2, # 2 minutes.
# Time to wait to acquire the lock.
blocking_timeout=CHECK_FOR_DOCUMENTS_TASK_LOCK_BLOCKING_TIMEOUT_S,
)
if not lock_beat.acquire():
if not lock.acquire():
task_logger.warning(
"The OpenSearch migration check task timed out waiting for the lock."
)
return None
else:
task_logger.info(
f"Acquired the OpenSearch migration check lock. Took {time.monotonic() - task_start_time:.3f} seconds. "
f"Token: {lock.local.token}"
)
num_documents_found_for_record_creation = 0
try:
# Double check that tenant info is correct.
if tenant_id != get_current_tenant_id():
@@ -151,65 +193,89 @@ def check_for_documents_for_opensearch_migration_task(
)
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
# For pagination, get the last ID we've inserted into
# OpenSearchMigration.
last_opensearch_migration_document_id = (
get_last_opensearch_migration_document_id(db_session)
)
# Now get the next batch of doc IDs starting after the last ID.
document_ids = get_paginated_document_batch(
db_session,
prev_ending_document_id=last_opensearch_migration_document_id,
)
if not document_ids:
task_logger.info(
"No more documents to insert for OpenSearch migration."
while (
time.monotonic() - task_start_time
< CHECK_FOR_DOCUMENTS_TASK_SOFT_TIME_LIMIT_S
and lock.owned()
):
with get_session_with_current_tenant() as db_session:
# For pagination, get the last ID we've inserted into
# OpenSearchMigration.
last_opensearch_migration_document_id = (
get_last_opensearch_migration_document_id(db_session)
)
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(
db_session
# Now get the next batch of doc IDs starting after the last ID.
# We'll do 1000 documents per transaction/timeout check.
document_ids = get_paginated_document_batch(
db_session,
limit=1000,
prev_ending_document_id=last_opensearch_migration_document_id,
)
# TODO(andrei): Once we've done this enough times and the number
# of documents matches the number of migration records, we can
# be done with this task and update
# document_migration_record_table_population_status.
return True
# Create the migration records for the next batch of documents with
# status PENDING.
create_opensearch_migration_records_with_commit(db_session, document_ids)
task_logger.info(
f"Created {len(document_ids)} migration records for the next batch of documents."
)
if not document_ids:
task_logger.info(
"No more documents to insert for OpenSearch migration."
)
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(
db_session
)
# TODO(andrei): Once we've done this enough times and the
# number of documents matches the number of migration
# records, we can be done with this task and update
# document_migration_record_table_population_status.
return True
# Create the migration records for the next batch of documents
# with status PENDING.
create_opensearch_migration_records_with_commit(
db_session, document_ids
)
num_documents_found_for_record_creation += len(document_ids)
# Try to create the singleton row in
# OpenSearchTenantMigrationRecord if it doesn't already exist.
# This is a reasonable place to put it because we already have a
# lock, a session, and error handling, at the cost of running
# this small set of logic for every batch.
try_insert_opensearch_tenant_migration_record_with_commit(db_session)
except Exception:
task_logger.exception("Error in the OpenSearch migration check task.")
return False
finally:
if lock_beat.owned():
lock_beat.release()
if lock.owned():
lock.release()
else:
task_logger.warning(
"The OpenSearch migration lock was not owned on completion of the check task."
)
task_logger.info(
f"Finished checking for documents for OpenSearch migration. Found {num_documents_found_for_record_creation} documents "
f"to create migration records for in {time.monotonic() - task_start_time:.3f} seconds. However, this may include "
"documents for which there already exist records."
)
return True
# shared_task allows this task to be shared across celery app instances.
@shared_task(
name=OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
name=OnyxCeleryTask.MIGRATE_DOCUMENTS_FROM_VESPA_TO_OPENSEARCH_TASK,
# Does not store the task's return value in the result backend.
ignore_result=True,
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
soft_time_limit=60 * 5, # 5 minutes.
# When exceeded the task will be forcefully terminated.
time_limit=60 * 6, # 6 minutes.
# WARNING: This is here just for rigor but since we use threads for Celery
# this config is not respected and timeout logic must be implemented in the
# task.
soft_time_limit=MIGRATION_TASK_SOFT_TIME_LIMIT_S,
# WARNING: This is here just for rigor but since we use threads for Celery
# this config is not respected and timeout logic must be implemented in the
# task.
time_limit=MIGRATION_TASK_TIME_LIMIT_S,
# Passed in self to the task to get task metadata.
bind=True,
)
def migrate_documents_from_vespa_to_opensearch_task(
self: Task,
self: Task, # noqa: ARG001
*,
tenant_id: str,
) -> bool | None:
@@ -218,10 +284,13 @@ def migrate_documents_from_vespa_to_opensearch_task(
Should not execute meaningful logic at the same time as
check_for_documents_for_opensearch_migration_task.
Effectively tries to migrate as many documents as possible within
MIGRATION_TASK_SOFT_TIME_LIMIT_S seconds. Does so in batches of 5 documents.
Returns:
None if OpenSearch migration is not enabled, or if the lock could not be
acquired; effectively a no-op. True if the task completed
successfully. False if the task failed.
successfully. False if the task errored.
"""
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
task_logger.warning(
@@ -229,30 +298,36 @@ def migrate_documents_from_vespa_to_opensearch_task(
)
return None
task_logger.info("Trying to migrate documents from Vespa to OpenSearch.")
task_logger.info("Trying a migration batch from Vespa to OpenSearch.")
task_start_time = time.monotonic()
r = get_redis_client()
# Use a lock to prevent overlapping tasks. Only this task or
# check_for_documents_for_opensearch_migration_task can interact with the
# OpenSearchMigration table at once.
lock_beat: RedisLock = r.lock(
lock: RedisLock = r.lock(
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
# The maximum time the lock can be held for. Will automatically be
# released after this time.
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
timeout=MIGRATION_TASK_LOCK_TIMEOUT_S,
# .acquire will block until the lock is acquired.
blocking=True,
# Wait for 2 minutes trying to acquire the lock.
blocking_timeout=60 * 2, # 2 minutes.
# Time to wait to acquire the lock.
blocking_timeout=MIGRATION_TASK_LOCK_BLOCKING_TIMEOUT_S,
)
if not lock_beat.acquire():
if not lock.acquire():
task_logger.warning(
"The OpenSearch migration task timed out waiting for the lock."
)
return None
else:
task_logger.info(
f"Acquired the OpenSearch migration lock. Took {time.monotonic() - task_start_time:.3f} seconds. "
f"Token: {lock.local.token}"
)
num_documents_migrated = 0
num_chunks_migrated = 0
num_documents_failed = 0
try:
# Double check that tenant info is correct.
if tenant_id != get_current_tenant_id():
@@ -262,98 +337,111 @@ def migrate_documents_from_vespa_to_opensearch_task(
)
task_logger.error(err_str)
return False
with get_session_with_current_tenant() as db_session:
records_needing_migration = (
get_opensearch_migration_records_needing_migration(db_session)
)
if not records_needing_migration:
task_logger.info(
"No documents found that need to be migrated from Vespa to OpenSearch."
)
increment_num_times_observed_no_additional_docs_to_migrate_with_commit(
db_session
)
# TODO(andrei): Once we've done this enough times and
# document_migration_record_table_population_status is done, we
# can be done with this task and update
# overall_document_migration_status accordingly. Note that this
# includes marking connectors as needing reindexing if some
# migrations failed.
return True
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
opensearch_document_index = OpenSearchDocumentIndex(
index_name=search_settings.index_name, tenant_state=tenant_state
)
vespa_document_index = VespaDocumentIndex(
index_name=search_settings.index_name,
tenant_state=tenant_state,
large_chunks_enabled=False,
)
task_logger.info(
f"Trying to migrate {len(records_needing_migration)} documents from Vespa to OpenSearch."
)
for record in records_needing_migration:
try:
# If the Document's chunk count is not known, it was
# probably just indexed so fail here to give it a chance to
# sync. If in the rare event this Document has not been
# re-indexed in a very long time and is still under the
# "old" embedding/indexing logic where chunk count was never
# stored, we will eventually permanently fail and thus force
# a re-index of this doc, which is a desireable outcome.
if record.document.chunk_count is None:
raise RuntimeError(
f"Document {record.document_id} has no chunk count."
)
chunks_migrated = _migrate_single_document(
document_id=record.document_id,
opensearch_document_index=opensearch_document_index,
vespa_document_index=vespa_document_index,
tenant_state=tenant_state,
while (
time.monotonic() - task_start_time < MIGRATION_TASK_SOFT_TIME_LIMIT_S
and lock.owned()
):
with get_session_with_current_tenant() as db_session:
# We'll do 5 documents per transaction/timeout check.
records_needing_migration = (
get_opensearch_migration_records_needing_migration(
db_session, limit=5
)
# If the number of chunks in Vespa is not in sync with the
# Document table for this doc let's not consider this
# completed and let's let a subsequent run take care of it.
if chunks_migrated != record.document.chunk_count:
raise RuntimeError(
f"Number of chunks migrated ({chunks_migrated}) does not match number of expected chunks in Vespa "
f"({record.document.chunk_count}) for document {record.document_id}."
)
record.status = OpenSearchDocumentMigrationStatus.COMPLETED
except Exception:
record.status = OpenSearchDocumentMigrationStatus.FAILED
record.error_message = f"Attempt {record.attempts_count + 1}:\n{traceback.format_exc()}"
task_logger.exception(
f"Error migrating document {record.document_id} from Vespa to OpenSearch."
)
if not records_needing_migration:
task_logger.info(
"No documents found that need to be migrated from Vespa to OpenSearch."
)
finally:
record.attempts_count += 1
record.last_attempt_at = datetime.now(timezone.utc)
if should_document_migration_be_permanently_failed(record):
record.status = (
OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED
)
# TODO(andrei): Not necessarily here but if this happens
# we'll need to mark the connector as needing reindex.
increment_num_times_observed_no_additional_docs_to_migrate_with_commit(
db_session
)
# TODO(andrei): Once we've done this enough times and
# document_migration_record_table_population_status is done, we
# can be done with this task and update
# overall_document_migration_status accordingly. Note that this
# includes marking connectors as needing reindexing if some
# migrations failed.
return True
db_session.commit()
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(
tenant_id=tenant_id, multitenant=MULTI_TENANT
)
opensearch_document_index = OpenSearchDocumentIndex(
index_name=search_settings.index_name, tenant_state=tenant_state
)
vespa_document_index = VespaDocumentIndex(
index_name=search_settings.index_name,
tenant_state=tenant_state,
large_chunks_enabled=False,
)
for record in records_needing_migration:
try:
# If the Document's chunk count is not known, it was
# probably just indexed so fail here to give it a chance to
# sync. If in the rare event this Document has not been
# re-indexed in a very long time and is still under the
# "old" embedding/indexing logic where chunk count was never
# stored, we will eventually permanently fail and thus force
# a re-index of this doc, which is a desireable outcome.
if record.document.chunk_count is None:
raise RuntimeError(
f"Document {record.document_id} has no chunk count."
)
chunks_migrated = _migrate_single_document(
document_id=record.document_id,
opensearch_document_index=opensearch_document_index,
vespa_document_index=vespa_document_index,
tenant_state=tenant_state,
)
# If the number of chunks in Vespa is not in sync with the
# Document table for this doc let's not consider this
# completed and let's let a subsequent run take care of it.
if chunks_migrated != record.document.chunk_count:
raise RuntimeError(
f"Number of chunks migrated ({chunks_migrated}) does not match number of expected chunks "
f"in Vespa ({record.document.chunk_count}) for document {record.document_id}."
)
record.status = OpenSearchDocumentMigrationStatus.COMPLETED
num_documents_migrated += 1
num_chunks_migrated += chunks_migrated
except Exception:
record.status = OpenSearchDocumentMigrationStatus.FAILED
record.error_message = f"Attempt {record.attempts_count + 1}:\n{traceback.format_exc()}"
task_logger.exception(
f"Error migrating document {record.document_id} from Vespa to OpenSearch."
)
num_documents_failed += 1
finally:
record.attempts_count += 1
record.last_attempt_at = datetime.now(timezone.utc)
if should_document_migration_be_permanently_failed(record):
record.status = (
OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED
)
# TODO(andrei): Not necessarily here but if this happens
# we'll need to mark the connector as needing reindex.
db_session.commit()
except Exception:
task_logger.exception("Error in the OpenSearch migration task.")
return False
finally:
if lock_beat.owned():
lock_beat.release()
if lock.owned():
lock.release()
else:
task_logger.warning(
"The OpenSearch migration lock was not owned on completion of the migration task."
)
task_logger.info(
f"Finished a migration batch from Vespa to OpenSearch. Migrated {num_chunks_migrated} chunks "
f"from {num_documents_migrated} documents in {time.monotonic() - task_start_time:.3f} seconds. "
f"Failed to migrate {num_documents_failed} documents."
)
return True

View File

@@ -30,8 +30,11 @@ from onyx.document_index.vespa_constants import TENANT_ID
from onyx.document_index.vespa_constants import TITLE
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
from onyx.document_index.vespa_constants import USER_PROJECT
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
logger = setup_logger(__name__)
def _extract_content_vector(embeddings: Any) -> list[float]:
"""Extracts the full chunk embedding vector from Vespa's embeddings tensor.
@@ -150,13 +153,25 @@ def _transform_vespa_acl_to_opensearch_acl(
def transform_vespa_chunks_to_opensearch_chunks(
vespa_chunks: list[dict[str, Any]],
tenant_state: TenantState,
document_id: str,
) -> list[DocumentChunk]:
result: list[DocumentChunk] = []
for vespa_chunk in vespa_chunks:
# This should exist; fail loudly if it does not.
document_id: str = vespa_chunk[DOCUMENT_ID]
if not document_id:
vespa_document_id: str = vespa_chunk[DOCUMENT_ID]
if not vespa_document_id:
raise ValueError("Missing document_id in Vespa chunk.")
# Vespa doc IDs were sanitized using replace_invalid_doc_id_characters.
# This was a poor design choice and we don't want this in OpenSearch;
# whatever restrictions there may be on indexed chunk ID should have no
# bearing on the chunk's document ID field, even if document ID is an
# argument to the chunk ID. Deliberately choose to use the real doc ID
# supplied to this function.
if vespa_document_id != document_id:
logger.warning(
f"Vespa document ID {vespa_document_id} does not match the document ID supplied {document_id}. "
"The Vespa ID will be discarded."
)
# This should exist; fail loudly if it does not.
chunk_index: int = vespa_chunk[CHUNK_ID]
@@ -236,6 +251,8 @@ def transform_vespa_chunks_to_opensearch_chunks(
)
opensearch_chunk = DocumentChunk(
# We deliberately choose to use the doc ID supplied to this function
# over the Vespa doc ID.
document_id=document_id,
chunk_index=chunk_index,
title=title,

View File

@@ -24,7 +24,7 @@ from onyx.db.engine.sql_engine import get_session_with_current_tenant
bind=True,
base=AbortableTask,
)
def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:
def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int: # noqa: ARG001
"""Runs periodically to clean up the kombu_message table"""
# we will select messages older than this amount to clean up

Some files were not shown because too many files have changed in this diff Show More