mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-16 23:35:46 +00:00
Compare commits
198 Commits
v2.11.0-cl
...
craft_chan
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
908d360011 | ||
|
|
30578bdf9a | ||
|
|
aebde89432 | ||
|
|
4a4b4bb378 | ||
|
|
a8d231976a | ||
|
|
9c8ae5bb4b | ||
|
|
0fc1fa3d36 | ||
|
|
94633698c3 | ||
|
|
6ae15589cd | ||
|
|
c24a8bb228 | ||
|
|
01945abd86 | ||
|
|
658632195f | ||
|
|
ec6fd01ba4 | ||
|
|
148e6fb97d | ||
|
|
6598c1a48d | ||
|
|
497ce43bd8 | ||
|
|
8634cb0446 | ||
|
|
8d56fd3dc6 | ||
|
|
a7579a99d0 | ||
|
|
3533c10da4 | ||
|
|
7b0414bf0d | ||
|
|
b500ea537a | ||
|
|
abd6d55add | ||
|
|
f15b6b8034 | ||
|
|
fb40485f25 | ||
|
|
22e85f1f28 | ||
|
|
2ef7c3e6f3 | ||
|
|
92a471ed2b | ||
|
|
d1b7e529a4 | ||
|
|
95c3579264 | ||
|
|
8802e5cad3 | ||
|
|
df7ab6841a | ||
|
|
2131c86c16 | ||
|
|
7d1b9e4356 | ||
|
|
38e92308ec | ||
|
|
a41b4bbc82 | ||
|
|
c026c077b5 | ||
|
|
3eee539a86 | ||
|
|
143e7a0d72 | ||
|
|
4572358038 | ||
|
|
1753f94c11 | ||
|
|
120ddf2ef6 | ||
|
|
2cce5bc58f | ||
|
|
383a6001d2 | ||
|
|
3a6f45bfca | ||
|
|
2444b59070 | ||
|
|
49771945e1 | ||
|
|
e06b5ef202 | ||
|
|
15f0bc9c3d | ||
|
|
963b172a09 | ||
|
|
c13ce816fa | ||
|
|
39f3e872ec | ||
|
|
b033c00217 | ||
|
|
6d47c5f21a | ||
|
|
dc2bf20a8d | ||
|
|
d29f1efec0 | ||
|
|
13d1c3d86a | ||
|
|
adc6773f9f | ||
|
|
a819482749 | ||
|
|
f660f9f447 | ||
|
|
26f9574364 | ||
|
|
0645540e24 | ||
|
|
9fa17c7713 | ||
|
|
1af484503e | ||
|
|
55276be061 | ||
|
|
4bb02459ae | ||
|
|
7109aea897 | ||
|
|
8ce4cfc302 | ||
|
|
0f75de9687 | ||
|
|
9782fcb0b9 | ||
|
|
ec2a35b3a4 | ||
|
|
a2c0fc4df0 | ||
|
|
9815c2c8d9 | ||
|
|
8c3e3a6e02 | ||
|
|
726c6232a5 | ||
|
|
f9d41ff1da | ||
|
|
7dccc88b35 | ||
|
|
eb3eb83c95 | ||
|
|
e4b9ef176f | ||
|
|
ac617a51ce | ||
|
|
d18dd62641 | ||
|
|
96224164ca | ||
|
|
78cec7c9e9 | ||
|
|
8fa7002826 | ||
|
|
921305f8ff | ||
|
|
71148dd880 | ||
|
|
ac26ba6c2d | ||
|
|
24584d4067 | ||
|
|
39d8d1db0c | ||
|
|
17824c5d92 | ||
|
|
eba89fa635 | ||
|
|
339a111a8f | ||
|
|
09b7e6fc9b | ||
|
|
135238014f | ||
|
|
303e37bf53 | ||
|
|
53f4025a23 | ||
|
|
6a888e9900 | ||
|
|
9159b159fa | ||
|
|
d7a22b916b | ||
|
|
97d90a82f8 | ||
|
|
d9cf5afee8 | ||
|
|
e90a7767c6 | ||
|
|
ce43dee20f | ||
|
|
90ac23a564 | ||
|
|
d9f97090d5 | ||
|
|
2661e27741 | ||
|
|
0481b61f8d | ||
|
|
1ded3af63c | ||
|
|
6d12c9c430 | ||
|
|
b81dd6f4a3 | ||
|
|
c53546c000 | ||
|
|
9afa12edda | ||
|
|
32046de962 | ||
|
|
f9a648bb5f | ||
|
|
e9be9101e5 | ||
|
|
e670bd994b | ||
|
|
a48d74c7fd | ||
|
|
0e76ae3423 | ||
|
|
37bfa5833b | ||
|
|
6c46fcd651 | ||
|
|
7700674b15 | ||
|
|
4ac6ff633a | ||
|
|
efd198072e | ||
|
|
b207a165c7 | ||
|
|
c231d2ec67 | ||
|
|
d1a0c75a40 | ||
|
|
3b737fe311 | ||
|
|
e7abbbdc7f | ||
|
|
5d5080e9e1 | ||
|
|
83b7c5d088 | ||
|
|
f08cdc603b | ||
|
|
6932791dd5 | ||
|
|
f334b365e0 | ||
|
|
af58ae5ad9 | ||
|
|
bcd8314dd7 | ||
|
|
cddb26ff19 | ||
|
|
c8d38de37f | ||
|
|
f2e95ee8bb | ||
|
|
94ee45ce64 | ||
|
|
f36d15d924 | ||
|
|
ec866debc0 | ||
|
|
08f80b4abf | ||
|
|
e559a4925a | ||
|
|
1f63a23238 | ||
|
|
658c76dd0a | ||
|
|
00828af63f | ||
|
|
71c6e40d5e | ||
|
|
f3ff4b57bd | ||
|
|
bf1752552b | ||
|
|
5a9f9e28dc | ||
|
|
655cfc4858 | ||
|
|
b26c2e27b2 | ||
|
|
305a667bf9 | ||
|
|
6bc5b083d5 | ||
|
|
31213d43b3 | ||
|
|
a9e79b45cc | ||
|
|
936ce0535d | ||
|
|
165710b5d6 | ||
|
|
c2ab9ca2a2 | ||
|
|
3bcdeea560 | ||
|
|
31200a1b41 | ||
|
|
a6261d57fd | ||
|
|
4c5e65e6dd | ||
|
|
e70115d359 | ||
|
|
eec188f9d3 | ||
|
|
0504335a7b | ||
|
|
f5186b5e44 | ||
|
|
8e3d4e1474 | ||
|
|
474fb028b0 | ||
|
|
d25e773b0e | ||
|
|
c5df9d8863 | ||
|
|
28eabdc885 | ||
|
|
72f34e403c | ||
|
|
8037dd2420 | ||
|
|
d29a384da6 | ||
|
|
fe7e5d3c55 | ||
|
|
91185f80c4 | ||
|
|
1244df1176 | ||
|
|
080e58d875 | ||
|
|
420f46ce48 | ||
|
|
50835b4fd0 | ||
|
|
b08a3f2195 | ||
|
|
dbf0c10632 | ||
|
|
04433f8d44 | ||
|
|
e426ca627f | ||
|
|
6c9651eb97 | ||
|
|
02140eed98 | ||
|
|
93f316fa8a | ||
|
|
e02a60ddc7 | ||
|
|
aa413e93d1 | ||
|
|
2749e9dd6d | ||
|
|
decca26a71 | ||
|
|
1c490735b1 | ||
|
|
87da107a03 | ||
|
|
f8b56098cc | ||
|
|
a3a43173f7 | ||
|
|
aea924119d | ||
|
|
a79e581465 |
32
.github/workflows/deployment.yml
vendored
32
.github/workflows/deployment.yml
vendored
@@ -145,13 +145,13 @@ jobs:
|
||||
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: "0.9.9"
|
||||
# NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
environment: release
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -219,7 +219,7 @@ jobs:
|
||||
timeout-minutes: 90
|
||||
environment: release
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
|
||||
with:
|
||||
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
|
||||
persist-credentials: true # zizmor: ignore[artipacked]
|
||||
@@ -392,7 +392,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -465,7 +465,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -603,7 +603,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -684,7 +684,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -827,7 +827,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -899,7 +899,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1036,7 +1036,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1109,7 +1109,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1248,7 +1248,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1327,7 +1327,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1580,7 +1580,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -1705,7 +1705,7 @@ jobs:
|
||||
environment: release
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
2
.github/workflows/helm-chart-releases.yml
vendored
2
.github/workflows/helm-chart-releases.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
2
.github/workflows/nightly-scan-licenses.yml
vendored
2
.github/workflows/nightly-scan-licenses.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
2
.github/workflows/pr-database-tests.yml
vendored
2
.github/workflows/pr-database-tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
114
.github/workflows/pr-desktop-build.yml
vendored
Normal file
114
.github/workflows/pr-desktop-build.yml
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
name: Build Desktop App
|
||||
concurrency:
|
||||
group: Build-Desktop-App-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- "release/**"
|
||||
paths:
|
||||
- "desktop/**"
|
||||
- ".github/workflows/pr-desktop-build.yml"
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-desktop:
|
||||
name: Build Desktop (${{ matrix.platform }})
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- platform: linux
|
||||
os: ubuntu-latest
|
||||
target: x86_64-unknown-linux-gnu
|
||||
args: "--bundles deb,rpm"
|
||||
# TODO: Fix and enable the macOS build.
|
||||
#- platform: macos
|
||||
# os: macos-latest
|
||||
# target: universal-apple-darwin
|
||||
# args: "--target universal-apple-darwin"
|
||||
# TODO: Fix and enable the Windows build.
|
||||
#- platform: windows
|
||||
# os: windows-latest
|
||||
# target: x86_64-pc-windows-msvc
|
||||
# args: ""
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
|
||||
with:
|
||||
node-version: 24
|
||||
cache: "npm" # zizmor: ignore[cache-poisoning]
|
||||
cache-dependency-path: ./desktop/package-lock.json
|
||||
|
||||
- name: Setup Rust
|
||||
uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7
|
||||
with:
|
||||
toolchain: stable
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
- name: Cache Cargo registry and build
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
desktop/src-tauri/target/
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('desktop/src-tauri/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Install Linux dependencies
|
||||
if: matrix.platform == 'linux'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
build-essential \
|
||||
libglib2.0-dev \
|
||||
libgirepository1.0-dev \
|
||||
libgtk-3-dev \
|
||||
libjavascriptcoregtk-4.1-dev \
|
||||
libwebkit2gtk-4.1-dev \
|
||||
libayatana-appindicator3-dev \
|
||||
gobject-introspection \
|
||||
pkg-config \
|
||||
curl \
|
||||
xdg-utils
|
||||
|
||||
- name: Install npm dependencies
|
||||
working-directory: ./desktop
|
||||
run: npm ci
|
||||
|
||||
- name: Build desktop app
|
||||
working-directory: ./desktop
|
||||
run: npx tauri build ${{ matrix.args }}
|
||||
env:
|
||||
TAURI_SIGNING_PRIVATE_KEY: ""
|
||||
TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ""
|
||||
|
||||
- name: Upload build artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
|
||||
path: |
|
||||
desktop/src-tauri/target/release/bundle/
|
||||
retention-days: 7
|
||||
if-no-files-found: ignore
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
3
.github/workflows/pr-helm-chart-testing.yml
vendored
3
.github/workflows/pr-helm-chart-testing.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
# fetch-depth 0 is required for helm/chart-testing-action
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
@@ -197,7 +197,6 @@ jobs:
|
||||
--set=auth.opensearch.enabled=true \
|
||||
--set=slackbot.enabled=false \
|
||||
--set=postgresql.enabled=true \
|
||||
--set=postgresql.nameOverride=cloudnative-pg \
|
||||
--set=postgresql.cluster.storage.storageClass=standard \
|
||||
--set=redis.enabled=true \
|
||||
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
|
||||
|
||||
12
.github/workflows/pr-integration-tests.yml
vendored
12
.github/workflows/pr-integration-tests.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -203,7 +203,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -279,7 +279,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -460,7 +460,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
2
.github/workflows/pr-jest-tests.yml
vendored
2
.github/workflows/pr-jest-tests.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
10
.github/workflows/pr-mit-integration-tests.yml
vendored
10
.github/workflows/pr-mit-integration-tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -136,7 +136,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -271,7 +271,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
10
.github/workflows/pr-playwright-tests.yml
vendored
10
.github/workflows/pr-playwright-tests.yml
vendored
@@ -66,7 +66,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -188,7 +188,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -254,7 +254,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -488,7 +488,7 @@ jobs:
|
||||
# ]
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
# uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
# with:
|
||||
# fetch-depth: 0
|
||||
|
||||
|
||||
2
.github/workflows/pr-python-checks.yml
vendored
2
.github/workflows/pr-python-checks.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ env:
|
||||
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
|
||||
|
||||
# Salesforce
|
||||
SF_USERNAME: ${{ secrets.SF_USERNAME }}
|
||||
SF_USERNAME: ${{ vars.SF_USERNAME }}
|
||||
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
|
||||
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
|
||||
|
||||
@@ -110,6 +110,9 @@ env:
|
||||
# Slack
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
# Discord
|
||||
DISCORD_CONNECTOR_BOT_TOKEN: ${{ secrets.DISCORD_CONNECTOR_BOT_TOKEN }}
|
||||
|
||||
# Teams
|
||||
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
|
||||
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
|
||||
@@ -139,7 +142,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
2
.github/workflows/pr-python-model-tests.yml
vendored
2
.github/workflows/pr-python-model-tests.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
2
.github/workflows/pr-python-tests.yml
vendored
2
.github/workflows/pr-python-tests.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
4
.github/workflows/pr-quality-checks.yml
vendored
4
.github/workflows/pr-quality-checks.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Install node dependencies
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
|
||||
- uses: j178/prek-action@9d6a3097e0c1865ecce00cfb89fe80f2ee91b547 # ratchet:j178/prek-action@v1
|
||||
with:
|
||||
prek-version: '0.2.21'
|
||||
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
|
||||
|
||||
4
.github/workflows/release-devtools.yml
vendored
4
.github/workflows/release-devtools.yml
vendored
@@ -24,11 +24,11 @@ jobs:
|
||||
- { goos: "darwin", goarch: "arm64" }
|
||||
- { goos: "", goarch: "" }
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
- uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
|
||||
- uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
2
.github/workflows/sync_foss.yml
vendored
2
.github/workflows/sync_foss.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout main Onyx repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
2
.github/workflows/tag-nightly.yml
vendored
2
.github/workflows/tag-nightly.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
|
||||
# implement here which needs an actual user's deploy key
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
ssh-key: "${{ secrets.DEPLOY_KEY }}"
|
||||
persist-credentials: true
|
||||
|
||||
4
.github/workflows/zizmor.yml
vendored
4
.github/workflows/zizmor.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
security-events: write # needed for SARIF uploads
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
|
||||
- name: Install the latest version of uv
|
||||
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
|
||||
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
# editors
|
||||
.vscode
|
||||
.vscode/*
|
||||
!/.vscode/env_template.txt
|
||||
!/.vscode/env.web_template.txt
|
||||
!/.vscode/launch.json
|
||||
!/.vscode/tasks.template.jsonc
|
||||
.zed
|
||||
|
||||
16
.vscode/env.web_template.txt
vendored
Normal file
16
.vscode/env.web_template.txt
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copy this file to .env.web in the .vscode folder.
|
||||
# Fill in the <REPLACE THIS> values as needed
|
||||
# Web Server specific environment variables
|
||||
# Minimal set needed for Next.js dev server
|
||||
|
||||
# Auth
|
||||
AUTH_TYPE=basic
|
||||
DEV_MODE=true
|
||||
|
||||
# Enable the full set of Danswer Enterprise Edition features.
|
||||
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
|
||||
# are using this for local testing/development).
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false
|
||||
|
||||
# Enable Onyx Craft
|
||||
ENABLE_CRAFT=true
|
||||
7
.vscode/env_template.txt
vendored
7
.vscode/env_template.txt
vendored
@@ -6,13 +6,13 @@
|
||||
# processes.
|
||||
|
||||
|
||||
# For local dev, often user Authentication is not needed.
|
||||
AUTH_TYPE=disabled
|
||||
AUTH_TYPE=basic
|
||||
DEV_MODE=true
|
||||
|
||||
|
||||
# Always keep these on for Dev.
|
||||
# Logs model prompts, reasoning, and answer to stdout.
|
||||
LOG_ONYX_MODEL_INTERACTIONS=True
|
||||
LOG_ONYX_MODEL_INTERACTIONS=False
|
||||
# More verbose logging
|
||||
LOG_LEVEL=debug
|
||||
|
||||
@@ -35,7 +35,6 @@ GEN_AI_API_KEY=<REPLACE THIS>
|
||||
OPENAI_API_KEY=<REPLACE THIS>
|
||||
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
|
||||
GEN_AI_MODEL_VERSION=gpt-4o
|
||||
FAST_GEN_AI_MODEL_VERSION=gpt-4o
|
||||
|
||||
|
||||
# Python stuff
|
||||
|
||||
5
.vscode/launch.json
vendored
5
.vscode/launch.json
vendored
@@ -25,6 +25,7 @@
|
||||
"Celery heavy",
|
||||
"Celery docfetching",
|
||||
"Celery docprocessing",
|
||||
"Celery user_file_processing",
|
||||
"Celery beat"
|
||||
],
|
||||
"presentation": {
|
||||
@@ -86,7 +87,7 @@
|
||||
"request": "launch",
|
||||
"cwd": "${workspaceRoot}/web",
|
||||
"runtimeExecutable": "npm",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env.web",
|
||||
"runtimeArgs": ["run", "dev"],
|
||||
"presentation": {
|
||||
"group": "2"
|
||||
@@ -121,7 +122,6 @@
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_ONYX_MODEL_INTERACTIONS": "True",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1"
|
||||
},
|
||||
@@ -572,7 +572,6 @@
|
||||
"cwd": "${workspaceFolder}/backend",
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"LOG_ONYX_MODEL_INTERACTIONS": "True",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "."
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
"""LLMProvider deprecated fields are nullable
|
||||
|
||||
Revision ID: 001984c88745
|
||||
Revises: 01f8e6d95a33
|
||||
Create Date: 2026-02-01 22:24:34.171100
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "001984c88745"
|
||||
down_revision = "01f8e6d95a33"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Make default_model_name nullable (was NOT NULL)
|
||||
op.alter_column(
|
||||
"llm_provider",
|
||||
"default_model_name",
|
||||
existing_type=sa.String(),
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# Remove server_default from is_default_vision_provider (was server_default=false())
|
||||
op.alter_column(
|
||||
"llm_provider",
|
||||
"is_default_vision_provider",
|
||||
existing_type=sa.Boolean(),
|
||||
server_default=None,
|
||||
)
|
||||
|
||||
# is_default_provider and default_vision_model are already nullable with no server_default
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
|
||||
op.execute(
|
||||
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
|
||||
)
|
||||
op.alter_column(
|
||||
"llm_provider",
|
||||
"default_model_name",
|
||||
existing_type=sa.String(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Restore server_default for is_default_vision_provider
|
||||
op.alter_column(
|
||||
"llm_provider",
|
||||
"is_default_vision_provider",
|
||||
existing_type=sa.Boolean(),
|
||||
server_default=sa.false(),
|
||||
)
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Populate flow mapping data
|
||||
|
||||
Revision ID: 01f8e6d95a33
|
||||
Revises: f220515df7b4
|
||||
Create Date: 2026-01-31 17:37:10.485558
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "01f8e6d95a33"
|
||||
down_revision = "f220515df7b4"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add each model config to the conversation flow, setting the global default if it exists
|
||||
# Exclude models that are part of ImageGenerationConfig
|
||||
op.execute(
|
||||
"""
|
||||
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
|
||||
SELECT
|
||||
'chat' AS llm_model_flow_type,
|
||||
COALESCE(
|
||||
(lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
|
||||
FALSE
|
||||
) AS is_default,
|
||||
mc.id AS model_configuration_id
|
||||
FROM model_configuration mc
|
||||
LEFT JOIN llm_provider lp
|
||||
ON lp.id = mc.llm_provider_id
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM image_generation_config igc
|
||||
WHERE igc.model_configuration_id = mc.id
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
# Add models with supports_image_input to the vision flow
|
||||
op.execute(
|
||||
"""
|
||||
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
|
||||
SELECT
|
||||
'vision' AS llm_model_flow_type,
|
||||
COALESCE(
|
||||
(lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
|
||||
FALSE
|
||||
) AS is_default,
|
||||
mc.id AS model_configuration_id
|
||||
FROM model_configuration mc
|
||||
LEFT JOIN llm_provider lp
|
||||
ON lp.id = mc.llm_provider_id
|
||||
WHERE mc.supports_image_input IS TRUE;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Populate vision defaults from model_flow
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE llm_provider AS lp
|
||||
SET
|
||||
is_default_vision_provider = TRUE,
|
||||
default_vision_model = mc.name
|
||||
FROM llm_model_flow mf
|
||||
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
|
||||
WHERE mf.llm_model_flow_type = 'vision'
|
||||
AND mf.is_default = TRUE
|
||||
AND mc.llm_provider_id = lp.id;
|
||||
"""
|
||||
)
|
||||
|
||||
# Populate conversation defaults from model_flow
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE llm_provider AS lp
|
||||
SET
|
||||
is_default_provider = TRUE,
|
||||
default_model_name = mc.name
|
||||
FROM llm_model_flow mf
|
||||
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
|
||||
WHERE mf.llm_model_flow_type = 'chat'
|
||||
AND mf.is_default = TRUE
|
||||
AND mc.llm_provider_id = lp.id;
|
||||
"""
|
||||
)
|
||||
|
||||
# For providers that have conversation flow mappings but aren't the default,
|
||||
# we still need a default_model_name (it was NOT NULL originally)
|
||||
# Pick the first visible model or any model for that provider
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE llm_provider AS lp
|
||||
SET default_model_name = (
|
||||
SELECT mc.name
|
||||
FROM model_configuration mc
|
||||
JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
|
||||
WHERE mc.llm_provider_id = lp.id
|
||||
AND mf.llm_model_flow_type = 'chat'
|
||||
ORDER BY mc.is_visible DESC, mc.id ASC
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE lp.default_model_name IS NULL;
|
||||
"""
|
||||
)
|
||||
|
||||
# Delete all model_flow entries (reverse the inserts from upgrade)
|
||||
op.execute("DELETE FROM llm_model_flow;")
|
||||
@@ -10,8 +10,6 @@ from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "1f60f60c3401"
|
||||
down_revision = "f17bf3b0d9f1"
|
||||
@@ -66,7 +64,7 @@ def upgrade() -> None:
|
||||
"num_rerank",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default=str(NUM_POSTPROCESSED_RESULTS),
|
||||
server_default=str(20),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
"""remove reranking from search_settings
|
||||
|
||||
Revision ID: 78ebc66946a0
|
||||
Revises: 849b21c732f8
|
||||
Create Date: 2026-01-28
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "78ebc66946a0"
|
||||
down_revision = "849b21c732f8"
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.drop_column("search_settings", "disable_rerank_for_streaming")
|
||||
op.drop_column("search_settings", "rerank_model_name")
|
||||
op.drop_column("search_settings", "rerank_provider_type")
|
||||
op.drop_column("search_settings", "rerank_api_key")
|
||||
op.drop_column("search_settings", "rerank_api_url")
|
||||
op.drop_column("search_settings", "num_rerank")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.add_column(
|
||||
"search_settings",
|
||||
sa.Column(
|
||||
"disable_rerank_for_streaming",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"search_settings",
|
||||
sa.Column(
|
||||
"num_rerank",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default=str(20),
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,32 @@
|
||||
"""add demo_data_enabled to build_session
|
||||
|
||||
Revision ID: 849b21c732f8
|
||||
Revises: 81c22b1e2e78
|
||||
Create Date: 2026-01-28 10:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "849b21c732f8"
|
||||
down_revision = "81c22b1e2e78"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"build_session",
|
||||
sa.Column(
|
||||
"demo_data_enabled",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default=sa.text("true"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("build_session", "demo_data_enabled")
|
||||
@@ -0,0 +1,27 @@
|
||||
"""add processing_duration_seconds to chat_message
|
||||
|
||||
Revision ID: 9d1543a37106
|
||||
Revises: cbc03e08d0f3
|
||||
Create Date: 2026-01-21 11:42:18.546188
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9d1543a37106"
|
||||
down_revision = "cbc03e08d0f3"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"chat_message",
|
||||
sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("chat_message", "processing_duration_seconds")
|
||||
@@ -0,0 +1,40 @@
|
||||
"""Persona new default model configuration id column
|
||||
|
||||
Revision ID: be87a654d5af
|
||||
Revises: e7f8a9b0c1d2
|
||||
Create Date: 2026-01-30 11:14:17.306275
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "be87a654d5af"
|
||||
down_revision = "e7f8a9b0c1d2"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"persona",
|
||||
sa.Column("default_model_configuration_id", sa.Integer(), nullable=True),
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_persona_default_model_configuration_id",
|
||||
"persona",
|
||||
"model_configuration",
|
||||
["default_model_configuration_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint(
|
||||
"fk_persona_default_model_configuration_id", "persona", type_="foreignkey"
|
||||
)
|
||||
|
||||
op.drop_column("persona", "default_model_configuration_id")
|
||||
@@ -0,0 +1,128 @@
|
||||
"""add_opensearch_migration_tables
|
||||
|
||||
Revision ID: cbc03e08d0f3
|
||||
Revises: be87a654d5af
|
||||
Create Date: 2026-01-31 17:00:45.176604
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "cbc03e08d0f3"
|
||||
down_revision = "be87a654d5af"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# 1. Create opensearch_document_migration_record table.
|
||||
op.create_table(
|
||||
"opensearch_document_migration_record",
|
||||
sa.Column("document_id", sa.String(), nullable=False),
|
||||
sa.Column("status", sa.String(), nullable=False, server_default="pending"),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column("attempts_count", sa.Integer(), nullable=False, server_default="0"),
|
||||
sa.Column("last_attempt_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.PrimaryKeyConstraint("document_id"),
|
||||
sa.ForeignKeyConstraint(
|
||||
["document_id"],
|
||||
["document.id"],
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
)
|
||||
# 2. Create indices.
|
||||
op.create_index(
|
||||
"ix_opensearch_document_migration_record_status",
|
||||
"opensearch_document_migration_record",
|
||||
["status"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_opensearch_document_migration_record_attempts_count",
|
||||
"opensearch_document_migration_record",
|
||||
["attempts_count"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_opensearch_document_migration_record_created_at",
|
||||
"opensearch_document_migration_record",
|
||||
["created_at"],
|
||||
)
|
||||
|
||||
# 3. Create opensearch_tenant_migration_record table (singleton).
|
||||
op.create_table(
|
||||
"opensearch_tenant_migration_record",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"document_migration_record_table_population_status",
|
||||
sa.String(),
|
||||
nullable=False,
|
||||
server_default="pending",
|
||||
),
|
||||
sa.Column(
|
||||
"num_times_observed_no_additional_docs_to_populate_migration_table",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default="0",
|
||||
),
|
||||
sa.Column(
|
||||
"overall_document_migration_status",
|
||||
sa.String(),
|
||||
nullable=False,
|
||||
server_default="pending",
|
||||
),
|
||||
sa.Column(
|
||||
"num_times_observed_no_additional_docs_to_migrate",
|
||||
sa.Integer(),
|
||||
nullable=False,
|
||||
server_default="0",
|
||||
),
|
||||
sa.Column(
|
||||
"last_updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
# 4. Create unique index on constant to enforce singleton pattern.
|
||||
op.execute(
|
||||
sa.text(
|
||||
"""
|
||||
CREATE UNIQUE INDEX idx_opensearch_tenant_migration_singleton
|
||||
ON opensearch_tenant_migration_record ((true))
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop opensearch_tenant_migration_record.
|
||||
op.drop_index(
|
||||
"idx_opensearch_tenant_migration_singleton",
|
||||
table_name="opensearch_tenant_migration_record",
|
||||
)
|
||||
op.drop_table("opensearch_tenant_migration_record")
|
||||
|
||||
# Drop opensearch_document_migration_record.
|
||||
op.drop_index(
|
||||
"ix_opensearch_document_migration_record_created_at",
|
||||
table_name="opensearch_document_migration_record",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_opensearch_document_migration_record_attempts_count",
|
||||
table_name="opensearch_document_migration_record",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_opensearch_document_migration_record_status",
|
||||
table_name="opensearch_document_migration_record",
|
||||
)
|
||||
op.drop_table("opensearch_document_migration_record")
|
||||
125
backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py
Normal file
125
backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""create_anonymous_user
|
||||
|
||||
This migration creates a permanent anonymous user in the database.
|
||||
When anonymous access is enabled, unauthenticated requests will use this user
|
||||
instead of returning user_id=NULL.
|
||||
|
||||
Revision ID: e7f8a9b0c1d2
|
||||
Revises: f7ca3e2f45d9
|
||||
Create Date: 2026-01-15 14:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "e7f8a9b0c1d2"
|
||||
down_revision = "f7ca3e2f45d9"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
# Must match constants in onyx/configs/constants.py file
|
||||
ANONYMOUS_USER_UUID = "00000000-0000-0000-0000-000000000002"
|
||||
ANONYMOUS_USER_EMAIL = "anonymous@onyx.app"
|
||||
|
||||
# Tables with user_id foreign key that may need migration
|
||||
TABLES_WITH_USER_ID = [
|
||||
"chat_session",
|
||||
"credential",
|
||||
"document_set",
|
||||
"persona",
|
||||
"tool",
|
||||
"notification",
|
||||
"inputprompt",
|
||||
]
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""
|
||||
Create the anonymous user for anonymous access feature.
|
||||
Also migrates any remaining user_id=NULL records to the anonymous user.
|
||||
"""
|
||||
connection = op.get_bind()
|
||||
|
||||
# Create the anonymous user (using ON CONFLICT to be idempotent)
|
||||
connection.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
|
||||
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
"""
|
||||
),
|
||||
{
|
||||
"id": ANONYMOUS_USER_UUID,
|
||||
"email": ANONYMOUS_USER_EMAIL,
|
||||
"hashed_password": "", # Empty password - user cannot log in directly
|
||||
"is_active": True, # Active so it can be used for anonymous access
|
||||
"is_superuser": False,
|
||||
"is_verified": True, # Verified since no email verification needed
|
||||
"role": "LIMITED", # Anonymous users have limited role to restrict access
|
||||
},
|
||||
)
|
||||
|
||||
# Migrate any remaining user_id=NULL records to anonymous user
|
||||
for table in TABLES_WITH_USER_ID:
|
||||
try:
|
||||
# Exclude public credential (id=0) which must remain user_id=NULL
|
||||
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
|
||||
# Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL
|
||||
# Exclude system input prompts (is_public=True with user_id=NULL) which must remain user_id=NULL
|
||||
if table == "credential":
|
||||
condition = "user_id IS NULL AND id != 0"
|
||||
elif table == "tool":
|
||||
condition = "user_id IS NULL AND in_code_tool_id IS NULL"
|
||||
elif table == "persona":
|
||||
condition = "user_id IS NULL AND builtin_persona = false"
|
||||
elif table == "inputprompt":
|
||||
condition = "user_id IS NULL AND is_public = false"
|
||||
else:
|
||||
condition = "user_id IS NULL"
|
||||
result = connection.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
UPDATE "{table}"
|
||||
SET user_id = :user_id
|
||||
WHERE {condition}
|
||||
"""
|
||||
),
|
||||
{"user_id": ANONYMOUS_USER_UUID},
|
||||
)
|
||||
if result.rowcount > 0:
|
||||
print(f"Updated {result.rowcount} rows in {table} to anonymous user")
|
||||
except Exception as e:
|
||||
print(f"Skipping {table}: {e}")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""
|
||||
Set anonymous user's records back to NULL and delete the anonymous user.
|
||||
"""
|
||||
connection = op.get_bind()
|
||||
|
||||
# Set records back to NULL
|
||||
for table in TABLES_WITH_USER_ID:
|
||||
try:
|
||||
connection.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
UPDATE "{table}"
|
||||
SET user_id = NULL
|
||||
WHERE user_id = :user_id
|
||||
"""
|
||||
),
|
||||
{"user_id": ANONYMOUS_USER_UUID},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Delete the anonymous user
|
||||
connection.execute(
|
||||
sa.text('DELETE FROM "user" WHERE id = :user_id'),
|
||||
{"user_id": ANONYMOUS_USER_UUID},
|
||||
)
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Add flow mapping table
|
||||
|
||||
Revision ID: f220515df7b4
|
||||
Revises: cbc03e08d0f3
|
||||
Create Date: 2026-01-30 12:21:24.955922
|
||||
|
||||
"""
|
||||
|
||||
from onyx.db.enums import LLMModelFlowType
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "f220515df7b4"
|
||||
down_revision = "9d1543a37106"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"llm_model_flow",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"llm_model_flow_type",
|
||||
sa.Enum(LLMModelFlowType, name="llmmodelflowtype", native_enum=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"is_default", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column("model_configuration_id", sa.Integer(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
sa.ForeignKeyConstraint(
|
||||
["model_configuration_id"], ["model_configuration.id"], ondelete="CASCADE"
|
||||
),
|
||||
sa.UniqueConstraint(
|
||||
"llm_model_flow_type",
|
||||
"model_configuration_id",
|
||||
name="uq_model_config_per_llm_model_flow_type",
|
||||
),
|
||||
)
|
||||
|
||||
# Partial unique index so that there is at most one default for each flow type
|
||||
op.create_index(
|
||||
"ix_one_default_per_llm_model_flow",
|
||||
"llm_model_flow",
|
||||
["llm_model_flow_type"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("is_default IS TRUE"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Drop the llm_model_flow table (index is dropped automatically with table)
|
||||
op.drop_table("llm_model_flow")
|
||||
@@ -0,0 +1,281 @@
|
||||
"""migrate_no_auth_data_to_placeholder
|
||||
|
||||
This migration handles the transition from AUTH_TYPE=disabled to requiring
|
||||
authentication. It creates a placeholder user and assigns all data that was
|
||||
created without a user (user_id=NULL) to this placeholder.
|
||||
|
||||
A database trigger is installed that automatically transfers all data from
|
||||
the placeholder user to the first real user who registers, then drops itself.
|
||||
|
||||
Revision ID: f7ca3e2f45d9
|
||||
Revises: 78ebc66946a0
|
||||
Create Date: 2026-01-15 12:49:53.802741
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "f7ca3e2f45d9"
|
||||
down_revision = "78ebc66946a0"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
# Must match constants in onyx/configs/constants.py file
|
||||
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"
|
||||
NO_AUTH_PLACEHOLDER_USER_EMAIL = "no-auth-placeholder@onyx.app"
|
||||
|
||||
# Trigger and function names
|
||||
TRIGGER_NAME = "trg_migrate_no_auth_data"
|
||||
FUNCTION_NAME = "migrate_no_auth_data_to_user"
|
||||
|
||||
# Trigger function that migrates data from placeholder to first real user
|
||||
MIGRATE_NO_AUTH_TRIGGER_FUNCTION = f"""
|
||||
CREATE OR REPLACE FUNCTION {FUNCTION_NAME}()
|
||||
RETURNS TRIGGER AS $$
|
||||
DECLARE
|
||||
placeholder_uuid UUID := '00000000-0000-0000-0000-000000000001'::uuid;
|
||||
anonymous_uuid UUID := '00000000-0000-0000-0000-000000000002'::uuid;
|
||||
placeholder_row RECORD;
|
||||
schema_name TEXT;
|
||||
BEGIN
|
||||
-- Skip if this is the placeholder user being inserted
|
||||
IF NEW.id = placeholder_uuid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Skip if this is the anonymous user being inserted (not a real user)
|
||||
IF NEW.id = anonymous_uuid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Skip if the new user is not active
|
||||
IF NEW.is_active = FALSE THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Get current schema for self-cleanup
|
||||
schema_name := current_schema();
|
||||
|
||||
-- Try to lock the placeholder user row with FOR UPDATE SKIP LOCKED
|
||||
-- This ensures only one concurrent transaction can proceed with migration
|
||||
-- SKIP LOCKED means if another transaction has the lock, we skip (don't wait)
|
||||
SELECT id INTO placeholder_row
|
||||
FROM "user"
|
||||
WHERE id = placeholder_uuid
|
||||
FOR UPDATE SKIP LOCKED;
|
||||
|
||||
IF NOT FOUND THEN
|
||||
-- Either placeholder doesn't exist or another transaction has it locked
|
||||
-- Either way, drop the trigger and return without making admin
|
||||
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
|
||||
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- We have exclusive lock on placeholder - proceed with migration
|
||||
-- The INSERT has already completed (AFTER INSERT), so NEW.id exists in the table
|
||||
|
||||
-- Migrate chat_session
|
||||
UPDATE "chat_session" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
|
||||
|
||||
-- Migrate credential (exclude public credential id=0)
|
||||
UPDATE "credential" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND id != 0;
|
||||
|
||||
-- Migrate document_set
|
||||
UPDATE "document_set" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
|
||||
|
||||
-- Migrate persona (exclude builtin personas)
|
||||
UPDATE "persona" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND builtin_persona = FALSE;
|
||||
|
||||
-- Migrate tool (exclude builtin tools)
|
||||
UPDATE "tool" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND in_code_tool_id IS NULL;
|
||||
|
||||
-- Migrate notification
|
||||
UPDATE "notification" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
|
||||
|
||||
-- Migrate inputprompt (exclude system/public prompts)
|
||||
UPDATE "inputprompt" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND is_public = FALSE;
|
||||
|
||||
-- Make the new user an admin (they had admin access in no-auth mode)
|
||||
-- In AFTER INSERT trigger, we must UPDATE the row since it already exists
|
||||
UPDATE "user" SET role = 'ADMIN' WHERE id = NEW.id;
|
||||
|
||||
-- Delete the placeholder user (we hold the lock so this is safe)
|
||||
DELETE FROM "user" WHERE id = placeholder_uuid;
|
||||
|
||||
-- Drop the trigger and function (self-cleanup)
|
||||
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
|
||||
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
"""
|
||||
|
||||
MIGRATE_NO_AUTH_TRIGGER = f"""
|
||||
CREATE TRIGGER {TRIGGER_NAME}
|
||||
AFTER INSERT ON "user"
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION {FUNCTION_NAME}();
|
||||
"""
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""
|
||||
Create a placeholder user and assign all NULL user_id records to it.
|
||||
Install a trigger that migrates data to the first real user and self-destructs.
|
||||
Only runs if AUTH_TYPE is currently disabled/none.
|
||||
|
||||
Skipped in multi-tenant mode - each tenant starts fresh with no legacy data.
|
||||
"""
|
||||
# Skip in multi-tenant mode - this migration handles single-tenant
|
||||
# AUTH_TYPE=disabled -> auth transitions only
|
||||
if MULTI_TENANT:
|
||||
return
|
||||
|
||||
# Only run if AUTH_TYPE is currently disabled/none
|
||||
# If they've already switched to auth-enabled, NULL data is stale anyway
|
||||
auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
|
||||
if auth_type not in ("disabled", "none", ""):
|
||||
print(f"AUTH_TYPE is '{auth_type}', not disabled. Skipping migration.")
|
||||
return
|
||||
|
||||
connection = op.get_bind()
|
||||
|
||||
# Check if there are any NULL user_id records that need migration
|
||||
tables_to_check = [
|
||||
"chat_session",
|
||||
"credential",
|
||||
"document_set",
|
||||
"persona",
|
||||
"tool",
|
||||
"notification",
|
||||
"inputprompt",
|
||||
]
|
||||
|
||||
has_null_records = False
|
||||
for table in tables_to_check:
|
||||
try:
|
||||
result = connection.execute(
|
||||
sa.text(f'SELECT 1 FROM "{table}" WHERE user_id IS NULL LIMIT 1')
|
||||
)
|
||||
if result.fetchone():
|
||||
has_null_records = True
|
||||
break
|
||||
except Exception:
|
||||
# Table might not exist
|
||||
pass
|
||||
|
||||
if not has_null_records:
|
||||
return
|
||||
|
||||
# Create the placeholder user
|
||||
connection.execute(
|
||||
sa.text(
|
||||
"""
|
||||
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
|
||||
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
|
||||
"""
|
||||
),
|
||||
{
|
||||
"id": NO_AUTH_PLACEHOLDER_USER_UUID,
|
||||
"email": NO_AUTH_PLACEHOLDER_USER_EMAIL,
|
||||
"hashed_password": "", # Empty password - user cannot log in
|
||||
"is_active": False, # Inactive - user cannot log in
|
||||
"is_superuser": False,
|
||||
"is_verified": False,
|
||||
"role": "BASIC",
|
||||
},
|
||||
)
|
||||
|
||||
# Assign NULL user_id records to the placeholder user
|
||||
for table in tables_to_check:
|
||||
try:
|
||||
# Base condition for all tables
|
||||
condition = "user_id IS NULL"
|
||||
# Exclude public credential (id=0) which must remain user_id=NULL
|
||||
if table == "credential":
|
||||
condition += " AND id != 0"
|
||||
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
|
||||
elif table == "tool":
|
||||
condition += " AND in_code_tool_id IS NULL"
|
||||
# Exclude builtin personas which must remain user_id=NULL
|
||||
elif table == "persona":
|
||||
condition += " AND builtin_persona = FALSE"
|
||||
# Exclude system/public input prompts which must remain user_id=NULL
|
||||
elif table == "inputprompt":
|
||||
condition += " AND is_public = FALSE"
|
||||
result = connection.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
UPDATE "{table}"
|
||||
SET user_id = :user_id
|
||||
WHERE {condition}
|
||||
"""
|
||||
),
|
||||
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
|
||||
)
|
||||
if result.rowcount > 0:
|
||||
print(f"Updated {result.rowcount} rows in {table}")
|
||||
except Exception as e:
|
||||
print(f"Skipping {table}: {e}")
|
||||
|
||||
# Install the trigger function and trigger for automatic migration on first user registration
|
||||
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER_FUNCTION))
|
||||
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER))
|
||||
print("Installed trigger for automatic data migration on first user registration")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""
|
||||
Drop trigger and function, set placeholder user's records back to NULL,
|
||||
and delete the placeholder user.
|
||||
"""
|
||||
# Skip in multi-tenant mode for consistency with upgrade
|
||||
if MULTI_TENANT:
|
||||
return
|
||||
|
||||
connection = op.get_bind()
|
||||
|
||||
# Drop trigger and function if they exist (they may have already self-destructed)
|
||||
connection.execute(sa.text(f'DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON "user"'))
|
||||
connection.execute(sa.text(f"DROP FUNCTION IF EXISTS {FUNCTION_NAME}()"))
|
||||
|
||||
tables_to_update = [
|
||||
"chat_session",
|
||||
"credential",
|
||||
"document_set",
|
||||
"persona",
|
||||
"tool",
|
||||
"notification",
|
||||
"inputprompt",
|
||||
]
|
||||
|
||||
# Set records back to NULL
|
||||
for table in tables_to_update:
|
||||
try:
|
||||
connection.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
UPDATE "{table}"
|
||||
SET user_id = NULL
|
||||
WHERE user_id = :user_id
|
||||
"""
|
||||
),
|
||||
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Delete the placeholder user
|
||||
connection.execute(
|
||||
sa.text('DELETE FROM "user" WHERE id = :user_id'),
|
||||
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
|
||||
)
|
||||
@@ -116,7 +116,7 @@ def _get_access_for_documents(
|
||||
return access_map
|
||||
|
||||
|
||||
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
|
||||
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
|
||||
"""Returns a list of ACL entries that the user has access to. This is meant to be
|
||||
used downstream to filter out documents that the user does not have access to. The
|
||||
user should have access to a document if at least one entry in the document's ACL
|
||||
@@ -124,13 +124,16 @@ def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
|
||||
|
||||
NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
|
||||
DO NOT REMOVE."""
|
||||
db_user_groups = fetch_user_groups_for_user(db_session, user.id) if user else []
|
||||
is_anonymous = user.is_anonymous
|
||||
db_user_groups = (
|
||||
[] if is_anonymous else fetch_user_groups_for_user(db_session, user.id)
|
||||
)
|
||||
prefixed_user_groups = [
|
||||
prefix_user_group(db_user_group.name) for db_user_group in db_user_groups
|
||||
]
|
||||
|
||||
db_external_groups = (
|
||||
fetch_external_groups_for_user(db_session, user.id) if user else []
|
||||
[] if is_anonymous else fetch_external_groups_for_user(db_session, user.id)
|
||||
)
|
||||
prefixed_external_groups = [
|
||||
prefix_external_group(db_external_group.external_user_group_id)
|
||||
|
||||
11
backend/ee/onyx/access/hierarchy_access.py
Normal file
11
backend/ee/onyx/access/hierarchy_access.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.external_perm import fetch_external_groups_for_user
|
||||
from onyx.db.models import User
|
||||
|
||||
|
||||
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
|
||||
if not user:
|
||||
return []
|
||||
external_groups = fetch_external_groups_for_user(db_session, user.id)
|
||||
return [external_group.external_user_group_id for external_group in external_groups]
|
||||
@@ -33,8 +33,8 @@ def get_default_admin_user_emails_() -> list[str]:
|
||||
|
||||
async def current_cloud_superuser(
|
||||
request: Request,
|
||||
user: User | None = Depends(current_admin_user),
|
||||
) -> User | None:
|
||||
user: User = Depends(current_admin_user),
|
||||
) -> User:
|
||||
api_key = request.headers.get("Authorization", "").replace("Bearer ", "")
|
||||
if api_key != SUPER_CLOUD_API_KEY:
|
||||
raise HTTPException(status_code=401, detail="Invalid API key")
|
||||
|
||||
@@ -25,6 +25,7 @@ from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
|
||||
from ee.onyx.db.document import upsert_document_external_perms
|
||||
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.background.celery.apps.app_base import task_logger
|
||||
from onyx.background.celery.celery_redis import celery_find_task
|
||||
from onyx.background.celery.celery_redis import celery_get_queue_length
|
||||
@@ -55,6 +56,9 @@ from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.enums import SyncStatus
|
||||
from onyx.db.enums import SyncType
|
||||
from onyx.db.hierarchy import (
|
||||
update_hierarchy_node_permissions as db_update_hierarchy_node_permissions,
|
||||
)
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt
|
||||
from onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt
|
||||
@@ -637,18 +641,25 @@ def connector_permission_sync_generator_task(
|
||||
),
|
||||
stop=stop_after_delay(DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER),
|
||||
)
|
||||
def document_update_permissions(
|
||||
def element_update_permissions(
|
||||
tenant_id: str,
|
||||
permissions: DocExternalAccess,
|
||||
permissions: ElementExternalAccess,
|
||||
source_type_str: str,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
) -> bool:
|
||||
"""Update permissions for a document or hierarchy node."""
|
||||
start = time.monotonic()
|
||||
|
||||
doc_id = permissions.doc_id
|
||||
external_access = permissions.external_access
|
||||
|
||||
# Determine element type and identifier for logging
|
||||
if isinstance(permissions, DocExternalAccess):
|
||||
element_id = permissions.doc_id
|
||||
element_type = "doc"
|
||||
else:
|
||||
element_id = permissions.raw_node_id
|
||||
element_type = "node"
|
||||
|
||||
try:
|
||||
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
|
||||
# Add the users to the DB if they don't exist
|
||||
@@ -657,39 +668,57 @@ def document_update_permissions(
|
||||
emails=list(external_access.external_user_emails),
|
||||
continue_on_error=True,
|
||||
)
|
||||
# Then upsert the document's external permissions
|
||||
created_new_doc = upsert_document_external_perms(
|
||||
db_session=db_session,
|
||||
doc_id=doc_id,
|
||||
external_access=external_access,
|
||||
source_type=DocumentSource(source_type_str),
|
||||
)
|
||||
|
||||
if created_new_doc:
|
||||
# If a new document was created, we associate it with the cc_pair
|
||||
upsert_document_by_connector_credential_pair(
|
||||
if isinstance(permissions, DocExternalAccess):
|
||||
# Document permission update
|
||||
created_new_doc = upsert_document_external_perms(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_ids=[doc_id],
|
||||
doc_id=permissions.doc_id,
|
||||
external_access=external_access,
|
||||
source_type=DocumentSource(source_type_str),
|
||||
)
|
||||
|
||||
if created_new_doc:
|
||||
# If a new document was created, we associate it with the cc_pair
|
||||
upsert_document_by_connector_credential_pair(
|
||||
db_session=db_session,
|
||||
connector_id=connector_id,
|
||||
credential_id=credential_id,
|
||||
document_ids=[permissions.doc_id],
|
||||
)
|
||||
else:
|
||||
# Hierarchy node permission update
|
||||
db_update_hierarchy_node_permissions(
|
||||
db_session=db_session,
|
||||
raw_node_id=permissions.raw_node_id,
|
||||
source=DocumentSource(permissions.source),
|
||||
is_public=external_access.is_public,
|
||||
external_user_emails=(
|
||||
list(external_access.external_user_emails)
|
||||
if external_access.external_user_emails
|
||||
else None
|
||||
),
|
||||
external_user_group_ids=(
|
||||
list(external_access.external_user_group_ids)
|
||||
if external_access.external_user_group_ids
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
task_logger.info(
|
||||
f"connector_id={connector_id} "
|
||||
f"doc={doc_id} "
|
||||
f"{element_type}={element_id} "
|
||||
f"action=update_permissions "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
)
|
||||
except Exception as e:
|
||||
task_logger.exception(
|
||||
f"document_update_permissions exceptioned: "
|
||||
f"connector_id={connector_id} doc_id={doc_id}"
|
||||
f"element_update_permissions exceptioned: {element_type}={element_id}, {connector_id=} {credential_id=}"
|
||||
)
|
||||
raise e
|
||||
finally:
|
||||
task_logger.info(
|
||||
f"document_update_permissions completed: connector_id={connector_id} doc={doc_id}"
|
||||
f"element_update_permissions completed: {element_type}={element_id}, {connector_id=} {credential_id=}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
@@ -334,11 +334,9 @@ def fetch_assistant_unique_users_total(
|
||||
# Users can view assistant stats if they created the persona,
|
||||
# or if they are an admin
|
||||
def user_can_view_assistant_stats(
|
||||
db_session: Session, user: User | None, assistant_id: int
|
||||
db_session: Session, user: User, assistant_id: int
|
||||
) -> bool:
|
||||
# If user is None and auth is disabled, assume the user is an admin
|
||||
|
||||
if user is None or user.role == UserRole.ADMIN:
|
||||
if user.role == UserRole.ADMIN:
|
||||
return True
|
||||
|
||||
# Check if the user created the persona
|
||||
|
||||
67
backend/ee/onyx/db/hierarchy.py
Normal file
67
backend/ee/onyx/db/hierarchy.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""EE version of hierarchy node access control.
|
||||
|
||||
This module provides permission-aware hierarchy node access for Enterprise Edition.
|
||||
It filters hierarchy nodes based on user email and external group membership.
|
||||
"""
|
||||
|
||||
from sqlalchemy import any_
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.sql.elements import ColumnElement
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.db.models import HierarchyNode
|
||||
|
||||
|
||||
def _build_hierarchy_access_filter(
|
||||
user_email: str | None,
|
||||
external_group_ids: list[str],
|
||||
) -> ColumnElement[bool]:
|
||||
"""Build SQLAlchemy filter for hierarchy node access.
|
||||
|
||||
A user can access a hierarchy node if any of the following are true:
|
||||
- The node is marked as public (is_public=True)
|
||||
- The user's email is in the node's external_user_emails list
|
||||
- Any of the user's external group IDs overlap with the node's external_user_group_ids
|
||||
"""
|
||||
access_filters: list[ColumnElement[bool]] = [HierarchyNode.is_public.is_(True)]
|
||||
if user_email:
|
||||
access_filters.append(any_(HierarchyNode.external_user_emails) == user_email)
|
||||
if external_group_ids:
|
||||
access_filters.append(
|
||||
HierarchyNode.external_user_group_ids.overlap(
|
||||
postgresql.array(external_group_ids)
|
||||
)
|
||||
)
|
||||
return or_(*access_filters)
|
||||
|
||||
|
||||
def _get_accessible_hierarchy_nodes_for_source(
|
||||
db_session: Session,
|
||||
source: DocumentSource,
|
||||
user_email: str | None,
|
||||
external_group_ids: list[str],
|
||||
) -> list[HierarchyNode]:
|
||||
"""
|
||||
EE version: Returns hierarchy nodes filtered by user permissions.
|
||||
|
||||
A user can access a hierarchy node if any of the following are true:
|
||||
- The node is marked as public (is_public=True)
|
||||
- The user's email is in the node's external_user_emails list
|
||||
- Any of the user's external group IDs overlap with the node's external_user_group_ids
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy session
|
||||
source: Document source type
|
||||
user_email: User's email for permission checking
|
||||
external_group_ids: User's external group IDs for permission checking
|
||||
|
||||
Returns:
|
||||
List of HierarchyNode objects the user has access to
|
||||
"""
|
||||
stmt = select(HierarchyNode).where(HierarchyNode.source == source)
|
||||
stmt = stmt.where(_build_hierarchy_access_filter(user_email, external_group_ids))
|
||||
stmt = stmt.order_by(HierarchyNode.display_name)
|
||||
return list(db_session.execute(stmt).scalars().all())
|
||||
@@ -227,10 +227,10 @@ def update_license_cache(
|
||||
stripe_subscription_id=payload.stripe_subscription_id,
|
||||
)
|
||||
|
||||
redis_client.setex(
|
||||
redis_client.set(
|
||||
LICENSE_METADATA_KEY,
|
||||
LICENSE_CACHE_TTL_SECONDS,
|
||||
metadata.model_dump_json(),
|
||||
ex=LICENSE_CACHE_TTL_SECONDS,
|
||||
)
|
||||
|
||||
logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
|
||||
|
||||
@@ -7,7 +7,6 @@ from sqlalchemy import select
|
||||
from sqlalchemy.orm import aliased
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import DISABLE_AUTH
|
||||
from onyx.configs.constants import TokenRateLimitScope
|
||||
from onyx.db.models import TokenRateLimit
|
||||
from onyx.db.models import TokenRateLimit__UserGroup
|
||||
@@ -18,13 +17,15 @@ from onyx.db.models import UserRole
|
||||
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
|
||||
|
||||
|
||||
def _add_user_filters(
|
||||
stmt: Select, user: User | None, get_editable: bool = True
|
||||
) -> Select:
|
||||
# If user is None and auth is disabled, assume the user is an admin
|
||||
if (user is None and DISABLE_AUTH) or (user and user.role == UserRole.ADMIN):
|
||||
def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
|
||||
if user.role == UserRole.ADMIN:
|
||||
return stmt
|
||||
|
||||
# If anonymous user, only show global/public token_rate_limits
|
||||
if user.is_anonymous:
|
||||
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
|
||||
return stmt.where(where_clause)
|
||||
|
||||
stmt = stmt.distinct()
|
||||
TRLimit_UG = aliased(TokenRateLimit__UserGroup)
|
||||
User__UG = aliased(User__UserGroup)
|
||||
@@ -49,11 +50,6 @@ def _add_user_filters(
|
||||
- if we are not editing, we show all token_rate_limits in the groups the user curates
|
||||
"""
|
||||
|
||||
# If user is None, this is an anonymous user and we should only show public token_rate_limits
|
||||
if user is None:
|
||||
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
|
||||
return stmt.where(where_clause)
|
||||
|
||||
where_clause = User__UG.user_id == user.id
|
||||
if user.role == UserRole.CURATOR and get_editable:
|
||||
where_clause &= User__UG.is_curator == True # noqa: E712
|
||||
@@ -114,7 +110,7 @@ def insert_user_group_token_rate_limit(
|
||||
def fetch_user_group_token_rate_limits_for_user(
|
||||
db_session: Session,
|
||||
group_id: int,
|
||||
user: User | None,
|
||||
user: User,
|
||||
enabled_only: bool = False,
|
||||
ordered: bool = True,
|
||||
get_editable: bool = True,
|
||||
|
||||
@@ -125,7 +125,7 @@ def _cleanup_document_set__user_group_relationships__no_commit(
|
||||
|
||||
def validate_object_creation_for_user(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
user: User,
|
||||
target_group_ids: list[int] | None = None,
|
||||
object_is_public: bool | None = None,
|
||||
object_is_perm_sync: bool | None = None,
|
||||
@@ -144,7 +144,8 @@ def validate_object_creation_for_user(
|
||||
if object_is_perm_sync and not target_group_ids:
|
||||
return
|
||||
|
||||
if not user or user.role == UserRole.ADMIN:
|
||||
# Admins are allowed
|
||||
if user.role == UserRole.ADMIN:
|
||||
return
|
||||
|
||||
# Allow curators and global curators to create public objects
|
||||
@@ -474,14 +475,15 @@ def remove_curator_status__no_commit(db_session: Session, user: User) -> None:
|
||||
def _validate_curator_relationship_update_requester(
|
||||
db_session: Session,
|
||||
user_group_id: int,
|
||||
user_making_change: User | None = None,
|
||||
user_making_change: User,
|
||||
) -> None:
|
||||
"""
|
||||
This function validates that the user making the change has the necessary permissions
|
||||
to update the curator relationship for the target user in the given user group.
|
||||
"""
|
||||
|
||||
if user_making_change is None or user_making_change.role == UserRole.ADMIN:
|
||||
# Admins can update curator relationships for any group
|
||||
if user_making_change.role == UserRole.ADMIN:
|
||||
return
|
||||
|
||||
# check if the user making the change is a curator in the group they are changing the curator relationship for
|
||||
@@ -550,7 +552,7 @@ def update_user_curator_relationship(
|
||||
db_session: Session,
|
||||
user_group_id: int,
|
||||
set_curator_request: SetCuratorRequest,
|
||||
user_making_change: User | None = None,
|
||||
user_making_change: User,
|
||||
) -> None:
|
||||
target_user = fetch_user_by_id(db_session, set_curator_request.user_id)
|
||||
if not target_user:
|
||||
@@ -599,7 +601,7 @@ def update_user_curator_relationship(
|
||||
|
||||
def add_users_to_user_group(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
user: User,
|
||||
user_group_id: int,
|
||||
user_ids: list[UUID],
|
||||
) -> UserGroup:
|
||||
@@ -641,7 +643,7 @@ def add_users_to_user_group(
|
||||
|
||||
def update_user_group(
|
||||
db_session: Session,
|
||||
user: User | None,
|
||||
user: User,
|
||||
user_group_id: int,
|
||||
user_group_update: UserGroupUpdate,
|
||||
) -> UserGroup:
|
||||
|
||||
@@ -8,7 +8,7 @@ from collections.abc import Generator
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from ee.onyx.external_permissions.utils import generic_doc_sync
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.confluence.connector import ConfluenceConnector
|
||||
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
|
||||
@@ -28,7 +28,7 @@ def confluence_doc_sync(
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
"""
|
||||
Fetches document permissions from Confluence and yields DocExternalAccess objects.
|
||||
Compares fetched documents against existing documents in the DB for the connector.
|
||||
|
||||
@@ -5,6 +5,9 @@ from datetime import timezone
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.access.models import NodeExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.gmail.connector import GmailConnector
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
@@ -39,12 +42,12 @@ def gmail_doc_sync(
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
"""
|
||||
Adds the external permissions to the documents in postgres
|
||||
if the document doesn't already exists in postgres, we create
|
||||
Adds the external permissions to the documents and hierarchy nodes in postgres.
|
||||
If the document doesn't already exist in postgres, we create
|
||||
it in postgres so that when it gets created later, the permissions are
|
||||
already populated
|
||||
already populated.
|
||||
"""
|
||||
gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
|
||||
gmail_connector.load_credentials(cc_pair.credential.credential_json)
|
||||
@@ -62,7 +65,13 @@ def gmail_doc_sync(
|
||||
callback.progress("gmail_doc_sync", 1)
|
||||
|
||||
if isinstance(slim_doc, HierarchyNode):
|
||||
# TODO: handle hierarchynodes during sync
|
||||
# Yield hierarchy node permissions to be processed in outer layer
|
||||
if slim_doc.external_access:
|
||||
yield NodeExternalAccess(
|
||||
external_access=slim_doc.external_access,
|
||||
raw_node_id=slim_doc.raw_node_id,
|
||||
source=DocumentSource.GMAIL.value,
|
||||
)
|
||||
continue
|
||||
if slim_doc.external_access is None:
|
||||
logger.warning(f"No permissions found for document {slim_doc.id}")
|
||||
|
||||
@@ -10,7 +10,10 @@ from ee.onyx.external_permissions.google_drive.permission_retrieval import (
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.access.models import NodeExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.google_drive.models import GoogleDriveFileType
|
||||
from onyx.connectors.google_utils.resources import GoogleDriveService
|
||||
@@ -168,17 +171,101 @@ def get_external_access_for_raw_gdrive_file(
|
||||
)
|
||||
|
||||
|
||||
def get_external_access_for_folder(
|
||||
folder: GoogleDriveFileType,
|
||||
google_domain: str,
|
||||
drive_service: GoogleDriveService,
|
||||
) -> ExternalAccess:
|
||||
"""
|
||||
Extract ExternalAccess from a folder's permissions.
|
||||
|
||||
This fetches permissions using the Drive API (via permissionIds) and extracts
|
||||
user emails, group emails, and public access status.
|
||||
|
||||
Args:
|
||||
folder: The folder metadata from Google Drive API (must include permissionIds field)
|
||||
google_domain: The company's Google Workspace domain (e.g., "company.com")
|
||||
drive_service: Google Drive service for fetching permission details
|
||||
|
||||
Returns:
|
||||
ExternalAccess with extracted permission info
|
||||
"""
|
||||
folder_id = folder.get("id")
|
||||
if not folder_id:
|
||||
logger.warning("Folder missing ID, returning empty permissions")
|
||||
return ExternalAccess(
|
||||
external_user_emails=set(),
|
||||
external_user_group_ids=set(),
|
||||
is_public=False,
|
||||
)
|
||||
|
||||
# Get permission IDs from folder metadata
|
||||
permission_ids = folder.get("permissionIds") or []
|
||||
if not permission_ids:
|
||||
logger.debug(f"No permissionIds found for folder {folder_id}")
|
||||
return ExternalAccess(
|
||||
external_user_emails=set(),
|
||||
external_user_group_ids=set(),
|
||||
is_public=False,
|
||||
)
|
||||
|
||||
# Fetch full permission objects using the permission IDs
|
||||
permissions_list = get_permissions_by_ids(
|
||||
drive_service=drive_service,
|
||||
doc_id=folder_id,
|
||||
permission_ids=permission_ids,
|
||||
)
|
||||
|
||||
user_emails: set[str] = set()
|
||||
group_emails: set[str] = set()
|
||||
is_public = False
|
||||
|
||||
for permission in permissions_list:
|
||||
if permission.type == PermissionType.USER:
|
||||
if permission.email_address:
|
||||
user_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.warning(f"User permission without email for folder {folder_id}")
|
||||
elif permission.type == PermissionType.GROUP:
|
||||
# Groups are represented as email addresses in Google Drive
|
||||
if permission.email_address:
|
||||
group_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.warning(f"Group permission without email for folder {folder_id}")
|
||||
elif permission.type == PermissionType.DOMAIN:
|
||||
# Domain permission - check if it matches company domain
|
||||
if permission.domain == google_domain:
|
||||
# Only public if discoverable (allowFileDiscovery is not False)
|
||||
# If allowFileDiscovery is False, it's "link only" access
|
||||
is_public = permission.allow_file_discovery is not False
|
||||
else:
|
||||
logger.debug(
|
||||
f"Domain permission for {permission.domain} does not match "
|
||||
f"company domain {google_domain} for folder {folder_id}"
|
||||
)
|
||||
elif permission.type == PermissionType.ANYONE:
|
||||
# Only public if discoverable (allowFileDiscovery is not False)
|
||||
# If allowFileDiscovery is False, it's "link only" access
|
||||
is_public = permission.allow_file_discovery is not False
|
||||
|
||||
return ExternalAccess(
|
||||
external_user_emails=user_emails,
|
||||
external_user_group_ids=group_emails,
|
||||
is_public=is_public,
|
||||
)
|
||||
|
||||
|
||||
def gdrive_doc_sync(
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
"""
|
||||
Adds the external permissions to the documents in postgres
|
||||
if the document doesn't already exists in postgres, we create
|
||||
Adds the external permissions to the documents and hierarchy nodes in postgres.
|
||||
If the document doesn't already exist in postgres, we create
|
||||
it in postgres so that when it gets created later, the permissions are
|
||||
already populated
|
||||
already populated.
|
||||
"""
|
||||
google_drive_connector = GoogleDriveConnector(
|
||||
**cc_pair.connector.connector_specific_config
|
||||
@@ -197,7 +284,13 @@ def gdrive_doc_sync(
|
||||
|
||||
callback.progress("gdrive_doc_sync", 1)
|
||||
if isinstance(slim_doc, HierarchyNode):
|
||||
# TODO: handle hierarchynodes during sync
|
||||
# Yield hierarchy node permissions to be processed in outer layer
|
||||
if slim_doc.external_access:
|
||||
yield NodeExternalAccess(
|
||||
external_access=slim_doc.external_access,
|
||||
raw_node_id=slim_doc.raw_node_id,
|
||||
source=DocumentSource.GOOGLE_DRIVE.value,
|
||||
)
|
||||
continue
|
||||
if slim_doc.external_access is None:
|
||||
raise ValueError(
|
||||
|
||||
@@ -30,6 +30,10 @@ class GoogleDrivePermission(BaseModel):
|
||||
type: PermissionType
|
||||
domain: str | None # only applies to domain permissions
|
||||
permission_details: GoogleDrivePermissionDetails | None
|
||||
# Whether this permission makes the file discoverable in search
|
||||
# False means "anyone with the link" (not searchable/discoverable)
|
||||
# Only applicable for domain/anyone permission types
|
||||
allow_file_discovery: bool | None
|
||||
|
||||
@classmethod
|
||||
def from_drive_permission(
|
||||
@@ -46,6 +50,7 @@ class GoogleDrivePermission(BaseModel):
|
||||
email_address=drive_permission.get("emailAddress"),
|
||||
type=PermissionType(drive_permission["type"]),
|
||||
domain=drive_permission.get("domain"),
|
||||
allow_file_discovery=drive_permission.get("allowFileDiscovery"),
|
||||
permission_details=(
|
||||
GoogleDrivePermissionDetails(
|
||||
permission_type=permission_details.get("type"),
|
||||
|
||||
@@ -36,7 +36,7 @@ def get_permissions_by_ids(
|
||||
retrieval_function=drive_service.permissions().list,
|
||||
list_key="permissions",
|
||||
fileId=doc_id,
|
||||
fields="permissions(id, emailAddress, type, domain, permissionDetails),nextPageToken",
|
||||
fields="permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails),nextPageToken",
|
||||
supportsAllDrives=True,
|
||||
continue_on_404_or_403=True,
|
||||
)
|
||||
|
||||
@@ -3,7 +3,7 @@ from collections.abc import Generator
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from ee.onyx.external_permissions.utils import generic_doc_sync
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.jira.connector import JiraConnector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
@@ -20,7 +20,7 @@ def jira_doc_sync(
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None = None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
jira_connector = JiraConnector(
|
||||
**cc_pair.connector.connector_specific_config,
|
||||
)
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Protocol
|
||||
|
||||
from ee.onyx.db.external_perm import ExternalUserGroup # noqa
|
||||
from onyx.access.models import DocExternalAccess # noqa
|
||||
from onyx.access.models import ElementExternalAccess # noqa
|
||||
from onyx.access.models import NodeExternalAccess # noqa
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.db.models import ConnectorCredentialPair # noqa
|
||||
from onyx.db.utils import DocumentRow
|
||||
@@ -53,7 +55,7 @@ DocSyncFuncType = Callable[
|
||||
FetchAllDocumentsIdsFunction,
|
||||
Optional[IndexingHeartbeatInterface],
|
||||
],
|
||||
Generator[DocExternalAccess, None, None],
|
||||
Generator[ElementExternalAccess, None, None],
|
||||
]
|
||||
|
||||
GroupSyncFuncType = Callable[
|
||||
|
||||
@@ -34,21 +34,21 @@ def _get_all_censoring_enabled_sources() -> set[DocumentSource]:
|
||||
# NOTE: This is only called if ee is enabled.
|
||||
def _post_query_chunk_censoring(
|
||||
chunks: list[InferenceChunk],
|
||||
user: User | None,
|
||||
user: User,
|
||||
) -> list[InferenceChunk]:
|
||||
"""
|
||||
This function checks all chunks to see if they need to be sent to a censoring
|
||||
function. If they do, it sends them to the censoring function and returns the
|
||||
censored chunks. If they don't, it returns the original chunks.
|
||||
"""
|
||||
if user is None:
|
||||
# if user is None, permissions are not enforced
|
||||
return chunks
|
||||
sources_to_censor = _get_all_censoring_enabled_sources()
|
||||
|
||||
# Anonymous users can only access public (non-permission-synced) content
|
||||
if user.is_anonymous:
|
||||
return [chunk for chunk in chunks if chunk.source_type not in sources_to_censor]
|
||||
|
||||
final_chunk_dict: dict[str, InferenceChunk] = {}
|
||||
chunks_to_process: dict[DocumentSource, list[InferenceChunk]] = {}
|
||||
|
||||
sources_to_censor = _get_all_censoring_enabled_sources()
|
||||
for chunk in chunks:
|
||||
# Separate out chunks that require permission post-processing by source
|
||||
if chunk.source_type in sources_to_censor:
|
||||
|
||||
@@ -3,7 +3,7 @@ from collections.abc import Generator
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from ee.onyx.external_permissions.utils import generic_doc_sync
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.sharepoint.connector import SharepointConnector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
@@ -20,7 +20,7 @@ def sharepoint_doc_sync(
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None = None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
sharepoint_connector = SharepointConnector(
|
||||
**cc_pair.connector.connector_specific_config,
|
||||
)
|
||||
|
||||
@@ -3,7 +3,7 @@ from collections.abc import Generator
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from ee.onyx.external_permissions.utils import generic_doc_sync
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.teams.connector import TeamsConnector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
@@ -21,7 +21,7 @@ def teams_doc_sync(
|
||||
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
|
||||
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
|
||||
callback: IndexingHeartbeatInterface | None,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
teams_connector = TeamsConnector(
|
||||
**cc_pair.connector.connector_specific_config,
|
||||
)
|
||||
|
||||
@@ -2,7 +2,9 @@ from collections.abc import Generator
|
||||
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ElementExternalAccess
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.access.models import NodeExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import SlimConnectorWithPermSync
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
@@ -20,7 +22,7 @@ def generic_doc_sync(
|
||||
doc_source: DocumentSource,
|
||||
slim_connector: SlimConnectorWithPermSync,
|
||||
label: str,
|
||||
) -> Generator[DocExternalAccess, None, None]:
|
||||
) -> Generator[ElementExternalAccess, None, None]:
|
||||
"""
|
||||
A convenience function for performing a generic document synchronization.
|
||||
|
||||
@@ -30,7 +32,7 @@ def generic_doc_sync(
|
||||
- fetching *all* new (slim) docs
|
||||
- yielding external-access permissions for existing docs which do not exist in the newly fetched slim-docs set (with their
|
||||
`external_access` set to "private")
|
||||
- yielding external-access permissions for newly fetched docs
|
||||
- yielding external-access permissions for newly fetched docs and hierarchy nodes
|
||||
|
||||
Returns:
|
||||
A `Generator` which yields existing and newly fetched external-access permissions.
|
||||
@@ -51,7 +53,13 @@ def generic_doc_sync(
|
||||
|
||||
for doc in doc_batch:
|
||||
if isinstance(doc, HierarchyNode):
|
||||
# TODO: handle hierarchynodes during sync
|
||||
# Yield hierarchy node permissions to be processed in outer layer
|
||||
if doc.external_access:
|
||||
yield NodeExternalAccess(
|
||||
external_access=doc.external_access,
|
||||
raw_node_id=doc.raw_node_id,
|
||||
source=doc_source.value,
|
||||
)
|
||||
continue
|
||||
if not doc.external_access:
|
||||
raise RuntimeError(
|
||||
|
||||
@@ -41,7 +41,7 @@ def _run_single_search(
|
||||
query: str,
|
||||
filters: BaseFilters | None,
|
||||
document_index: DocumentIndex,
|
||||
user: User | None,
|
||||
user: User,
|
||||
db_session: Session,
|
||||
num_hits: int | None = None,
|
||||
) -> list[InferenceChunk]:
|
||||
@@ -63,7 +63,7 @@ def _run_single_search(
|
||||
|
||||
def stream_search_query(
|
||||
request: SendSearchQueryRequest,
|
||||
user: User | None,
|
||||
user: User,
|
||||
db_session: Session,
|
||||
) -> Generator[
|
||||
SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
|
||||
@@ -101,8 +101,7 @@ def stream_search_query(
|
||||
# Build list of all executed queries for tracking
|
||||
all_executed_queries = [original_query] + keyword_expansions
|
||||
|
||||
# TODO remove this check, user should not be None
|
||||
if user is not None:
|
||||
if not user.is_anonymous:
|
||||
create_search_query(
|
||||
db_session=db_session,
|
||||
user_id=user.id,
|
||||
|
||||
@@ -40,7 +40,7 @@ class QueryAnalyticsResponse(BaseModel):
|
||||
def get_query_analytics(
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[QueryAnalyticsResponse]:
|
||||
daily_query_usage_info = fetch_query_analytics(
|
||||
@@ -71,7 +71,7 @@ class UserAnalyticsResponse(BaseModel):
|
||||
def get_user_analytics(
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[UserAnalyticsResponse]:
|
||||
daily_query_usage_info_per_user = fetch_per_user_query_analytics(
|
||||
@@ -105,7 +105,7 @@ class OnyxbotAnalyticsResponse(BaseModel):
|
||||
def get_onyxbot_analytics(
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[OnyxbotAnalyticsResponse]:
|
||||
daily_onyxbot_info = fetch_onyxbot_analytics(
|
||||
@@ -141,7 +141,7 @@ def get_persona_messages(
|
||||
persona_id: int,
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[PersonaMessageAnalyticsResponse]:
|
||||
"""Fetch daily message counts for a single persona within the given time range."""
|
||||
@@ -179,7 +179,7 @@ def get_persona_unique_users(
|
||||
persona_id: int,
|
||||
start: datetime.datetime,
|
||||
end: datetime.datetime,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[PersonaUniqueUsersResponse]:
|
||||
"""Get unique users per day for a single persona."""
|
||||
@@ -218,7 +218,7 @@ def get_assistant_stats(
|
||||
assistant_id: int,
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> AssistantStatsResponse:
|
||||
"""
|
||||
|
||||
@@ -27,6 +27,7 @@ import httpx
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.auth.users import current_admin_user
|
||||
@@ -56,6 +57,7 @@ from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
|
||||
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
|
||||
from onyx.configs.app_configs import WEB_DOMAIN
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.redis.redis_pool import get_shared_redis_client
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
@@ -68,6 +70,63 @@ router = APIRouter(prefix="/admin/billing")
|
||||
_stripe_publishable_key_cache: str | None = None
|
||||
_stripe_key_lock = asyncio.Lock()
|
||||
|
||||
# Redis key for billing circuit breaker (self-hosted only)
|
||||
# When set, billing requests to Stripe are disabled until user manually retries
|
||||
BILLING_CIRCUIT_BREAKER_KEY = "billing_circuit_open"
|
||||
# Circuit breaker auto-expires after 1 hour (user can manually retry sooner)
|
||||
BILLING_CIRCUIT_BREAKER_TTL_SECONDS = 3600
|
||||
|
||||
|
||||
def _is_billing_circuit_open() -> bool:
|
||||
"""Check if the billing circuit breaker is open (self-hosted only)."""
|
||||
if MULTI_TENANT:
|
||||
return False
|
||||
try:
|
||||
redis_client = get_shared_redis_client()
|
||||
is_open = bool(redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY))
|
||||
logger.debug(
|
||||
f"Circuit breaker check: key={BILLING_CIRCUIT_BREAKER_KEY}, is_open={is_open}"
|
||||
)
|
||||
return is_open
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check circuit breaker: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _open_billing_circuit() -> None:
|
||||
"""Open the billing circuit breaker after a failure (self-hosted only)."""
|
||||
if MULTI_TENANT:
|
||||
return
|
||||
try:
|
||||
redis_client = get_shared_redis_client()
|
||||
redis_client.set(
|
||||
BILLING_CIRCUIT_BREAKER_KEY,
|
||||
"1",
|
||||
ex=BILLING_CIRCUIT_BREAKER_TTL_SECONDS,
|
||||
)
|
||||
# Verify it was set
|
||||
exists = redis_client.exists(BILLING_CIRCUIT_BREAKER_KEY)
|
||||
logger.warning(
|
||||
f"Billing circuit breaker opened (TTL={BILLING_CIRCUIT_BREAKER_TTL_SECONDS}s, "
|
||||
f"verified={exists}). Stripe billing requests are disabled until manually reset."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to open circuit breaker: {e}")
|
||||
|
||||
|
||||
def _close_billing_circuit() -> None:
|
||||
"""Close the billing circuit breaker (re-enable Stripe requests)."""
|
||||
if MULTI_TENANT:
|
||||
return
|
||||
try:
|
||||
redis_client = get_shared_redis_client()
|
||||
redis_client.delete(BILLING_CIRCUIT_BREAKER_KEY)
|
||||
logger.info(
|
||||
"Billing circuit breaker closed. Stripe billing requests re-enabled."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to close circuit breaker: {e}")
|
||||
|
||||
|
||||
def _get_license_data(db_session: Session) -> str | None:
|
||||
"""Get license data from database if exists (self-hosted only)."""
|
||||
@@ -102,6 +161,7 @@ async def create_checkout_session(
|
||||
license_data = _get_license_data(db_session)
|
||||
tenant_id = _get_tenant_id()
|
||||
billing_period = request.billing_period if request else "monthly"
|
||||
seats = request.seats if request else None
|
||||
email = request.email if request else None
|
||||
|
||||
# Build redirect URL for after checkout completion
|
||||
@@ -110,6 +170,7 @@ async def create_checkout_session(
|
||||
try:
|
||||
return await create_checkout_service(
|
||||
billing_period=billing_period,
|
||||
seats=seats,
|
||||
email=email,
|
||||
license_data=license_data,
|
||||
redirect_url=redirect_url,
|
||||
@@ -156,6 +217,8 @@ async def get_billing_information(
|
||||
"""Get billing information for the current subscription.
|
||||
|
||||
Returns subscription status and details from Stripe.
|
||||
For self-hosted: If the circuit breaker is open (previous failure),
|
||||
returns a 503 error without making the request.
|
||||
"""
|
||||
license_data = _get_license_data(db_session)
|
||||
tenant_id = _get_tenant_id()
|
||||
@@ -164,12 +227,22 @@ async def get_billing_information(
|
||||
if not MULTI_TENANT and not license_data:
|
||||
return SubscriptionStatusResponse(subscribed=False)
|
||||
|
||||
# Check circuit breaker (self-hosted only)
|
||||
if _is_billing_circuit_open():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
|
||||
)
|
||||
|
||||
try:
|
||||
return await get_billing_service(
|
||||
license_data=license_data,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
except BillingServiceError as e:
|
||||
# Open circuit breaker on connection failures (self-hosted only)
|
||||
if e.status_code in (502, 503, 504):
|
||||
_open_billing_circuit()
|
||||
raise HTTPException(status_code=e.status_code, detail=e.message)
|
||||
|
||||
|
||||
@@ -182,6 +255,8 @@ async def update_seats(
|
||||
"""Update the seat count for the current subscription.
|
||||
|
||||
Handles Stripe proration and license regeneration via control plane.
|
||||
For self-hosted, the frontend should call /license/claim after a short delay
|
||||
to fetch the regenerated license.
|
||||
"""
|
||||
license_data = _get_license_data(db_session)
|
||||
tenant_id = _get_tenant_id()
|
||||
@@ -191,11 +266,17 @@ async def update_seats(
|
||||
raise HTTPException(status_code=400, detail="No license found")
|
||||
|
||||
try:
|
||||
return await update_seat_service(
|
||||
result = await update_seat_service(
|
||||
new_seat_count=request.new_seat_count,
|
||||
license_data=license_data,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
# Note: Don't store license here - the control plane may still be processing
|
||||
# the subscription update. The frontend should call /license/claim after a
|
||||
# short delay to get the freshly generated license.
|
||||
|
||||
return result
|
||||
except BillingServiceError as e:
|
||||
raise HTTPException(status_code=e.status_code, detail=e.message)
|
||||
|
||||
@@ -262,3 +343,31 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
|
||||
status_code=500,
|
||||
detail="Failed to fetch Stripe publishable key",
|
||||
)
|
||||
|
||||
|
||||
class ResetConnectionResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
|
||||
@router.post("/reset-connection")
|
||||
async def reset_stripe_connection(
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> ResetConnectionResponse:
|
||||
"""Reset the Stripe connection circuit breaker.
|
||||
|
||||
Called when user clicks "Connect to Stripe" to retry after a previous failure.
|
||||
This clears the circuit breaker flag, allowing billing requests to proceed again.
|
||||
Self-hosted only - cloud deployments don't use the circuit breaker.
|
||||
"""
|
||||
if MULTI_TENANT:
|
||||
return ResetConnectionResponse(
|
||||
success=True,
|
||||
message="Circuit breaker not applicable for cloud deployments",
|
||||
)
|
||||
|
||||
_close_billing_circuit()
|
||||
return ResetConnectionResponse(
|
||||
success=True,
|
||||
message="Stripe connection reset. Billing requests re-enabled.",
|
||||
)
|
||||
|
||||
@@ -10,6 +10,7 @@ class CreateCheckoutSessionRequest(BaseModel):
|
||||
"""Request to create a Stripe checkout session."""
|
||||
|
||||
billing_period: Literal["monthly", "annual"] = "monthly"
|
||||
seats: int | None = None
|
||||
email: str | None = None
|
||||
|
||||
|
||||
@@ -67,6 +68,7 @@ class SeatUpdateResponse(BaseModel):
|
||||
current_seats: int
|
||||
used_seats: int
|
||||
message: str | None = None
|
||||
license: str | None = None # Regenerated license (self-hosted stores this)
|
||||
|
||||
|
||||
class StripePublishableKeyResponse(BaseModel):
|
||||
|
||||
@@ -103,6 +103,7 @@ async def _make_billing_request(
|
||||
Raises:
|
||||
BillingServiceError: If request fails
|
||||
"""
|
||||
|
||||
base_url = _get_base_url()
|
||||
url = f"{base_url}{path}"
|
||||
headers = _get_headers(license_data)
|
||||
@@ -134,6 +135,7 @@ async def _make_billing_request(
|
||||
|
||||
async def create_checkout_session(
|
||||
billing_period: str = "monthly",
|
||||
seats: int | None = None,
|
||||
email: str | None = None,
|
||||
license_data: str | None = None,
|
||||
redirect_url: str | None = None,
|
||||
@@ -143,6 +145,7 @@ async def create_checkout_session(
|
||||
|
||||
Args:
|
||||
billing_period: "monthly" or "annual"
|
||||
seats: Number of seats to purchase (optional, uses default if not provided)
|
||||
email: Customer email for new subscriptions
|
||||
license_data: Existing license for renewals (self-hosted)
|
||||
redirect_url: URL to redirect after successful checkout
|
||||
@@ -152,6 +155,8 @@ async def create_checkout_session(
|
||||
CreateCheckoutSessionResponse with checkout URL
|
||||
"""
|
||||
body: dict = {"billing_period": billing_period}
|
||||
if seats is not None:
|
||||
body["seats"] = seats
|
||||
if email:
|
||||
body["email"] = email
|
||||
if redirect_url:
|
||||
@@ -264,4 +269,5 @@ async def update_seat_count(
|
||||
current_seats=data.get("current_seats", 0),
|
||||
used_seats=data.get("used_seats", 0),
|
||||
message=data.get("message"),
|
||||
license=data.get("license"),
|
||||
)
|
||||
|
||||
@@ -115,7 +115,7 @@ async def refresh_access_token(
|
||||
|
||||
@admin_router.put("")
|
||||
def admin_ee_put_settings(
|
||||
settings: EnterpriseSettings, _: User | None = Depends(current_admin_user)
|
||||
settings: EnterpriseSettings, _: User = Depends(current_admin_user)
|
||||
) -> None:
|
||||
store_settings(settings)
|
||||
|
||||
@@ -134,7 +134,7 @@ def ee_fetch_settings() -> EnterpriseSettings:
|
||||
def put_logo(
|
||||
file: UploadFile,
|
||||
is_logotype: bool = False,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> None:
|
||||
upload_logo(file=file, is_logotype=is_logotype)
|
||||
|
||||
@@ -187,7 +187,7 @@ def fetch_logo(
|
||||
|
||||
@admin_router.put("/custom-analytics-script")
|
||||
def upload_custom_analytics_script(
|
||||
script_upload: AnalyticsScriptUpload, _: User | None = Depends(current_admin_user)
|
||||
script_upload: AnalyticsScriptUpload, _: User = Depends(current_admin_user)
|
||||
) -> None:
|
||||
try:
|
||||
store_analytics_script(script_upload)
|
||||
|
||||
@@ -21,6 +21,7 @@ from sqlalchemy.orm import Session
|
||||
from ee.onyx.auth.users import current_admin_user
|
||||
from ee.onyx.configs.app_configs import CLOUD_DATA_PLANE_URL
|
||||
from ee.onyx.db.license import delete_license as db_delete_license
|
||||
from ee.onyx.db.license import get_license
|
||||
from ee.onyx.db.license import get_license_metadata
|
||||
from ee.onyx.db.license import invalidate_license_cache
|
||||
from ee.onyx.db.license import refresh_license_cache
|
||||
@@ -90,24 +91,21 @@ async def get_seat_usage(
|
||||
|
||||
@router.post("/claim")
|
||||
async def claim_license(
|
||||
session_id: str,
|
||||
session_id: str | None = None,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> LicenseResponse:
|
||||
"""
|
||||
Claim a license after Stripe checkout (self-hosted only).
|
||||
Claim a license from the control plane (self-hosted only).
|
||||
|
||||
After a user completes Stripe checkout, they're redirected back with a
|
||||
session_id. This endpoint exchanges that session_id for a signed license
|
||||
via the cloud data plane proxy.
|
||||
Two modes:
|
||||
1. With session_id: After Stripe checkout, exchange session_id for license
|
||||
2. Without session_id: Re-claim using existing license for auth
|
||||
|
||||
Flow:
|
||||
1. Self-hosted frontend redirects to Stripe checkout (via cloud proxy)
|
||||
2. User completes payment
|
||||
3. Stripe redirects back to self-hosted instance with session_id
|
||||
4. Frontend calls this endpoint with session_id
|
||||
5. We call cloud data plane /proxy/claim-license to get the signed license
|
||||
6. License is stored locally and cached
|
||||
Use without session_id after:
|
||||
- Updating seats via the billing API
|
||||
- Returning from the Stripe customer portal
|
||||
- Any operation that regenerates the license on control plane
|
||||
"""
|
||||
if MULTI_TENANT:
|
||||
raise HTTPException(
|
||||
@@ -116,14 +114,40 @@ async def claim_license(
|
||||
)
|
||||
|
||||
try:
|
||||
# Call cloud data plane to claim the license
|
||||
url = f"{CLOUD_DATA_PLANE_URL}/proxy/claim-license"
|
||||
response = requests.post(
|
||||
url,
|
||||
json={"session_id": session_id},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=30,
|
||||
)
|
||||
if session_id:
|
||||
# Claim license after checkout using session_id
|
||||
url = f"{CLOUD_DATA_PLANE_URL}/proxy/claim-license"
|
||||
response = requests.post(
|
||||
url,
|
||||
json={"session_id": session_id},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=30,
|
||||
)
|
||||
else:
|
||||
# Re-claim using existing license for auth
|
||||
metadata = get_license_metadata(db_session)
|
||||
if not metadata or not metadata.tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No license found. Provide session_id after checkout.",
|
||||
)
|
||||
|
||||
license_row = get_license(db_session)
|
||||
if not license_row or not license_row.license_data:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="No license found in database"
|
||||
)
|
||||
|
||||
url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {license_row.license_data}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
||||
@@ -27,7 +27,7 @@ router = APIRouter(prefix="/manage")
|
||||
def create_standard_answer(
|
||||
standard_answer_creation_request: StandardAnswerCreationRequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> StandardAnswer:
|
||||
standard_answer_model = insert_standard_answer(
|
||||
keyword=standard_answer_creation_request.keyword,
|
||||
@@ -43,7 +43,7 @@ def create_standard_answer(
|
||||
@router.get("/admin/standard-answer")
|
||||
def list_standard_answers(
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> list[StandardAnswer]:
|
||||
standard_answer_models = fetch_standard_answers(db_session=db_session)
|
||||
return [
|
||||
@@ -57,7 +57,7 @@ def patch_standard_answer(
|
||||
standard_answer_id: int,
|
||||
standard_answer_creation_request: StandardAnswerCreationRequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> StandardAnswer:
|
||||
existing_standard_answer = fetch_standard_answer(
|
||||
standard_answer_id=standard_answer_id,
|
||||
@@ -83,7 +83,7 @@ def patch_standard_answer(
|
||||
def delete_standard_answer(
|
||||
standard_answer_id: int,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> None:
|
||||
return remove_standard_answer(
|
||||
standard_answer_id=standard_answer_id,
|
||||
@@ -95,7 +95,7 @@ def delete_standard_answer(
|
||||
def create_standard_answer_category(
|
||||
standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> StandardAnswerCategory:
|
||||
standard_answer_category_model = insert_standard_answer_category(
|
||||
category_name=standard_answer_category_creation_request.name,
|
||||
@@ -107,7 +107,7 @@ def create_standard_answer_category(
|
||||
@router.get("/admin/standard-answer/category")
|
||||
def list_standard_answer_categories(
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> list[StandardAnswerCategory]:
|
||||
standard_answer_category_models = fetch_standard_answer_categories(
|
||||
db_session=db_session
|
||||
@@ -123,7 +123,7 @@ def patch_standard_answer_category(
|
||||
standard_answer_category_id: int,
|
||||
standard_answer_category_creation_request: StandardAnswerCategoryCreationRequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> StandardAnswerCategory:
|
||||
existing_standard_answer_category = fetch_standard_answer_category(
|
||||
standard_answer_category_id=standard_answer_category_id,
|
||||
|
||||
@@ -22,7 +22,7 @@ basic_router = APIRouter(prefix="/query")
|
||||
def get_standard_answer(
|
||||
request: StandardAnswerRequest,
|
||||
db_session: Session = Depends(get_session),
|
||||
_: User | None = Depends(current_user),
|
||||
_: User = Depends(current_user),
|
||||
) -> StandardAnswerResponse:
|
||||
try:
|
||||
standard_answers = oneoff_standard_answers(
|
||||
|
||||
@@ -37,8 +37,7 @@ router = APIRouter(prefix="/search")
|
||||
@router.post("/search-flow-classification")
|
||||
def search_flow_classification(
|
||||
request: SearchFlowClassificationRequest,
|
||||
# This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
|
||||
_: User | None = Depends(current_user),
|
||||
_: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> SearchFlowClassificationResponse:
|
||||
query = request.user_query
|
||||
@@ -70,7 +69,7 @@ def search_flow_classification(
|
||||
@router.post("/send-search-message", response_model=None)
|
||||
def handle_send_search_message(
|
||||
request: SendSearchQueryRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> StreamingResponse | SearchFullResponse:
|
||||
"""
|
||||
@@ -114,7 +113,7 @@ def handle_send_search_message(
|
||||
def get_search_history(
|
||||
limit: int = 100,
|
||||
filter_days: int | None = None,
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> SearchHistoryResponse:
|
||||
"""
|
||||
@@ -146,11 +145,6 @@ def get_search_history(
|
||||
detail="filter_days must be greater than 0",
|
||||
)
|
||||
|
||||
# TODO(yuhong) remove this
|
||||
if user is None:
|
||||
# Return empty list for unauthenticated users
|
||||
return SearchHistoryResponse(search_queries=[])
|
||||
|
||||
search_queries = fetch_search_queries_for_user(
|
||||
db_session=db_session,
|
||||
user_id=user.id,
|
||||
|
||||
@@ -28,9 +28,9 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
|
||||
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
||||
|
||||
|
||||
def _check_token_rate_limits(user: User | None) -> None:
|
||||
if user is None:
|
||||
# Unauthenticated users are only rate limited by global settings
|
||||
def _check_token_rate_limits(user: User) -> None:
|
||||
# Anonymous users are only rate limited by global settings
|
||||
if user.is_anonymous:
|
||||
_user_is_rate_limited_by_global()
|
||||
|
||||
elif is_api_key_email_address(user.email):
|
||||
|
||||
@@ -153,7 +153,7 @@ def snapshot_from_chat_session(
|
||||
@router.get("/admin/chat-sessions")
|
||||
def admin_get_chat_sessions(
|
||||
user_id: UUID,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> ChatSessionsResponse:
|
||||
# we specifically don't allow this endpoint if "anonymized" since
|
||||
@@ -196,7 +196,7 @@ def get_chat_session_history(
|
||||
feedback_type: QAFeedbackType | None = None,
|
||||
start_time: datetime | None = None,
|
||||
end_time: datetime | None = None,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> PaginatedReturn[ChatSessionMinimal]:
|
||||
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
|
||||
@@ -234,7 +234,7 @@ def get_chat_session_history(
|
||||
@router.get("/admin/chat-session-history/{chat_session_id}")
|
||||
def get_chat_session_admin(
|
||||
chat_session_id: UUID,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> ChatSessionSnapshot:
|
||||
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
|
||||
@@ -269,7 +269,7 @@ def get_chat_session_admin(
|
||||
|
||||
@router.get("/admin/query-history/list")
|
||||
def list_all_query_history_exports(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[QueryHistoryExport]:
|
||||
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
|
||||
@@ -297,7 +297,7 @@ def list_all_query_history_exports(
|
||||
|
||||
@router.post("/admin/query-history/start-export", tags=PUBLIC_API_TAGS)
|
||||
def start_query_history_export(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
start: datetime | None = None,
|
||||
end: datetime | None = None,
|
||||
@@ -344,7 +344,7 @@ def start_query_history_export(
|
||||
@router.get("/admin/query-history/export-status", tags=PUBLIC_API_TAGS)
|
||||
def get_query_history_export_status(
|
||||
request_id: str,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, str]:
|
||||
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
|
||||
@@ -378,7 +378,7 @@ def get_query_history_export_status(
|
||||
@router.get("/admin/query-history/download", tags=PUBLIC_API_TAGS)
|
||||
def download_query_history_csv(
|
||||
request_id: str,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> StreamingResponse:
|
||||
ensure_query_history_is_enabled(disallowed=[QueryHistoryType.DISABLED])
|
||||
|
||||
@@ -58,7 +58,7 @@ def generate_report(
|
||||
@router.get("/admin/usage-report/{report_name}")
|
||||
def read_usage_report(
|
||||
report_name: str,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> Response:
|
||||
try:
|
||||
@@ -82,7 +82,7 @@ def read_usage_report(
|
||||
|
||||
@router.get("/admin/usage-report")
|
||||
def fetch_usage_reports(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[UsageReportMetadata]:
|
||||
try:
|
||||
|
||||
@@ -123,9 +123,14 @@ def _seed_llms(
|
||||
upsert_llm_provider(llm_upsert_request, db_session)
|
||||
for llm_upsert_request in llm_upsert_requests
|
||||
]
|
||||
update_default_provider(
|
||||
provider_id=seeded_providers[0].id, db_session=db_session
|
||||
)
|
||||
|
||||
if len(seeded_providers[0].model_configurations) > 0:
|
||||
default_model = seeded_providers[0].model_configurations[0].name
|
||||
update_default_provider(
|
||||
provider_id=seeded_providers[0].id,
|
||||
model_name=default_model,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
|
||||
def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:
|
||||
|
||||
@@ -14,7 +14,6 @@ from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_p
|
||||
from ee.onyx.server.tenants.models import AnonymousUserPath
|
||||
from onyx.auth.users import anonymous_user_enabled
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import optional_user
|
||||
from onyx.auth.users import User
|
||||
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
|
||||
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
|
||||
@@ -29,7 +28,7 @@ router = APIRouter(prefix="/tenants")
|
||||
|
||||
@router.get("/anonymous-user-path")
|
||||
async def get_anonymous_user_path_api(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> AnonymousUserPath:
|
||||
tenant_id = get_current_tenant_id()
|
||||
|
||||
@@ -45,7 +44,7 @@ async def get_anonymous_user_path_api(
|
||||
@router.post("/anonymous-user-path")
|
||||
async def set_anonymous_user_path_api(
|
||||
anonymous_user_path: str,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> None:
|
||||
tenant_id = get_current_tenant_id()
|
||||
try:
|
||||
@@ -72,7 +71,6 @@ async def set_anonymous_user_path_api(
|
||||
@router.post("/anonymous-user")
|
||||
async def login_as_anonymous_user(
|
||||
anonymous_user_path: str,
|
||||
_: User | None = Depends(optional_user),
|
||||
) -> Response:
|
||||
with get_session_with_shared_schema() as db_session:
|
||||
tenant_id = get_tenant_id_for_anonymous_user_path(
|
||||
|
||||
@@ -300,12 +300,12 @@ def configure_default_api_keys(db_session: Session) -> None:
|
||||
|
||||
has_set_default_provider = False
|
||||
|
||||
def _upsert(request: LLMProviderUpsertRequest) -> None:
|
||||
def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:
|
||||
nonlocal has_set_default_provider
|
||||
try:
|
||||
provider = upsert_llm_provider(request, db_session)
|
||||
if not has_set_default_provider:
|
||||
update_default_provider(provider.id, db_session)
|
||||
update_default_provider(provider.id, default_model, db_session)
|
||||
has_set_default_provider = True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure {request.provider} provider: {e}")
|
||||
@@ -323,14 +323,13 @@ def configure_default_api_keys(db_session: Session) -> None:
|
||||
name="OpenAI",
|
||||
provider=OPENAI_PROVIDER_NAME,
|
||||
api_key=OPENAI_DEFAULT_API_KEY,
|
||||
default_model_name=default_model_name,
|
||||
model_configurations=_build_model_configuration_upsert_requests(
|
||||
OPENAI_PROVIDER_NAME, recommendations
|
||||
),
|
||||
api_key_changed=True,
|
||||
is_auto_mode=True,
|
||||
)
|
||||
_upsert(openai_provider)
|
||||
_upsert(openai_provider, default_model_name)
|
||||
|
||||
# Create default image generation config using the OpenAI API key
|
||||
try:
|
||||
@@ -359,14 +358,13 @@ def configure_default_api_keys(db_session: Session) -> None:
|
||||
name="Anthropic",
|
||||
provider=ANTHROPIC_PROVIDER_NAME,
|
||||
api_key=ANTHROPIC_DEFAULT_API_KEY,
|
||||
default_model_name=default_model_name,
|
||||
model_configurations=_build_model_configuration_upsert_requests(
|
||||
ANTHROPIC_PROVIDER_NAME, recommendations
|
||||
),
|
||||
api_key_changed=True,
|
||||
is_auto_mode=True,
|
||||
)
|
||||
_upsert(anthropic_provider)
|
||||
_upsert(anthropic_provider, default_model_name)
|
||||
else:
|
||||
logger.info(
|
||||
"ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
|
||||
@@ -391,14 +389,13 @@ def configure_default_api_keys(db_session: Session) -> None:
|
||||
name="Google Vertex AI",
|
||||
provider=VERTEXAI_PROVIDER_NAME,
|
||||
custom_config=custom_config,
|
||||
default_model_name=default_model_name,
|
||||
model_configurations=_build_model_configuration_upsert_requests(
|
||||
VERTEXAI_PROVIDER_NAME, recommendations
|
||||
),
|
||||
api_key_changed=True,
|
||||
is_auto_mode=True,
|
||||
)
|
||||
_upsert(vertexai_provider)
|
||||
_upsert(vertexai_provider, default_model_name)
|
||||
else:
|
||||
logger.info(
|
||||
"VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
|
||||
@@ -430,12 +427,11 @@ def configure_default_api_keys(db_session: Session) -> None:
|
||||
name="OpenRouter",
|
||||
provider=OPENROUTER_PROVIDER_NAME,
|
||||
api_key=OPENROUTER_DEFAULT_API_KEY,
|
||||
default_model_name=default_model_name,
|
||||
model_configurations=model_configurations,
|
||||
api_key_changed=True,
|
||||
is_auto_mode=True,
|
||||
)
|
||||
_upsert(openrouter_provider)
|
||||
_upsert(openrouter_provider, default_model_name)
|
||||
else:
|
||||
logger.info(
|
||||
"OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
|
||||
|
||||
@@ -29,17 +29,13 @@ from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
|
||||
from ee.onyx.db.license import update_license_cache
|
||||
from ee.onyx.db.license import upsert_license
|
||||
from ee.onyx.server.billing.models import SeatUpdateRequest
|
||||
from ee.onyx.server.billing.models import SeatUpdateResponse
|
||||
from ee.onyx.server.license.models import LicensePayload
|
||||
from ee.onyx.server.license.models import LicenseSource
|
||||
from ee.onyx.server.tenants.access import generate_data_plane_token
|
||||
from ee.onyx.utils.license import is_license_valid
|
||||
from ee.onyx.utils.license import verify_license_signature
|
||||
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
|
||||
from onyx.db.engine.sql_engine import get_session_with_tenant
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@@ -209,36 +205,6 @@ async def forward_to_control_plane(
|
||||
)
|
||||
|
||||
|
||||
def fetch_and_store_license(tenant_id: str, license_data: str) -> None:
|
||||
"""Store license in database and update Redis cache.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant ID
|
||||
license_data: Base64-encoded signed license blob
|
||||
"""
|
||||
try:
|
||||
# Verify before storing
|
||||
payload = verify_license_signature(license_data)
|
||||
|
||||
# Store in database using the specific tenant's schema
|
||||
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
|
||||
upsert_license(db_session, license_data)
|
||||
|
||||
# Update Redis cache
|
||||
update_license_cache(
|
||||
payload,
|
||||
source=LicenseSource.AUTO_FETCH,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to verify license: {e}")
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("Failed to store license")
|
||||
raise
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -246,6 +212,7 @@ def fetch_and_store_license(tenant_id: str, license_data: str) -> None:
|
||||
|
||||
class CreateCheckoutSessionRequest(BaseModel):
|
||||
billing_period: Literal["monthly", "annual"] = "monthly"
|
||||
seats: int | None = None
|
||||
email: str | None = None
|
||||
# Redirect URL after successful checkout - self-hosted passes their instance URL
|
||||
redirect_url: str | None = None
|
||||
@@ -277,6 +244,8 @@ async def proxy_create_checkout_session(
|
||||
}
|
||||
if tenant_id:
|
||||
body["tenant_id"] = tenant_id
|
||||
if request_body.seats is not None:
|
||||
body["seats"] = request_body.seats
|
||||
if request_body.email:
|
||||
body["email"] = request_body.email
|
||||
if request_body.redirect_url:
|
||||
@@ -439,7 +408,6 @@ async def proxy_license_fetch(
|
||||
|
||||
result = await forward_to_control_plane("GET", f"/license/{tenant_id}")
|
||||
|
||||
# Auto-store the refreshed license
|
||||
license_data = result.get("license")
|
||||
if not license_data:
|
||||
logger.error(f"Control plane returned incomplete license response: {result}")
|
||||
@@ -448,8 +416,7 @@ async def proxy_license_fetch(
|
||||
detail="Control plane returned incomplete license data",
|
||||
)
|
||||
|
||||
fetch_and_store_license(tenant_id, license_data)
|
||||
|
||||
# Return license to caller - self-hosted instance stores it via /api/license/claim
|
||||
return LicenseFetchResponse(license=license_data, tenant_id=tenant_id)
|
||||
|
||||
|
||||
@@ -462,6 +429,7 @@ async def proxy_seat_update(
|
||||
|
||||
Auth: Valid (non-expired) license required.
|
||||
Handles Stripe proration and license regeneration.
|
||||
Returns the regenerated license in the response for the caller to store.
|
||||
"""
|
||||
if not license_payload.tenant_id:
|
||||
raise HTTPException(status_code=401, detail="License missing tenant_id")
|
||||
@@ -477,9 +445,11 @@ async def proxy_seat_update(
|
||||
},
|
||||
)
|
||||
|
||||
# Return license in response - self-hosted instance stores it via /api/license/claim
|
||||
return SeatUpdateResponse(
|
||||
success=result.get("success", False),
|
||||
current_seats=result.get("current_seats", 0),
|
||||
used_seats=result.get("used_seats", 0),
|
||||
message=result.get("message"),
|
||||
license=result.get("license"),
|
||||
)
|
||||
|
||||
@@ -24,12 +24,12 @@ router = APIRouter(prefix="/tenants")
|
||||
@router.post("/leave-team")
|
||||
async def leave_organization(
|
||||
user_email: UserByEmail,
|
||||
current_user: User | None = Depends(current_admin_user),
|
||||
current_user: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
tenant_id = get_current_tenant_id()
|
||||
|
||||
if current_user is None or current_user.email != user_email.user_email:
|
||||
if current_user.email != user_email.user_email:
|
||||
raise HTTPException(
|
||||
status_code=403, detail="You can only leave the organization as yourself"
|
||||
)
|
||||
|
||||
@@ -26,10 +26,8 @@ FORBIDDEN_COMMON_EMAIL_SUBSTRINGS = [
|
||||
|
||||
@router.get("/existing-team-by-domain")
|
||||
def get_existing_tenant_by_domain(
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
) -> TenantByDomainResponse | None:
|
||||
if not user:
|
||||
return None
|
||||
domain = user.email.split("@")[1]
|
||||
if any(substring in domain for substring in FORBIDDEN_COMMON_EMAIL_SUBSTRINGS):
|
||||
return None
|
||||
|
||||
@@ -24,10 +24,8 @@ router = APIRouter(prefix="/tenants")
|
||||
@router.post("/users/invite/request")
|
||||
async def request_invite(
|
||||
invite_request: RequestInviteRequest,
|
||||
user: User | None = Depends(current_admin_user),
|
||||
user: User = Depends(current_admin_user),
|
||||
) -> None:
|
||||
if user is None:
|
||||
raise HTTPException(status_code=401, detail="User not authenticated")
|
||||
try:
|
||||
invite_self_to_tenant(user.email, invite_request.tenant_id)
|
||||
except Exception as e:
|
||||
@@ -39,7 +37,7 @@ async def request_invite(
|
||||
|
||||
@router.get("/users/pending")
|
||||
def list_pending_users(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> list[PendingUserSnapshot]:
|
||||
pending_emails = get_pending_users()
|
||||
return [PendingUserSnapshot(email=email) for email in pending_emails]
|
||||
@@ -48,7 +46,7 @@ def list_pending_users(
|
||||
@router.post("/users/invite/approve")
|
||||
async def approve_user(
|
||||
approve_user_request: ApproveUserRequest,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> None:
|
||||
tenant_id = get_current_tenant_id()
|
||||
approve_user_invite(approve_user_request.email, tenant_id)
|
||||
@@ -57,14 +55,11 @@ async def approve_user(
|
||||
@router.post("/users/invite/accept")
|
||||
async def accept_invite(
|
||||
invite_request: RequestInviteRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
) -> None:
|
||||
"""
|
||||
Accept an invitation to join a tenant.
|
||||
"""
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
try:
|
||||
accept_user_invite(user.email, invite_request.tenant_id)
|
||||
except Exception as e:
|
||||
@@ -75,14 +70,11 @@ async def accept_invite(
|
||||
@router.post("/users/invite/deny")
|
||||
async def deny_invite(
|
||||
invite_request: RequestInviteRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
user: User = Depends(current_user),
|
||||
) -> None:
|
||||
"""
|
||||
Deny an invitation to join a tenant.
|
||||
"""
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
try:
|
||||
deny_user_invite(user.email, invite_request.tenant_id)
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from fastapi_users import exceptions
|
||||
from sqlalchemy import select
|
||||
|
||||
from ee.onyx.db.license import invalidate_license_cache
|
||||
from onyx.auth.invited_users import get_invited_users
|
||||
from onyx.auth.invited_users import get_pending_users
|
||||
from onyx.auth.invited_users import write_invited_users
|
||||
@@ -48,8 +47,6 @@ def get_tenant_id_for_email(email: str) -> str:
|
||||
mapping.active = True
|
||||
db_session.commit()
|
||||
tenant_id = mapping.tenant_id
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error getting tenant id for email {email}: {e}")
|
||||
raise exceptions.UserNotExists()
|
||||
@@ -78,14 +75,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
|
||||
an inactive mapping (invitation) to this tenant. They can accept the
|
||||
invitation later to switch tenants.
|
||||
|
||||
Raises:
|
||||
HTTPException: 402 if adding active users would exceed seat limit
|
||||
"""
|
||||
from fastapi import HTTPException
|
||||
|
||||
from ee.onyx.db.license import check_seat_availability
|
||||
from onyx.db.engine.sql_engine import get_session_with_tenant as get_tenant_session
|
||||
|
||||
unique_emails = set(emails)
|
||||
if not unique_emails:
|
||||
return
|
||||
@@ -119,33 +109,6 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
|
||||
)
|
||||
emails_with_active_mapping = {m.email for m in active_mappings}
|
||||
|
||||
# Determine which users will consume a new seat.
|
||||
# Users with active mappings elsewhere get INACTIVE mappings (invitations)
|
||||
# and don't consume seats until they accept. Only users without any active
|
||||
# mapping will get an ACTIVE mapping and consume a seat immediately.
|
||||
emails_consuming_seats = {
|
||||
email
|
||||
for email in unique_emails
|
||||
if email not in emails_with_mapping
|
||||
and email not in emails_with_active_mapping
|
||||
}
|
||||
|
||||
# Check seat availability inside the transaction to prevent race conditions.
|
||||
# Note: ALL users in unique_emails still get added below - this check only
|
||||
# validates we have capacity for users who will consume seats immediately.
|
||||
if emails_consuming_seats:
|
||||
with get_tenant_session(tenant_id=tenant_id) as tenant_session:
|
||||
result = check_seat_availability(
|
||||
tenant_session,
|
||||
seats_needed=len(emails_consuming_seats),
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
if not result.available:
|
||||
raise HTTPException(
|
||||
status_code=402,
|
||||
detail=result.error_message or "Seat limit exceeded",
|
||||
)
|
||||
|
||||
# Add mappings for emails that don't already have one to this tenant
|
||||
for email in unique_emails:
|
||||
if email in emails_with_mapping:
|
||||
@@ -165,12 +128,6 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
|
||||
db_session.commit()
|
||||
logger.info(f"Successfully added users {emails} to tenant {tenant_id}")
|
||||
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
|
||||
except HTTPException:
|
||||
db_session.rollback()
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception(f"Failed to add users to tenant {tenant_id}")
|
||||
db_session.rollback()
|
||||
@@ -193,9 +150,6 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
|
||||
db_session.delete(mapping)
|
||||
|
||||
db_session.commit()
|
||||
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Failed to remove users from tenant {tenant_id}: {str(e)}"
|
||||
@@ -210,9 +164,6 @@ def remove_all_users_from_tenant(tenant_id: str) -> None:
|
||||
).delete()
|
||||
db_session.commit()
|
||||
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
|
||||
|
||||
def invite_self_to_tenant(email: str, tenant_id: str) -> None:
|
||||
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
|
||||
@@ -241,9 +192,6 @@ def approve_user_invite(email: str, tenant_id: str) -> None:
|
||||
db_session.add(new_mapping)
|
||||
db_session.commit()
|
||||
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
|
||||
# Also remove the user from pending users list
|
||||
# Remove from pending users
|
||||
pending_users = get_pending_users()
|
||||
@@ -262,20 +210,11 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
|
||||
"""
|
||||
Accept an invitation to join a tenant.
|
||||
This activates the user's mapping to the tenant.
|
||||
|
||||
Raises:
|
||||
HTTPException: 402 if accepting would exceed seat limit
|
||||
"""
|
||||
from fastapi import HTTPException
|
||||
|
||||
from ee.onyx.db.license import check_seat_availability
|
||||
from onyx.db.engine.sql_engine import get_session_with_tenant
|
||||
|
||||
with get_session_with_shared_schema() as db_session:
|
||||
try:
|
||||
# Lock the user's mappings first to prevent race conditions.
|
||||
# This ensures no concurrent request can modify this user's mappings
|
||||
# while we check seats and activate.
|
||||
# This ensures no concurrent request can modify this user's mappings.
|
||||
active_mapping = (
|
||||
db_session.query(UserTenantMapping)
|
||||
.filter(
|
||||
@@ -286,18 +225,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
|
||||
.first()
|
||||
)
|
||||
|
||||
# Check seat availability within the same logical operation.
|
||||
# Note: This queries fresh data from DB, not cache.
|
||||
with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
|
||||
result = check_seat_availability(
|
||||
tenant_session, seats_needed=1, tenant_id=tenant_id
|
||||
)
|
||||
if not result.available:
|
||||
raise HTTPException(
|
||||
status_code=402,
|
||||
detail=result.error_message or "Seat limit exceeded",
|
||||
)
|
||||
|
||||
# If an active mapping exists, delete it
|
||||
if active_mapping:
|
||||
db_session.delete(active_mapping)
|
||||
@@ -327,9 +254,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
|
||||
mapping.active = True
|
||||
db_session.commit()
|
||||
logger.info(f"User {email} accepted invitation to tenant {tenant_id}")
|
||||
|
||||
# Invalidate license cache so used_seats reflects the new count
|
||||
invalidate_license_cache(tenant_id)
|
||||
else:
|
||||
logger.warning(
|
||||
f"No invitation found for user {email} in tenant {tenant_id}"
|
||||
|
||||
@@ -28,7 +28,7 @@ Group Token Limit Settings
|
||||
|
||||
@router.get("/user-groups")
|
||||
def get_all_group_token_limit_settings(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> dict[str, list[TokenRateLimitDisplay]]:
|
||||
user_groups_to_token_rate_limits = fetch_all_user_group_token_rate_limits_by_group(
|
||||
@@ -47,7 +47,7 @@ def get_all_group_token_limit_settings(
|
||||
@router.get("/user-group/{group_id}")
|
||||
def get_group_token_limit_settings(
|
||||
group_id: int,
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
user: User = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[TokenRateLimitDisplay]:
|
||||
return [
|
||||
@@ -64,7 +64,7 @@ def get_group_token_limit_settings(
|
||||
def create_group_token_limit_settings(
|
||||
group_id: int,
|
||||
token_limit_settings: TokenRateLimitArgs,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> TokenRateLimitDisplay:
|
||||
rate_limit_display = TokenRateLimitDisplay.from_db(
|
||||
@@ -86,7 +86,7 @@ User Token Limit Settings
|
||||
|
||||
@router.get("/users")
|
||||
def get_user_token_limit_settings(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[TokenRateLimitDisplay]:
|
||||
return [
|
||||
@@ -98,7 +98,7 @@ def get_user_token_limit_settings(
|
||||
@router.post("/users")
|
||||
def create_user_token_limit_settings(
|
||||
token_limit_settings: TokenRateLimitArgs,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> TokenRateLimitDisplay:
|
||||
rate_limit_display = TokenRateLimitDisplay.from_db(
|
||||
|
||||
@@ -31,10 +31,10 @@ router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)
|
||||
|
||||
@router.get("/admin/user-group")
|
||||
def list_user_groups(
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
user: User = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[UserGroup]:
|
||||
if user is None or user.role == UserRole.ADMIN:
|
||||
if user.role == UserRole.ADMIN:
|
||||
user_groups = fetch_user_groups(db_session, only_up_to_date=False)
|
||||
else:
|
||||
user_groups = fetch_user_groups_for_user(
|
||||
@@ -48,7 +48,7 @@ def list_user_groups(
|
||||
@router.post("/admin/user-group")
|
||||
def create_user_group(
|
||||
user_group: UserGroupCreate,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UserGroup:
|
||||
try:
|
||||
@@ -66,7 +66,7 @@ def create_user_group(
|
||||
def patch_user_group(
|
||||
user_group_id: int,
|
||||
user_group_update: UserGroupUpdate,
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
user: User = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UserGroup:
|
||||
try:
|
||||
@@ -86,7 +86,7 @@ def patch_user_group(
|
||||
def add_users(
|
||||
user_group_id: int,
|
||||
add_users_request: AddUsersToUserGroupRequest,
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
user: User = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UserGroup:
|
||||
try:
|
||||
@@ -106,7 +106,7 @@ def add_users(
|
||||
def set_user_curator(
|
||||
user_group_id: int,
|
||||
set_curator_request: SetCuratorRequest,
|
||||
user: User | None = Depends(current_curator_or_admin_user),
|
||||
user: User = Depends(current_curator_or_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
try:
|
||||
@@ -124,7 +124,7 @@ def set_user_curator(
|
||||
@router.delete("/admin/user-group/{user_group_id}")
|
||||
def delete_user_group(
|
||||
user_group_id: int,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
try:
|
||||
|
||||
Binary file not shown.
@@ -96,22 +96,20 @@ def get_access_for_documents(
|
||||
return versioned_get_access_for_documents_fn(document_ids, db_session)
|
||||
|
||||
|
||||
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
|
||||
"""Returns a list of ACL entries that the user has access to.
|
||||
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
|
||||
"""Returns a list of ACL entries that the user has access to. This is meant to be
|
||||
used downstream to filter out documents that the user does not have access to. The
|
||||
user should have access to a document if at least one entry in the document's ACL
|
||||
matches one entry in the returned set.
|
||||
|
||||
This is meant to be used downstream to filter out documents that the user
|
||||
does not have access to. The user should have access to a document if at
|
||||
least one entry in the document's ACL matches one entry in the returned set.
|
||||
|
||||
NOTE: These strings must be formatted in the same way as the output of
|
||||
DocumentAccess::to_acl.
|
||||
Anonymous users only have access to public documents.
|
||||
"""
|
||||
if user:
|
||||
return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
|
||||
return {PUBLIC_DOC_PAT}
|
||||
if user.is_anonymous:
|
||||
return {PUBLIC_DOC_PAT}
|
||||
return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
|
||||
|
||||
|
||||
def get_acl_for_user(user: User | None, db_session: Session | None = None) -> set[str]:
|
||||
def get_acl_for_user(user: User, db_session: Session | None = None) -> set[str]:
|
||||
versioned_acl_for_user_fn = fetch_versioned_implementation(
|
||||
"onyx.access.access", "_get_acl_for_user"
|
||||
)
|
||||
|
||||
15
backend/onyx/access/hierarchy_access.py
Normal file
15
backend/onyx/access/hierarchy_access.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.models import User
|
||||
from onyx.utils.variable_functionality import fetch_versioned_implementation
|
||||
|
||||
|
||||
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
|
||||
return []
|
||||
|
||||
|
||||
def get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
|
||||
versioned_get_user_external_group_ids = fetch_versioned_implementation(
|
||||
"onyx.access.hierarchy_access", "_get_user_external_group_ids"
|
||||
)
|
||||
return versioned_get_user_external_group_ids(db_session, user)
|
||||
@@ -105,6 +105,54 @@ class DocExternalAccess:
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NodeExternalAccess:
|
||||
"""
|
||||
Wraps external access with a hierarchy node's raw ID.
|
||||
Used for syncing hierarchy node permissions (e.g., folder permissions).
|
||||
"""
|
||||
|
||||
external_access: ExternalAccess
|
||||
# The raw node ID from the source system (e.g., Google Drive folder ID)
|
||||
raw_node_id: str
|
||||
# The source type (e.g., "google_drive")
|
||||
source: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"external_access": {
|
||||
"external_user_emails": list(self.external_access.external_user_emails),
|
||||
"external_user_group_ids": list(
|
||||
self.external_access.external_user_group_ids
|
||||
),
|
||||
"is_public": self.external_access.is_public,
|
||||
},
|
||||
"raw_node_id": self.raw_node_id,
|
||||
"source": self.source,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "NodeExternalAccess":
|
||||
external_access = ExternalAccess(
|
||||
external_user_emails=set(
|
||||
data["external_access"].get("external_user_emails", [])
|
||||
),
|
||||
external_user_group_ids=set(
|
||||
data["external_access"].get("external_user_group_ids", [])
|
||||
),
|
||||
is_public=data["external_access"]["is_public"],
|
||||
)
|
||||
return cls(
|
||||
external_access=external_access,
|
||||
raw_node_id=data["raw_node_id"],
|
||||
source=data["source"],
|
||||
)
|
||||
|
||||
|
||||
# Union type for elements that can have permissions synced
|
||||
ElementExternalAccess = DocExternalAccess | NodeExternalAccess
|
||||
|
||||
|
||||
# TODO(andrei): First refactor this into a pydantic model, then get rid of
|
||||
# duplicate fields.
|
||||
@dataclass(frozen=True, init=False)
|
||||
|
||||
@@ -3,10 +3,10 @@ from typing import Any
|
||||
from typing import cast
|
||||
|
||||
from onyx.auth.schemas import UserRole
|
||||
from onyx.configs.constants import KV_NO_AUTH_USER_PERSONALIZATION_KEY
|
||||
from onyx.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY
|
||||
from onyx.configs.constants import NO_AUTH_USER_EMAIL
|
||||
from onyx.configs.constants import NO_AUTH_USER_ID
|
||||
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
|
||||
from onyx.configs.constants import ANONYMOUS_USER_INFO_ID
|
||||
from onyx.configs.constants import KV_ANONYMOUS_USER_PERSONALIZATION_KEY
|
||||
from onyx.configs.constants import KV_ANONYMOUS_USER_PREFERENCES_KEY
|
||||
from onyx.key_value_store.store import KeyValueStore
|
||||
from onyx.key_value_store.store import KvKeyNotFoundError
|
||||
from onyx.server.manage.models import UserInfo
|
||||
@@ -14,22 +14,22 @@ from onyx.server.manage.models import UserPersonalization
|
||||
from onyx.server.manage.models import UserPreferences
|
||||
|
||||
|
||||
def set_no_auth_user_preferences(
|
||||
def set_anonymous_user_preferences(
|
||||
store: KeyValueStore, preferences: UserPreferences
|
||||
) -> None:
|
||||
store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump())
|
||||
store.store(KV_ANONYMOUS_USER_PREFERENCES_KEY, preferences.model_dump())
|
||||
|
||||
|
||||
def set_no_auth_user_personalization(
|
||||
def set_anonymous_user_personalization(
|
||||
store: KeyValueStore, personalization: UserPersonalization
|
||||
) -> None:
|
||||
store.store(KV_NO_AUTH_USER_PERSONALIZATION_KEY, personalization.model_dump())
|
||||
store.store(KV_ANONYMOUS_USER_PERSONALIZATION_KEY, personalization.model_dump())
|
||||
|
||||
|
||||
def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
|
||||
def load_anonymous_user_preferences(store: KeyValueStore) -> UserPreferences:
|
||||
try:
|
||||
preferences_data = cast(
|
||||
Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY)
|
||||
Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PREFERENCES_KEY)
|
||||
)
|
||||
return UserPreferences(**preferences_data)
|
||||
except KvKeyNotFoundError:
|
||||
@@ -38,27 +38,26 @@ def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
|
||||
)
|
||||
|
||||
|
||||
def fetch_no_auth_user(
|
||||
store: KeyValueStore, *, anonymous_user_enabled: bool | None = None
|
||||
) -> UserInfo:
|
||||
def fetch_anonymous_user_info(store: KeyValueStore) -> UserInfo:
|
||||
"""Fetch a UserInfo object for anonymous users (used for API responses)."""
|
||||
personalization = UserPersonalization()
|
||||
try:
|
||||
personalization_data = cast(
|
||||
Mapping[str, Any], store.load(KV_NO_AUTH_USER_PERSONALIZATION_KEY)
|
||||
Mapping[str, Any], store.load(KV_ANONYMOUS_USER_PERSONALIZATION_KEY)
|
||||
)
|
||||
personalization = UserPersonalization(**personalization_data)
|
||||
except KvKeyNotFoundError:
|
||||
pass
|
||||
|
||||
return UserInfo(
|
||||
id=NO_AUTH_USER_ID,
|
||||
email=NO_AUTH_USER_EMAIL,
|
||||
id=ANONYMOUS_USER_INFO_ID,
|
||||
email=ANONYMOUS_USER_EMAIL,
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
role=UserRole.BASIC if anonymous_user_enabled else UserRole.ADMIN,
|
||||
preferences=load_no_auth_user_preferences(store),
|
||||
role=UserRole.LIMITED,
|
||||
preferences=load_anonymous_user_preferences(store),
|
||||
personalization=personalization,
|
||||
is_anonymous_user=anonymous_user_enabled,
|
||||
is_anonymous_user=True,
|
||||
password_configured=False,
|
||||
)
|
||||
@@ -56,6 +56,7 @@ class DisposableEmailValidator:
|
||||
"guerrillamail.com",
|
||||
"mailinator.com",
|
||||
"tempmail.com",
|
||||
"chat-tempmail.com",
|
||||
"throwaway.email",
|
||||
"yopmail.com",
|
||||
"temp-mail.org",
|
||||
|
||||
@@ -75,7 +75,6 @@ from onyx.auth.schemas import UserUpdateWithRole
|
||||
from onyx.configs.app_configs import AUTH_BACKEND
|
||||
from onyx.configs.app_configs import AUTH_COOKIE_EXPIRE_TIME_SECONDS
|
||||
from onyx.configs.app_configs import AUTH_TYPE
|
||||
from onyx.configs.app_configs import DISABLE_AUTH
|
||||
from onyx.configs.app_configs import EMAIL_CONFIGURED
|
||||
from onyx.configs.app_configs import JWT_PUBLIC_KEY_URL
|
||||
from onyx.configs.app_configs import PASSWORD_MAX_LENGTH
|
||||
@@ -92,6 +91,8 @@ from onyx.configs.app_configs import USER_AUTH_SECRET
|
||||
from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
|
||||
from onyx.configs.app_configs import WEB_DOMAIN
|
||||
from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
|
||||
from onyx.configs.constants import ANONYMOUS_USER_EMAIL
|
||||
from onyx.configs.constants import ANONYMOUS_USER_UUID
|
||||
from onyx.configs.constants import AuthType
|
||||
from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
|
||||
from onyx.configs.constants import DANSWER_API_KEY_PREFIX
|
||||
@@ -134,12 +135,8 @@ from shared_configs.contextvars import get_current_tenant_id
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def is_user_admin(user: User | None) -> bool:
|
||||
if AUTH_TYPE == AuthType.DISABLED:
|
||||
return True
|
||||
if user and user.role == UserRole.ADMIN:
|
||||
return True
|
||||
return False
|
||||
def is_user_admin(user: User) -> bool:
|
||||
return user.role == UserRole.ADMIN
|
||||
|
||||
|
||||
def verify_auth_setting() -> None:
|
||||
@@ -1331,6 +1328,14 @@ async def optional_user(
|
||||
user: User | None = Depends(optional_fastapi_current_user),
|
||||
) -> User | None:
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
if (
|
||||
user is not None
|
||||
and user.is_anonymous
|
||||
and anonymous_user_enabled(tenant_id=tenant_id)
|
||||
):
|
||||
return get_anonymous_user()
|
||||
|
||||
if user := await _check_for_saml_and_jwt(request, user, async_db_session):
|
||||
# If user is already set, _check_for_saml_and_jwt returns the same user object
|
||||
return user
|
||||
@@ -1347,15 +1352,26 @@ async def optional_user(
|
||||
return user
|
||||
|
||||
|
||||
def get_anonymous_user() -> User:
|
||||
"""Create anonymous user object."""
|
||||
user = User(
|
||||
id=uuid.UUID(ANONYMOUS_USER_UUID),
|
||||
email=ANONYMOUS_USER_EMAIL,
|
||||
hashed_password="",
|
||||
is_active=True,
|
||||
is_verified=True,
|
||||
is_superuser=False,
|
||||
role=UserRole.LIMITED,
|
||||
use_memories=False,
|
||||
)
|
||||
return user
|
||||
|
||||
|
||||
async def double_check_user(
|
||||
user: User | None,
|
||||
optional: bool = DISABLE_AUTH,
|
||||
include_expired: bool = False,
|
||||
allow_anonymous_access: bool = False,
|
||||
) -> User | None:
|
||||
if optional:
|
||||
return user
|
||||
|
||||
) -> User:
|
||||
if user is not None:
|
||||
# If user attempted to authenticate, verify them, do not default
|
||||
# to anonymous access if it fails.
|
||||
@@ -1376,7 +1392,7 @@ async def double_check_user(
|
||||
return user
|
||||
|
||||
if allow_anonymous_access:
|
||||
return None
|
||||
return get_anonymous_user()
|
||||
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User is not authenticated.",
|
||||
@@ -1385,19 +1401,19 @@ async def double_check_user(
|
||||
|
||||
async def current_user_with_expired_token(
|
||||
user: User | None = Depends(optional_user),
|
||||
) -> User | None:
|
||||
) -> User:
|
||||
return await double_check_user(user, include_expired=True)
|
||||
|
||||
|
||||
async def current_limited_user(
|
||||
user: User | None = Depends(optional_user),
|
||||
) -> User | None:
|
||||
) -> User:
|
||||
return await double_check_user(user)
|
||||
|
||||
|
||||
async def current_chat_accessible_user(
|
||||
user: User | None = Depends(optional_user),
|
||||
) -> User | None:
|
||||
) -> User:
|
||||
tenant_id = get_current_tenant_id()
|
||||
|
||||
return await double_check_user(
|
||||
@@ -1407,10 +1423,8 @@ async def current_chat_accessible_user(
|
||||
|
||||
async def current_user(
|
||||
user: User | None = Depends(optional_user),
|
||||
) -> User | None:
|
||||
) -> User:
|
||||
user = await double_check_user(user)
|
||||
if not user:
|
||||
return None
|
||||
|
||||
if user.role == UserRole.LIMITED:
|
||||
raise BasicAuthenticationError(
|
||||
@@ -1420,16 +1434,8 @@ async def current_user(
|
||||
|
||||
|
||||
async def current_curator_or_admin_user(
|
||||
user: User | None = Depends(current_user),
|
||||
) -> User | None:
|
||||
if DISABLE_AUTH:
|
||||
return None
|
||||
|
||||
if not user or not hasattr(user, "role"):
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User is not authenticated or lacks role information.",
|
||||
)
|
||||
|
||||
user: User = Depends(current_user),
|
||||
) -> User:
|
||||
allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN}
|
||||
if user.role not in allowed_roles:
|
||||
raise BasicAuthenticationError(
|
||||
@@ -1439,11 +1445,8 @@ async def current_curator_or_admin_user(
|
||||
return user
|
||||
|
||||
|
||||
async def current_admin_user(user: User | None = Depends(current_user)) -> User | None:
|
||||
if DISABLE_AUTH:
|
||||
return None
|
||||
|
||||
if not user or not hasattr(user, "role") or user.role != UserRole.ADMIN:
|
||||
async def current_admin_user(user: User = Depends(current_user)) -> User:
|
||||
if user.role != UserRole.ADMIN:
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User must be an admin to perform this action.",
|
||||
)
|
||||
|
||||
@@ -124,6 +124,7 @@ celery_app.autodiscover_tasks(
|
||||
"onyx.background.celery.tasks.monitoring",
|
||||
"onyx.background.celery.tasks.user_file_processing",
|
||||
"onyx.background.celery.tasks.llm_model_update",
|
||||
"onyx.background.celery.tasks.opensearch_migration",
|
||||
# Light worker tasks
|
||||
"onyx.background.celery.tasks.shared",
|
||||
"onyx.background.celery.tasks.vespa",
|
||||
|
||||
@@ -325,5 +325,6 @@ celery_app.autodiscover_tasks(
|
||||
"onyx.background.celery.tasks.vespa",
|
||||
"onyx.background.celery.tasks.llm_model_update",
|
||||
"onyx.background.celery.tasks.user_file_processing",
|
||||
"onyx.background.celery.tasks.opensearch_migration",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ from celery.schedules import crontab
|
||||
|
||||
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
|
||||
from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
|
||||
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
|
||||
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
||||
@@ -210,6 +211,31 @@ if SCHEDULED_EVAL_DATASET_NAMES:
|
||||
}
|
||||
)
|
||||
|
||||
# Add OpenSearch migration task if enabled.
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
beat_task_templates.append(
|
||||
{
|
||||
"name": "check-for-documents-for-opensearch-migration",
|
||||
"task": OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
|
||||
"schedule": timedelta(seconds=120), # 2 minutes
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.LOW,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
},
|
||||
}
|
||||
)
|
||||
beat_task_templates.append(
|
||||
{
|
||||
"name": "migrate-documents-from-vespa-to-opensearch",
|
||||
"task": OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
|
||||
"schedule": timedelta(seconds=120), # 2 minutes
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.LOW,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
|
||||
cloud_task: dict[str, Any] = {}
|
||||
|
||||
@@ -24,18 +24,27 @@ from sqlalchemy.orm import Session
|
||||
from onyx.background.celery.apps.app_base import task_logger
|
||||
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.connectors.factory import instantiate_connector
|
||||
from onyx.connectors.interfaces import HierarchyConnector
|
||||
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
|
||||
from onyx.db.connector import mark_cc_pair_as_hierarchy_fetched
|
||||
from onyx.db.connector_credential_pair import (
|
||||
fetch_indexable_standard_connector_credential_pair_ids,
|
||||
)
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
|
||||
from onyx.redis.redis_hierarchy import ensure_source_node_exists
|
||||
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
@@ -211,6 +220,101 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
|
||||
return tasks_created
|
||||
|
||||
|
||||
# Batch size for hierarchy node processing
|
||||
HIERARCHY_NODE_BATCH_SIZE = 100
|
||||
|
||||
|
||||
def _run_hierarchy_extraction(
|
||||
db_session: Session,
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
source: DocumentSource,
|
||||
tenant_id: str,
|
||||
) -> int:
|
||||
"""
|
||||
Run the hierarchy extraction for a connector.
|
||||
|
||||
Instantiates the connector and calls load_hierarchy() if the connector
|
||||
implements HierarchyConnector.
|
||||
|
||||
Returns the total number of hierarchy nodes extracted.
|
||||
"""
|
||||
connector = cc_pair.connector
|
||||
credential = cc_pair.credential
|
||||
|
||||
# Instantiate the connector using its configured input type
|
||||
runnable_connector = instantiate_connector(
|
||||
db_session=db_session,
|
||||
source=source,
|
||||
input_type=connector.input_type,
|
||||
connector_specific_config=connector.connector_specific_config,
|
||||
credential=credential,
|
||||
)
|
||||
|
||||
# Check if the connector supports hierarchy fetching
|
||||
if not isinstance(runnable_connector, HierarchyConnector):
|
||||
task_logger.debug(
|
||||
f"Connector {source} does not implement HierarchyConnector, skipping"
|
||||
)
|
||||
return 0
|
||||
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
|
||||
# Ensure the SOURCE-type root node exists before processing hierarchy nodes.
|
||||
# This is the root of the hierarchy tree - all other nodes for this source
|
||||
# should ultimately have this as an ancestor.
|
||||
ensure_source_node_exists(redis_client, db_session, source)
|
||||
|
||||
# Determine time range: start from last hierarchy fetch, end at now
|
||||
last_fetch = cc_pair.last_time_hierarchy_fetch
|
||||
start_time = last_fetch.timestamp() if last_fetch else 0
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
|
||||
# Check if connector is public - all hierarchy nodes from public connectors
|
||||
# should be accessible to all users
|
||||
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
|
||||
|
||||
total_nodes = 0
|
||||
node_batch: list[PydanticHierarchyNode] = []
|
||||
|
||||
def _process_batch() -> int:
|
||||
"""Process accumulated hierarchy nodes batch."""
|
||||
if not node_batch:
|
||||
return 0
|
||||
|
||||
upserted_nodes = upsert_hierarchy_nodes_batch(
|
||||
db_session=db_session,
|
||||
nodes=node_batch,
|
||||
source=source,
|
||||
commit=True,
|
||||
is_connector_public=is_connector_public,
|
||||
)
|
||||
|
||||
# Cache in Redis for fast ancestor resolution
|
||||
cache_entries = [
|
||||
HierarchyNodeCacheEntry.from_db_model(node) for node in upserted_nodes
|
||||
]
|
||||
cache_hierarchy_nodes_batch(
|
||||
redis_client=redis_client,
|
||||
source=source,
|
||||
entries=cache_entries,
|
||||
)
|
||||
|
||||
count = len(node_batch)
|
||||
node_batch.clear()
|
||||
return count
|
||||
|
||||
# Fetch hierarchy nodes from the connector
|
||||
for node in runnable_connector.load_hierarchy(start=start_time, end=end_time):
|
||||
node_batch.append(node)
|
||||
if len(node_batch) >= HIERARCHY_NODE_BATCH_SIZE:
|
||||
total_nodes += _process_batch()
|
||||
|
||||
# Process any remaining nodes
|
||||
total_nodes += _process_batch()
|
||||
|
||||
return total_nodes
|
||||
|
||||
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.CONNECTOR_HIERARCHY_FETCHING_TASK,
|
||||
soft_time_limit=3600, # 1 hour soft limit
|
||||
@@ -253,15 +357,17 @@ def connector_hierarchy_fetching_task(
|
||||
)
|
||||
return
|
||||
|
||||
# TODO: Implement the actual hierarchy fetching logic
|
||||
# This will involve:
|
||||
# 1. Instantiating the connector
|
||||
# 2. Calling a hierarchy-specific method on the connector
|
||||
# 3. Upserting the hierarchy nodes to the database
|
||||
source = cc_pair.connector.source
|
||||
total_nodes = _run_hierarchy_extraction(
|
||||
db_session=db_session,
|
||||
cc_pair=cc_pair,
|
||||
source=source,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
f"connector_hierarchy_fetching_task: "
|
||||
f"Hierarchy fetching not yet implemented for cc_pair={cc_pair_id}"
|
||||
f"Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}"
|
||||
)
|
||||
|
||||
# Update the last fetch time to prevent re-running until next interval
|
||||
|
||||
@@ -0,0 +1,359 @@
|
||||
"""Celery tasks for migrating documents from Vespa to OpenSearch."""
|
||||
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from celery import shared_task
|
||||
from celery import Task
|
||||
from redis.lock import Lock as RedisLock
|
||||
|
||||
from onyx.background.celery.apps.app_base import task_logger
|
||||
from onyx.background.celery.tasks.opensearch_migration.transformer import (
|
||||
transform_vespa_chunks_to_opensearch_chunks,
|
||||
)
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import OpenSearchDocumentMigrationStatus
|
||||
from onyx.db.opensearch_migration import create_opensearch_migration_records_with_commit
|
||||
from onyx.db.opensearch_migration import get_last_opensearch_migration_document_id
|
||||
from onyx.db.opensearch_migration import (
|
||||
get_opensearch_migration_records_needing_migration,
|
||||
)
|
||||
from onyx.db.opensearch_migration import get_paginated_document_batch
|
||||
from onyx.db.opensearch_migration import (
|
||||
increment_num_times_observed_no_additional_docs_to_migrate_with_commit,
|
||||
)
|
||||
from onyx.db.opensearch_migration import (
|
||||
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit,
|
||||
)
|
||||
from onyx.db.opensearch_migration import should_document_migration_be_permanently_failed
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.opensearch_document_index import (
|
||||
OpenSearchDocumentIndex,
|
||||
)
|
||||
from onyx.document_index.opensearch.schema import DocumentChunk
|
||||
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
|
||||
def _migrate_single_document(
|
||||
document_id: str,
|
||||
opensearch_document_index: OpenSearchDocumentIndex,
|
||||
vespa_document_index: VespaDocumentIndex,
|
||||
tenant_state: TenantState,
|
||||
) -> int:
|
||||
"""Migrates a single document from Vespa to OpenSearch.
|
||||
|
||||
Args:
|
||||
document_id: The ID of the document to migrate.
|
||||
opensearch_document_index: The OpenSearch document index to use.
|
||||
vespa_document_index: The Vespa document index to use.
|
||||
tenant_state: The tenant state to use.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If no chunks are found for the document in Vespa, or if
|
||||
the number of candidate chunks to migrate does not match the number
|
||||
of chunks in Vespa.
|
||||
|
||||
Returns:
|
||||
The number of chunks migrated.
|
||||
"""
|
||||
vespa_document_chunks: list[dict[str, Any]] = (
|
||||
vespa_document_index.get_raw_document_chunks(document_id=document_id)
|
||||
)
|
||||
if not vespa_document_chunks:
|
||||
raise RuntimeError(f"No chunks found for document {document_id} in Vespa.")
|
||||
|
||||
opensearch_document_chunks: list[DocumentChunk] = (
|
||||
transform_vespa_chunks_to_opensearch_chunks(vespa_document_chunks, tenant_state)
|
||||
)
|
||||
if len(opensearch_document_chunks) != len(vespa_document_chunks):
|
||||
raise RuntimeError(
|
||||
f"Bug: Number of candidate chunks to migrate ({len(opensearch_document_chunks)}) does not match "
|
||||
f"number of chunks in Vespa ({len(vespa_document_chunks)})."
|
||||
)
|
||||
|
||||
opensearch_document_index.index_raw_chunks(chunks=opensearch_document_chunks)
|
||||
|
||||
return len(opensearch_document_chunks)
|
||||
|
||||
|
||||
# shared_task allows this task to be shared across celery app instances.
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
|
||||
# Does not store the task's return value in the result backend.
|
||||
ignore_result=True,
|
||||
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
|
||||
soft_time_limit=60 * 5, # 5 minutes.
|
||||
# When exceeded the task will be forcefully terminated.
|
||||
time_limit=60 * 6, # 6 minutes.
|
||||
# Passed in self to the task to get task metadata.
|
||||
bind=True,
|
||||
)
|
||||
def check_for_documents_for_opensearch_migration_task(
|
||||
self: Task, *, tenant_id: str
|
||||
) -> bool | None:
|
||||
"""
|
||||
Periodic task to check for and add documents to the OpenSearch migration
|
||||
table.
|
||||
|
||||
Should not execute meaningful logic at the same time as
|
||||
migrate_document_from_vespa_to_opensearch_task.
|
||||
|
||||
Returns:
|
||||
None if OpenSearch migration is not enabled, or if the lock could not be
|
||||
acquired; effectively a no-op. True if the task completed
|
||||
successfully. False if the task failed.
|
||||
"""
|
||||
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
task_logger.warning(
|
||||
"OpenSearch migration is not enabled, skipping check for documents for the OpenSearch migration task."
|
||||
)
|
||||
return None
|
||||
|
||||
task_logger.info("Checking for documents for OpenSearch migration.")
|
||||
|
||||
r = get_redis_client()
|
||||
|
||||
# Use a lock to prevent overlapping tasks. Only this task or
|
||||
# migrate_document_from_vespa_to_opensearch_task can interact with the
|
||||
# OpenSearchMigration table at once.
|
||||
lock_beat: RedisLock = r.lock(
|
||||
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
|
||||
# The maximum time the lock can be held for. Will automatically be
|
||||
# released after this time.
|
||||
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
|
||||
# .acquire will block until the lock is acquired.
|
||||
blocking=True,
|
||||
# Wait for 2 minutes trying to acquire the lock.
|
||||
blocking_timeout=60 * 2, # 2 minutes.
|
||||
)
|
||||
|
||||
if not lock_beat.acquire():
|
||||
task_logger.warning(
|
||||
"The OpenSearch migration check task timed out waiting for the lock."
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
# Double check that tenant info is correct.
|
||||
if tenant_id != get_current_tenant_id():
|
||||
err_str = (
|
||||
f"Tenant ID mismatch in the OpenSearch migration check task: "
|
||||
f"{tenant_id} != {get_current_tenant_id()}. This should never happen."
|
||||
)
|
||||
task_logger.error(err_str)
|
||||
return False
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
# For pagination, get the last ID we've inserted into
|
||||
# OpenSearchMigration.
|
||||
last_opensearch_migration_document_id = (
|
||||
get_last_opensearch_migration_document_id(db_session)
|
||||
)
|
||||
# Now get the next batch of doc IDs starting after the last ID.
|
||||
document_ids = get_paginated_document_batch(
|
||||
db_session,
|
||||
prev_ending_document_id=last_opensearch_migration_document_id,
|
||||
)
|
||||
|
||||
if not document_ids:
|
||||
task_logger.info(
|
||||
"No more documents to insert for OpenSearch migration."
|
||||
)
|
||||
increment_num_times_observed_no_additional_docs_to_populate_migration_table_with_commit(
|
||||
db_session
|
||||
)
|
||||
# TODO(andrei): Once we've done this enough times and the number
|
||||
# of documents matches the number of migration records, we can
|
||||
# be done with this task and update
|
||||
# document_migration_record_table_population_status.
|
||||
return True
|
||||
|
||||
# Create the migration records for the next batch of documents with
|
||||
# status PENDING.
|
||||
create_opensearch_migration_records_with_commit(db_session, document_ids)
|
||||
task_logger.info(
|
||||
f"Created {len(document_ids)} migration records for the next batch of documents."
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception("Error in the OpenSearch migration check task.")
|
||||
return False
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
else:
|
||||
task_logger.warning(
|
||||
"The OpenSearch migration lock was not owned on completion of the check task."
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# shared_task allows this task to be shared across celery app instances.
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
|
||||
# Does not store the task's return value in the result backend.
|
||||
ignore_result=True,
|
||||
# When exceeded celery will raise a SoftTimeLimitExceeded in the task.
|
||||
soft_time_limit=60 * 5, # 5 minutes.
|
||||
# When exceeded the task will be forcefully terminated.
|
||||
time_limit=60 * 6, # 6 minutes.
|
||||
# Passed in self to the task to get task metadata.
|
||||
bind=True,
|
||||
)
|
||||
def migrate_documents_from_vespa_to_opensearch_task(
|
||||
self: Task,
|
||||
*,
|
||||
tenant_id: str,
|
||||
) -> bool | None:
|
||||
"""Periodic task to migrate documents from Vespa to OpenSearch.
|
||||
|
||||
Should not execute meaningful logic at the same time as
|
||||
check_for_documents_for_opensearch_migration_task.
|
||||
|
||||
Returns:
|
||||
None if OpenSearch migration is not enabled, or if the lock could not be
|
||||
acquired; effectively a no-op. True if the task completed
|
||||
successfully. False if the task failed.
|
||||
"""
|
||||
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
task_logger.warning(
|
||||
"OpenSearch migration is not enabled, skipping trying to migrate documents from Vespa to OpenSearch."
|
||||
)
|
||||
return None
|
||||
|
||||
task_logger.info("Trying to migrate documents from Vespa to OpenSearch.")
|
||||
|
||||
r = get_redis_client()
|
||||
|
||||
# Use a lock to prevent overlapping tasks. Only this task or
|
||||
# check_for_documents_for_opensearch_migration_task can interact with the
|
||||
# OpenSearchMigration table at once.
|
||||
lock_beat: RedisLock = r.lock(
|
||||
name=OnyxRedisLocks.OPENSEARCH_MIGRATION_BEAT_LOCK,
|
||||
# The maximum time the lock can be held for. Will automatically be
|
||||
# released after this time.
|
||||
timeout=60 * 6, # 6 minutes, same as the time limit for this task.
|
||||
# .acquire will block until the lock is acquired.
|
||||
blocking=True,
|
||||
# Wait for 2 minutes trying to acquire the lock.
|
||||
blocking_timeout=60 * 2, # 2 minutes.
|
||||
)
|
||||
|
||||
if not lock_beat.acquire():
|
||||
task_logger.warning(
|
||||
"The OpenSearch migration task timed out waiting for the lock."
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
# Double check that tenant info is correct.
|
||||
if tenant_id != get_current_tenant_id():
|
||||
err_str = (
|
||||
f"Tenant ID mismatch in the OpenSearch migration task: "
|
||||
f"{tenant_id} != {get_current_tenant_id()}. This should never happen."
|
||||
)
|
||||
task_logger.error(err_str)
|
||||
return False
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
records_needing_migration = (
|
||||
get_opensearch_migration_records_needing_migration(db_session)
|
||||
)
|
||||
if not records_needing_migration:
|
||||
task_logger.info(
|
||||
"No documents found that need to be migrated from Vespa to OpenSearch."
|
||||
)
|
||||
increment_num_times_observed_no_additional_docs_to_migrate_with_commit(
|
||||
db_session
|
||||
)
|
||||
# TODO(andrei): Once we've done this enough times and
|
||||
# document_migration_record_table_population_status is done, we
|
||||
# can be done with this task and update
|
||||
# overall_document_migration_status accordingly. Note that this
|
||||
# includes marking connectors as needing reindexing if some
|
||||
# migrations failed.
|
||||
return True
|
||||
|
||||
search_settings = get_current_search_settings(db_session)
|
||||
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
|
||||
|
||||
opensearch_document_index = OpenSearchDocumentIndex(
|
||||
index_name=search_settings.index_name, tenant_state=tenant_state
|
||||
)
|
||||
vespa_document_index = VespaDocumentIndex(
|
||||
index_name=search_settings.index_name,
|
||||
tenant_state=tenant_state,
|
||||
large_chunks_enabled=False,
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
f"Trying to migrate {len(records_needing_migration)} documents from Vespa to OpenSearch."
|
||||
)
|
||||
|
||||
for record in records_needing_migration:
|
||||
try:
|
||||
# If the Document's chunk count is not known, it was
|
||||
# probably just indexed so fail here to give it a chance to
|
||||
# sync. If in the rare event this Document has not been
|
||||
# re-indexed in a very long time and is still under the
|
||||
# "old" embedding/indexing logic where chunk count was never
|
||||
# stored, we will eventually permanently fail and thus force
|
||||
# a re-index of this doc, which is a desireable outcome.
|
||||
if record.document.chunk_count is None:
|
||||
raise RuntimeError(
|
||||
f"Document {record.document_id} has no chunk count."
|
||||
)
|
||||
|
||||
chunks_migrated = _migrate_single_document(
|
||||
document_id=record.document_id,
|
||||
opensearch_document_index=opensearch_document_index,
|
||||
vespa_document_index=vespa_document_index,
|
||||
tenant_state=tenant_state,
|
||||
)
|
||||
|
||||
# If the number of chunks in Vespa is not in sync with the
|
||||
# Document table for this doc let's not consider this
|
||||
# completed and let's let a subsequent run take care of it.
|
||||
if chunks_migrated != record.document.chunk_count:
|
||||
raise RuntimeError(
|
||||
f"Number of chunks migrated ({chunks_migrated}) does not match number of expected chunks in Vespa "
|
||||
f"({record.document.chunk_count}) for document {record.document_id}."
|
||||
)
|
||||
|
||||
record.status = OpenSearchDocumentMigrationStatus.COMPLETED
|
||||
except Exception:
|
||||
record.status = OpenSearchDocumentMigrationStatus.FAILED
|
||||
record.error_message = f"Attempt {record.attempts_count + 1}:\n{traceback.format_exc()}"
|
||||
task_logger.exception(
|
||||
f"Error migrating document {record.document_id} from Vespa to OpenSearch."
|
||||
)
|
||||
finally:
|
||||
record.attempts_count += 1
|
||||
record.last_attempt_at = datetime.now(timezone.utc)
|
||||
if should_document_migration_be_permanently_failed(record):
|
||||
record.status = (
|
||||
OpenSearchDocumentMigrationStatus.PERMANENTLY_FAILED
|
||||
)
|
||||
# TODO(andrei): Not necessarily here but if this happens
|
||||
# we'll need to mark the connector as needing reindex.
|
||||
|
||||
db_session.commit()
|
||||
except Exception:
|
||||
task_logger.exception("Error in the OpenSearch migration task.")
|
||||
return False
|
||||
finally:
|
||||
if lock_beat.owned():
|
||||
lock_beat.release()
|
||||
else:
|
||||
task_logger.warning(
|
||||
"The OpenSearch migration lock was not owned on completion of the migration task."
|
||||
)
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,268 @@
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from onyx.configs.constants import PUBLIC_DOC_PAT
|
||||
from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.schema import DocumentChunk
|
||||
from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
|
||||
from onyx.document_index.vespa_constants import BLURB
|
||||
from onyx.document_index.vespa_constants import BOOST
|
||||
from onyx.document_index.vespa_constants import CHUNK_CONTEXT
|
||||
from onyx.document_index.vespa_constants import CHUNK_ID
|
||||
from onyx.document_index.vespa_constants import CONTENT
|
||||
from onyx.document_index.vespa_constants import DOC_SUMMARY
|
||||
from onyx.document_index.vespa_constants import DOC_UPDATED_AT
|
||||
from onyx.document_index.vespa_constants import DOCUMENT_ID
|
||||
from onyx.document_index.vespa_constants import DOCUMENT_SETS
|
||||
from onyx.document_index.vespa_constants import EMBEDDINGS
|
||||
from onyx.document_index.vespa_constants import FULL_CHUNK_EMBEDDING_KEY
|
||||
from onyx.document_index.vespa_constants import HIDDEN
|
||||
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
|
||||
from onyx.document_index.vespa_constants import METADATA_LIST
|
||||
from onyx.document_index.vespa_constants import METADATA_SUFFIX
|
||||
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
|
||||
from onyx.document_index.vespa_constants import SOURCE_LINKS
|
||||
from onyx.document_index.vespa_constants import SOURCE_TYPE
|
||||
from onyx.document_index.vespa_constants import TENANT_ID
|
||||
from onyx.document_index.vespa_constants import TITLE
|
||||
from onyx.document_index.vespa_constants import TITLE_EMBEDDING
|
||||
from onyx.document_index.vespa_constants import USER_PROJECT
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
|
||||
def _extract_content_vector(embeddings: Any) -> list[float]:
|
||||
"""Extracts the full chunk embedding vector from Vespa's embeddings tensor.
|
||||
|
||||
Vespa stores embeddings as a tensor<float>(t{},x[dim]) where 't' maps
|
||||
embedding names (like "full_chunk") to vectors. The API can return this in
|
||||
different formats:
|
||||
1. Direct list: {"full_chunk": [...]}
|
||||
2. Blocks format: {"blocks": {"full_chunk": [0.1, 0.2, ...]}}
|
||||
3. Possibly other formats.
|
||||
|
||||
We only support formats 1 and 2. Any other supplied format will raise an
|
||||
error.
|
||||
|
||||
Raises:
|
||||
ValueError: If the embeddings format is not supported.
|
||||
|
||||
Returns:
|
||||
The full chunk content embedding vector as a list of floats.
|
||||
"""
|
||||
if isinstance(embeddings, dict):
|
||||
# Handle format 1.
|
||||
full_chunk_embedding = embeddings.get(FULL_CHUNK_EMBEDDING_KEY)
|
||||
if isinstance(full_chunk_embedding, list):
|
||||
# Double check that within the list we have floats and not another
|
||||
# list or dict.
|
||||
if not full_chunk_embedding:
|
||||
raise ValueError("Full chunk embedding is empty.")
|
||||
if isinstance(full_chunk_embedding[0], float):
|
||||
return full_chunk_embedding
|
||||
|
||||
# Handle format 2.
|
||||
blocks = embeddings.get("blocks")
|
||||
if isinstance(blocks, dict):
|
||||
full_chunk_embedding = blocks.get(FULL_CHUNK_EMBEDDING_KEY)
|
||||
if isinstance(full_chunk_embedding, list):
|
||||
# Double check that within the list we have floats and not another
|
||||
# list or dict.
|
||||
if not full_chunk_embedding:
|
||||
raise ValueError("Full chunk embedding is empty.")
|
||||
if isinstance(full_chunk_embedding[0], float):
|
||||
return full_chunk_embedding
|
||||
|
||||
raise ValueError(f"Unknown embedding format: {type(embeddings)}")
|
||||
|
||||
|
||||
def _extract_title_vector(title_embedding: Any | None) -> list[float] | None:
|
||||
"""Extract the title embedding vector.
|
||||
|
||||
Returns None if no title embedding exists.
|
||||
|
||||
Vespa returns title_embedding as tensor<float>(x[dim]) which can be in
|
||||
formats:
|
||||
1. Direct list: [0.1, 0.2, ...]
|
||||
2. Values format: {"values": [0.1, 0.2, ...]}
|
||||
3. Possibly other formats.
|
||||
|
||||
Only formats 1 and 2 are supported. Any other supplied format will raise an
|
||||
error.
|
||||
|
||||
Raises:
|
||||
ValueError: If the title embedding format is not supported.
|
||||
|
||||
Returns:
|
||||
The title embedding vector as a list of floats.
|
||||
"""
|
||||
if title_embedding is None:
|
||||
return None
|
||||
|
||||
# Handle format 1.
|
||||
if isinstance(title_embedding, list):
|
||||
# Double check that within the list we have floats and not another
|
||||
# list or dict.
|
||||
if not title_embedding:
|
||||
return None
|
||||
if isinstance(title_embedding[0], float):
|
||||
return title_embedding
|
||||
|
||||
# Handle format 2.
|
||||
if isinstance(title_embedding, dict):
|
||||
# Try values format.
|
||||
values = title_embedding.get("values")
|
||||
if values is not None and isinstance(values, list):
|
||||
# Double check that within the list we have floats and not another
|
||||
# list or dict.
|
||||
if not values:
|
||||
return None
|
||||
if isinstance(values[0], float):
|
||||
return values
|
||||
|
||||
raise ValueError(f"Unknown title embedding format: {type(title_embedding)}")
|
||||
|
||||
|
||||
def _transform_vespa_document_sets_to_opensearch_document_sets(
|
||||
vespa_document_sets: dict[str, int] | None,
|
||||
) -> list[str] | None:
|
||||
if not vespa_document_sets:
|
||||
return None
|
||||
return list(vespa_document_sets.keys())
|
||||
|
||||
|
||||
def _transform_vespa_acl_to_opensearch_acl(
|
||||
vespa_acl: dict[str, int] | None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
if not vespa_acl:
|
||||
raise ValueError(
|
||||
"Missing ACL in Vespa chunk. This does not make sense as it implies the document is never searchable by anyone ever."
|
||||
)
|
||||
acl_list = list(vespa_acl.keys())
|
||||
is_public = PUBLIC_DOC_PAT in acl_list
|
||||
if is_public:
|
||||
acl_list.remove(PUBLIC_DOC_PAT)
|
||||
return is_public, acl_list
|
||||
|
||||
|
||||
def transform_vespa_chunks_to_opensearch_chunks(
|
||||
vespa_chunks: list[dict[str, Any]],
|
||||
tenant_state: TenantState,
|
||||
) -> list[DocumentChunk]:
|
||||
result: list[DocumentChunk] = []
|
||||
for vespa_chunk in vespa_chunks:
|
||||
# This should exist; fail loudly if it does not.
|
||||
document_id: str = vespa_chunk[DOCUMENT_ID]
|
||||
if not document_id:
|
||||
raise ValueError("Missing document_id in Vespa chunk.")
|
||||
|
||||
# This should exist; fail loudly if it does not.
|
||||
chunk_index: int = vespa_chunk[CHUNK_ID]
|
||||
|
||||
title: str | None = vespa_chunk.get(TITLE)
|
||||
# WARNING: Should supply format.tensors=short-value to the Vespa client
|
||||
# in order to get a supported format for the tensors.
|
||||
title_vector: list[float] | None = _extract_title_vector(
|
||||
vespa_chunk.get(TITLE_EMBEDDING)
|
||||
)
|
||||
|
||||
# This should exist; fail loudly if it does not.
|
||||
content: str = vespa_chunk[CONTENT]
|
||||
if not content:
|
||||
raise ValueError("Missing content in Vespa chunk.")
|
||||
# This should exist; fail loudly if it does not.
|
||||
# WARNING: Should supply format.tensors=short-value to the Vespa client
|
||||
# in order to get a supported format for the tensors.
|
||||
content_vector: list[float] = _extract_content_vector(vespa_chunk[EMBEDDINGS])
|
||||
if not content_vector:
|
||||
raise ValueError("Missing content_vector in Vespa chunk.")
|
||||
|
||||
# This should exist; fail loudly if it does not.
|
||||
source_type: str = vespa_chunk[SOURCE_TYPE]
|
||||
if not source_type:
|
||||
raise ValueError("Missing source_type in Vespa chunk.")
|
||||
|
||||
metadata_list: list[str] | None = vespa_chunk.get(METADATA_LIST)
|
||||
|
||||
_raw_doc_updated_at: int | None = vespa_chunk.get(DOC_UPDATED_AT)
|
||||
last_updated: datetime | None = (
|
||||
datetime.fromtimestamp(_raw_doc_updated_at, tz=timezone.utc)
|
||||
if _raw_doc_updated_at is not None
|
||||
else None
|
||||
)
|
||||
|
||||
hidden: bool = vespa_chunk.get(HIDDEN, False)
|
||||
|
||||
# This should exist; fail loudly if it does not.
|
||||
global_boost: int = vespa_chunk[BOOST]
|
||||
|
||||
# This should exist; fail loudly if it does not.
|
||||
semantic_identifier: str = vespa_chunk[SEMANTIC_IDENTIFIER]
|
||||
if not semantic_identifier:
|
||||
raise ValueError("Missing semantic_identifier in Vespa chunk.")
|
||||
|
||||
image_file_id: str | None = vespa_chunk.get(IMAGE_FILE_NAME)
|
||||
source_links: str | None = vespa_chunk.get(SOURCE_LINKS)
|
||||
blurb: str = vespa_chunk.get(BLURB, "")
|
||||
doc_summary: str = vespa_chunk.get(DOC_SUMMARY, "")
|
||||
chunk_context: str = vespa_chunk.get(CHUNK_CONTEXT, "")
|
||||
metadata_suffix: str | None = vespa_chunk.get(METADATA_SUFFIX)
|
||||
document_sets: list[str] | None = (
|
||||
_transform_vespa_document_sets_to_opensearch_document_sets(
|
||||
vespa_chunk.get(DOCUMENT_SETS)
|
||||
)
|
||||
)
|
||||
user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)
|
||||
primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)
|
||||
secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)
|
||||
|
||||
# This should exist; fail loudly if it does not; this function will
|
||||
# raise in that event.
|
||||
is_public, acl_list = _transform_vespa_acl_to_opensearch_acl(
|
||||
vespa_chunk.get(ACCESS_CONTROL_LIST)
|
||||
)
|
||||
|
||||
chunk_tenant_id: str | None = vespa_chunk.get(TENANT_ID)
|
||||
if MULTI_TENANT:
|
||||
if not chunk_tenant_id:
|
||||
raise ValueError(
|
||||
"Missing tenant_id in Vespa chunk in a multi-tenant environment."
|
||||
)
|
||||
if chunk_tenant_id != tenant_state.tenant_id:
|
||||
raise ValueError(
|
||||
f"Chunk tenant_id {chunk_tenant_id} does not match expected tenant_id {tenant_state.tenant_id}"
|
||||
)
|
||||
|
||||
opensearch_chunk = DocumentChunk(
|
||||
document_id=document_id,
|
||||
chunk_index=chunk_index,
|
||||
title=title,
|
||||
title_vector=title_vector,
|
||||
content=content,
|
||||
content_vector=content_vector,
|
||||
source_type=source_type,
|
||||
metadata_list=metadata_list,
|
||||
last_updated=last_updated,
|
||||
public=is_public,
|
||||
access_control_list=acl_list,
|
||||
hidden=hidden,
|
||||
global_boost=global_boost,
|
||||
semantic_identifier=semantic_identifier,
|
||||
image_file_id=image_file_id,
|
||||
source_links=source_links,
|
||||
blurb=blurb,
|
||||
doc_summary=doc_summary,
|
||||
chunk_context=chunk_context,
|
||||
metadata_suffix=metadata_suffix,
|
||||
document_sets=document_sets,
|
||||
user_projects=user_projects,
|
||||
primary_owners=primary_owners,
|
||||
secondary_owners=secondary_owners,
|
||||
tenant_id=tenant_state,
|
||||
)
|
||||
|
||||
result.append(opensearch_chunk)
|
||||
|
||||
return result
|
||||
@@ -59,6 +59,9 @@ from onyx.file_store.document_batch_storage import get_document_batch_storage
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
|
||||
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
|
||||
from onyx.redis.redis_hierarchy import ensure_source_node_exists
|
||||
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
|
||||
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
|
||||
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.features.build.indexing.persistent_document_writer import (
|
||||
@@ -381,6 +384,9 @@ def connector_document_extraction(
|
||||
db_credential = index_attempt.connector_credential_pair.credential
|
||||
processing_mode = index_attempt.connector_credential_pair.processing_mode
|
||||
is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT
|
||||
is_connector_public = (
|
||||
index_attempt.connector_credential_pair.access_type == AccessType.PUBLIC
|
||||
)
|
||||
|
||||
from_beginning = index_attempt.from_beginning
|
||||
has_successful_attempt = (
|
||||
@@ -542,6 +548,13 @@ def connector_document_extraction(
|
||||
total_failures = 0
|
||||
document_count = 0
|
||||
|
||||
# Ensure the SOURCE-type root hierarchy node exists before processing.
|
||||
# This is the root of the hierarchy tree for this source - all other
|
||||
# hierarchy nodes should ultimately have this as an ancestor.
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
ensure_source_node_exists(redis_client, db_session, db_connector.source)
|
||||
|
||||
# Main extraction loop
|
||||
while checkpoint.has_more:
|
||||
logger.info(
|
||||
@@ -595,6 +608,7 @@ def connector_document_extraction(
|
||||
nodes=hierarchy_node_batch,
|
||||
source=db_connector.source,
|
||||
commit=True,
|
||||
is_connector_public=is_connector_public,
|
||||
)
|
||||
|
||||
# Cache in Redis for fast ancestor resolution during doc processing
|
||||
@@ -620,6 +634,26 @@ def connector_document_extraction(
|
||||
|
||||
# Clean documents and create batch
|
||||
doc_batch_cleaned = strip_null_characters(document_batch)
|
||||
|
||||
# Resolve parent_hierarchy_raw_node_id to parent_hierarchy_node_id
|
||||
# using the Redis cache (just populated from hierarchy nodes batch)
|
||||
with get_session_with_current_tenant() as db_session_tmp:
|
||||
source_node_id = get_source_node_id_from_cache(
|
||||
redis_client, db_session_tmp, db_connector.source
|
||||
)
|
||||
for doc in doc_batch_cleaned:
|
||||
if doc.parent_hierarchy_raw_node_id is not None:
|
||||
node_id, found = get_node_id_from_raw_id(
|
||||
redis_client,
|
||||
db_connector.source,
|
||||
doc.parent_hierarchy_raw_node_id,
|
||||
)
|
||||
doc.parent_hierarchy_node_id = (
|
||||
node_id if found else source_node_id
|
||||
)
|
||||
else:
|
||||
doc.parent_hierarchy_node_id = source_node_id
|
||||
|
||||
batch_description = []
|
||||
|
||||
for doc in doc_batch_cleaned:
|
||||
|
||||
@@ -45,6 +45,8 @@ class ChatStateContainer:
|
||||
self.citation_to_doc: CitationMapping = {}
|
||||
# True if this turn is a clarification question (deep research flow)
|
||||
self.is_clarification: bool = False
|
||||
# Pre-answer processing time (time before answer starts) in seconds
|
||||
self.pre_answer_processing_time: float | None = None
|
||||
# Note: LLM cost tracking is now handled in multi_llm.py
|
||||
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
|
||||
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
|
||||
@@ -101,6 +103,16 @@ class ChatStateContainer:
|
||||
with self._lock:
|
||||
return self.is_clarification
|
||||
|
||||
def set_pre_answer_processing_time(self, duration: float | None) -> None:
|
||||
"""Set the pre-answer processing time (time before answer starts)."""
|
||||
with self._lock:
|
||||
self.pre_answer_processing_time = duration
|
||||
|
||||
def get_pre_answer_processing_time(self) -> float | None:
|
||||
"""Thread-safe getter for pre_answer_processing_time."""
|
||||
with self._lock:
|
||||
return self.pre_answer_processing_time
|
||||
|
||||
@staticmethod
|
||||
def create_search_doc_key(
|
||||
search_doc: SearchDoc, use_simple_key: bool = True
|
||||
|
||||
@@ -277,7 +277,7 @@ def extract_headers(
|
||||
|
||||
|
||||
def create_temporary_persona(
|
||||
persona_config: PersonaOverrideConfig, db_session: Session, user: User | None = None
|
||||
persona_config: PersonaOverrideConfig, db_session: Session, user: User
|
||||
) -> Persona:
|
||||
if not is_user_admin(user):
|
||||
raise HTTPException(
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -390,6 +391,9 @@ def run_llm_loop(
|
||||
|
||||
initialize_litellm()
|
||||
|
||||
# Track when the loop starts for calculating time-to-answer
|
||||
loop_start_time = time.monotonic()
|
||||
|
||||
# Initialize citation processor for handling citations dynamically
|
||||
# When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)
|
||||
# When include_citations is False, use REMOVE mode to strip citations from output
|
||||
@@ -551,6 +555,11 @@ def run_llm_loop(
|
||||
# This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
|
||||
# It also pre-processes the tool calls in preparation for running them
|
||||
tool_defs = [tool.tool_definition() for tool in final_tools]
|
||||
|
||||
# Calculate total processing time from loop start until now
|
||||
# This measures how long the user waits before the answer starts streaming
|
||||
pre_answer_processing_time = time.monotonic() - loop_start_time
|
||||
|
||||
llm_step_result, has_reasoned = run_llm_step(
|
||||
emitter=emitter,
|
||||
history=truncated_message_history,
|
||||
@@ -565,6 +574,7 @@ def run_llm_loop(
|
||||
# final set of documents immediately if desired.
|
||||
final_documents=gathered_documents,
|
||||
user_identity=user_identity,
|
||||
pre_answer_processing_time=pre_answer_processing_time,
|
||||
)
|
||||
if has_reasoned:
|
||||
reasoning_cycles += 1
|
||||
|
||||
@@ -622,6 +622,7 @@ def run_llm_step_pkt_generator(
|
||||
# TODO: Temporary handling of nested tool calls with agents, figure out a better way to handle this
|
||||
use_existing_tab_index: bool = False,
|
||||
is_deep_research: bool = False,
|
||||
pre_answer_processing_time: float | None = None,
|
||||
) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:
|
||||
"""Run an LLM step and stream the response as packets.
|
||||
NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and
|
||||
@@ -677,9 +678,8 @@ def run_llm_step_pkt_generator(
|
||||
llm_msg_history = translate_history_to_llm_format(history, llm.config)
|
||||
has_reasoned = 0
|
||||
|
||||
# Uncomment the line below to log the entire message history to the console
|
||||
if LOG_ONYX_MODEL_INTERACTIONS:
|
||||
logger.info(
|
||||
logger.debug(
|
||||
f"Message history:\n{_format_message_history_for_logging(llm_msg_history)}"
|
||||
)
|
||||
|
||||
@@ -822,6 +822,12 @@ def run_llm_step_pkt_generator(
|
||||
reasoning_start = False
|
||||
|
||||
if not answer_start:
|
||||
# Store pre-answer processing time in state container for save_chat
|
||||
if state_container and pre_answer_processing_time is not None:
|
||||
state_container.set_pre_answer_processing_time(
|
||||
pre_answer_processing_time
|
||||
)
|
||||
|
||||
yield Packet(
|
||||
placement=Placement(
|
||||
turn_index=turn_index,
|
||||
@@ -830,6 +836,7 @@ def run_llm_step_pkt_generator(
|
||||
),
|
||||
obj=AgentResponseStart(
|
||||
final_documents=final_documents,
|
||||
pre_answer_processing_seconds=pre_answer_processing_time,
|
||||
),
|
||||
)
|
||||
answer_start = True
|
||||
@@ -1038,6 +1045,7 @@ def run_llm_step(
|
||||
max_tokens: int | None = None,
|
||||
use_existing_tab_index: bool = False,
|
||||
is_deep_research: bool = False,
|
||||
pre_answer_processing_time: float | None = None,
|
||||
) -> tuple[LlmStepResult, bool]:
|
||||
"""Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.
|
||||
|
||||
@@ -1059,6 +1067,7 @@ def run_llm_step(
|
||||
max_tokens=max_tokens,
|
||||
use_existing_tab_index=use_existing_tab_index,
|
||||
is_deep_research=is_deep_research,
|
||||
pre_answer_processing_time=pre_answer_processing_time,
|
||||
)
|
||||
|
||||
while True:
|
||||
|
||||
@@ -4,6 +4,7 @@ An overview can be found in the README.md file in this directory.
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
from collections.abc import Callable
|
||||
from uuid import UUID
|
||||
@@ -82,7 +83,6 @@ from onyx.tools.tool_constructor import construct_tools
|
||||
from onyx.tools.tool_constructor import CustomToolConfig
|
||||
from onyx.tools.tool_constructor import SearchToolConfig
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.long_term_log import LongTermLogger
|
||||
from onyx.utils.telemetry import mt_cloud_telemetry
|
||||
from onyx.utils.timing import log_function_time
|
||||
from onyx.utils.variable_functionality import (
|
||||
@@ -293,7 +293,7 @@ def _get_project_search_availability(
|
||||
|
||||
def handle_stream_message_objects(
|
||||
new_msg_req: SendMessageRequest,
|
||||
user: User | None,
|
||||
user: User,
|
||||
db_session: Session,
|
||||
# if specified, uses the last user message and does not create a new user message based
|
||||
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
|
||||
@@ -312,17 +312,17 @@ def handle_stream_message_objects(
|
||||
external_state_container: ChatStateContainer | None = None,
|
||||
) -> AnswerStream:
|
||||
tenant_id = get_current_tenant_id()
|
||||
processing_start_time = time.monotonic()
|
||||
|
||||
llm: LLM | None = None
|
||||
chat_session: ChatSession | None = None
|
||||
redis_client: Redis | None = None
|
||||
|
||||
user_id = user.id if user is not None else None
|
||||
llm_user_identifier = (
|
||||
user.email
|
||||
if user is not None and getattr(user, "email", None)
|
||||
else (str(user_id) if user_id else "anonymous_user")
|
||||
)
|
||||
user_id = user.id
|
||||
if user.is_anonymous:
|
||||
llm_user_identifier = "anonymous_user"
|
||||
else:
|
||||
llm_user_identifier = user.email or str(user_id)
|
||||
try:
|
||||
if not new_msg_req.chat_session_id:
|
||||
if not new_msg_req.chat_session_info:
|
||||
@@ -349,15 +349,10 @@ def handle_stream_message_objects(
|
||||
user_id=llm_user_identifier, session_id=str(chat_session.id)
|
||||
)
|
||||
|
||||
# permanent "log" store, used primarily for debugging
|
||||
long_term_logger = LongTermLogger(
|
||||
metadata={"user_id": str(user_id), "chat_session_id": str(chat_session.id)}
|
||||
)
|
||||
|
||||
# Milestone tracking, most devs using the API don't need to understand this
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email if user else tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
|
||||
)
|
||||
|
||||
@@ -367,7 +362,7 @@ def handle_stream_message_objects(
|
||||
attribute="event_telemetry",
|
||||
fallback=noop_fallback,
|
||||
)(
|
||||
distinct_id=user.email if user else tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
event="user_message_sent",
|
||||
properties={
|
||||
"origin": new_msg_req.origin.value,
|
||||
@@ -384,7 +379,6 @@ def handle_stream_message_objects(
|
||||
user=user,
|
||||
llm_override=new_msg_req.llm_override or chat_session.llm_override,
|
||||
additional_headers=litellm_additional_headers,
|
||||
long_term_logger=long_term_logger,
|
||||
)
|
||||
token_counter = get_llm_token_counter(llm)
|
||||
|
||||
@@ -602,6 +596,7 @@ def handle_stream_message_objects(
|
||||
chat_session_id=str(chat_session.id),
|
||||
is_connected=check_is_connected,
|
||||
assistant_message=assistant_response,
|
||||
processing_start_time=processing_start_time,
|
||||
)
|
||||
|
||||
# Run the LLM loop with explicit wrapper for stop signal handling
|
||||
@@ -722,6 +717,7 @@ def llm_loop_completion_handle(
|
||||
db_session: Session,
|
||||
chat_session_id: str,
|
||||
assistant_message: ChatMessage,
|
||||
processing_start_time: float | None = None,
|
||||
) -> None:
|
||||
# Determine if stopped by user
|
||||
completed_normally = is_connected()
|
||||
@@ -753,12 +749,13 @@ def llm_loop_completion_handle(
|
||||
assistant_message=assistant_message,
|
||||
is_clarification=state_container.is_clarification,
|
||||
emitted_citations=state_container.get_emitted_citations(),
|
||||
pre_answer_processing_time=state_container.get_pre_answer_processing_time(),
|
||||
)
|
||||
|
||||
|
||||
def stream_chat_message_objects(
|
||||
new_msg_req: CreateChatMessageRequest,
|
||||
user: User | None,
|
||||
user: User,
|
||||
db_session: Session,
|
||||
# if specified, uses the last user message and does not create a new user message based
|
||||
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
|
||||
|
||||
@@ -145,6 +145,7 @@ def save_chat_turn(
|
||||
assistant_message: ChatMessage,
|
||||
is_clarification: bool = False,
|
||||
emitted_citations: set[int] | None = None,
|
||||
pre_answer_processing_time: float | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Save a chat turn by populating the assistant_message and creating related entities.
|
||||
@@ -169,12 +170,17 @@ def save_chat_turn(
|
||||
is_clarification: Whether this assistant message is a clarification question (deep research flow)
|
||||
emitted_citations: Set of citation numbers that were actually emitted during streaming.
|
||||
If provided, only citations in this set will be saved; others are filtered out.
|
||||
pre_answer_processing_time: Duration of processing before answer starts (in seconds)
|
||||
"""
|
||||
# 1. Update ChatMessage with message content, reasoning tokens, and token count
|
||||
assistant_message.message = message_text
|
||||
assistant_message.reasoning_tokens = reasoning_tokens
|
||||
assistant_message.is_clarification = is_clarification
|
||||
|
||||
# Use pre-answer processing time (captured when MESSAGE_START was emitted)
|
||||
if pre_answer_processing_time is not None:
|
||||
assistant_message.processing_duration_seconds = pre_answer_processing_time
|
||||
|
||||
# Calculate token count using default tokenizer, when storing, this should not use the LLM
|
||||
# specific one so we use a system default tokenizer here.
|
||||
default_tokenizer = get_tokenizer(None, None)
|
||||
|
||||
@@ -11,6 +11,9 @@ from onyx.configs.constants import QueryHistoryType
|
||||
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
|
||||
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_SYSTEM_PROMPT
|
||||
from onyx.prompts.image_analysis import DEFAULT_IMAGE_SUMMARIZATION_USER_PROMPT
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
#####
|
||||
# App Configs
|
||||
@@ -71,8 +74,16 @@ WEB_DOMAIN = os.environ.get("WEB_DOMAIN") or "http://localhost:3000"
|
||||
#####
|
||||
# Auth Configs
|
||||
#####
|
||||
AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower())
|
||||
DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED
|
||||
# Upgrades users from disabled auth to basic auth and shows warning.
|
||||
_auth_type_str = (os.environ.get("AUTH_TYPE") or "").lower()
|
||||
if not _auth_type_str or _auth_type_str in ("disabled", "none"):
|
||||
logger.warning(
|
||||
"AUTH_TYPE='disabled' is no longer supported. "
|
||||
"Defaulting to 'basic'. Please update your configuration. "
|
||||
"Your existing data will be migrated automatically."
|
||||
)
|
||||
_auth_type_str = AuthType.BASIC.value
|
||||
AUTH_TYPE = AuthType(_auth_type_str)
|
||||
|
||||
PASSWORD_MIN_LENGTH = int(os.getenv("PASSWORD_MIN_LENGTH", 8))
|
||||
PASSWORD_MAX_LENGTH = int(os.getenv("PASSWORD_MAX_LENGTH", 64))
|
||||
@@ -145,6 +156,10 @@ OAUTH_CLIENT_SECRET = (
|
||||
os.environ.get("OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET"))
|
||||
or ""
|
||||
)
|
||||
|
||||
# Whether Google OAuth is enabled (requires both client ID and secret)
|
||||
OAUTH_ENABLED = bool(OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET)
|
||||
|
||||
# OpenID Connect configuration URL for OIDC integrations
|
||||
OPENID_CONFIG_URL = os.environ.get("OPENID_CONFIG_URL") or ""
|
||||
|
||||
@@ -203,6 +218,7 @@ TRACK_EXTERNAL_IDP_EXPIRY = (
|
||||
#####
|
||||
DOCUMENT_INDEX_NAME = "danswer_index"
|
||||
|
||||
# OpenSearch Configs
|
||||
OPENSEARCH_HOST = os.environ.get("OPENSEARCH_HOST") or "localhost"
|
||||
OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 9200)
|
||||
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
|
||||
@@ -1041,14 +1057,3 @@ STRIPE_PUBLISHABLE_KEY_URL = (
|
||||
)
|
||||
# Override for local testing with Stripe test keys (pk_test_*)
|
||||
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
|
||||
# Persistent Document Storage Configuration
|
||||
# When enabled, indexed documents are written to local filesystem with hierarchical structure
|
||||
PERSISTENT_DOCUMENT_STORAGE_ENABLED = (
|
||||
os.environ.get("PERSISTENT_DOCUMENT_STORAGE_ENABLED", "").lower() == "true"
|
||||
)
|
||||
|
||||
# Base directory path for persistent document storage (local filesystem)
|
||||
# Example: /var/onyx/indexed-docs or /app/indexed-docs
|
||||
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
|
||||
"PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/indexed-docs"
|
||||
)
|
||||
|
||||
@@ -3,10 +3,6 @@ import os
|
||||
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
|
||||
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
|
||||
NUM_RETURNED_HITS = 50
|
||||
# Used for LLM filtering and reranking
|
||||
# We want this to be approximately the number of results we want to show on the first page
|
||||
# It cannot be too large due to cost and latency implications
|
||||
NUM_POSTPROCESSED_RESULTS = 20
|
||||
|
||||
# May be less depending on model
|
||||
MAX_CHUNKS_FED_TO_CHAT = int(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 25)
|
||||
@@ -56,3 +52,7 @@ USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
|
||||
os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
|
||||
== "true"
|
||||
)
|
||||
|
||||
SKIP_DEEP_RESEARCH_CLARIFICATION = (
|
||||
os.environ.get("SKIP_DEEP_RESEARCH_CLARIFICATION", "false").lower() == "true"
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user