Compare commits

..

106 Commits

Author SHA1 Message Date
Raunak Bhagat
68e19383b9 Edit props passed to ActionToggle 2025-09-28 12:04:53 -07:00
Raunak Bhagat
f01f3d6e77 Add back assistants selector 2025-09-28 12:00:25 -07:00
Raunak Bhagat
42b57bf804 Add back FilePicker and DeepResearchToggle 2025-09-28 09:50:21 -07:00
Raunak Bhagat
d4b696d17d Clean up some more code 2025-09-27 23:34:13 -07:00
Raunak Bhagat
84b774f376 Edit ChatInputBar and fix build errors 2025-09-27 23:27:47 -07:00
Raunak Bhagat
921f90d013 Fix all build errors 2025-09-27 21:50:53 -07:00
Raunak Bhagat
f6c09572eb Finish more integrations of Projects into Refresh 2025-09-27 21:34:11 -07:00
Raunak Bhagat
b2f90db0b8 Merge in other files from main 2025-09-26 17:10:55 -07:00
Raunak Bhagat
ee7b33b382 Merge main in 2025-09-26 17:09:50 -07:00
Raunak Bhagat
db37c35030 Update imports 2025-09-26 12:11:08 -07:00
Raunak Bhagat
4f105d002c Finish refreshing HumanMessage 2025-09-26 11:01:42 -07:00
Raunak Bhagat
7e9095b976 Whole bunch of refactors 2025-09-25 22:58:33 -07:00
Raunak Bhagat
bce6741ee6 Fix build errors 2025-09-25 22:27:30 -07:00
Raunak Bhagat
b5a188ee5e Add new utility components 2025-09-25 22:22:34 -07:00
Raunak Bhagat
8578d89a24 Update MessageSwitcher 2025-09-25 22:20:27 -07:00
Raunak Bhagat
e42840775a Add tooltip to IconButton 2025-09-25 22:18:51 -07:00
Raunak Bhagat
c907971d5b Saving changes 2025-09-25 22:17:56 -07:00
Raunak Bhagat
44a565bbfc Add new icons 2025-09-25 22:13:18 -07:00
Raunak Bhagat
ea4219806d Edit main page 2025-09-25 15:08:49 -07:00
Raunak Bhagat
ccc76413c6 Fix build errors 2025-09-25 13:01:41 -07:00
Raunak Bhagat
12a2786ff6 Fix build errors 2025-09-25 12:53:12 -07:00
Raunak Bhagat
882c294e74 saving changes 2025-09-25 10:57:55 -07:00
Raunak Bhagat
0e2b6cf193 Remove more unused files 2025-09-24 19:54:52 -07:00
Raunak Bhagat
c7ae8bd783 Remove unused files 2025-09-24 19:51:19 -07:00
Raunak Bhagat
3b10dd4b22 Edit LLMPopover + dependent components 2025-09-24 19:33:00 -07:00
Raunak Bhagat
6ea886fc85 Add new SelectButton 2025-09-24 17:59:40 -07:00
Raunak Bhagat
f8c89fc750 Fix up ChatInputBar + lots of cleanup 2025-09-24 17:05:02 -07:00
Raunak Bhagat
cace80ffaa Fix sidebar folding 2025-09-24 13:57:18 -07:00
Raunak Bhagat
e628033885 Reattach handler 2025-09-24 13:37:11 -07:00
Raunak Bhagat
22bb4b6d98 Add emphasis 2025-09-24 13:33:02 -07:00
Raunak Bhagat
9afffc2de4 Implement proper grouping hierarchies for buttons 2025-09-24 13:28:26 -07:00
Raunak Bhagat
2c1193f975 Fix button variants + subvariants 2025-09-24 13:00:00 -07:00
Raunak Bhagat
b192542c85 Fix bug in which folded state would stil render text 2025-09-24 12:18:38 -07:00
Raunak Bhagat
d8821b8ccc Prevent click propagation 2025-09-24 12:14:58 -07:00
Raunak Bhagat
a007369bd5 Edit how renaming UI is rendered (using input instead of textarea) 2025-09-24 12:03:19 -07:00
Raunak Bhagat
2f65629f51 More edits to AppSidebar 2025-09-24 11:48:04 -07:00
Raunak Bhagat
7701ae2112 Re-implement buttons + clean up look 2025-09-24 11:15:38 -07:00
Raunak Bhagat
01a3a256e9 Update README 2025-09-23 15:57:28 -07:00
Raunak Bhagat
0d55febaa7 Remove strokeOpacity from icons 2025-09-23 15:56:16 -07:00
Raunak Bhagat
bdafbfe0e8 Edit AdminSidebar width 2025-09-22 11:22:25 -07:00
Raunak Bhagat
278fd0e153 Fix error in which message would not be updated after edit 2025-09-22 11:04:15 -07:00
Raunak Bhagat
a4bb97bc22 Fix editing modal 2025-09-22 10:53:55 -07:00
Raunak Bhagat
8063d9a75e Edit KG configuration page 2025-09-22 05:04:42 -07:00
Raunak Bhagat
1ffaba12f0 Fix height of search bar 2025-09-22 04:45:41 -07:00
Raunak Bhagat
26f8660663 Fix search bar 2025-09-22 04:40:12 -07:00
Raunak Bhagat
d6504ed578 Update search-settings page 2025-09-22 03:59:35 -07:00
Raunak Bhagat
7fcc2c9d35 Clean up more admin stuff 2025-09-22 03:21:48 -07:00
Raunak Bhagat
46e8f925fe Clean up AdminSidebar 2025-09-21 22:33:26 -07:00
Raunak Bhagat
5ec1f61839 Add user settings 2025-09-19 20:07:11 -07:00
Raunak Bhagat
df950963a7 Edit SvgMoreHorizontal SVG size 2025-09-19 19:47:06 -07:00
Raunak Bhagat
93208a66ac Edit settings popup state transitions 2025-09-19 19:30:39 -07:00
Raunak Bhagat
a4819e07e7 Small bug fixes 2025-09-19 19:19:37 -07:00
Raunak Bhagat
f642ace40c Implement logout 2025-09-19 19:16:24 -07:00
Raunak Bhagat
9b430ae2d5 Implement notifications 2025-09-19 19:03:48 -07:00
Raunak Bhagat
05f3f878b2 Edit edit/delete modals 2025-09-19 17:54:24 -07:00
Raunak Bhagat
df17c5352e Edit active colours 2025-09-19 16:46:08 -07:00
Raunak Bhagat
bcfb0f3cf3 Remove commented out state 2025-09-19 16:04:13 -07:00
Raunak Bhagat
38468c1dc4 Fix AgentsModal 2025-09-19 15:55:40 -07:00
Raunak Bhagat
8550a9c5e3 Cleanup sidebar a bit more 2025-09-19 14:33:36 -07:00
Raunak Bhagat
fe0c60e50d Fix UX around naming chats 2025-09-19 14:03:02 -07:00
Raunak Bhagat
4ecc151a02 Fix up chat-renaming 2025-09-19 13:49:08 -07:00
Raunak Bhagat
d08becead5 Saving changes 2025-09-19 13:34:54 -07:00
Raunak Bhagat
a429f852d5 Reduce height of buttons 2025-09-19 08:05:50 -07:00
Raunak Bhagat
a856f27fae Saving changes 2025-09-18 20:19:25 -07:00
Raunak Bhagat
d0d8027928 Edit popups in sidebar buttons 2025-09-18 19:53:33 -07:00
Raunak Bhagat
bd1671f1a1 Edit popovers and add new icons 2025-09-18 19:05:16 -07:00
Raunak Bhagat
e236c67678 Fix build errors 2025-09-18 17:28:19 -07:00
Raunak Bhagat
683956697a More UI fixes and tweaks 2025-09-18 16:57:47 -07:00
Raunak Bhagat
fb1e303ffc Fix ordering bug 2025-09-18 16:09:25 -07:00
Raunak Bhagat
729d4fafd1 Remove client directive 2025-09-18 15:52:16 -07:00
Raunak Bhagat
40c60282d0 Update agents modal and general structure of app 2025-09-18 15:19:35 -07:00
Raunak Bhagat
2141fd2c6e More edits to styling + colours 2025-09-18 12:45:50 -07:00
Raunak Bhagat
9aeba96043 Update state management 2025-09-16 19:34:48 -07:00
Raunak Bhagat
b431de5141 Update hover state for buttons 2025-09-16 19:06:05 -07:00
Raunak Bhagat
d1a6340cfc Add new chat handler 2025-09-16 17:45:21 -07:00
Raunak Bhagat
ccf382ef4f Edit spacing 2025-09-16 17:41:56 -07:00
Raunak Bhagat
c31997b9b2 Save folded state to localStorage 2025-09-16 17:40:11 -07:00
Raunak Bhagat
ab31795a46 Recenter icon when title is hidden 2025-09-16 17:35:40 -07:00
Raunak Bhagat
b3beca63dc Make headers sticky 2025-09-16 17:33:21 -07:00
Raunak Bhagat
cc6d54c1e6 Add loading state for Truncated component + fix spacings 2025-09-16 17:28:54 -07:00
Raunak Bhagat
ee12c0c5de Fix scrolling issue 2025-09-16 17:00:40 -07:00
Raunak Bhagat
d48912a05d Fix errors 2025-09-16 15:50:48 -07:00
Raunak Bhagat
c079072676 Remove unnecessary file + make HistorySidebar be smart 2025-09-16 15:48:15 -07:00
Raunak Bhagat
952f6bfb37 Delete unused files 2025-09-16 15:42:36 -07:00
Raunak Bhagat
0714e4bb4e Fix dnd 2025-09-16 15:39:47 -07:00
Raunak Bhagat
ae577f0f44 Add AgentsModal 2025-09-16 15:35:59 -07:00
Raunak Bhagat
0705d584d8 Update user hover-card 2025-09-16 14:52:54 -07:00
Raunak Bhagat
36e391e557 Add folded sidebar (+ shortcuts) 2025-09-16 13:50:49 -07:00
Raunak Bhagat
1efce594b5 Clean up truncation + buttons 2025-09-16 13:02:16 -07:00
Raunak Bhagat
67ac53f17d Add more styling for HistorySidebar + add README for working w/ icons 2025-09-16 11:21:13 -07:00
Raunak Bhagat
d5a222925a Add icons (as raw TSX) 2025-09-16 09:51:25 -07:00
Raunak Bhagat
d5ef928782 Add icons 2025-09-16 09:12:12 -07:00
Raunak Bhagat
6963d78f8e Fix more build errors? 2025-09-15 17:39:24 -07:00
Raunak Bhagat
d3ef2b8c17 Fix build errors 2025-09-15 17:28:34 -07:00
Raunak Bhagat
70f4162ea8 Update name 2025-09-15 17:17:09 -07:00
Raunak Bhagat
883f52d332 Update component names 2025-09-15 17:09:13 -07:00
Raunak Bhagat
f8fd83c883 Clean up sidebar 2025-09-15 15:56:45 -07:00
Raunak Bhagat
d2bf0c0c5f Update token-context bar 2025-09-15 11:43:54 -07:00
Raunak Bhagat
5d598c2d22 Add more colour fixes to Modal 2025-09-15 11:13:45 -07:00
Raunak Bhagat
9dc0e97302 Merge branch 'main' into colours 2025-09-15 09:45:32 -07:00
Raunak Bhagat
048b2a6b39 Edit LLMPopover and add border-radii 2025-09-15 09:43:06 -07:00
Raunak Bhagat
7dd3cecf67 Edit UserDropdown colours 2025-09-15 09:15:56 -07:00
Raunak Bhagat
82abe28986 Update more colours 2025-09-14 20:37:33 -07:00
Raunak Bhagat
a0575e6a00 Update colours for sidebar 2025-09-14 20:24:25 -07:00
Raunak Bhagat
0c5bf5b3ed Add all colours from Figma 2025-09-11 13:34:54 -07:00
Raunak Bhagat
492117d910 Edit .gitignore 2025-09-11 12:32:53 -07:00
471 changed files with 26985 additions and 15823 deletions

View File

@@ -8,9 +8,9 @@ on:
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# tag nightly builds with "edge"
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
jobs:
build-and-push:
@@ -33,16 +33,7 @@ jobs:
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Check if stable release version
id: check_version
run: |
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
echo "is_stable=true" >> $GITHUB_OUTPUT
else
echo "is_stable=false" >> $GITHUB_OUTPUT
fi
- name: Checkout code
uses: actions/checkout@v4
@@ -55,8 +46,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -129,8 +119,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3

View File

@@ -11,8 +11,8 @@ env:
BUILDKIT_PROGRESS: plain
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# tag nightly builds with "edge"
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
jobs:
@@ -145,15 +145,6 @@ jobs:
if: needs.check_model_server_changes.outputs.changed == 'true'
runs-on: ubuntu-latest
steps:
- name: Check if stable release version
id: check_version
run: |
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
echo "is_stable=true" >> $GITHUB_OUTPUT
else
echo "is_stable=false" >> $GITHUB_OUTPUT
fi
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
@@ -166,16 +157,11 @@ jobs:
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
fi
if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
fi
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@v3

View File

@@ -7,10 +7,7 @@ on:
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
# tag nightly builds with "edge"
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DEPLOYMENT: standalone
jobs:
@@ -48,15 +45,6 @@ jobs:
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Check if stable release version
id: check_version
run: |
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "is_stable=true" >> $GITHUB_OUTPUT
else
echo "is_stable=false" >> $GITHUB_OUTPUT
fi
- name: Checkout
uses: actions/checkout@v4
@@ -69,8 +57,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -139,8 +126,7 @@ jobs:
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3

View File

@@ -25,11 +25,9 @@ jobs:
- name: Add required Helm repositories
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add keda https://kedacore.github.io/charts
helm repo update
- name: Build chart dependencies

View File

@@ -0,0 +1,124 @@
name: Backport on Merge
# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
on:
pull_request:
types: [closed] # Later we check for merge so only PRs that go in can get backported
permissions:
contents: write
actions: write
jobs:
backport:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
fetch-depth: 0
- name: Set up Git user
run: |
git config user.name "Richard Kuo [bot]"
git config user.email "rkuo[bot]@onyx.app"
git fetch --prune
- name: Check for Backport Checkbox
id: checkbox-check
run: |
PR_BODY="${{ github.event.pull_request.body }}"
if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
echo "backport=true" >> $GITHUB_OUTPUT
else
echo "backport=false" >> $GITHUB_OUTPUT
fi
- name: List and sort release branches
id: list-branches
run: |
git fetch --all --tags
BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
BETA=$(echo "$BRANCHES" | head -n 1)
STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
echo "beta=release/$BETA" >> $GITHUB_OUTPUT
echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
# Fetch latest tags for beta and stable
LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
# Handle case where no beta tags exist
if [[ -z "$LATEST_BETA_TAG" ]]; then
NEW_BETA_TAG="v1.0.0-beta.1"
else
NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
fi
# Increment latest stable tag
NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
- name: Echo branch and tag information
run: |
echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
- name: Trigger Backport
if: steps.checkbox-check.outputs.backport == 'true'
run: |
set -e
echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
# Echo the merge commit SHA
echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
# Fetch all history for all branches and tags
git fetch --prune
# Reset and prepare the beta branch
git checkout ${{ steps.list-branches.outputs.beta }}
echo "Last 5 commits on beta branch:"
git log -n 5 --pretty=format:"%H"
echo "" # Newline for formatting
# Cherry-pick the merge commit from the merged PR
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
echo "Cherry-pick to beta failed due to conflicts."
exit 1
}
# Create new beta branch/tag
git tag ${{ steps.list-branches.outputs.new_beta_tag }}
# Push the changes and tag to the beta branch using PAT
git push origin ${{ steps.list-branches.outputs.beta }}
git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
# Reset and prepare the stable branch
git checkout ${{ steps.list-branches.outputs.stable }}
echo "Last 5 commits on stable branch:"
git log -n 5 --pretty=format:"%H"
echo "" # Newline for formatting
# Cherry-pick the merge commit from the merged PR
git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
echo "Cherry-pick to stable failed due to conflicts."
exit 1
}
# Create new stable branch/tag
git tag ${{ steps.list-branches.outputs.new_stable_tag }}
# Push the changes and tag to the stable branch using PAT
git push origin ${{ steps.list-branches.outputs.stable }}
git push origin ${{ steps.list-branches.outputs.new_stable_tag }}

View File

@@ -20,7 +20,6 @@ env:
CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# LLMs
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

View File

@@ -65,45 +65,35 @@ jobs:
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo update
- name: Install Redis operator
if: steps.list-changed.outputs.changed == 'true'
shell: bash
run: |
echo "=== Installing redis-operator CRDs ==="
helm upgrade --install redis-operator ot-container-kit/redis-operator \
--namespace redis-operator --create-namespace --wait --timeout 300s
- name: Pre-pull required images
- name: Pre-pull critical images
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-pulling required images to avoid timeout ==="
echo "=== Pre-pulling critical images to avoid timeout ==="
# Get kind cluster name
KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
echo "Kind cluster: $KIND_CLUSTER"
IMAGES=(
"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
"quay.io/opstree/redis:v7.0.15"
"docker.io/onyxdotapp/onyx-web-server:latest"
)
for image in "${IMAGES[@]}"; do
echo "Pre-pulling $image"
if docker pull "$image"; then
kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
else
echo "Failed to pull $image"
fi
done
# Pre-pull images that are likely to be used
echo "Pre-pulling PostgreSQL image..."
docker pull postgres:15-alpine || echo "Failed to pull postgres:15-alpine"
kind load docker-image postgres:15-alpine --name $KIND_CLUSTER || echo "Failed to load postgres image"
echo "Pre-pulling Redis image..."
docker pull redis:7-alpine || echo "Failed to pull redis:7-alpine"
kind load docker-image redis:7-alpine --name $KIND_CLUSTER || echo "Failed to load redis image"
echo "Pre-pulling Onyx images..."
docker pull docker.io/onyxdotapp/onyx-web-server:latest || echo "Failed to pull onyx web server"
docker pull docker.io/onyxdotapp/onyx-backend:latest || echo "Failed to pull onyx backend"
kind load docker-image docker.io/onyxdotapp/onyx-web-server:latest --name $KIND_CLUSTER || echo "Failed to load onyx web server"
kind load docker-image docker.io/onyxdotapp/onyx-backend:latest --name $KIND_CLUSTER || echo "Failed to load onyx backend"
echo "=== Images loaded into Kind cluster ==="
docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
docker exec $KIND_CLUSTER-control-plane crictl images | grep -E "(postgres|redis|onyx)" || echo "Some images may still be loading..."
- name: Validate chart dependencies
if: steps.list-changed.outputs.changed == 'true'
@@ -159,7 +149,6 @@ jobs:
# Run the actual installation with detailed logging
echo "=== Starting ct install ==="
set +e
ct install --all \
--helm-extra-set-args="\
--set=nginx.enabled=false \
@@ -167,10 +156,8 @@ jobs:
--set=vespa.enabled=false \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=postgresql.primary.persistence.enabled=false \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
--set=webserver.replicaCount=1 \
--set=api.replicaCount=0 \
--set=inferenceCapability.replicaCount=0 \
@@ -186,16 +173,8 @@ jobs:
--set=celery_worker_user_files_indexing.replicaCount=0" \
--helm-extra-args="--timeout 900s --debug" \
--debug --config ct.yaml
CT_EXIT=$?
set -e
if [[ $CT_EXIT -ne 0 ]]; then
echo "ct install failed with exit code $CT_EXIT"
exit $CT_EXIT
else
echo "=== Installation completed successfully ==="
fi
echo "=== Installation completed successfully ==="
kubectl get pods --all-namespaces
- name: Post-install verification
@@ -220,7 +199,7 @@ jobs:
echo "=== Recent logs for debugging ==="
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
echo "=== Helm releases ==="
helm list --all-namespaces
# the following would install only changed charts, but we only have one chart so

View File

@@ -22,11 +22,9 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -133,7 +131,6 @@ jobs:
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
push: true
outputs: type=registry
no-cache: true
build-model-server-image:
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -161,7 +158,6 @@ jobs:
push: true
outputs: type=registry
provenance: false
no-cache: true
build-integration-image:
needs: prepare-build
@@ -195,7 +191,6 @@ jobs:
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
push: true
outputs: type=registry
no-cache: true
integration-tests:
needs:
@@ -342,11 +337,9 @@ jobs:
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \

View File

@@ -19,11 +19,9 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -130,7 +128,6 @@ jobs:
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
push: true
outputs: type=registry
no-cache: true
build-model-server-image:
runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -158,7 +155,6 @@ jobs:
push: true
outputs: type=registry
provenance: false
no-cache: true
build-integration-image:
needs: prepare-build
@@ -192,7 +188,6 @@ jobs:
tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
push: true
outputs: type=registry
no-cache: true
integration-tests-mit:
needs:
@@ -339,11 +334,9 @@ jobs:
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
-e JIRA_BASE_URL=${JIRA_BASE_URL} \
-e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
-e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \

View File

@@ -56,8 +56,6 @@ jobs:
provenance: false
sbom: false
push: true
outputs: type=registry
# no-cache: true
build-backend-image:
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -89,8 +87,6 @@ jobs:
provenance: false
sbom: false
push: true
outputs: type=registry
# no-cache: true
build-model-server-image:
runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -122,8 +118,6 @@ jobs:
provenance: false
sbom: false
push: true
outputs: type=registry
# no-cache: true
playwright-tests:
needs: [build-web-image, build-backend-image, build-model-server-image]
@@ -185,21 +179,16 @@ jobs:
working-directory: ./web
run: npx playwright install --with-deps
- name: Create .env file for Docker Compose
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
EXA_API_KEY=${{ env.EXA_API_KEY }}
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
IMAGE_TAG=test
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }} \
EXA_API_KEY=${{ env.EXA_API_KEY }} \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
id: start_docker
@@ -239,16 +228,14 @@ jobs:
- name: Run Playwright tests
working-directory: ./web
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
npx playwright test
run: npx playwright test
- uses: actions/upload-artifact@v4
if: always()
with:
# Includes test results and debug screenshots
name: playwright-test-results-${{ github.run_id }}
# Chromatic automatically defaults to the test-results directory.
# Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
name: test-results
path: ./web/test-results
retention-days: 30

View File

@@ -20,13 +20,11 @@ env:
CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
# Gong
GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}

View File

@@ -13,50 +13,6 @@
"presentation": {
"group": "1"
}
},
{
"name": "Run All Onyx Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
}
},
{
"name": "Web / Model / API",
"configurations": ["Web Server", "Model Server", "API Server"],
"presentation": {
"group": "1"
}
},
{
"name": "Celery (all)",
"configurations": [
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
},
"stopAll": true
}
],
"configurations": [
@@ -258,6 +214,250 @@
"consoleTitle": "Celery docfetching Console",
"justMyCode": false
},
{
"name": "Run All Onyx Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
}
},
{
"name": "Web / Model / API",
"configurations": ["Web Server", "Model Server", "API Server"],
"presentation": {
"group": "1"
}
},
{
"name": "Celery (all)",
"configurations": [
"Celery primary",
"Celery light",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat",
"Celery monitoring",
"Celery user file processing"
],
"presentation": {
"group": "1"
},
"stopAll": true
}
],
"configurations": [
{
// Dummy entry used to label the group
"name": "--- Individual ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "2",
"order": 0
}
},
{
"name": "Web Server",
"type": "node",
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.vscode/.env",
"runtimeArgs": ["run", "dev"],
"presentation": {
"group": "2"
},
"console": "integratedTerminal",
"consoleTitle": "Web Server Console"
},
{
"name": "Model Server",
"consoleName": "Model Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["model_server.main:app", "--reload", "--port", "9000"],
"presentation": {
"group": "2"
},
"consoleTitle": "Model Server Console"
},
{
"name": "API Server",
"consoleName": "API Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": ["onyx.main:app", "--reload", "--port", "8080"],
"presentation": {
"group": "2"
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/slack/listener.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Celery primary",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.primary",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=primary@%n",
"-Q",
"celery"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery primary Console"
},
{
"name": "Celery light",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.light",
"worker",
"--pool=threads",
"--concurrency=64",
"--prefetch-multiplier=8",
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.heavy",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=heavy@%n",
"-Q",
"connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery heavy Console"
},
{
"name": "Celery docfetching",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docfetching@%n",
"-Q",
"connector_doc_fetching,user_files_indexing"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery docfetching Console",
"justMyCode": false
},
{
"name": "Celery docprocessing",
"type": "debugpy",
@@ -286,83 +486,8 @@
"presentation": {
"group": "2"
},
"consoleTitle": "Celery docprocessing Console"
},
{
"name": "Celery beat",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery beat Console"
},
{
"name": "Celery monitoring",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {},
"args": [
"-A",
"onyx.background.celery.versioned_apps.monitoring",
"worker",
"--pool=solo",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=monitoring@%n",
"-Q",
"monitoring"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery monitoring Console"
},
{
"name": "Celery user file processing",
"type": "debugpy",
"request": "launch",
"module": "celery",
"args": [
"-A",
"onyx.background.celery.versioned_apps.user_file_processing",
"worker",
"--loglevel=INFO",
"--hostname=user_file_processing@%n",
"--pool=threads",
"-Q",
"user_file_processing,user_file_project_sync"
],
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Celery user file processing Console"
},
{
"name": "Pytest",
"consoleName": "Pytest",
@@ -378,8 +503,8 @@
},
"args": [
"-v"
// Specify a specific module/test to run or provide nothing to run all tests
// "tests/unit/onyx/llm/answering/test_prune_and_merge.py"
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
],
"presentation": {
"group": "2"
@@ -463,6 +588,144 @@
"group": "3"
}
},
{
"name": "Celery monitoring",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {},
"args": [
"-A",
"onyx.background.celery.versioned_apps.monitoring",
"worker",
"--pool=solo",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=monitoring@%n",
"-Q",
"monitoring"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery monitoring Console"
},
{
"name": "Celery beat",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Celery beat Console"
},
{
"name": "Celery user file processing",
"type": "debugpy",
"request": "launch",
"module": "celery",
"args": [
"-A",
"onyx.background.celery.versioned_apps.user_file_processing",
"worker",
"--loglevel=INFO",
"--hostname=user_file_processing@%n",
"--pool=threads",
"-Q",
"user_file_processing,user_file_project_sync"
],
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Celery user file processing Console"
},
{
"name": "Pytest",
"consoleName": "Pytest",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-v"
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
],
"presentation": {
"group": "2"
},
"consoleTitle": "Pytest Console"
},
{
// Dummy entry used to label the group
"name": "--- Tasks ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "3",
"order": 0
}
},
{
"name": "Clear and Restart External Volumes and Containers",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_containers.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
},
{
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -105,11 +105,6 @@ pip install -r backend/requirements/ee.txt
pip install -r backend/requirements/model_server.txt
```
Fix vscode/cursor auto-imports:
```bash
pip install -e .
```
Install Playwright for Python (headless browser required by the Web Connector)
In the activated Python virtualenv, install Playwright for Python by running:

View File

@@ -128,7 +128,7 @@ def upgrade() -> None:
AND a.attname = 'cc_pair_id'
)
) LOOP
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT %I', r.conname);
END LOOP;
END$$;
"""

View File

@@ -167,10 +167,7 @@ def upgrade() -> None:
)
# Delete related records
# Clean child tables first to satisfy foreign key constraints,
# then the parent tables
tables_to_clean = [
("index_attempt_errors", "connector_credential_pair_id"),
("index_attempt", "connector_credential_pair_id"),
("background_error", "cc_pair_id"),
("document_set__connector_credential_pair", "connector_credential_pair_id"),
@@ -245,7 +242,7 @@ def upgrade() -> None:
AND t.relname = 'user_file'
AND ft.relname = 'connector_credential_pair'
) LOOP
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT %I', r.conname);
END LOOP;
END$$;
"""

View File

@@ -1,37 +0,0 @@
"""Add image input support to model config
Revision ID: 64bd5677aeb6
Revises: b30353be4eec
Create Date: 2025-09-28 15:48:12.003612
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "64bd5677aeb6"
down_revision = "b30353be4eec"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"model_configuration",
sa.Column("supports_image_input", sa.Boolean(), nullable=True),
)
# Seems to be left over from when model visibility was introduced and a nullable field.
# Set any null is_visible values to False
connection = op.get_bind()
connection.execute(
sa.text(
"UPDATE model_configuration SET is_visible = false WHERE is_visible IS NULL"
)
)
def downgrade() -> None:
op.drop_column("model_configuration", "supports_image_input")

View File

@@ -1,123 +0,0 @@
"""add_mcp_auth_performer
Revision ID: b30353be4eec
Revises: 2b75d0a8ffcb
Create Date: 2025-09-13 14:58:08.413534
"""
from alembic import op
import sqlalchemy as sa
from onyx.db.enums import MCPAuthenticationPerformer, MCPTransport
# revision identifiers, used by Alembic.
revision = "b30353be4eec"
down_revision = "2b75d0a8ffcb"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""moving to a better way of handling auth performer and transport"""
# Add nullable column first for backward compatibility
op.add_column(
"mcp_server",
sa.Column(
"auth_performer",
sa.Enum(MCPAuthenticationPerformer, native_enum=False),
nullable=True,
),
)
op.add_column(
"mcp_server",
sa.Column(
"transport",
sa.Enum(MCPTransport, native_enum=False),
nullable=True,
),
)
# # Backfill values using existing data and inference rules
bind = op.get_bind()
# 1) OAUTH servers are always PER_USER
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET auth_performer = 'PER_USER'
WHERE auth_type = 'OAUTH'
"""
)
)
# 2) If there is no admin connection config, mark as ADMIN (and not set yet)
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET auth_performer = 'ADMIN'
WHERE admin_connection_config_id IS NULL
AND auth_performer IS NULL
"""
)
)
# 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER
bind.execute(
sa.text(
"""
UPDATE mcp_server AS ms
SET auth_performer = 'PER_USER'
FROM mcp_connection_config AS mcc
WHERE mcc.mcp_server_id = ms.id
AND COALESCE(mcc.user_email, '') <> ''
AND ms.auth_performer IS NULL
"""
)
)
# 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET auth_performer = 'ADMIN'
WHERE auth_performer IS NULL
"""
)
)
# Finally, make the column non-nullable
op.alter_column(
"mcp_server",
"auth_performer",
existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),
nullable=False,
)
# Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET transport = 'STREAMABLE_HTTP'
WHERE transport IS NULL
"""
)
)
op.alter_column(
"mcp_server",
"transport",
existing_type=sa.Enum(MCPTransport, native_enum=False),
nullable=False,
)
def downgrade() -> None:
"""remove cols"""
op.drop_column("mcp_server", "transport")
op.drop_column("mcp_server", "auth_performer")

View File

@@ -124,9 +124,9 @@ def get_space_permission(
and not space_permissions.external_user_group_ids
):
logger.warning(
f"No permissions found for space '{space_key}'. This is very unlikely "
"to be correct and is more likely caused by an access token with "
"insufficient permissions. Make sure that the access token has Admin "
f"No permissions found for space '{space_key}'. This is very unlikely"
"to be correct and is more likely caused by an access token with"
"insufficient permissions. Make sure that the access token has Admin"
f"permissions for space '{space_key}'"
)

View File

@@ -26,7 +26,7 @@ def _get_slim_doc_generator(
else 0.0
)
return gmail_connector.retrieve_all_slim_docs_perm_sync(
return gmail_connector.retrieve_all_slim_documents(
start=start_time,
end=current_time.timestamp(),
callback=callback,

View File

@@ -34,7 +34,7 @@ def _get_slim_doc_generator(
else 0.0
)
return google_drive_connector.retrieve_all_slim_docs_perm_sync(
return google_drive_connector.retrieve_all_slim_documents(
start=start_time,
end=current_time.timestamp(),
callback=callback,

View File

@@ -59,7 +59,7 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
for raw_perm in permissions:
if not hasattr(raw_perm, "raw"):
logger.warning(f"Expected a 'raw' field, but none was found: {raw_perm=}")
logger.warn(f"Expected a 'raw' field, but none was found: {raw_perm=}")
continue
permission = Permission(**raw_perm.raw)
@@ -71,14 +71,14 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
# In order to associate this permission to some Atlassian entity, we need the "Holder".
# If this doesn't exist, then we cannot associate this permission to anyone; just skip.
if not permission.holder:
logger.warning(
logger.warn(
f"Expected to find a permission holder, but none was found: {permission=}"
)
continue
type = permission.holder.get("type")
if not type:
logger.warning(
logger.warn(
f"Expected to find the type of permission holder, but none was found: {permission=}"
)
continue

View File

@@ -105,9 +105,7 @@ def _get_slack_document_access(
channel_permissions: dict[str, ExternalAccess],
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
callback=callback
)
slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)
for doc_metadata_batch in slim_doc_generator:
for doc_metadata in doc_metadata_batch:

View File

@@ -4,7 +4,7 @@ from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFun
from onyx.access.models import DocExternalAccess
from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
@@ -17,7 +17,7 @@ def generic_doc_sync(
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
doc_source: DocumentSource,
slim_connector: SlimConnectorWithPermSync,
slim_connector: SlimConnector,
label: str,
) -> Generator[DocExternalAccess, None, None]:
"""
@@ -40,7 +40,7 @@ def generic_doc_sync(
newly_fetched_doc_ids: set[str] = set()
logger.info(f"Fetching all slim documents from {doc_source}")
for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(callback=callback):
for doc_batch in slim_connector.retrieve_all_slim_documents(callback=callback):
logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")
if callback:

View File

@@ -16,7 +16,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
# saml
("/auth/saml/authorize", {"GET"}),
("/auth/saml/callback", {"POST"}),
("/auth/saml/callback", {"GET"}),
("/auth/saml/logout", {"POST"}),
]

View File

@@ -110,6 +110,7 @@ async def upsert_saml_user(email: str) -> User:
async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
form_data = await request.form()
if request.client is None:
raise ValueError("Invalid request for SAML")
@@ -124,27 +125,14 @@ async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
"post_data": {},
"get_data": {},
}
# Handle query parameters (for GET requests)
if request.query_params:
rv["get_data"] = dict(request.query_params)
# Handle form data (for POST requests)
if request.method == "POST":
form_data = await request.form()
if "SAMLResponse" in form_data:
SAMLResponse = form_data["SAMLResponse"]
rv["post_data"]["SAMLResponse"] = SAMLResponse
if "RelayState" in form_data:
RelayState = form_data["RelayState"]
rv["post_data"]["RelayState"] = RelayState
else:
# For GET requests, check if SAMLResponse is in query params
if "SAMLResponse" in request.query_params:
rv["get_data"]["SAMLResponse"] = request.query_params["SAMLResponse"]
if "RelayState" in request.query_params:
rv["get_data"]["RelayState"] = request.query_params["RelayState"]
rv["get_data"] = (request.query_params,)
if "SAMLResponse" in form_data:
SAMLResponse = form_data["SAMLResponse"]
rv["post_data"]["SAMLResponse"] = SAMLResponse
if "RelayState" in form_data:
RelayState = form_data["RelayState"]
rv["post_data"]["RelayState"] = RelayState
return rv
@@ -160,27 +148,10 @@ async def saml_login(request: Request) -> SAMLAuthorizeResponse:
return SAMLAuthorizeResponse(authorization_url=callback_url)
@router.get("/callback")
async def saml_login_callback_get(
request: Request,
db_session: Session = Depends(get_session),
) -> Response:
"""Handle SAML callback via HTTP-Redirect binding (GET request)"""
return await _process_saml_callback(request, db_session)
@router.post("/callback")
async def saml_login_callback(
request: Request,
db_session: Session = Depends(get_session),
) -> Response:
"""Handle SAML callback via HTTP-POST binding (POST request)"""
return await _process_saml_callback(request, db_session)
async def _process_saml_callback(
request: Request,
db_session: Session,
) -> Response:
req = await prepare_from_fastapi_request(request)
auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)

View File

@@ -6,6 +6,7 @@ from typing import Optional
from fastapi import APIRouter
from fastapi import HTTPException
from fastapi import Request
from litellm.exceptions import RateLimitError
from sentence_transformers import CrossEncoder # type: ignore
from sentence_transformers import SentenceTransformer # type: ignore
@@ -206,8 +207,6 @@ async def route_bi_encoder_embed(
async def process_embed_request(
embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
) -> EmbedResponse:
from litellm.exceptions import RateLimitError
# Only local models should use this endpoint - API providers should make direct API calls
if embed_request.provider_type is not None:
raise ValueError(

View File

@@ -24,8 +24,6 @@ def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
return END
elif next_tool_name == DRPath.LOGGER.value:
return DRPath.LOGGER
elif next_tool_name == DRPath.CLOSER.value:
return DRPath.CLOSER
else:
return DRPath.ORCHESTRATOR

View File

@@ -643,16 +643,9 @@ def clarifier(
datetime_aware=True,
)
system_prompt_to_use_content = build_citations_system_message(
system_prompt_to_use = build_citations_system_message(
prompt_config
).content
system_prompt_to_use: str = cast(str, system_prompt_to_use_content)
if graph_config.inputs.project_instructions:
system_prompt_to_use = (
system_prompt_to_use
+ PROJECT_INSTRUCTIONS_SEPARATOR
+ graph_config.inputs.project_instructions
)
user_prompt_to_use = build_citations_user_message(
user_query=original_question,
files=[],

View File

@@ -181,15 +181,6 @@ def orchestrator(
remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[research_type]
elif remaining_time_budget <= 0:
write_custom_event(
current_step_nr,
SectionEnd(),
writer,
)
current_step_nr += 1
return OrchestrationUpdate(
tools_used=[DRPath.CLOSER.value],
current_step_nr=current_step_nr,

View File

@@ -1,147 +0,0 @@
import json
from concurrent.futures import ThreadPoolExecutor
import requests
from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
InternetContent,
)
from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
InternetSearchProvider,
)
from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
InternetSearchResult,
)
from onyx.configs.chat_configs import SERPER_API_KEY
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.utils.retry_wrapper import retry_builder
SERPER_SEARCH_URL = "https://google.serper.dev/search"
SERPER_CONTENTS_URL = "https://scrape.serper.dev"
class SerperClient(InternetSearchProvider):
def __init__(self, api_key: str | None = SERPER_API_KEY) -> None:
self.headers = {
"X-API-KEY": api_key,
"Content-Type": "application/json",
}
@retry_builder(tries=3, delay=1, backoff=2)
def search(self, query: str) -> list[InternetSearchResult]:
payload = {
"q": query,
}
response = requests.post(
SERPER_SEARCH_URL,
headers=self.headers,
data=json.dumps(payload),
)
response.raise_for_status()
results = response.json()
organic_results = results["organic"]
return [
InternetSearchResult(
title=result["title"],
link=result["link"],
snippet=result["snippet"],
author=None,
published_date=None,
)
for result in organic_results
]
def contents(self, urls: list[str]) -> list[InternetContent]:
if not urls:
return []
# Serper can responds with 500s regularly. We want to retry,
# but in the event of failure, return an unsuccesful scrape.
def safe_get_webpage_content(url: str) -> InternetContent:
try:
return self._get_webpage_content(url)
except Exception:
return InternetContent(
title="",
link=url,
full_content="",
published_date=None,
scrape_successful=False,
)
with ThreadPoolExecutor(max_workers=min(8, len(urls))) as e:
return list(e.map(safe_get_webpage_content, urls))
@retry_builder(tries=3, delay=1, backoff=2)
def _get_webpage_content(self, url: str) -> InternetContent:
payload = {
"url": url,
}
response = requests.post(
SERPER_CONTENTS_URL,
headers=self.headers,
data=json.dumps(payload),
)
# 400 returned when serper cannot scrape
if response.status_code == 400:
return InternetContent(
title="",
link=url,
full_content="",
published_date=None,
scrape_successful=False,
)
response.raise_for_status()
response_json = response.json()
# Response only guarantees text
text = response_json["text"]
# metadata & jsonld is not guaranteed to be present
metadata = response_json.get("metadata", {})
jsonld = response_json.get("jsonld", {})
title = extract_title_from_metadata(metadata)
# Serper does not provide a reliable mechanism to extract the url
response_url = url
published_date_str = extract_published_date_from_jsonld(jsonld)
published_date = None
if published_date_str:
try:
published_date = time_str_to_utc(published_date_str)
except Exception:
published_date = None
return InternetContent(
title=title or "",
link=response_url,
full_content=text or "",
published_date=published_date,
)
def extract_title_from_metadata(metadata: dict[str, str]) -> str | None:
keys = ["title", "og:title"]
return extract_value_from_dict(metadata, keys)
def extract_published_date_from_jsonld(jsonld: dict[str, str]) -> str | None:
keys = ["dateModified"]
return extract_value_from_dict(jsonld, keys)
def extract_value_from_dict(data: dict[str, str], keys: list[str]) -> str | None:
for key in keys:
if key in data:
return data[key]
return None

View File

@@ -26,7 +26,6 @@ class InternetContent(BaseModel):
link: str
full_content: str
published_date: datetime | None = None
scrape_successful: bool = True
class InternetSearchProvider(ABC):

View File

@@ -1,19 +1,13 @@
from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import (
ExaClient,
)
from onyx.agents.agent_search.dr.sub_agents.web_search.clients.serper_client import (
SerperClient,
)
from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
InternetSearchProvider,
)
from onyx.configs.chat_configs import EXA_API_KEY
from onyx.configs.chat_configs import SERPER_API_KEY
def get_default_provider() -> InternetSearchProvider | None:
if EXA_API_KEY:
return ExaClient()
if SERPER_API_KEY:
return SerperClient()
return None

View File

@@ -34,7 +34,7 @@ def dummy_inference_section_from_internet_content(
boost=1,
recency_bias=1.0,
score=1.0,
hidden=(not result.scrape_successful),
hidden=False,
metadata={},
match_highlights=[],
doc_summary=truncated_content,

View File

@@ -8,6 +8,8 @@ from typing import TypeVar
from langchain.schema.language_model import LanguageModelInput
from langchain_core.messages import HumanMessage
from langgraph.types import StreamWriter
from litellm import get_supported_openai_params
from litellm import supports_response_schema
from pydantic import BaseModel
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
@@ -145,7 +147,6 @@ def invoke_llm_json(
Invoke an LLM, forcing it to respond in a specified JSON format if possible,
and return an object of that schema.
"""
from litellm.utils import get_supported_openai_params, supports_response_schema
# check if the model supports response_format: json_schema
supports_json = "response_format" in (

View File

@@ -19,9 +19,7 @@ from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
from onyx.httpx.httpx_pool import HttpxPool
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
@@ -32,7 +30,7 @@ PRUNING_CHECKPOINTED_BATCH_SIZE = 32
def document_batch_to_ids(
doc_batch: Iterator[list[Document]] | Iterator[list[SlimDocument]],
doc_batch: Iterator[list[Document]],
) -> Generator[set[str], None, None]:
for doc_list in doc_batch:
yield {doc.id for doc in doc_list}
@@ -43,24 +41,20 @@ def extract_ids_from_runnable_connector(
callback: IndexingHeartbeatInterface | None = None,
) -> set[str]:
"""
If the given connector is neither a SlimConnector nor a SlimConnectorWithPermSync, just pull
If the SlimConnector hasnt been implemented for the given connector, just pull
all docs using the load_from_state and grab out the IDs.
Optionally, a callback can be passed to handle the length of each document batch.
"""
all_connector_doc_ids: set[str] = set()
doc_batch_id_generator = None
if isinstance(runnable_connector, SlimConnector):
doc_batch_id_generator = document_batch_to_ids(
runnable_connector.retrieve_all_slim_docs()
)
elif isinstance(runnable_connector, SlimConnectorWithPermSync):
doc_batch_id_generator = document_batch_to_ids(
runnable_connector.retrieve_all_slim_docs_perm_sync()
)
# If the connector isn't slim, fall back to running it normally to get ids
elif isinstance(runnable_connector, LoadConnector):
for metadata_batch in runnable_connector.retrieve_all_slim_documents():
all_connector_doc_ids.update({doc.id for doc in metadata_batch})
doc_batch_id_generator = None
if isinstance(runnable_connector, LoadConnector):
doc_batch_id_generator = document_batch_to_ids(
runnable_connector.load_from_state()
)
@@ -84,14 +78,13 @@ def extract_ids_from_runnable_connector(
raise RuntimeError("Pruning job could not find a valid runnable_connector.")
# this function is called per batch for rate limiting
doc_batch_processing_func = (
rate_limit_builder(
def doc_batch_processing_func(doc_batch_ids: set[str]) -> set[str]:
return doc_batch_ids
if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE:
doc_batch_processing_func = rate_limit_builder(
max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
)(lambda x: x)
if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
else lambda x: x
)
for doc_batch_ids in doc_batch_id_generator:
if callback:
if callback.should_stop():

View File

@@ -41,7 +41,7 @@ beat_task_templates: list[dict] = [
"task": OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
"schedule": timedelta(minutes=1),
"options": {
"priority": OnyxCeleryPriority.HIGH,
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
},
@@ -85,9 +85,9 @@ beat_task_templates: list[dict] = [
{
"name": "check-for-index-attempt-cleanup",
"task": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
"schedule": timedelta(minutes=30),
"schedule": timedelta(hours=1),
"options": {
"priority": OnyxCeleryPriority.MEDIUM,
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
},
},

View File

@@ -89,7 +89,6 @@ from onyx.indexing.adapters.document_indexing_adapter import (
DocumentIndexingBatchAdapter,
)
from onyx.indexing.embedder import DefaultIndexingEmbedder
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import (
InformationContentClassificationModel,
@@ -1271,6 +1270,8 @@ def _docprocessing_task(
tenant_id: str,
batch_num: int,
) -> None:
from onyx.indexing.indexing_pipeline import run_indexing_pipeline
start_time = time.monotonic()
if tenant_id:

View File

@@ -0,0 +1,266 @@
import time
from typing import List
from celery import shared_task
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
from redis.lock import Lock as RedisLock
from sqlalchemy.orm import Session
from tenacity import RetryError
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
from onyx.configs.app_configs import JOB_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.connector_credential_pair import (
get_connector_credential_pairs_with_user_files,
)
from onyx.db.document import get_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.search_settings import get_active_search_settings
from onyx.db.user_documents import fetch_user_files_for_documents
from onyx.db.user_documents import fetch_user_folders_for_documents
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
from onyx.utils.logger import setup_logger
logger = setup_logger()
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
ignore_result=True,
soft_time_limit=JOB_TIMEOUT,
trail=False,
bind=True,
)
def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None:
"""Runs periodically to check for documents that need user file folder metadata updates.
This task fetches all connector credential pairs with user files, gets the documents
associated with them, and updates the user file and folder metadata in Vespa.
"""
time_start = time.monotonic()
r = get_redis_client()
lock_beat: RedisLock = r.lock(
OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK,
timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT,
)
# these tasks should never overlap
if not lock_beat.acquire(blocking=False):
return None
try:
with get_session_with_current_tenant() as db_session:
# Get all connector credential pairs that have user files
cc_pairs = get_connector_credential_pairs_with_user_files(db_session)
if not cc_pairs:
task_logger.info("No connector credential pairs with user files found")
return True
# Get all documents associated with these cc_pairs
document_ids = get_documents_for_cc_pairs(cc_pairs, db_session)
if not document_ids:
task_logger.info(
"No documents found for connector credential pairs with user files"
)
return True
# Fetch current user file and folder IDs for these documents
doc_id_to_user_file_id = fetch_user_files_for_documents(
document_ids=document_ids, db_session=db_session
)
doc_id_to_user_folder_id = fetch_user_folders_for_documents(
document_ids=document_ids, db_session=db_session
)
# Update Vespa metadata for each document
for doc_id in document_ids:
user_file_id = doc_id_to_user_file_id.get(doc_id)
user_folder_id = doc_id_to_user_folder_id.get(doc_id)
if user_file_id is not None or user_folder_id is not None:
# Schedule a task to update the document metadata
update_user_file_folder_metadata.apply_async(
args=(doc_id,), # Use tuple instead of list for args
kwargs={
"tenant_id": tenant_id,
"user_file_id": user_file_id,
"user_folder_id": user_folder_id,
},
queue="vespa_metadata_sync",
)
task_logger.info(
f"Scheduled metadata updates for {len(document_ids)} documents. "
f"Elapsed time: {time.monotonic() - time_start:.2f}s"
)
return True
except Exception as e:
task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}")
return False
finally:
lock_beat.release()
def get_documents_for_cc_pairs(
cc_pairs: List[ConnectorCredentialPair], db_session: Session
) -> List[str]:
"""Get all document IDs associated with the given connector credential pairs."""
if not cc_pairs:
return []
cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs]
# Query to get document IDs from DocumentByConnectorCredentialPair
# Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id
doc_cc_pairs = (
db_session.query(Document.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.filter(
db_session.query(ConnectorCredentialPair)
.filter(
ConnectorCredentialPair.id.in_(cc_pair_ids),
ConnectorCredentialPair.connector_id
== DocumentByConnectorCredentialPair.connector_id,
ConnectorCredentialPair.credential_id
== DocumentByConnectorCredentialPair.credential_id,
)
.exists()
)
.all()
)
return [doc_id for (doc_id,) in doc_cc_pairs]
@shared_task(
name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA,
bind=True,
soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
time_limit=LIGHT_TIME_LIMIT,
max_retries=3,
)
def update_user_file_folder_metadata(
self: Task,
document_id: str,
*,
tenant_id: str,
user_file_id: int | None,
user_folder_id: int | None,
) -> bool:
"""Updates the user file and folder metadata for a document in Vespa."""
start = time.monotonic()
completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
try:
with get_session_with_current_tenant() as db_session:
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(doc_index)
doc = get_document(document_id, db_session)
if not doc:
elapsed = time.monotonic() - start
task_logger.info(
f"doc={document_id} "
f"action=no_operation "
f"elapsed={elapsed:.2f}"
)
completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
return False
# Create user fields object with file and folder IDs
user_fields = VespaDocumentUserFields(
user_file_id=str(user_file_id) if user_file_id is not None else None,
user_folder_id=(
str(user_folder_id) if user_folder_id is not None else None
),
)
# Update Vespa. OK if doc doesn't exist. Raises exception otherwise.
chunks_affected = retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=None, # We're only updating user fields
user_fields=user_fields,
)
elapsed = time.monotonic() - start
task_logger.info(
f"doc={document_id} "
f"action=user_file_folder_sync "
f"user_file_id={user_file_id} "
f"user_folder_id={user_folder_id} "
f"chunks={chunks_affected} "
f"elapsed={elapsed:.2f}"
)
completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
return True
except SoftTimeLimitExceeded:
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
except Exception as ex:
e: Exception | None = None
while True:
if isinstance(ex, RetryError):
task_logger.warning(
f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
)
# only set the inner exception if it is of type Exception
e_temp = ex.last_attempt.exception()
if isinstance(e_temp, Exception):
e = e_temp
else:
e = ex
task_logger.exception(
f"update_user_file_folder_metadata exceptioned: doc={document_id}"
)
completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
if (
self.max_retries is not None
and self.request.retries >= self.max_retries
):
completion_status = (
OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
)
# Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
countdown = 2 ** (self.request.retries + 4)
self.retry(exc=e, countdown=countdown) # this will raise a celery exception
break # we won't hit this, but it looks weird not to have it
finally:
task_logger.info(
f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}"
)
return False

View File

@@ -236,11 +236,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
f"process_single_user_file - Indexing pipeline completed ={index_pipeline_result}"
)
if (
index_pipeline_result.failures
or index_pipeline_result.total_docs != len(documents)
or index_pipeline_result.total_chunks == 0
):
if index_pipeline_result.failures:
task_logger.error(
f"process_single_user_file - Indexing pipeline failed id={user_file_id}"
)
@@ -546,78 +542,39 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
task_logger.warning(
f"Tenant={tenant_id} failed Vespa update for doc_id={new_uuid} - {e.__class__.__name__}"
)
# Update search_doc records to refer to the UUID string
# we are not using document_id_migrated = false because if the migration already completed,
# it will not run again and we will not update the search_doc records because of the issue currently fixed
user_files = (
db_session.execute(
sa.select(UserFile).where(UserFile.document_id.is_not(None))
uf_id_subq = (
sa.select(sa.cast(UserFile.id, sa.String))
.where(
UserFile.document_id.is_not(None),
UserFile.document_id_migrated.is_(False),
SearchDoc.document_id == UserFile.document_id,
)
.scalars()
.all()
.correlate(SearchDoc)
.scalar_subquery()
)
# Query all SearchDocs that need updating
search_docs = (
db_session.execute(
sa.select(SearchDoc).where(
SearchDoc.document_id.like("%FILE_CONNECTOR__%")
db_session.execute(
sa.update(SearchDoc)
.where(
sa.exists(
sa.select(sa.literal(1)).where(
UserFile.document_id.is_not(None),
UserFile.document_id_migrated.is_(False),
SearchDoc.document_id == UserFile.document_id,
)
)
)
.scalars()
.all()
.values(document_id=uf_id_subq)
)
task_logger.info(f"Found {len(user_files)} user files to update")
task_logger.info(f"Found {len(search_docs)} search docs to update")
# Build a map of normalized doc IDs to SearchDocs
search_doc_map: dict[str, list[SearchDoc]] = {}
for sd in search_docs:
doc_id = sd.document_id
if search_doc_map.get(doc_id) is None:
search_doc_map[doc_id] = []
search_doc_map[doc_id].append(sd)
task_logger.debug(
f"Built search doc map with {len(search_doc_map)} entries"
)
ids_preview = list(search_doc_map.keys())[:5]
task_logger.debug(
f"First few search_doc_map ids: {ids_preview if ids_preview else 'No ids found'}"
)
task_logger.debug(
f"search_doc_map total items: {sum(len(docs) for docs in search_doc_map.values())}"
)
# Process each UserFile and update matching SearchDocs
updated_count = 0
for uf in user_files:
doc_id = uf.document_id
if doc_id.startswith("USER_FILE_CONNECTOR__"):
doc_id = "FILE_CONNECTOR__" + doc_id[len("USER_FILE_CONNECTOR__") :]
task_logger.debug(f"Processing user file {uf.id} with doc_id {doc_id}")
task_logger.debug(
f"doc_id in search_doc_map: {doc_id in search_doc_map}"
# Mark all processed user_files as migrated
db_session.execute(
sa.update(UserFile)
.where(
UserFile.document_id.is_not(None),
UserFile.document_id_migrated.is_(False),
)
if doc_id in search_doc_map:
search_docs = search_doc_map[doc_id]
task_logger.debug(
f"Found {len(search_docs)} search docs to update for user file {uf.id}"
)
# Update the SearchDoc to use the UserFile's UUID
for search_doc in search_docs:
search_doc.document_id = str(uf.id)
db_session.add(search_doc)
# Mark UserFile as migrated
uf.document_id_migrated = True
db_session.add(uf)
updated_count += 1
task_logger.info(
f"Updated {updated_count} SearchDoc records with new UUIDs"
.values(document_id_migrated=True)
)
db_session.commit()

View File

@@ -5,7 +5,6 @@ from sqlalchemy.orm import Session
from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexAttemptError
def get_old_index_attempts(
@@ -22,10 +21,6 @@ def get_old_index_attempts(
def cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:
"""Clean up multiple index attempts"""
db_session.query(IndexAttemptError).filter(
IndexAttemptError.index_attempt_id.in_(index_attempt_ids)
).delete(synchronize_session=False)
db_session.query(IndexAttempt).filter(
IndexAttempt.id.in_(index_attempt_ids)
).delete(synchronize_session=False)

View File

@@ -28,7 +28,6 @@ from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.connector_runner import ConnectorRunner
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.factory import instantiate_connector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
@@ -102,6 +101,7 @@ def _get_connector_runner(
are the complete list of existing documents of the connector. If the task
of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
"""
from onyx.connectors.factory import instantiate_connector
task = attempt.connector_credential_pair.connector.input_type

View File

@@ -138,34 +138,23 @@ def _build_project_llm_docs(
project_file_id_set = set(project_file_ids)
for f in in_memory_user_files:
# Only include files that belong to the project (not ad-hoc uploads)
if project_file_id_set and (f.file_id in project_file_id_set):
def _strip_nuls(s: str) -> str:
return s.replace("\x00", "") if s else s
cleaned_filename = _strip_nuls(f.filename or str(f.file_id))
if f.file_type.is_text_file():
try:
text_content = f.content.decode("utf-8", errors="ignore")
text_content = _strip_nuls(text_content)
except Exception:
text_content = ""
# Build a short blurb from the file content for better UI display
blurb = (
(text_content[:200] + "...")
if len(text_content) > 200
else text_content
)
else:
# Non-text (e.g., images): do not decode bytes; keep empty content but allow citation
try:
text_content = f.content.decode("utf-8", errors="ignore")
except Exception:
text_content = ""
blurb = f"[{f.file_type.value}] {cleaned_filename}"
# Build a short blurb from the file content for better UI display
blurb = (
(text_content[:200] + "...")
if len(text_content) > 200
else text_content
)
# Provide basic metadata to improve SavedSearchDoc display
file_metadata: dict[str, str | list[str]] = {
"filename": cleaned_filename,
"filename": f.filename or str(f.file_id),
"file_type": f.file_type.value,
}
@@ -174,7 +163,7 @@ def _build_project_llm_docs(
document_id=str(f.file_id),
content=text_content,
blurb=blurb,
semantic_identifier=cleaned_filename,
semantic_identifier=f.filename or str(f.file_id),
source_type=DocumentSource.USER_FILE,
metadata=file_metadata,
updated_at=None,
@@ -514,8 +503,7 @@ def stream_chat_message_objects(
for fd in new_msg_req.current_message_files:
uid = fd.get("user_file_id")
if uid is not None:
user_file_id = UUID(uid)
user_file_ids.append(user_file_id)
user_file_ids.append(uid)
# Load in user files into memory and create search tool override kwargs if needed
# if we have enough tokens, we don't need to use search

View File

@@ -100,14 +100,12 @@ def parse_user_files(
persona=persona,
actual_user_input=actual_user_input,
)
uploaded_context_cap = int(available_tokens * 0.5)
logger.debug(
f"Total file tokens: {total_tokens}, Available tokens: {available_tokens},"
f"Allowed uploaded context tokens: {uploaded_context_cap}"
f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}"
)
have_enough_tokens = total_tokens <= uploaded_context_cap
have_enough_tokens = total_tokens <= available_tokens
# If we have enough tokens, we don't need search
# we can just pass them into the prompt directly

View File

@@ -90,7 +90,6 @@ HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"
# Internet Search
EXA_API_KEY = os.environ.get("EXA_API_KEY") or None
SERPER_API_KEY = os.environ.get("SERPER_API_KEY") or None
NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)

View File

@@ -41,7 +41,7 @@ All new connectors should have tests added to the `backend/tests/daily/connector
#### Implementing the new Connector
The connector must subclass one or more of LoadConnector, PollConnector, CheckpointedConnector, or CheckpointedConnectorWithPermSync
The connector must subclass one or more of LoadConnector, PollConnector, SlimConnector, or EventConnector.
The `__init__` should take arguments for configuring what documents the connector will and where it finds those
documents. For example, if you have a wiki site, it may include the configuration for the team, topic, folder, etc. of

View File

@@ -25,7 +25,7 @@ from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
@@ -56,7 +56,7 @@ class BitbucketConnectorCheckpoint(ConnectorCheckpoint):
class BitbucketConnector(
CheckpointedConnector[BitbucketConnectorCheckpoint],
SlimConnectorWithPermSync,
SlimConnector,
):
"""Connector for indexing Bitbucket Cloud pull requests.
@@ -266,7 +266,7 @@ class BitbucketConnector(
"""Validate and deserialize a checkpoint instance from JSON."""
return BitbucketConnectorCheckpoint.model_validate_json(checkpoint_json)
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -5,7 +5,6 @@ from datetime import timezone
from typing import Any
from urllib.parse import quote
from atlassian.errors import ApiError # type: ignore
from requests.exceptions import HTTPError
from typing_extensions import override
@@ -42,7 +41,6 @@ from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
@@ -93,7 +91,6 @@ class ConfluenceCheckpoint(ConnectorCheckpoint):
class ConfluenceConnector(
CheckpointedConnector[ConfluenceCheckpoint],
SlimConnector,
SlimConnectorWithPermSync,
CredentialsConnector,
):
def __init__(
@@ -111,7 +108,6 @@ class ConfluenceConnector(
# pages.
labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
scoped_token: bool = False,
) -> None:
self.wiki_base = wiki_base
self.is_cloud = is_cloud
@@ -122,7 +118,6 @@ class ConfluenceConnector(
self.batch_size = batch_size
self.labels_to_skip = labels_to_skip
self.timezone_offset = timezone_offset
self.scoped_token = scoped_token
self._confluence_client: OnyxConfluence | None = None
self._low_timeout_confluence_client: OnyxConfluence | None = None
self._fetched_titles: set[str] = set()
@@ -200,7 +195,6 @@ class ConfluenceConnector(
is_cloud=self.is_cloud,
url=self.wiki_base,
credentials_provider=credentials_provider,
scoped_token=self.scoped_token,
)
confluence_client._probe_connection(**self.probe_kwargs)
confluence_client._initialize_connection(**self.final_kwargs)
@@ -213,7 +207,6 @@ class ConfluenceConnector(
url=self.wiki_base,
credentials_provider=credentials_provider,
timeout=3,
scoped_token=self.scoped_token,
)
low_timeout_confluence_client._probe_connection(**self.probe_kwargs)
low_timeout_confluence_client._initialize_connection(**self.final_kwargs)
@@ -565,21 +558,7 @@ class ConfluenceConnector(
def validate_checkpoint_json(self, checkpoint_json: str) -> ConfluenceCheckpoint:
return ConfluenceCheckpoint.model_validate_json(checkpoint_json)
@override
def retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
return self._retrieve_all_slim_docs(
start=start,
end=end,
callback=callback,
include_permissions=False,
)
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
@@ -589,28 +568,12 @@ class ConfluenceConnector(
Return 'slim' docs (IDs + minimal permission data).
Does not fetch actual text. Used primarily for incremental permission sync.
"""
return self._retrieve_all_slim_docs(
start=start,
end=end,
callback=callback,
include_permissions=True,
)
def _retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
include_permissions: bool = True,
) -> GenerateSlimDocumentOutput:
doc_metadata_list: list[SlimDocument] = []
restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)
space_level_access_info: dict[str, ExternalAccess] = {}
if include_permissions:
space_level_access_info = get_all_space_permissions(
self.confluence_client, self.is_cloud
)
space_level_access_info = get_all_space_permissions(
self.confluence_client, self.is_cloud
)
def get_external_access(
doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]
@@ -637,10 +600,8 @@ class ConfluenceConnector(
doc_metadata_list.append(
SlimDocument(
id=page_id,
external_access=(
get_external_access(page_id, page_restrictions, page_ancestors)
if include_permissions
else None
external_access=get_external_access(
page_id, page_restrictions, page_ancestors
),
)
)
@@ -675,12 +636,8 @@ class ConfluenceConnector(
doc_metadata_list.append(
SlimDocument(
id=attachment_id,
external_access=(
get_external_access(
attachment_id, attachment_restrictions, []
)
if include_permissions
else None
external_access=get_external_access(
attachment_id, attachment_restrictions, []
),
)
)
@@ -691,10 +648,10 @@ class ConfluenceConnector(
if callback and callback.should_stop():
raise RuntimeError(
"retrieve_all_slim_docs_perm_sync: Stop signal detected"
"retrieve_all_slim_documents: Stop signal detected"
)
if callback:
callback.progress("retrieve_all_slim_docs_perm_sync", 1)
callback.progress("retrieve_all_slim_documents", 1)
yield doc_metadata_list
@@ -719,14 +676,6 @@ class ConfluenceConnector(
f"Unexpected error while validating Confluence settings: {e}"
)
if self.space:
try:
self.low_timeout_confluence_client.get_space(self.space)
except ApiError as e:
raise ConnectorValidationError(
"Invalid Confluence space key provided"
) from e
if not spaces or not spaces.get("results"):
raise ConnectorValidationError(
"No Confluence spaces found. Either your credentials lack permissions, or "
@@ -775,7 +724,7 @@ if __name__ == "__main__":
end = datetime.now().timestamp()
# Fetch all `SlimDocuments`.
for slim_doc in confluence_connector.retrieve_all_slim_docs_perm_sync():
for slim_doc in confluence_connector.retrieve_all_slim_documents():
print(slim_doc)
# Fetch all `Documents`.

View File

@@ -41,7 +41,6 @@ from onyx.connectors.confluence.utils import _handle_http_error
from onyx.connectors.confluence.utils import confluence_refresh_tokens
from onyx.connectors.confluence.utils import get_start_param_from_url
from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.file_processing.html_utils import format_document_soup
from onyx.redis.redis_pool import get_redis_client
@@ -88,20 +87,16 @@ class OnyxConfluence:
url: str,
credentials_provider: CredentialsProviderInterface,
timeout: int | None = None,
scoped_token: bool = False,
# should generally not be passed in, but making it overridable for
# easier testing
confluence_user_profiles_override: list[dict[str, str]] | None = (
CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
),
) -> None:
self.base_url = url #'/'.join(url.rstrip("/").split("/")[:-1])
url = scoped_url(url, "confluence") if scoped_token else url
self._is_cloud = is_cloud
self._url = url.rstrip("/")
self._credentials_provider = credentials_provider
self.scoped_token = scoped_token
self.redis_client: Redis | None = None
self.static_credentials: dict[str, Any] | None = None
if self._credentials_provider.is_dynamic():
@@ -223,34 +218,6 @@ class OnyxConfluence:
with self._credentials_provider:
credentials, _ = self._renew_credentials()
if self.scoped_token:
# v2 endpoint doesn't always work with scoped tokens, use v1
token = credentials["confluence_access_token"]
probe_url = f"{self.base_url}/rest/api/space?limit=1"
import requests
logger.info(f"First and Last 5 of token: {token[:5]}...{token[-5:]}")
try:
r = requests.get(
probe_url,
headers={"Authorization": f"Bearer {token}"},
timeout=10,
)
r.raise_for_status()
except HTTPError as e:
if e.response.status_code == 403:
logger.warning(
"scoped token authenticated but not valid for probe endpoint (spaces)"
)
else:
if "WWW-Authenticate" in e.response.headers:
logger.warning(
f"WWW-Authenticate: {e.response.headers['WWW-Authenticate']}"
)
logger.warning(f"Full error: {e.response.text}")
raise e
return
# probe connection with direct client, no retries
if "confluence_refresh_token" in credentials:
@@ -269,7 +236,6 @@ class OnyxConfluence:
logger.info("Probing Confluence with Personal Access Token.")
url = self._url
if self._is_cloud:
logger.info("running with cloud client")
confluence_client_with_minimal_retries = Confluence(
url=url,
username=credentials["confluence_username"],
@@ -338,9 +304,7 @@ class OnyxConfluence:
url = f"https://api.atlassian.com/ex/confluence/{credentials['cloud_id']}"
confluence = Confluence(url=url, oauth2=oauth2_dict, **kwargs)
else:
logger.info(
f"Connecting to Confluence with Personal Access Token as user: {credentials['confluence_username']}"
)
logger.info("Connecting to Confluence with Personal Access Token.")
if self._is_cloud:
confluence = Confluence(
url=self._url,

View File

@@ -5,10 +5,7 @@ from datetime import datetime
from datetime import timezone
from typing import Any
from typing import TypeVar
from urllib.parse import urljoin
from urllib.parse import urlparse
import requests
from dateutil.parser import parse
from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
@@ -151,17 +148,3 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
def is_atlassian_date_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)
def get_cloudId(base_url: str) -> str:
tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
response = requests.get(tenant_info_url, timeout=10)
response.raise_for_status()
return response.json()["cloudId"]
def scoped_url(url: str, product: str) -> str:
parsed = urlparse(url)
base_url = parsed.scheme + "://" + parsed.netloc
cloud_id = get_cloudId(base_url)
return f"https://api.atlassian.com/ex/{product}/{cloud_id}{parsed.path}"

View File

@@ -1,4 +1,3 @@
import importlib
from typing import Any
from typing import Type
@@ -7,16 +6,60 @@ from sqlalchemy.orm import Session
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.constants import DocumentSource
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.connectors.airtable.airtable_connector import AirtableConnector
from onyx.connectors.asana.connector import AsanaConnector
from onyx.connectors.axero.connector import AxeroConnector
from onyx.connectors.bitbucket.connector import BitbucketConnector
from onyx.connectors.blob.connector import BlobStorageConnector
from onyx.connectors.bookstack.connector import BookstackConnector
from onyx.connectors.clickup.connector import ClickupConnector
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.discord.connector import DiscordConnector
from onyx.connectors.discourse.connector import DiscourseConnector
from onyx.connectors.document360.connector import Document360Connector
from onyx.connectors.dropbox.connector import DropboxConnector
from onyx.connectors.egnyte.connector import EgnyteConnector
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.file.connector import LocalFileConnector
from onyx.connectors.fireflies.connector import FirefliesConnector
from onyx.connectors.freshdesk.connector import FreshdeskConnector
from onyx.connectors.gitbook.connector import GitbookConnector
from onyx.connectors.github.connector import GithubConnector
from onyx.connectors.gitlab.connector import GitlabConnector
from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.gong.connector import GongConnector
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_site.connector import GoogleSitesConnector
from onyx.connectors.guru.connector import GuruConnector
from onyx.connectors.highspot.connector import HighspotConnector
from onyx.connectors.hubspot.connector import HubSpotConnector
from onyx.connectors.imap.connector import ImapConnector
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import EventConnector
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.jira.connector import JiraConnector
from onyx.connectors.linear.connector import LinearConnector
from onyx.connectors.loopio.connector import LoopioConnector
from onyx.connectors.mediawiki.wiki import MediaWikiConnector
from onyx.connectors.mock_connector.connector import MockConnector
from onyx.connectors.models import InputType
from onyx.connectors.registry import CONNECTOR_CLASS_MAP
from onyx.connectors.notion.connector import NotionConnector
from onyx.connectors.outline.connector import OutlineConnector
from onyx.connectors.productboard.connector import ProductboardConnector
from onyx.connectors.salesforce.connector import SalesforceConnector
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.connectors.slab.connector import SlabConnector
from onyx.connectors.slack.connector import SlackConnector
from onyx.connectors.teams.connector import TeamsConnector
from onyx.connectors.web.connector import WebConnector
from onyx.connectors.wikipedia.connector import WikipediaConnector
from onyx.connectors.xenforo.connector import XenforoConnector
from onyx.connectors.zendesk.connector import ZendeskConnector
from onyx.connectors.zulip.connector import ZulipConnector
from onyx.db.connector import fetch_connector_by_id
from onyx.db.credentials import backend_update_credential_json
from onyx.db.credentials import fetch_credential_by_id
@@ -29,75 +72,101 @@ class ConnectorMissingException(Exception):
pass
# Cache for already imported connector classes
_connector_cache: dict[DocumentSource, Type[BaseConnector]] = {}
def _load_connector_class(source: DocumentSource) -> Type[BaseConnector]:
"""Dynamically load and cache a connector class."""
if source in _connector_cache:
return _connector_cache[source]
if source not in CONNECTOR_CLASS_MAP:
raise ConnectorMissingException(f"Connector not found for source={source}")
mapping = CONNECTOR_CLASS_MAP[source]
try:
module = importlib.import_module(mapping.module_path)
connector_class = getattr(module, mapping.class_name)
_connector_cache[source] = connector_class
return connector_class
except (ImportError, AttributeError) as e:
raise ConnectorMissingException(
f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
)
def _validate_connector_supports_input_type(
connector: Type[BaseConnector],
input_type: InputType | None,
source: DocumentSource,
) -> None:
"""Validate that a connector supports the requested input type."""
if input_type is None:
return
# Check each input type requirement separately for clarity
load_state_unsupported = input_type == InputType.LOAD_STATE and not issubclass(
connector, LoadConnector
)
poll_unsupported = (
input_type == InputType.POLL
# Either poll or checkpoint works for this, in the future
# all connectors should be checkpoint connectors
and (
not issubclass(connector, PollConnector)
and not issubclass(connector, CheckpointedConnector)
)
)
event_unsupported = input_type == InputType.EVENT and not issubclass(
connector, EventConnector
)
if any([load_state_unsupported, poll_unsupported, event_unsupported]):
raise ConnectorMissingException(
f"Connector for source={source} does not accept input_type={input_type}"
)
def identify_connector_class(
source: DocumentSource,
input_type: InputType | None = None,
) -> Type[BaseConnector]:
# Load the connector class using lazy loading
connector = _load_connector_class(source)
connector_map = {
DocumentSource.WEB: WebConnector,
DocumentSource.FILE: LocalFileConnector,
DocumentSource.SLACK: {
InputType.POLL: SlackConnector,
InputType.SLIM_RETRIEVAL: SlackConnector,
},
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GMAIL: GmailConnector,
DocumentSource.GITLAB: GitlabConnector,
DocumentSource.GITBOOK: GitbookConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.BOOKSTACK: BookstackConnector,
DocumentSource.OUTLINE: OutlineConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,
DocumentSource.JIRA: JiraConnector,
DocumentSource.PRODUCTBOARD: ProductboardConnector,
DocumentSource.SLAB: SlabConnector,
DocumentSource.NOTION: NotionConnector,
DocumentSource.ZULIP: ZulipConnector,
DocumentSource.GURU: GuruConnector,
DocumentSource.LINEAR: LinearConnector,
DocumentSource.HUBSPOT: HubSpotConnector,
DocumentSource.DOCUMENT360: Document360Connector,
DocumentSource.GONG: GongConnector,
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
DocumentSource.ZENDESK: ZendeskConnector,
DocumentSource.LOOPIO: LoopioConnector,
DocumentSource.DROPBOX: DropboxConnector,
DocumentSource.SHAREPOINT: SharepointConnector,
DocumentSource.TEAMS: TeamsConnector,
DocumentSource.SALESFORCE: SalesforceConnector,
DocumentSource.DISCOURSE: DiscourseConnector,
DocumentSource.AXERO: AxeroConnector,
DocumentSource.CLICKUP: ClickupConnector,
DocumentSource.MEDIAWIKI: MediaWikiConnector,
DocumentSource.WIKIPEDIA: WikipediaConnector,
DocumentSource.ASANA: AsanaConnector,
DocumentSource.S3: BlobStorageConnector,
DocumentSource.R2: BlobStorageConnector,
DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
DocumentSource.OCI_STORAGE: BlobStorageConnector,
DocumentSource.XENFORO: XenforoConnector,
DocumentSource.DISCORD: DiscordConnector,
DocumentSource.FRESHDESK: FreshdeskConnector,
DocumentSource.FIREFLIES: FirefliesConnector,
DocumentSource.EGNYTE: EgnyteConnector,
DocumentSource.AIRTABLE: AirtableConnector,
DocumentSource.HIGHSPOT: HighspotConnector,
DocumentSource.IMAP: ImapConnector,
DocumentSource.BITBUCKET: BitbucketConnector,
# just for integration tests
DocumentSource.MOCK_CONNECTOR: MockConnector,
}
connector_by_source = connector_map.get(source, {})
# Validate connector supports the requested input_type
_validate_connector_supports_input_type(connector, input_type, source)
if isinstance(connector_by_source, dict):
if input_type is None:
# If not specified, default to most exhaustive update
connector = connector_by_source.get(InputType.LOAD_STATE)
else:
connector = connector_by_source.get(input_type)
else:
connector = connector_by_source
if connector is None:
raise ConnectorMissingException(f"Connector not found for source={source}")
if any(
[
(
input_type == InputType.LOAD_STATE
and not issubclass(connector, LoadConnector)
),
(
input_type == InputType.POLL
# either poll or checkpoint works for this, in the future
# all connectors should be checkpoint connectors
and (
not issubclass(connector, PollConnector)
and not issubclass(connector, CheckpointedConnector)
)
),
(
input_type == InputType.EVENT
and not issubclass(connector, EventConnector)
),
]
):
raise ConnectorMissingException(
f"Connector for source={source} does not accept input_type={input_type}"
)
return connector

View File

@@ -219,19 +219,12 @@ def _get_batch_rate_limited(
def _get_userinfo(user: NamedUser) -> dict[str, str]:
def _safe_get(attr_name: str) -> str | None:
try:
return cast(str | None, getattr(user, attr_name))
except GithubException:
logger.debug(f"Error getting {attr_name} for user")
return None
return {
k: v
for k, v in {
"login": _safe_get("login"),
"name": _safe_get("name"),
"email": _safe_get("email"),
"login": user.login,
"name": user.name,
"email": user.email,
}.items()
if v is not None
}

View File

@@ -28,7 +28,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import ImageSection
@@ -232,7 +232,7 @@ def thread_to_document(
)
class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
class GmailConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.batch_size = batch_size
@@ -397,10 +397,10 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
if callback:
if callback.should_stop():
raise RuntimeError(
"retrieve_all_slim_docs_perm_sync: Stop signal detected"
"retrieve_all_slim_documents: Stop signal detected"
)
callback.progress("retrieve_all_slim_docs_perm_sync", 1)
callback.progress("retrieve_all_slim_documents", 1)
except HttpError as e:
if _is_mail_service_disabled_error(e):
logger.warning(
@@ -431,7 +431,7 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
raise e
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -64,7 +64,7 @@ from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
@@ -153,7 +153,7 @@ class DriveIdStatus(Enum):
class GoogleDriveConnector(
SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
SlimConnector, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
):
def __init__(
self,
@@ -1137,9 +1137,7 @@ class GoogleDriveConnector(
convert_func,
(
[file.user_email, self.primary_admin_email]
+ get_file_owners(
file.drive_file, self.primary_admin_email
),
+ get_file_owners(file.drive_file),
file.drive_file,
),
)
@@ -1296,7 +1294,7 @@ class GoogleDriveConnector(
callback.progress("_extract_slim_docs_from_google_drive", 1)
yield slim_batch
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -97,15 +97,14 @@ def _execute_with_retry(request: Any) -> Any:
raise Exception(f"Failed to execute request after {max_attempts} attempts")
def get_file_owners(file: GoogleDriveFileType, primary_admin_email: str) -> list[str]:
def get_file_owners(file: GoogleDriveFileType) -> list[str]:
"""
Get the owners of a file if the attribute is present.
"""
return [
email
owner.get("emailAddress")
for owner in file.get("owners", [])
if (email := owner.get("emailAddress"))
and email.split("@")[-1] == primary_admin_email.split("@")[-1]
if owner.get("emailAddress")
]

View File

@@ -18,7 +18,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
@@ -38,7 +38,7 @@ class HighspotSpot(BaseModel):
name: str
class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
class HighspotConnector(LoadConnector, PollConnector, SlimConnector):
"""
Connector for loading data from Highspot.
@@ -362,7 +362,7 @@ class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync)
description = item_details.get("description", "")
return title, description
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -1,18 +1,14 @@
import re
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from typing import cast
from typing import TypeVar
import requests
from hubspot import HubSpot # type: ignore
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.hubspot.rate_limit import HubSpotRateLimiter
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
@@ -29,10 +25,6 @@ HUBSPOT_API_URL = "https://api.hubapi.com/integrations/v1/me"
# Available HubSpot object types
AVAILABLE_OBJECT_TYPES = {"tickets", "companies", "deals", "contacts"}
HUBSPOT_PAGE_SIZE = 100
T = TypeVar("T")
logger = setup_logger()
@@ -46,7 +38,6 @@ class HubSpotConnector(LoadConnector, PollConnector):
self.batch_size = batch_size
self._access_token = access_token
self._portal_id: str | None = None
self._rate_limiter = HubSpotRateLimiter()
# Set object types to fetch, default to all available types
if object_types is None:
@@ -86,37 +77,6 @@ class HubSpotConnector(LoadConnector, PollConnector):
"""Set the portal ID."""
self._portal_id = value
def _call_hubspot(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
return self._rate_limiter.call(func, *args, **kwargs)
def _paginated_results(
self,
fetch_page: Callable[..., Any],
**kwargs: Any,
) -> Generator[Any, None, None]:
base_kwargs = dict(kwargs)
base_kwargs.setdefault("limit", HUBSPOT_PAGE_SIZE)
after: str | None = None
while True:
page_kwargs = base_kwargs.copy()
if after is not None:
page_kwargs["after"] = after
page = self._call_hubspot(fetch_page, **page_kwargs)
results = getattr(page, "results", [])
for result in results:
yield result
paging = getattr(page, "paging", None)
next_page = getattr(paging, "next", None) if paging else None
if next_page is None:
break
after = getattr(next_page, "after", None)
if after is None:
break
def _clean_html_content(self, html_content: str) -> str:
"""Clean HTML content and extract raw text"""
if not html_content:
@@ -190,82 +150,78 @@ class HubSpotConnector(LoadConnector, PollConnector):
) -> list[dict[str, Any]]:
"""Get associated objects for a given object"""
try:
associations_iter = self._paginated_results(
api_client.crm.associations.v4.basic_api.get_page,
associations = api_client.crm.associations.v4.basic_api.get_page(
object_type=from_object_type,
object_id=object_id,
to_object_type=to_object_type,
)
object_ids = [assoc.to_object_id for assoc in associations_iter]
associated_objects = []
if associations.results:
object_ids = [assoc.to_object_id for assoc in associations.results]
associated_objects: list[dict[str, Any]] = []
# Batch get the associated objects
if to_object_type == "contacts":
for obj_id in object_ids:
try:
obj = api_client.crm.contacts.basic_api.get_by_id(
contact_id=obj_id,
properties=[
"firstname",
"lastname",
"email",
"company",
"jobtitle",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch contact {obj_id}: {e}")
if to_object_type == "contacts":
for obj_id in object_ids:
try:
obj = self._call_hubspot(
api_client.crm.contacts.basic_api.get_by_id,
contact_id=obj_id,
properties=[
"firstname",
"lastname",
"email",
"company",
"jobtitle",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch contact {obj_id}: {e}")
elif to_object_type == "companies":
for obj_id in object_ids:
try:
obj = api_client.crm.companies.basic_api.get_by_id(
company_id=obj_id,
properties=[
"name",
"domain",
"industry",
"city",
"state",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch company {obj_id}: {e}")
elif to_object_type == "companies":
for obj_id in object_ids:
try:
obj = self._call_hubspot(
api_client.crm.companies.basic_api.get_by_id,
company_id=obj_id,
properties=[
"name",
"domain",
"industry",
"city",
"state",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch company {obj_id}: {e}")
elif to_object_type == "deals":
for obj_id in object_ids:
try:
obj = api_client.crm.deals.basic_api.get_by_id(
deal_id=obj_id,
properties=[
"dealname",
"amount",
"dealstage",
"closedate",
"pipeline",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch deal {obj_id}: {e}")
elif to_object_type == "deals":
for obj_id in object_ids:
try:
obj = self._call_hubspot(
api_client.crm.deals.basic_api.get_by_id,
deal_id=obj_id,
properties=[
"dealname",
"amount",
"dealstage",
"closedate",
"pipeline",
],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch deal {obj_id}: {e}")
elif to_object_type == "tickets":
for obj_id in object_ids:
try:
obj = self._call_hubspot(
api_client.crm.tickets.basic_api.get_by_id,
ticket_id=obj_id,
properties=["subject", "content", "hs_ticket_priority"],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch ticket {obj_id}: {e}")
elif to_object_type == "tickets":
for obj_id in object_ids:
try:
obj = api_client.crm.tickets.basic_api.get_by_id(
ticket_id=obj_id,
properties=["subject", "content", "hs_ticket_priority"],
)
associated_objects.append(obj.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch ticket {obj_id}: {e}")
return associated_objects
@@ -283,33 +239,33 @@ class HubSpotConnector(LoadConnector, PollConnector):
) -> list[dict[str, Any]]:
"""Get notes associated with a given object"""
try:
associations_iter = self._paginated_results(
api_client.crm.associations.v4.basic_api.get_page,
# Get associations to notes (engagement type)
associations = api_client.crm.associations.v4.basic_api.get_page(
object_type=object_type,
object_id=object_id,
to_object_type="notes",
)
note_ids = [assoc.to_object_id for assoc in associations_iter]
associated_notes = []
if associations.results:
note_ids = [assoc.to_object_id for assoc in associations.results]
for note_id in note_ids:
try:
# Notes are engagements in HubSpot, use the engagements API
note = self._call_hubspot(
api_client.crm.objects.notes.basic_api.get_by_id,
note_id=note_id,
properties=[
"hs_note_body",
"hs_timestamp",
"hs_created_by",
"hubspot_owner_id",
],
)
associated_notes.append(note.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch note {note_id}: {e}")
# Batch get the associated notes
for note_id in note_ids:
try:
# Notes are engagements in HubSpot, use the engagements API
note = api_client.crm.objects.notes.basic_api.get_by_id(
note_id=note_id,
properties=[
"hs_note_body",
"hs_timestamp",
"hs_created_by",
"hubspot_owner_id",
],
)
associated_notes.append(note.to_dict())
except Exception as e:
logger.warning(f"Failed to fetch note {note_id}: {e}")
return associated_notes
@@ -402,9 +358,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
api_client = HubSpot(access_token=self.access_token)
tickets_iter = self._paginated_results(
api_client.crm.tickets.basic_api.get_page,
all_tickets = api_client.crm.tickets.get_all(
properties=[
"subject",
"content",
@@ -417,7 +371,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
doc_batch: list[Document] = []
for ticket in tickets_iter:
for ticket in all_tickets:
updated_at = ticket.updated_at.replace(tzinfo=None)
if start is not None and updated_at < start.replace(tzinfo=None):
continue
@@ -505,9 +459,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
api_client = HubSpot(access_token=self.access_token)
companies_iter = self._paginated_results(
api_client.crm.companies.basic_api.get_page,
all_companies = api_client.crm.companies.get_all(
properties=[
"name",
"domain",
@@ -523,7 +475,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
doc_batch: list[Document] = []
for company in companies_iter:
for company in all_companies:
updated_at = company.updated_at.replace(tzinfo=None)
if start is not None and updated_at < start.replace(tzinfo=None):
continue
@@ -630,9 +582,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
api_client = HubSpot(access_token=self.access_token)
deals_iter = self._paginated_results(
api_client.crm.deals.basic_api.get_page,
all_deals = api_client.crm.deals.get_all(
properties=[
"dealname",
"amount",
@@ -648,7 +598,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
doc_batch: list[Document] = []
for deal in deals_iter:
for deal in all_deals:
updated_at = deal.updated_at.replace(tzinfo=None)
if start is not None and updated_at < start.replace(tzinfo=None):
continue
@@ -753,9 +703,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
api_client = HubSpot(access_token=self.access_token)
contacts_iter = self._paginated_results(
api_client.crm.contacts.basic_api.get_page,
all_contacts = api_client.crm.contacts.get_all(
properties=[
"firstname",
"lastname",
@@ -773,7 +721,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
doc_batch: list[Document] = []
for contact in contacts_iter:
for contact in all_contacts:
updated_at = contact.updated_at.replace(tzinfo=None)
if start is not None and updated_at < start.replace(tzinfo=None):
continue

View File

@@ -1,145 +0,0 @@
from __future__ import annotations
import time
from collections.abc import Callable
from typing import Any
from typing import TypeVar
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
rate_limit_builder,
)
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
RateLimitTriedTooManyTimesError,
)
from onyx.utils.logger import setup_logger
logger = setup_logger()
T = TypeVar("T")
# HubSpot exposes a ten second rolling window (x-hubspot-ratelimit-interval-milliseconds)
# with a maximum of 190 requests, and a per-second limit of 19 requests.
_HUBSPOT_TEN_SECOND_LIMIT = 190
_HUBSPOT_TEN_SECOND_PERIOD = 10 # seconds
_HUBSPOT_SECONDLY_LIMIT = 19
_HUBSPOT_SECONDLY_PERIOD = 1 # second
_DEFAULT_SLEEP_SECONDS = 10
_SLEEP_PADDING_SECONDS = 1.0
_MAX_RATE_LIMIT_RETRIES = 5
def _extract_header(headers: Any, key: str) -> str | None:
if headers is None:
return None
getter = getattr(headers, "get", None)
if callable(getter):
value = getter(key)
if value is not None:
return value
if isinstance(headers, dict):
value = headers.get(key)
if value is not None:
return value
return None
def is_rate_limit_error(exception: Exception) -> bool:
status = getattr(exception, "status", None)
if status == 429:
return True
headers = getattr(exception, "headers", None)
if headers is not None:
remaining = _extract_header(headers, "x-hubspot-ratelimit-remaining")
if remaining == "0":
return True
secondly_remaining = _extract_header(
headers, "x-hubspot-ratelimit-secondly-remaining"
)
if secondly_remaining == "0":
return True
message = str(exception)
return "RATE_LIMIT" in message or "Too Many Requests" in message
def get_rate_limit_retry_delay_seconds(exception: Exception) -> float:
headers = getattr(exception, "headers", None)
retry_after = _extract_header(headers, "Retry-After")
if retry_after:
try:
return float(retry_after) + _SLEEP_PADDING_SECONDS
except ValueError:
logger.debug(
"Failed to parse Retry-After header '%s' as float", retry_after
)
interval_ms = _extract_header(headers, "x-hubspot-ratelimit-interval-milliseconds")
if interval_ms:
try:
return float(interval_ms) / 1000.0 + _SLEEP_PADDING_SECONDS
except ValueError:
logger.debug(
"Failed to parse x-hubspot-ratelimit-interval-milliseconds '%s' as float",
interval_ms,
)
secondly_limit = _extract_header(headers, "x-hubspot-ratelimit-secondly")
if secondly_limit:
try:
per_second = max(float(secondly_limit), 1.0)
return (1.0 / per_second) + _SLEEP_PADDING_SECONDS
except ValueError:
logger.debug(
"Failed to parse x-hubspot-ratelimit-secondly '%s' as float",
secondly_limit,
)
return _DEFAULT_SLEEP_SECONDS + _SLEEP_PADDING_SECONDS
class HubSpotRateLimiter:
def __init__(
self,
*,
ten_second_limit: int = _HUBSPOT_TEN_SECOND_LIMIT,
ten_second_period: int = _HUBSPOT_TEN_SECOND_PERIOD,
secondly_limit: int = _HUBSPOT_SECONDLY_LIMIT,
secondly_period: int = _HUBSPOT_SECONDLY_PERIOD,
max_retries: int = _MAX_RATE_LIMIT_RETRIES,
) -> None:
self._max_retries = max_retries
@rate_limit_builder(max_calls=secondly_limit, period=secondly_period)
@rate_limit_builder(max_calls=ten_second_limit, period=ten_second_period)
def _execute(callable_: Callable[[], T]) -> T:
return callable_()
self._execute = _execute
def call(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
attempts = 0
while True:
try:
return self._execute(lambda: func(*args, **kwargs))
except Exception as exc: # pylint: disable=broad-except
if not is_rate_limit_error(exc):
raise
attempts += 1
if attempts > self._max_retries:
raise RateLimitTriedTooManyTimesError(
"Exceeded configured HubSpot rate limit retries"
) from exc
wait_time = get_rate_limit_retry_delay_seconds(exc)
logger.notice(
"HubSpot rate limit reached. Sleeping %.2f seconds before retrying.",
wait_time,
)
time.sleep(wait_time)

View File

@@ -97,20 +97,11 @@ class PollConnector(BaseConnector):
raise NotImplementedError
# Slim connectors retrieve just the ids of documents
# Slim connectors can retrieve just the ids and
# permission syncing information for connected documents
class SlimConnector(BaseConnector):
@abc.abstractmethod
def retrieve_all_slim_docs(
self,
) -> GenerateSlimDocumentOutput:
raise NotImplementedError
# Slim connectors retrieve both the ids AND
# permission syncing information for connected documents
class SlimConnectorWithPermSync(BaseConnector):
@abc.abstractmethod
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -25,11 +25,11 @@ from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.jira.access import get_project_permissions
from onyx.connectors.jira.utils import best_effort_basic_expert_info
from onyx.connectors.jira.utils import best_effort_get_field_from_issue
@@ -247,7 +247,7 @@ def _perform_jql_search_v2(
def process_jira_issue(
jira_base_url: str,
jira_client: JIRA,
issue: Issue,
comment_email_blacklist: tuple[str, ...] = (),
labels_to_skip: set[str] | None = None,
@@ -281,7 +281,7 @@ def process_jira_issue(
)
return None
page_url = build_jira_url(jira_base_url, issue.key)
page_url = build_jira_url(jira_client, issue.key)
metadata_dict: dict[str, str | list[str]] = {}
people = set()
@@ -359,10 +359,7 @@ class JiraConnectorCheckpoint(ConnectorCheckpoint):
offset: int | None = None
class JiraConnector(
CheckpointedConnectorWithPermSync[JiraConnectorCheckpoint],
SlimConnectorWithPermSync,
):
class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnector):
def __init__(
self,
jira_base_url: str,
@@ -375,23 +372,15 @@ class JiraConnector(
labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
# Custom JQL query to filter Jira issues
jql_query: str | None = None,
scoped_token: bool = False,
) -> None:
self.batch_size = batch_size
# dealing with scoped tokens is a bit tricky becasue we need to hit api.atlassian.net
# when making jira requests but still want correct links to issues in the UI.
# So, the user's base url is stored here, but converted to a scoped url when passed
# to the jira client.
self.jira_base = jira_base_url.rstrip("/") # Remove trailing slash if present
self.jira_project = project_key
self._comment_email_blacklist = comment_email_blacklist or []
self.labels_to_skip = set(labels_to_skip)
self.jql_query = jql_query
self.scoped_token = scoped_token
self._jira_client: JIRA | None = None
# Cache project permissions to avoid fetching them repeatedly across runs
self._project_permissions_cache: dict[str, Any] = {}
@property
def comment_email_blacklist(self) -> tuple:
@@ -410,26 +399,10 @@ class JiraConnector(
return ""
return f'"{self.jira_project}"'
def _get_project_permissions(self, project_key: str) -> Any:
"""Get project permissions with caching.
Args:
project_key: The Jira project key
Returns:
The external access permissions for the project
"""
if project_key not in self._project_permissions_cache:
self._project_permissions_cache[project_key] = get_project_permissions(
jira_client=self.jira_client, jira_project=project_key
)
return self._project_permissions_cache[project_key]
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self._jira_client = build_jira_client(
credentials=credentials,
jira_base=self.jira_base,
scoped_token=self.scoped_token,
)
return None
@@ -469,37 +442,15 @@ class JiraConnector(
) -> CheckpointOutput[JiraConnectorCheckpoint]:
jql = self._get_jql_query(start, end)
try:
return self._load_from_checkpoint(
jql, checkpoint, include_permissions=False
)
return self._load_from_checkpoint(jql, checkpoint)
except Exception as e:
if is_atlassian_date_error(e):
jql = self._get_jql_query(start - ONE_HOUR, end)
return self._load_from_checkpoint(
jql, checkpoint, include_permissions=False
)
raise e
def load_from_checkpoint_with_perm_sync(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: JiraConnectorCheckpoint,
) -> CheckpointOutput[JiraConnectorCheckpoint]:
"""Load documents from checkpoint with permission information included."""
jql = self._get_jql_query(start, end)
try:
return self._load_from_checkpoint(jql, checkpoint, include_permissions=True)
except Exception as e:
if is_atlassian_date_error(e):
jql = self._get_jql_query(start - ONE_HOUR, end)
return self._load_from_checkpoint(
jql, checkpoint, include_permissions=True
)
return self._load_from_checkpoint(jql, checkpoint)
raise e
def _load_from_checkpoint(
self, jql: str, checkpoint: JiraConnectorCheckpoint, include_permissions: bool
self, jql: str, checkpoint: JiraConnectorCheckpoint
) -> CheckpointOutput[JiraConnectorCheckpoint]:
# Get the current offset from checkpoint or start at 0
starting_offset = checkpoint.offset or 0
@@ -521,25 +472,18 @@ class JiraConnector(
issue_key = issue.key
try:
if document := process_jira_issue(
jira_base_url=self.jira_base,
jira_client=self.jira_client,
issue=issue,
comment_email_blacklist=self.comment_email_blacklist,
labels_to_skip=self.labels_to_skip,
):
# Add permission information to the document if requested
if include_permissions:
project_key = get_jira_project_key_from_issue(issue=issue)
if project_key:
document.external_access = self._get_project_permissions(
project_key
)
yield document
except Exception as e:
yield ConnectorFailure(
failed_document=DocumentFailure(
document_id=issue_key,
document_link=build_jira_url(self.jira_base, issue_key),
document_link=build_jira_url(self.jira_client, issue_key),
),
failure_message=f"Failed to process Jira issue: {str(e)}",
exception=e,
@@ -571,7 +515,7 @@ class JiraConnector(
# if we didn't retrieve a full batch, we're done
checkpoint.has_more = current_offset - starting_offset == page_size
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
@@ -590,7 +534,6 @@ class JiraConnector(
prev_offset = 0
current_offset = 0
slim_doc_batch = []
while checkpoint.has_more:
for issue in _perform_jql_search(
jira_client=self.jira_client,
@@ -607,12 +550,13 @@ class JiraConnector(
continue
issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY)
id = build_jira_url(self.jira_base, issue_key)
id = build_jira_url(self.jira_client, issue_key)
slim_doc_batch.append(
SlimDocument(
id=id,
external_access=self._get_project_permissions(project_key),
external_access=get_project_permissions(
jira_client=self.jira_client, jira_project=project_key
),
)
)
current_offset += 1
@@ -757,7 +701,7 @@ if __name__ == "__main__":
start = 0
end = datetime.now().timestamp()
for slim_doc in connector.retrieve_all_slim_docs_perm_sync(
for slim_doc in connector.retrieve_all_slim_documents(
start=start,
end=end,
):

View File

@@ -10,7 +10,6 @@ from jira.resources import CustomFieldOption
from jira.resources import Issue
from jira.resources import User
from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
from onyx.connectors.models import BasicExpertInfo
from onyx.utils.logger import setup_logger
@@ -75,18 +74,11 @@ def extract_text_from_adf(adf: dict | None) -> str:
return " ".join(texts)
def build_jira_url(jira_base_url: str, issue_key: str) -> str:
"""
Get the url used to access an issue in the UI.
"""
return f"{jira_base_url}/browse/{issue_key}"
def build_jira_url(jira_client: JIRA, issue_key: str) -> str:
return f"{jira_client.client_info()}/browse/{issue_key}"
def build_jira_client(
credentials: dict[str, Any], jira_base: str, scoped_token: bool = False
) -> JIRA:
jira_base = scoped_url(jira_base, "jira") if scoped_token else jira_base
def build_jira_client(credentials: dict[str, Any], jira_base: str) -> JIRA:
api_token = credentials["jira_api_token"]
# if user provide an email we assume it's cloud
if "jira_user_email" in credentials:

View File

@@ -1,208 +0,0 @@
"""Registry mapping for connector classes."""
from pydantic import BaseModel
from onyx.configs.constants import DocumentSource
class ConnectorMapping(BaseModel):
module_path: str
class_name: str
# Mapping of DocumentSource to connector details for lazy loading
CONNECTOR_CLASS_MAP = {
DocumentSource.WEB: ConnectorMapping(
module_path="onyx.connectors.web.connector",
class_name="WebConnector",
),
DocumentSource.FILE: ConnectorMapping(
module_path="onyx.connectors.file.connector",
class_name="LocalFileConnector",
),
DocumentSource.SLACK: ConnectorMapping(
module_path="onyx.connectors.slack.connector",
class_name="SlackConnector",
),
DocumentSource.GITHUB: ConnectorMapping(
module_path="onyx.connectors.github.connector",
class_name="GithubConnector",
),
DocumentSource.GMAIL: ConnectorMapping(
module_path="onyx.connectors.gmail.connector",
class_name="GmailConnector",
),
DocumentSource.GITLAB: ConnectorMapping(
module_path="onyx.connectors.gitlab.connector",
class_name="GitlabConnector",
),
DocumentSource.GITBOOK: ConnectorMapping(
module_path="onyx.connectors.gitbook.connector",
class_name="GitbookConnector",
),
DocumentSource.GOOGLE_DRIVE: ConnectorMapping(
module_path="onyx.connectors.google_drive.connector",
class_name="GoogleDriveConnector",
),
DocumentSource.BOOKSTACK: ConnectorMapping(
module_path="onyx.connectors.bookstack.connector",
class_name="BookstackConnector",
),
DocumentSource.OUTLINE: ConnectorMapping(
module_path="onyx.connectors.outline.connector",
class_name="OutlineConnector",
),
DocumentSource.CONFLUENCE: ConnectorMapping(
module_path="onyx.connectors.confluence.connector",
class_name="ConfluenceConnector",
),
DocumentSource.JIRA: ConnectorMapping(
module_path="onyx.connectors.jira.connector",
class_name="JiraConnector",
),
DocumentSource.PRODUCTBOARD: ConnectorMapping(
module_path="onyx.connectors.productboard.connector",
class_name="ProductboardConnector",
),
DocumentSource.SLAB: ConnectorMapping(
module_path="onyx.connectors.slab.connector",
class_name="SlabConnector",
),
DocumentSource.NOTION: ConnectorMapping(
module_path="onyx.connectors.notion.connector",
class_name="NotionConnector",
),
DocumentSource.ZULIP: ConnectorMapping(
module_path="onyx.connectors.zulip.connector",
class_name="ZulipConnector",
),
DocumentSource.GURU: ConnectorMapping(
module_path="onyx.connectors.guru.connector",
class_name="GuruConnector",
),
DocumentSource.LINEAR: ConnectorMapping(
module_path="onyx.connectors.linear.connector",
class_name="LinearConnector",
),
DocumentSource.HUBSPOT: ConnectorMapping(
module_path="onyx.connectors.hubspot.connector",
class_name="HubSpotConnector",
),
DocumentSource.DOCUMENT360: ConnectorMapping(
module_path="onyx.connectors.document360.connector",
class_name="Document360Connector",
),
DocumentSource.GONG: ConnectorMapping(
module_path="onyx.connectors.gong.connector",
class_name="GongConnector",
),
DocumentSource.GOOGLE_SITES: ConnectorMapping(
module_path="onyx.connectors.google_site.connector",
class_name="GoogleSitesConnector",
),
DocumentSource.ZENDESK: ConnectorMapping(
module_path="onyx.connectors.zendesk.connector",
class_name="ZendeskConnector",
),
DocumentSource.LOOPIO: ConnectorMapping(
module_path="onyx.connectors.loopio.connector",
class_name="LoopioConnector",
),
DocumentSource.DROPBOX: ConnectorMapping(
module_path="onyx.connectors.dropbox.connector",
class_name="DropboxConnector",
),
DocumentSource.SHAREPOINT: ConnectorMapping(
module_path="onyx.connectors.sharepoint.connector",
class_name="SharepointConnector",
),
DocumentSource.TEAMS: ConnectorMapping(
module_path="onyx.connectors.teams.connector",
class_name="TeamsConnector",
),
DocumentSource.SALESFORCE: ConnectorMapping(
module_path="onyx.connectors.salesforce.connector",
class_name="SalesforceConnector",
),
DocumentSource.DISCOURSE: ConnectorMapping(
module_path="onyx.connectors.discourse.connector",
class_name="DiscourseConnector",
),
DocumentSource.AXERO: ConnectorMapping(
module_path="onyx.connectors.axero.connector",
class_name="AxeroConnector",
),
DocumentSource.CLICKUP: ConnectorMapping(
module_path="onyx.connectors.clickup.connector",
class_name="ClickupConnector",
),
DocumentSource.MEDIAWIKI: ConnectorMapping(
module_path="onyx.connectors.mediawiki.wiki",
class_name="MediaWikiConnector",
),
DocumentSource.WIKIPEDIA: ConnectorMapping(
module_path="onyx.connectors.wikipedia.connector",
class_name="WikipediaConnector",
),
DocumentSource.ASANA: ConnectorMapping(
module_path="onyx.connectors.asana.connector",
class_name="AsanaConnector",
),
DocumentSource.S3: ConnectorMapping(
module_path="onyx.connectors.blob.connector",
class_name="BlobStorageConnector",
),
DocumentSource.R2: ConnectorMapping(
module_path="onyx.connectors.blob.connector",
class_name="BlobStorageConnector",
),
DocumentSource.GOOGLE_CLOUD_STORAGE: ConnectorMapping(
module_path="onyx.connectors.blob.connector",
class_name="BlobStorageConnector",
),
DocumentSource.OCI_STORAGE: ConnectorMapping(
module_path="onyx.connectors.blob.connector",
class_name="BlobStorageConnector",
),
DocumentSource.XENFORO: ConnectorMapping(
module_path="onyx.connectors.xenforo.connector",
class_name="XenforoConnector",
),
DocumentSource.DISCORD: ConnectorMapping(
module_path="onyx.connectors.discord.connector",
class_name="DiscordConnector",
),
DocumentSource.FRESHDESK: ConnectorMapping(
module_path="onyx.connectors.freshdesk.connector",
class_name="FreshdeskConnector",
),
DocumentSource.FIREFLIES: ConnectorMapping(
module_path="onyx.connectors.fireflies.connector",
class_name="FirefliesConnector",
),
DocumentSource.EGNYTE: ConnectorMapping(
module_path="onyx.connectors.egnyte.connector",
class_name="EgnyteConnector",
),
DocumentSource.AIRTABLE: ConnectorMapping(
module_path="onyx.connectors.airtable.airtable_connector",
class_name="AirtableConnector",
),
DocumentSource.HIGHSPOT: ConnectorMapping(
module_path="onyx.connectors.highspot.connector",
class_name="HighspotConnector",
),
DocumentSource.IMAP: ConnectorMapping(
module_path="onyx.connectors.imap.connector",
class_name="ImapConnector",
),
DocumentSource.BITBUCKET: ConnectorMapping(
module_path="onyx.connectors.bitbucket.connector",
class_name="BitbucketConnector",
),
# just for integration tests
DocumentSource.MOCK_CONNECTOR: ConnectorMapping(
module_path="onyx.connectors.mock_connector.connector",
class_name="MockConnector",
),
}

View File

@@ -16,7 +16,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorMissingCredentialError
@@ -151,7 +151,7 @@ def _validate_custom_query_config(config: dict[str, Any]) -> None:
)
class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
class SalesforceConnector(LoadConnector, PollConnector, SlimConnector):
"""Approach outline
Goal
@@ -1119,7 +1119,7 @@ class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSyn
with tempfile.TemporaryDirectory() as temp_dir:
return self._delta_sync(temp_dir, start, end)
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -41,7 +41,7 @@ from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import IndexingHeartbeatInterface
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
@@ -73,8 +73,7 @@ class SiteDescriptor(BaseModel):
"""Data class for storing SharePoint site information.
Args:
url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests
or https://danswerai.sharepoint.com/teams/team-name)
url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests)
drive_name: The name of the drive to access (e.g. "Shared Documents", "Other Library")
If None, all drives will be accessed.
folder_path: The folder path within the drive to access (e.g. "test/nested with spaces")
@@ -673,7 +672,7 @@ def _convert_sitepage_to_slim_document(
class SharepointConnector(
SlimConnectorWithPermSync,
SlimConnector,
CheckpointedConnectorWithPermSync[SharepointConnectorCheckpoint],
):
def __init__(
@@ -704,11 +703,9 @@ class SharepointConnector(
# Ensure sites are sharepoint urls
for site_url in self.sites:
if not site_url.startswith("https://") or not (
"/sites/" in site_url or "/teams/" in site_url
):
if not site_url.startswith("https://") or "/sites/" not in site_url:
raise ConnectorValidationError(
"Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)"
"Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site)"
)
@property
@@ -723,17 +720,10 @@ class SharepointConnector(
site_data_list = []
for url in site_urls:
parts = url.strip().split("/")
site_type_index = None
if "sites" in parts:
site_type_index = parts.index("sites")
elif "teams" in parts:
site_type_index = parts.index("teams")
if site_type_index is not None:
# Extract the base site URL (up to and including the site/team name)
site_url = "/".join(parts[: site_type_index + 2])
remaining_parts = parts[site_type_index + 2 :]
sites_index = parts.index("sites")
site_url = "/".join(parts[: sites_index + 2])
remaining_parts = parts[sites_index + 2 :]
# Extract drive name and folder path
if remaining_parts:
@@ -755,9 +745,7 @@ class SharepointConnector(
)
)
else:
logger.warning(
f"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/ or /teams/)"
)
logger.warning(f"Site URL '{url}' is not a valid Sharepoint URL")
return site_data_list
def _get_drive_items_for_drive_name(
@@ -1609,7 +1597,7 @@ class SharepointConnector(
) -> SharepointConnectorCheckpoint:
return SharepointConnectorCheckpoint.model_validate_json(checkpoint_json)
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -16,7 +16,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
@@ -164,7 +164,7 @@ def get_slab_url_from_title_id(base_url: str, title: str, page_id: str) -> str:
return urljoin(urljoin(base_url, "posts/"), url_id)
class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
class SlabConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(
self,
base_url: str,
@@ -239,7 +239,7 @@ class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
time_filter=lambda t: start_time <= t <= end_time
)
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -42,7 +42,7 @@ from onyx.connectors.interfaces import CredentialsConnector
from onyx.connectors.interfaces import CredentialsProviderInterface
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
@@ -581,7 +581,7 @@ def _process_message(
class SlackConnector(
SlimConnectorWithPermSync,
SlimConnector,
CredentialsConnector,
CheckpointedConnectorWithPermSync[SlackCheckpoint],
):
@@ -732,7 +732,7 @@ class SlackConnector(
self.text_cleaner = SlackTextCleaner(client=self.client)
self.credentials_provider = credentials_provider
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -22,7 +22,7 @@ from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
@@ -51,7 +51,7 @@ class TeamsCheckpoint(ConnectorCheckpoint):
class TeamsConnector(
CheckpointedConnector[TeamsCheckpoint],
SlimConnectorWithPermSync,
SlimConnector,
):
MAX_WORKERS = 10
AUTHORITY_URL_PREFIX = "https://login.microsoftonline.com/"
@@ -228,9 +228,9 @@ class TeamsConnector(
has_more=bool(todos),
)
# impls for SlimConnectorWithPermSync
# impls for SlimConnector
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
@@ -572,7 +572,7 @@ if __name__ == "__main__":
)
teams_connector.validate_connector_settings()
for slim_doc in teams_connector.retrieve_all_slim_docs_perm_sync():
for slim_doc in teams_connector.retrieve_all_slim_documents():
...
for doc in load_everything_from_checkpoint_connector(

View File

@@ -219,25 +219,6 @@ def is_valid_url(url: str) -> bool:
return False
def _same_site(base_url: str, candidate_url: str) -> bool:
base, candidate = urlparse(base_url), urlparse(candidate_url)
base_netloc = base.netloc.lower().removeprefix("www.")
candidate_netloc = candidate.netloc.lower().removeprefix("www.")
if base_netloc != candidate_netloc:
return False
base_path = (base.path or "/").rstrip("/")
if base_path in ("", "/"):
return True
candidate_path = candidate.path or "/"
if candidate_path == base_path:
return True
boundary = f"{base_path}/"
return candidate_path.startswith(boundary)
def get_internal_links(
base_url: str, url: str, soup: BeautifulSoup, should_ignore_pound: bool = True
) -> set[str]:
@@ -258,7 +239,7 @@ def get_internal_links(
# Relative path handling
href = urljoin(url, href)
if _same_site(base_url, href):
if urlparse(href).netloc == urlparse(url).netloc and base_url in href:
internal_links.add(href)
return internal_links

View File

@@ -26,7 +26,7 @@ from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import ConnectorFailure
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import Document
@@ -376,7 +376,7 @@ class ZendeskConnectorCheckpoint(ConnectorCheckpoint):
class ZendeskConnector(
SlimConnectorWithPermSync, CheckpointedConnector[ZendeskConnectorCheckpoint]
SlimConnector, CheckpointedConnector[ZendeskConnectorCheckpoint]
):
def __init__(
self,
@@ -565,7 +565,7 @@ class ZendeskConnector(
)
return checkpoint
def retrieve_all_slim_docs_perm_sync(
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,

View File

@@ -62,14 +62,6 @@ class MCPAuthenticationType(str, PyEnum):
OAUTH = "OAUTH"
class MCPTransport(str, PyEnum):
"""MCP transport types"""
STDIO = "STDIO" # TODO: currently unsupported, need to add a user guide for setup
SSE = "SSE" # Server-Sent Events (deprecated but still used)
STREAMABLE_HTTP = "STREAMABLE_HTTP" # Modern HTTP streaming
class MCPAuthenticationPerformer(str, PyEnum):
ADMIN = "ADMIN"
PER_USER = "PER_USER"

132
backend/onyx/db/folder.py Normal file
View File

@@ -0,0 +1,132 @@
from uuid import UUID
from sqlalchemy.orm import Session
from onyx.db.chat import delete_chat_session
from onyx.db.models import ChatFolder
from onyx.db.models import ChatSession
from onyx.utils.logger import setup_logger
logger = setup_logger()
def get_user_folders(
user_id: UUID | None,
db_session: Session,
) -> list[ChatFolder]:
return db_session.query(ChatFolder).filter(ChatFolder.user_id == user_id).all()
def update_folder_display_priority(
user_id: UUID | None,
display_priority_map: dict[int, int],
db_session: Session,
) -> None:
folders = get_user_folders(user_id=user_id, db_session=db_session)
folder_ids = {folder.id for folder in folders}
if folder_ids != set(display_priority_map.keys()):
raise ValueError("Invalid Folder IDs provided")
for folder in folders:
folder.display_priority = display_priority_map[folder.id]
db_session.commit()
def get_folder_by_id(
user_id: UUID | None,
folder_id: int,
db_session: Session,
) -> ChatFolder:
folder = (
db_session.query(ChatFolder).filter(ChatFolder.id == folder_id).one_or_none()
)
if not folder:
raise ValueError("Folder by specified id does not exist")
if folder.user_id != user_id:
raise PermissionError(f"Folder does not belong to user: {user_id}")
return folder
def create_folder(
user_id: UUID | None, folder_name: str | None, db_session: Session
) -> int:
new_folder = ChatFolder(
user_id=user_id,
name=folder_name,
)
db_session.add(new_folder)
db_session.commit()
return new_folder.id
def rename_folder(
user_id: UUID | None, folder_id: int, folder_name: str | None, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
folder.name = folder_name
db_session.commit()
def add_chat_to_folder(
user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
chat_session.folder_id = folder.id
db_session.commit()
def remove_chat_from_folder(
user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
if chat_session.folder_id != folder.id:
raise ValueError("The chat session is not in the specified folder.")
if folder.user_id != user_id:
raise ValueError(
f"Tried to remove a chat session from a folder that does not below to "
f"this user, user id: {user_id}"
)
chat_session.folder_id = None
if chat_session in folder.chat_sessions:
folder.chat_sessions.remove(chat_session)
db_session.commit()
def delete_folder(
user_id: UUID | None,
folder_id: int,
including_chats: bool,
db_session: Session,
) -> None:
folder = get_folder_by_id(
user_id=user_id, folder_id=folder_id, db_session=db_session
)
# Assuming there will not be a massive number of chats in any given folder
if including_chats:
for chat_session in folder.chat_sessions:
delete_chat_session(
user_id=user_id,
chat_session_id=chat_session.id,
db_session=db_session,
)
db_session.delete(folder)
db_session.commit()

View File

@@ -112,7 +112,6 @@ def upsert_llm_provider(
name=model_configuration.name,
is_visible=model_configuration.is_visible,
max_input_tokens=model_configuration.max_input_tokens,
supports_image_input=model_configuration.supports_image_input,
)
.on_conflict_do_nothing()
)

View File

@@ -4,10 +4,8 @@ from uuid import UUID
from sqlalchemy import and_
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import flag_modified
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPTransport
from onyx.db.models import MCPAuthenticationType
from onyx.db.models import MCPConnectionConfig
from onyx.db.models import MCPServer
@@ -91,8 +89,6 @@ def create_mcp_server__no_commit(
description: str | None,
server_url: str,
auth_type: MCPAuthenticationType,
transport: MCPTransport,
auth_performer: MCPAuthenticationPerformer,
db_session: Session,
admin_connection_config_id: int | None = None,
) -> MCPServer:
@@ -102,9 +98,7 @@ def create_mcp_server__no_commit(
name=name,
description=description,
server_url=server_url,
transport=transport,
auth_type=auth_type,
auth_performer=auth_performer,
admin_connection_config_id=admin_connection_config_id,
)
db_session.add(new_server)
@@ -120,8 +114,6 @@ def update_mcp_server__no_commit(
server_url: str | None = None,
auth_type: MCPAuthenticationType | None = None,
admin_connection_config_id: int | None = None,
auth_performer: MCPAuthenticationPerformer | None = None,
transport: MCPTransport | None = None,
) -> MCPServer:
"""Update an existing MCP server"""
server = get_mcp_server_by_id(server_id, db_session)
@@ -136,10 +128,6 @@ def update_mcp_server__no_commit(
server.auth_type = auth_type
if admin_connection_config_id is not None:
server.admin_connection_config_id = admin_connection_config_id
if auth_performer is not None:
server.auth_performer = auth_performer
if transport is not None:
server.transport = transport
db_session.flush() # Don't commit yet, let caller decide when to commit
return server
@@ -159,6 +147,18 @@ def delete_mcp_server(server_id: int, db_session: Session) -> None:
logger.info(f"Successfully deleted MCP server {server_id} and its tools")
# TODO: this is pretty hacky
def get_mcp_server_auth_performer(mcp_server: MCPServer) -> MCPAuthenticationPerformer:
"""Get the authentication performer for an MCP server"""
if mcp_server.auth_type == MCPAuthenticationType.OAUTH:
return MCPAuthenticationPerformer.PER_USER
if not mcp_server.admin_connection_config:
return MCPAuthenticationPerformer.ADMIN
if not mcp_server.admin_connection_config.config.get("header_substitutions"):
return MCPAuthenticationPerformer.ADMIN
return MCPAuthenticationPerformer.PER_USER
def get_all_mcp_tools_for_server(server_id: int, db_session: Session) -> list[Tool]:
"""Get all MCP tools for a server"""
return list(
@@ -259,8 +259,6 @@ def update_connection_config(
if config_data is not None:
config.config = config_data
# Force SQLAlchemy to detect the change by marking the field as modified
flag_modified(config, "config")
db_session.commit()
return config
@@ -297,7 +295,7 @@ def get_server_auth_template(
if not server.admin_connection_config_id:
return None
if server.auth_performer == MCPAuthenticationPerformer.ADMIN:
if get_mcp_server_auth_performer(server) == MCPAuthenticationPerformer.ADMIN:
return None # admin server implies no template
return server.admin_connection_config

View File

@@ -63,8 +63,6 @@ from onyx.db.enums import (
SyncStatus,
MCPAuthenticationType,
UserFileStatus,
MCPAuthenticationPerformer,
MCPTransport,
)
from onyx.configs.constants import NotificationType
from onyx.configs.constants import SearchFeedbackType
@@ -2353,8 +2351,6 @@ class ModelConfiguration(Base):
# - The end-user is configuring a model and chooses not to set a max-input-tokens limit.
max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
llm_provider: Mapped["LLMProvider"] = relationship(
"LLMProvider",
back_populates="model_configurations",
@@ -3472,18 +3468,10 @@ class MCPServer(Base):
name: Mapped[str] = mapped_column(String, nullable=False)
description: Mapped[str | None] = mapped_column(String, nullable=True)
server_url: Mapped[str] = mapped_column(String, nullable=False)
# Transport type for connecting to the MCP server
transport: Mapped[MCPTransport] = mapped_column(
Enum(MCPTransport, native_enum=False), nullable=False
)
# Auth type: "none", "api_token", or "oauth"
auth_type: Mapped[MCPAuthenticationType] = mapped_column(
Enum(MCPAuthenticationType, native_enum=False), nullable=False
)
# Who performs authentication for this server (ADMIN or PER_USER)
auth_performer: Mapped[MCPAuthenticationPerformer] = mapped_column(
Enum(MCPAuthenticationPerformer, native_enum=False), nullable=False
)
# Admin connection config - used for the config page
# and (when applicable) admin-managed auth
# and (when applicable) per-user auth

View File

@@ -0,0 +1,478 @@
import datetime
import time
from typing import List
from uuid import UUID
from fastapi import UploadFile
from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Session
from onyx.auth.users import get_current_tenant_id
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.enums import AccessType
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import Document
from onyx.db.models import DocumentByConnectorCredentialPair
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
from onyx.server.documents.connector import upload_files
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.models import StatusResponse
USER_FILE_CONSTANT = "USER_FILE_CONNECTOR"
def create_user_files(
files: List[UploadFile],
folder_id: int | None,
user: User | None,
db_session: Session,
link_url: str | None = None,
) -> list[UserFile]:
"""NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
Document what this does?
"""
# NOTE: At the moment, zip metadata is not used for user files.
# Should revisit to decide whether this should be a feature.
upload_response = upload_files(files)
user_files = []
for file_path, file in zip(upload_response.file_paths, files):
new_file = UserFile(
user_id=user.id if user else None,
folder_id=folder_id,
file_id=file_path,
document_id="USER_FILE_CONNECTOR__" + file_path,
name=file.filename,
token_count=None,
link_url=link_url,
content_type=file.content_type,
)
db_session.add(new_file)
user_files.append(new_file)
db_session.commit()
return user_files
def upload_files_to_user_files_with_indexing(
files: List[UploadFile],
folder_id: int | None,
user: User,
db_session: Session,
trigger_index: bool = True,
) -> list[UserFile]:
"""NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
Document what this does?
Create user files and trigger immediate indexing"""
# Create the user files first
user_files = create_user_files(files, folder_id, user, db_session)
# Create connector and credential for each file
for user_file in user_files:
cc_pair = create_file_connector_credential(user_file, user, db_session)
user_file.cc_pair_id = cc_pair.data
db_session.commit()
# Trigger immediate high-priority indexing for all created files
if trigger_index:
tenant_id = get_current_tenant_id()
for user_file in user_files:
# Use the existing trigger_indexing_for_cc_pair function but with highest priority
if user_file.cc_pair_id:
trigger_indexing_for_cc_pair(
[],
user_file.cc_pair.connector_id,
False,
tenant_id,
db_session,
is_user_file=True,
)
return user_files
def create_file_connector_credential(
user_file: UserFile, user: User, db_session: Session
) -> StatusResponse:
"""Create connector and credential for a user file"""
connector_base = ConnectorBase(
name=f"UserFile-{user_file.file_id}-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [user_file.file_id],
"file_names": [user_file.name],
"zip_metadata": {},
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(db_session=db_session, connector_data=connector_base)
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
is_user_file=True,
)
credential = create_credential(credential_info, user, db_session)
return add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{user_file.file_id}-{int(time.time())}",
access_type=AccessType.PRIVATE,
auto_sync_options=None,
groups=[],
is_user_file=True,
)
def get_user_file_indexing_status(
file_ids: list[int], db_session: Session
) -> dict[int, bool]:
"""Get indexing status for multiple user files"""
status_dict = {}
# Query UserFile with cc_pair join
files_with_pairs = (
db_session.query(UserFile)
.filter(UserFile.id.in_(file_ids))
.options(joinedload(UserFile.cc_pair))
.all()
)
for file in files_with_pairs:
if file.cc_pair and file.cc_pair.last_successful_index_time:
status_dict[file.id] = True
else:
status_dict[file.id] = False
return status_dict
def calculate_user_files_token_count(
file_ids: list[int], folder_ids: list[int], db_session: Session
) -> int:
"""Calculate total token count for specified files and folders"""
total_tokens = 0
# Get tokens from individual files
if file_ids:
file_tokens = (
db_session.query(func.sum(UserFile.token_count))
.filter(UserFile.id.in_(file_ids))
.scalar()
or 0
)
total_tokens += file_tokens
# Get tokens from folders
if folder_ids:
folder_files_tokens = (
db_session.query(func.sum(UserFile.token_count))
.filter(UserFile.folder_id.in_(folder_ids))
.scalar()
or 0
)
total_tokens += folder_files_tokens
return total_tokens
def load_all_user_files(
file_ids: list[int], folder_ids: list[int], db_session: Session
) -> list[UserFile]:
"""Load all user files from specified file IDs and folder IDs"""
result = []
# Get individual files
if file_ids:
files = db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()
result.extend(files)
# Get files from folders
if folder_ids:
folder_files = (
db_session.query(UserFile).filter(UserFile.folder_id.in_(folder_ids)).all()
)
result.extend(folder_files)
return result
def get_user_files_from_folder(folder_id: int, db_session: Session) -> list[UserFile]:
return db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all()
def share_file_with_assistant(
file_id: int, assistant_id: int, db_session: Session
) -> None:
file = db_session.query(UserFile).filter(UserFile.id == file_id).first()
assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
if file and assistant:
file.assistants.append(assistant)
db_session.commit()
def unshare_file_with_assistant(
file_id: int, assistant_id: int, db_session: Session
) -> None:
db_session.query(Persona__UserFile).filter(
and_(
Persona__UserFile.user_file_id == file_id,
Persona__UserFile.persona_id == assistant_id,
)
).delete()
db_session.commit()
def share_folder_with_assistant(
folder_id: int, assistant_id: int, db_session: Session
) -> None:
folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
if folder and assistant:
for file in folder.files:
share_file_with_assistant(file.id, assistant_id, db_session)
def unshare_folder_with_assistant(
folder_id: int, assistant_id: int, db_session: Session
) -> None:
folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
if folder:
for file in folder.files:
unshare_file_with_assistant(file.id, assistant_id, db_session)
def fetch_user_files_for_documents(
document_ids: list[str],
db_session: Session,
) -> dict[str, int | None]:
"""
Fetches user file IDs for the given document IDs.
Args:
document_ids: List of document IDs to fetch user files for
db_session: Database session
Returns:
Dictionary mapping document IDs to user file IDs (or None if no user file exists)
"""
# First, get the document to cc_pair mapping
doc_cc_pairs = (
db_session.query(Document.id, ConnectorCredentialPair.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.filter(Document.id.in_(document_ids))
.all()
)
# Get cc_pair to user_file mapping
cc_pair_to_user_file = (
db_session.query(ConnectorCredentialPair.id, UserFile.id)
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
.filter(
ConnectorCredentialPair.id.in_(
[cc_pair_id for _, cc_pair_id in doc_cc_pairs]
)
)
.all()
)
# Create mapping from cc_pair_id to user_file_id
cc_pair_to_user_file_dict = {
cc_pair_id: user_file_id for cc_pair_id, user_file_id in cc_pair_to_user_file
}
# Create the final result mapping document_id to user_file_id
result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
for doc_id, cc_pair_id in doc_cc_pairs:
if cc_pair_id in cc_pair_to_user_file_dict:
result[doc_id] = cc_pair_to_user_file_dict[cc_pair_id]
return result
def fetch_user_folders_for_documents(
document_ids: list[str],
db_session: Session,
) -> dict[str, int | None]:
"""
Fetches user folder IDs for the given document IDs.
For each document, returns the folder ID that the document's associated user file belongs to.
Args:
document_ids: List of document IDs to fetch user folders for
db_session: Database session
Returns:
Dictionary mapping document IDs to user folder IDs (or None if no user folder exists)
"""
# First, get the document to cc_pair mapping
doc_cc_pairs = (
db_session.query(Document.id, ConnectorCredentialPair.id)
.join(
DocumentByConnectorCredentialPair,
Document.id == DocumentByConnectorCredentialPair.id,
)
.join(
ConnectorCredentialPair,
and_(
DocumentByConnectorCredentialPair.connector_id
== ConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id
== ConnectorCredentialPair.credential_id,
),
)
.filter(Document.id.in_(document_ids))
.all()
)
# Get cc_pair to user_file and folder mapping
cc_pair_to_folder = (
db_session.query(ConnectorCredentialPair.id, UserFile.folder_id)
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
.filter(
ConnectorCredentialPair.id.in_(
[cc_pair_id for _, cc_pair_id in doc_cc_pairs]
)
)
.all()
)
# Create mapping from cc_pair_id to folder_id
cc_pair_to_folder_dict = {
cc_pair_id: folder_id for cc_pair_id, folder_id in cc_pair_to_folder
}
# Create the final result mapping document_id to folder_id
result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
for doc_id, cc_pair_id in doc_cc_pairs:
if cc_pair_id in cc_pair_to_folder_dict:
result[doc_id] = cc_pair_to_folder_dict[cc_pair_id]
return result
def get_user_file_from_id(db_session: Session, user_file_id: int) -> UserFile | None:
return db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
# def fetch_user_files_for_documents(
# # document_ids: list[str],
# # db_session: Session,
# # ) -> dict[str, int | None]:
# # # Query UserFile objects for the given document_ids
# # user_files = (
# # db_session.query(UserFile).filter(UserFile.document_id.in_(document_ids)).all()
# # )
# # # Create a dictionary mapping document_ids to UserFile objects
# # result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
# # for user_file in user_files:
# # result[user_file.document_id] = user_file.id
# # return result
def upsert_user_folder(
db_session: Session,
id: int | None = None,
user_id: UUID | None = None,
name: str | None = None,
description: str | None = None,
created_at: datetime.datetime | None = None,
user: User | None = None,
files: list[UserFile] | None = None,
assistants: list[Persona] | None = None,
) -> UserFolder:
if id is not None:
user_folder = db_session.query(UserFolder).filter_by(id=id).first()
else:
user_folder = (
db_session.query(UserFolder).filter_by(name=name, user_id=user_id).first()
)
if user_folder:
if user_id is not None:
user_folder.user_id = user_id
if name is not None:
user_folder.name = name
if description is not None:
user_folder.description = description
if created_at is not None:
user_folder.created_at = created_at
if user is not None:
user_folder.user = user
if files is not None:
user_folder.files = files
if assistants is not None:
user_folder.assistants = assistants
else:
user_folder = UserFolder(
id=id,
user_id=user_id,
name=name,
description=description,
created_at=created_at or datetime.datetime.utcnow(),
user=user,
files=files or [],
assistants=assistants or [],
)
db_session.add(user_folder)
db_session.flush()
return user_folder
def get_user_folder_by_name(db_session: Session, name: str) -> UserFolder | None:
return db_session.query(UserFolder).filter(UserFolder.name == name).first()
def update_user_file_token_count__no_commit(
user_file_id_to_token_count: dict[int, int | None],
db_session: Session,
) -> None:
for user_file_id, token_count in user_file_id_to_token_count.items():
db_session.query(UserFile).filter(UserFile.id == user_file_id).update(
{UserFile.token_count: token_count}
)

View File

@@ -1,52 +1,15 @@
"""Factory for creating federated connector instances."""
import importlib
from typing import Any
from typing import Type
from onyx.configs.constants import FederatedConnectorSource
from onyx.federated_connectors.interfaces import FederatedConnector
from onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP
from onyx.federated_connectors.slack.federated_connector import SlackFederatedConnector
from onyx.utils.logger import setup_logger
logger = setup_logger()
class FederatedConnectorMissingException(Exception):
pass
# Cache for already imported federated connector classes
_federated_connector_cache: dict[FederatedConnectorSource, Type[FederatedConnector]] = (
{}
)
def _load_federated_connector_class(
source: FederatedConnectorSource,
) -> Type[FederatedConnector]:
"""Dynamically load and cache a federated connector class."""
if source in _federated_connector_cache:
return _federated_connector_cache[source]
if source not in FEDERATED_CONNECTOR_CLASS_MAP:
raise FederatedConnectorMissingException(
f"Federated connector not found for source={source}"
)
mapping = FEDERATED_CONNECTOR_CLASS_MAP[source]
try:
module = importlib.import_module(mapping.module_path)
connector_class = getattr(module, mapping.class_name)
_federated_connector_cache[source] = connector_class
return connector_class
except (ImportError, AttributeError) as e:
raise FederatedConnectorMissingException(
f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
)
def get_federated_connector(
source: FederatedConnectorSource,
credentials: dict[str, Any],
@@ -58,6 +21,9 @@ def get_federated_connector(
def get_federated_connector_cls(
source: FederatedConnectorSource,
) -> Type[FederatedConnector]:
) -> type[FederatedConnector]:
"""Get the class of the appropriate federated connector."""
return _load_federated_connector_class(source)
if source == FederatedConnectorSource.FEDERATED_SLACK:
return SlackFederatedConnector
else:
raise ValueError(f"Unsupported federated connector source: {source}")

View File

@@ -135,16 +135,12 @@ def get_federated_retrieval_functions(
# At this point, user_id is guaranteed to be not None since we're in the else branch
assert user_id is not None
# If no source types are specified, don't use any federated connectors
if source_types is None:
logger.info("No source types specified, skipping all federated connectors")
return []
federated_retrieval_infos: list[FederatedRetrievalInfo] = []
federated_oauth_tokens = list_federated_connector_oauth_tokens(db_session, user_id)
for oauth_token in federated_oauth_tokens:
if (
oauth_token.federated_connector.source.to_non_federated_source()
source_types is not None
and oauth_token.federated_connector.source.to_non_federated_source()
not in source_types
):
continue

View File

@@ -1,19 +0,0 @@
"""Registry mapping for federated connector classes."""
from pydantic import BaseModel
from onyx.configs.constants import FederatedConnectorSource
class FederatedConnectorMapping(BaseModel):
module_path: str
class_name: str
# Mapping of FederatedConnectorSource to connector details for lazy loading
FEDERATED_CONNECTOR_CLASS_MAP = {
FederatedConnectorSource.FEDERATED_SLACK: FederatedConnectorMapping(
module_path="onyx.federated_connectors.slack.federated_connector",
class_name="SlackFederatedConnector",
),
}

View File

@@ -54,7 +54,6 @@ ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS = [
".xml",
".yml",
".yaml",
".sql",
]
ACCEPTED_DOCUMENT_FILE_EXTENSIONS = [

View File

@@ -2,6 +2,7 @@ from typing import Any
from typing import cast
from typing import IO
from unstructured.staging.base import dict_to_elements
from unstructured_client import UnstructuredClient # type: ignore
from unstructured_client.models import operations # type: ignore
from unstructured_client.models import shared
@@ -51,8 +52,6 @@ def _sdk_partition_request(
def unstructured_to_text(file: IO[Any], file_name: str) -> str:
from unstructured.staging.base import dict_to_elements
logger.debug(f"Starting to read file: {file_name}")
req = _sdk_partition_request(file, file_name, strategy="fast")

View File

@@ -1,6 +1,7 @@
import base64
from enum import Enum
from typing import NotRequired
from uuid import UUID
from typing_extensions import TypedDict # noreorder
from pydantic import BaseModel
@@ -35,7 +36,7 @@ class FileDescriptor(TypedDict):
id: str
type: ChatFileType
name: NotRequired[str | None]
user_file_id: NotRequired[str | None]
user_file_id: NotRequired[UUID | None]
class InMemoryChatFile(BaseModel):
@@ -57,5 +58,5 @@ class InMemoryChatFile(BaseModel):
"id": str(self.file_id),
"type": self.file_type,
"name": self.filename,
"user_file_id": str(self.file_id) if self.file_id else None,
"user_file_id": UUID(str(self.file_id)) if self.file_id else None,
}

View File

@@ -5,9 +5,8 @@ from collections.abc import Iterator
from collections.abc import Sequence
from typing import Any
from typing import cast
from typing import TYPE_CHECKING
from typing import Union
import litellm # type: ignore
from httpx import RemoteProtocolError
from langchain.schema.language_model import LanguageModelInput
from langchain_core.messages import AIMessage
@@ -25,7 +24,9 @@ from langchain_core.messages import SystemMessageChunk
from langchain_core.messages.tool import ToolCallChunk
from langchain_core.messages.tool import ToolMessage
from langchain_core.prompt_values import PromptValue
from litellm.utils import get_supported_openai_params
from onyx.configs.app_configs import BRAINTRUST_ENABLED
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.configs.chat_configs import QA_TIMEOUT
@@ -44,9 +45,13 @@ from onyx.utils.long_term_log import LongTermLogger
logger = setup_logger()
if TYPE_CHECKING:
from litellm import ModelResponse, CustomStreamWrapper, Message
# If a user configures a different model and it doesn't support all the same
# parameters like frequency and presence, just ignore them
litellm.drop_params = True
litellm.telemetry = False
if BRAINTRUST_ENABLED:
litellm.callbacks = ["braintrust"]
_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
VERTEX_CREDENTIALS_FILE_KWARG = "vertex_credentials"
@@ -80,10 +85,8 @@ def _base_msg_to_role(msg: BaseMessage) -> str:
def _convert_litellm_message_to_langchain_message(
litellm_message: "Message",
litellm_message: litellm.Message,
) -> BaseMessage:
from onyx.llm.litellm_singleton import litellm
# Extracting the basic attributes from the litellm message
content = litellm_message.content or ""
role = litellm_message.role
@@ -173,15 +176,15 @@ def _convert_delta_to_message_chunk(
curr_msg: BaseMessage | None,
stop_reason: str | None = None,
) -> BaseMessageChunk:
from litellm.utils import ChatCompletionDeltaToolCall
"""Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk"""
role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else "unknown")
content = _dict.get("content") or ""
additional_kwargs = {}
if _dict.get("function_call"):
additional_kwargs.update({"function_call": dict(_dict["function_call"])})
tool_calls = cast(list[ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls"))
tool_calls = cast(
list[litellm.utils.ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls")
)
if role == "user":
return HumanMessageChunk(content=content)
@@ -318,8 +321,6 @@ class DefaultMultiLLM(LLM):
self._max_token_param = LEGACY_MAX_TOKENS_KWARG
try:
from litellm.utils import get_supported_openai_params
params = get_supported_openai_params(model_name, model_provider)
if STANDARD_MAX_TOKENS_KWARG in (params or []):
self._max_token_param = STANDARD_MAX_TOKENS_KWARG
@@ -387,12 +388,11 @@ class DefaultMultiLLM(LLM):
structured_response_format: dict | None = None,
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Union["ModelResponse", "CustomStreamWrapper"]:
) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
# litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
# to a dict representation
processed_prompt = _prompt_to_dict(prompt)
self._record_call(processed_prompt)
from onyx.llm.litellm_singleton import litellm
try:
return litellm.completion(
@@ -437,16 +437,6 @@ class DefaultMultiLLM(LLM):
]
else {}
), # TODO: remove once LITELLM has patched
**(
{"reasoning_effort": "minimal"}
if self.config.model_name
in [
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
]
else {}
), # TODO: remove once LITELLM has better support/we change API
**(
{"response_format": structured_response_format}
if structured_response_format
@@ -495,13 +485,11 @@ class DefaultMultiLLM(LLM):
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> BaseMessage:
from litellm import ModelResponse
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
response = cast(
ModelResponse,
litellm.ModelResponse,
self._completion(
prompt=prompt,
tools=tools,
@@ -530,8 +518,6 @@ class DefaultMultiLLM(LLM):
timeout_override: int | None = None,
max_tokens: int | None = None,
) -> Iterator[BaseMessage]:
from litellm import CustomStreamWrapper
if LOG_ONYX_MODEL_INTERACTIONS:
self.log_model_configs()
@@ -548,7 +534,7 @@ class DefaultMultiLLM(LLM):
output = None
response = cast(
CustomStreamWrapper,
litellm.CustomStreamWrapper,
self._completion(
prompt=prompt,
tools=tools,

View File

@@ -1,5 +1,8 @@
from typing import Any
from onyx.chat.models import PersonaOverrideConfig
from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.llm import fetch_default_provider
@@ -10,8 +13,6 @@ from onyx.db.models import Persona
from onyx.llm.chat_llm import DefaultMultiLLM
from onyx.llm.exceptions import GenAIDisabledException
from onyx.llm.interfaces import LLM
from onyx.llm.llm_provider_options import OLLAMA_API_KEY_CONFIG_KEY
from onyx.llm.llm_provider_options import OLLAMA_PROVIDER_NAME
from onyx.llm.override_models import LLMOverride
from onyx.llm.utils import get_max_input_tokens_from_llm_provider
from onyx.llm.utils import model_supports_image_input
@@ -23,22 +24,13 @@ from onyx.utils.long_term_log import LongTermLogger
logger = setup_logger()
def _build_provider_extra_headers(
provider: str, custom_config: dict[str, str] | None
) -> dict[str, str]:
if provider != OLLAMA_PROVIDER_NAME or not custom_config:
return {}
def _build_extra_model_kwargs(provider: str) -> dict[str, Any]:
"""Ollama requires us to specify the max context window.
raw_api_key = custom_config.get(OLLAMA_API_KEY_CONFIG_KEY)
api_key = raw_api_key.strip() if raw_api_key else None
if not api_key:
return {}
if not api_key.lower().startswith("bearer "):
api_key = f"Bearer {api_key}"
return {"Authorization": api_key}
For now, just using the GEN_AI_MODEL_FALLBACK_MAX_TOKENS value.
TODO: allow model-specific values to be configured via the UI.
"""
return {"num_ctx": GEN_AI_MODEL_FALLBACK_MAX_TOKENS} if provider == "ollama" else {}
def get_main_llm_from_tuple(
@@ -280,16 +272,6 @@ def get_llm(
) -> LLM:
if temperature is None:
temperature = GEN_AI_TEMPERATURE
extra_headers = build_llm_extra_headers(additional_headers)
# NOTE: this is needed since Ollama API key is optional
# User may access Ollama cloud via locally hosted instance (logged in)
# or just via the cloud API (not logged in, using API key)
provider_extra_headers = _build_provider_extra_headers(provider, custom_config)
if provider_extra_headers:
extra_headers.update(provider_extra_headers)
return DefaultMultiLLM(
model_provider=provider,
model_name=model,
@@ -300,8 +282,8 @@ def get_llm(
timeout=timeout,
temperature=temperature,
custom_config=custom_config,
extra_headers=extra_headers,
model_kwargs={},
extra_headers=build_llm_extra_headers(additional_headers),
model_kwargs=_build_extra_model_kwargs(provider),
long_term_logger=long_term_logger,
max_input_tokens=max_input_tokens,
)

View File

@@ -1,23 +0,0 @@
"""
Singleton module for litellm configuration.
This ensures litellm is configured exactly once when first imported.
All other modules should import litellm from here instead of directly.
"""
import litellm
from onyx.configs.app_configs import BRAINTRUST_ENABLED
# Import litellm
# Configure litellm settings immediately on import
# If a user configures a different model and it doesn't support all the same
# parameters like frequency and presence, just ignore them
litellm.drop_params = True
litellm.telemetry = False
if BRAINTRUST_ENABLED:
litellm.callbacks = ["braintrust"]
# Export the configured litellm module
__all__ = ["litellm"]

View File

@@ -39,7 +39,6 @@ class WellKnownLLMProviderDescriptor(BaseModel):
model_configurations: list[ModelConfigurationView]
default_model: str | None = None
default_fast_model: str | None = None
default_api_base: str | None = None
# set for providers like Azure, which require a deployment name.
deployment_name_required: bool = False
# set for providers like Azure, which support a single model per deployment.
@@ -96,9 +95,7 @@ BEDROCK_MODEL_NAMES = [
for model in list(litellm.bedrock_models.union(litellm.bedrock_converse_models))
if "/" not in model and "embed" not in model
][::-1]
OLLAMA_PROVIDER_NAME = "ollama"
OLLAMA_API_KEY_CONFIG_KEY = "OLLAMA_API_KEY"
BEDROCK_DEFAULT_MODEL = "anthropic.claude-3-5-sonnet-20241022-v2:0"
IGNORABLE_ANTHROPIC_MODELS = [
"claude-2",
@@ -112,8 +109,8 @@ ANTHROPIC_MODEL_NAMES = [
if model not in IGNORABLE_ANTHROPIC_MODELS
][::-1]
ANTHROPIC_VISIBLE_MODEL_NAMES = [
"claude-sonnet-4-5-20250929",
"claude-sonnet-4-20250514",
"claude-3-5-sonnet-20241022",
"claude-3-7-sonnet-20250219",
]
AZURE_PROVIDER_NAME = "azure"
@@ -163,15 +160,13 @@ _PROVIDER_TO_MODELS_MAP = {
BEDROCK_PROVIDER_NAME: BEDROCK_MODEL_NAMES,
ANTHROPIC_PROVIDER_NAME: ANTHROPIC_MODEL_NAMES,
VERTEXAI_PROVIDER_NAME: VERTEXAI_MODEL_NAMES,
OLLAMA_PROVIDER_NAME: [],
}
_PROVIDER_TO_VISIBLE_MODELS_MAP = {
OPENAI_PROVIDER_NAME: OPEN_AI_VISIBLE_MODEL_NAMES,
BEDROCK_PROVIDER_NAME: [],
BEDROCK_PROVIDER_NAME: [BEDROCK_DEFAULT_MODEL],
ANTHROPIC_PROVIDER_NAME: ANTHROPIC_VISIBLE_MODEL_NAMES,
VERTEXAI_PROVIDER_NAME: VERTEXAI_VISIBLE_MODEL_NAMES,
OLLAMA_PROVIDER_NAME: [],
}
@@ -190,28 +185,6 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
default_model="gpt-4o",
default_fast_model="gpt-4o-mini",
),
WellKnownLLMProviderDescriptor(
name=OLLAMA_PROVIDER_NAME,
display_name="Ollama",
api_key_required=False,
api_base_required=True,
api_version_required=False,
custom_config_keys=[
CustomConfigKey(
name=OLLAMA_API_KEY_CONFIG_KEY,
display_name="Ollama API Key",
description="Optional API key used when connecting to Ollama Cloud (i.e. API base is https://ollama.com).",
is_required=False,
is_secret=True,
)
],
model_configurations=fetch_model_configurations_for_provider(
OLLAMA_PROVIDER_NAME
),
default_model=None,
default_fast_model=None,
default_api_base="http://127.0.0.1:11434",
),
WellKnownLLMProviderDescriptor(
name=ANTHROPIC_PROVIDER_NAME,
display_name="Anthropic",
@@ -222,8 +195,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
model_configurations=fetch_model_configurations_for_provider(
ANTHROPIC_PROVIDER_NAME
),
default_model="claude-sonnet-4-5-20250929",
default_fast_model="claude-sonnet-4-20250514",
default_model="claude-3-7-sonnet-20250219",
default_fast_model="claude-3-5-sonnet-20241022",
),
WellKnownLLMProviderDescriptor(
name=AZURE_PROVIDER_NAME,
@@ -275,7 +248,7 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
model_configurations=fetch_model_configurations_for_provider(
BEDROCK_PROVIDER_NAME
),
default_model=None,
default_model=BEDROCK_DEFAULT_MODEL,
default_fast_model=None,
),
WellKnownLLMProviderDescriptor(

View File

@@ -16,7 +16,6 @@ from langchain.schema.messages import AIMessage
from langchain.schema.messages import BaseMessage
from langchain.schema.messages import HumanMessage
from langchain.schema.messages import SystemMessage
from sqlalchemy import select
from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
@@ -27,9 +26,6 @@ from onyx.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from onyx.configs.model_configs import GEN_AI_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
from onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import LLMProvider
from onyx.db.models import ModelConfiguration
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.llm.interfaces import LLM
@@ -464,7 +460,6 @@ def get_llm_contextual_cost(
this does not account for the cost of documents that fit within a single chunk
which do not get contextualized.
"""
import litellm
# calculate input costs
@@ -644,30 +639,6 @@ def get_max_input_tokens_from_llm_provider(
def model_supports_image_input(model_name: str, model_provider: str) -> bool:
# TODO: Add support to check model config for any provider
# TODO: Circular import means OLLAMA_PROVIDER_NAME is not available here
if model_provider == "ollama":
try:
with get_session_with_current_tenant() as db_session:
model_config = db_session.scalar(
select(ModelConfiguration)
.join(
LLMProvider,
ModelConfiguration.llm_provider_id == LLMProvider.id,
)
.where(
ModelConfiguration.name == model_name,
LLMProvider.provider == model_provider,
)
)
if model_config and model_config.supports_image_input is not None:
return model_config.supports_image_input
except Exception as e:
logger.warning(
f"Failed to query database for {model_provider} model {model_name} image support: {e}"
)
model_map = get_model_map()
try:
model_obj = find_model_obj(

View File

@@ -714,15 +714,13 @@ information that will be necessary to provide a succinct answer to the specific
the documents. Again, start out here as well with a brief statement whether the SPECIFIC CONTEXT is \
mentioned in the documents. (Example: 'I was not able to find information about yellow curry specifically, \
but I found information about curry...'). But this should be be precise and concise, and specifically \
answer the question. Please cite the document sources inline in format [[1]][[7]], etc., where it \
is essential that the document NUMBERS are in the brackets, not any titles.>",
answer the question. Please cite the document sources inline in format [[1]][[7]], etc.>",
"claims": "<a list of short claims discussed in the documents as they pertain to the query and/or \
the original question. These will later be used for follow-up questions and verifications. Note that \
these may not actually be in the succinct answer above. Note also that each claim \
should include ONE fact that contains enough context to be verified/questioned by a different system \
without the need for going back to these documents for additional context. Also here, please cite the \
document sources inline in format [[1]][[7]], etc., where it is essential that the document NUMBERS are \
in the brackets, not any titles. So this should have format like \
document sources inline in format [[1]][[7]], etc.. So this should have format like \
[<claim 1>, <claim 2>, <claim 3>, ...], each with citations.>"
}
"""
@@ -1045,9 +1043,8 @@ find information about yellow curry specifically, but here is what I found about
- do not make anything up! Only use the information provided in the documents, or, \
if no documents are provided for a sub-answer, in the actual sub-answer.
- Provide a thoughtful answer that is concise and to the point, but that is detailed.
- Please cite your sources INLINE in format [[2]][[4]], etc! The NUMBERS of the documents \
are provided above, and the NUMBERS need to be in the brackets. And the appropriate citation \
should be close to the corresponding /
- Please cite your sources INLINE in format [[2]][[4]], etc! The numbers of the documents \
are provided above. So the appropriate citation number should be close to the corresponding /
information it supports!
- If you are not that certain that the information does relate to the question topic, \
point out the ambiguity in your answer. But DO NOT say something like 'I was not able to find \
@@ -1101,16 +1098,14 @@ find information about yellow curry specifically, but here is what I found about
- do not make anything up! Only use the information provided in the documents, or, \
if no documents are provided for a sub-answer, in the actual sub-answer.
- Provide a thoughtful answer that is concise and to the point, but that is detailed.
- Please cite your sources inline in format [[2]][[4]], etc! The NUMBERS of the documents \
are provided above, and the NUMBERS need to be in the brackets. And the appropriate citation \
should be close to the corresponding /
- Please cite your sources inline in format [[2]][[4]], etc! The numbers of the documents \
are provided above. So the appropriate citation number should be close to the corresponding /
information it supports!
- If you are not that certain that the information does relate to the question topic, \
point out the ambiguity in your answer. But DO NOT say something like 'I was not able to find \
information on <X> specifically, but here is what I found about <X> generally....'. Rather say, \
'Here is what I found about <X> and I hope this is the <X> you were looking for...', or similar.
- Again... CITE YOUR SOURCES INLINE IN FORMAT [[2]][[4]], etc! This is CRITICAL! Note that \
the DOCUMENT NUMBERS need to be in the brackets.
- Again... CITE YOUR SOURCES INLINE IN FORMAT [[2]][[4]], etc! This is CRITICAL!
ANSWER:
"""
@@ -1155,9 +1150,8 @@ find information about yellow curry specifically, but here is what I found about
- do not make anything up! Only use the information provided in the documents, or, \
if no documents are provided for a sub-answer, in the actual sub-answer.
- Provide a thoughtful answer that is concise and to the point, but that is detailed.
- THIS IS VERY IMPORTANT: Please cite your sources inline in format [[2]][[4]], etc! \
The NUMBERS of the documents - provided above -need to be in the brackets. \
Also, if you refer to sub-answers, the provided reference numbers \
- THIS IS VERY IMPORTANT: Please cite your sources inline in format [[2]][[4]], etc! The numbers of the documents \
are provided above. Also, if you refer to sub-answers, the provided reference numbers \
in the sub-answers are the same as the ones provided for the documents!
ANSWER:

View File

@@ -3,6 +3,7 @@ from typing import Any
from typing import cast
from typing import List
from litellm import get_supported_openai_params
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import NUM_PERSONA_PROMPT_GENERATION_CHUNKS
@@ -122,8 +123,6 @@ def generate_starter_messages(
"""
_, fast_llm = get_default_llms(temperature=0.5)
from litellm.utils import get_supported_openai_params
provider = fast_llm.config.model_provider
model = fast_llm.config.model_name

View File

@@ -184,7 +184,7 @@ def seed_initial_documents(
"base_url": "https://docs.onyx.app/",
"web_connector_type": "recursive",
},
refresh_freq=3600, # 1 hour
refresh_freq=None, # Never refresh by default
prune_freq=None,
indexing_start=None,
)

View File

@@ -0,0 +1,6 @@
user_folders:
- id: -1
name: "Recent Documents"
description: "Documents uploaded by the user"
files: []
assistants: []

View File

@@ -22,7 +22,7 @@ from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.server.documents.models import CredentialBase
from onyx.utils.logger import setup_logger
from onyx.utils.subclasses import find_all_subclasses_in_package
from onyx.utils.subclasses import find_all_subclasses_in_dir
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
@@ -44,8 +44,7 @@ def _discover_oauth_connectors() -> dict[DocumentSource, type[OAuthConnector]]:
if _OAUTH_CONNECTORS: # Return cached connectors if already discovered
return _OAUTH_CONNECTORS
# Import submodules using package-based discovery to avoid sys.path mutations
oauth_connectors = find_all_subclasses_in_package(
oauth_connectors = find_all_subclasses_in_dir(
cast(type[OAuthConnector], OAuthConnector), "onyx.connectors"
)

View File

@@ -0,0 +1,177 @@
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Path
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.db.chat import get_chat_session_by_id
from onyx.db.engine.sql_engine import get_session
from onyx.db.folder import add_chat_to_folder
from onyx.db.folder import create_folder
from onyx.db.folder import delete_folder
from onyx.db.folder import get_user_folders
from onyx.db.folder import remove_chat_from_folder
from onyx.db.folder import rename_folder
from onyx.db.folder import update_folder_display_priority
from onyx.db.models import User
from onyx.server.features.folder.models import DeleteFolderOptions
from onyx.server.features.folder.models import FolderChatSessionRequest
from onyx.server.features.folder.models import FolderCreationRequest
from onyx.server.features.folder.models import FolderUpdateRequest
from onyx.server.features.folder.models import GetUserFoldersResponse
from onyx.server.features.folder.models import UserFolderSnapshot
from onyx.server.models import DisplayPriorityRequest
from onyx.server.query_and_chat.models import ChatSessionDetails
router = APIRouter(prefix="/folder")
@router.get("")
def get_folders(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> GetUserFoldersResponse:
folders = get_user_folders(
user_id=user.id if user else None,
db_session=db_session,
)
folders.sort()
return GetUserFoldersResponse(
folders=[
UserFolderSnapshot(
folder_id=folder.id,
folder_name=folder.name,
display_priority=folder.display_priority,
chat_sessions=[
ChatSessionDetails(
id=chat_session.id,
name=chat_session.description,
persona_id=chat_session.persona_id,
time_created=chat_session.time_created.isoformat(),
time_updated=chat_session.time_updated.isoformat(),
shared_status=chat_session.shared_status,
folder_id=folder.id,
)
for chat_session in folder.chat_sessions
if not chat_session.deleted
],
)
for folder in folders
]
)
@router.put("/reorder")
def put_folder_display_priority(
display_priority_request: DisplayPriorityRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
update_folder_display_priority(
user_id=user.id if user else None,
display_priority_map=display_priority_request.display_priority_map,
db_session=db_session,
)
@router.post("")
def create_folder_endpoint(
request: FolderCreationRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> int:
return create_folder(
user_id=user.id if user else None,
folder_name=request.folder_name,
db_session=db_session,
)
@router.patch("/{folder_id}")
def patch_folder_endpoint(
request: FolderUpdateRequest,
folder_id: int = Path(..., description="The ID of the folder to rename"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
try:
rename_folder(
user_id=user.id if user else None,
folder_id=folder_id,
folder_name=request.folder_name,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.delete("/{folder_id}")
def delete_folder_endpoint(
request: DeleteFolderOptions,
folder_id: int = Path(..., description="The ID of the folder to delete"),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
delete_folder(
user_id=user_id,
folder_id=folder_id,
including_chats=request.including_chats,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/{folder_id}/add-chat-session")
def add_chat_to_folder_endpoint(
request: FolderChatSessionRequest,
folder_id: int = Path(
..., description="The ID of the folder in which to add the chat session"
),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
chat_session = get_chat_session_by_id(
chat_session_id=request.chat_session_id,
user_id=user_id,
db_session=db_session,
)
add_chat_to_folder(
user_id=user.id if user else None,
folder_id=folder_id,
chat_session=chat_session,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/{folder_id}/remove-chat-session")
def remove_chat_from_folder_endpoint(
request: FolderChatSessionRequest,
folder_id: int = Path(
..., description="The ID of the folder from which to remove the chat session"
),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
user_id = user.id if user else None
try:
chat_session = get_chat_session_by_id(
chat_session_id=request.chat_session_id,
user_id=user_id,
db_session=db_session,
)
remove_chat_from_folder(
user_id=user_id,
folder_id=folder_id,
chat_session=chat_session,
db_session=db_session,
)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))

View File

@@ -0,0 +1,32 @@
from uuid import UUID
from pydantic import BaseModel
from onyx.server.query_and_chat.models import ChatSessionDetails
class UserFolderSnapshot(BaseModel):
folder_id: int
folder_name: str | None
display_priority: int
chat_sessions: list[ChatSessionDetails]
class GetUserFoldersResponse(BaseModel):
folders: list[UserFolderSnapshot]
class FolderCreationRequest(BaseModel):
folder_name: str | None = None
class FolderUpdateRequest(BaseModel):
folder_name: str | None = None
class FolderChatSessionRequest(BaseModel):
chat_session_id: UUID
class DeleteFolderOptions(BaseModel):
including_chats: bool = False

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,3 @@
from enum import Enum
from typing import Any
from typing import List
from typing import NotRequired
from typing import Optional
@@ -12,35 +10,20 @@ from pydantic import model_validator
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.enums import MCPTransport
# This should be updated along with MCPConnectionData
class MCPOAuthKeys(str, Enum):
"""MCP OAuth keys types"""
CLIENT_INFO = "client_info"
TOKENS = "tokens"
METADATA = "metadata"
class MCPConnectionData(TypedDict):
"""TypedDict to allow use as a type hint for a JSONB column
in Postgres"""
refresh_token: NotRequired[str]
access_token: NotRequired[str]
headers: dict[str, str]
header_substitutions: NotRequired[dict[str, str]]
# For OAuth only
# Note: Update MCPOAuthKeys if necessary when modifying these
# Unfortunately we can't use the actual models here because basemodels aren't compatible
# with SQLAlchemy
client_info: NotRequired[dict[str, Any]] # OAuthClientInformationFull
tokens: NotRequired[dict[str, Any]] # OAuthToken
metadata: NotRequired[dict[str, Any]] # OAuthClientMetadata
# the actual models are defined in mcp.shared.auth
# from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken
client_id: NotRequired[str]
client_secret: NotRequired[str]
registration_access_token: NotRequired[str]
registration_client_uri: NotRequired[str]
class MCPAuthTemplate(BaseModel):
@@ -65,17 +48,14 @@ class MCPToolCreateRequest(BaseModel):
description: Optional[str] = Field(None, description="Description of the MCP tool")
server_url: str = Field(..., description="URL of the MCP server")
auth_type: MCPAuthenticationType = Field(..., description="Authentication type")
auth_performer: MCPAuthenticationPerformer = Field(
..., description="Who performs authentication"
auth_performer: Optional[MCPAuthenticationPerformer] = Field(
None, description="Who performs authentication"
)
api_token: Optional[str] = Field(
None, description="API token for api_token auth type"
)
oauth_client_id: Optional[str] = Field(None, description="OAuth client ID")
oauth_client_secret: Optional[str] = Field(None, description="OAuth client secret")
transport: MCPTransport | None = Field(
None, description="MCP transport type (STREAMABLE_HTTP or SSE)"
)
auth_template: Optional[MCPAuthTemplate] = Field(
None, description="Template configuration for per-user authentication"
)
@@ -124,9 +104,15 @@ class MCPToolCreateRequest(BaseModel):
"admin_credentials is required when auth_performer is 'per_user'"
)
# OAuth client ID/secret are optional. If provided, they will seed the
# OAuth client info; otherwise, the MCP client will attempt dynamic
# client registration.
if self.auth_type == MCPAuthenticationType.OAUTH and not self.oauth_client_id:
raise ValueError("oauth_client_id is required when auth_type is 'oauth'")
if (
self.auth_type == MCPAuthenticationType.OAUTH
and not self.oauth_client_secret
):
raise ValueError(
"oauth_client_secret is required when auth_type is 'oauth'"
)
return self
@@ -154,7 +140,7 @@ class MCPToolResponse(BaseModel):
is_authenticated: bool
class MCPOAuthConnectRequest(BaseModel):
class MCPOAuthInitiateRequest(BaseModel):
name: str = Field(..., description="Name of the MCP tool")
description: Optional[str] = Field(None, description="Description of the MCP tool")
server_url: str = Field(..., description="URL of the MCP server")
@@ -166,33 +152,32 @@ class MCPOAuthConnectRequest(BaseModel):
)
class MCPOAuthConnectResponse(BaseModel):
class MCPOAuthInitiateResponse(BaseModel):
oauth_url: str = Field(..., description="OAuth URL to redirect user to")
state: str = Field(..., description="OAuth state parameter")
pending_tool: dict = Field(..., description="Pending tool configuration")
class MCPUserOAuthConnectRequest(BaseModel):
class MCPUserOAuthInitiateRequest(BaseModel):
server_id: int = Field(..., description="ID of the MCP server")
return_path: str = Field(..., description="Path to redirect to after callback")
include_resource_param: bool = Field(..., description="Include resource parameter")
oauth_client_id: str | None = Field(
None, description="OAuth client ID (optional for DCR)"
)
oauth_client_secret: str | None = Field(
None, description="OAuth client secret (optional for DCR)"
)
@model_validator(mode="after")
def validate_return_path(self) -> "MCPUserOAuthConnectRequest":
def validate_return_path(self) -> "MCPUserOAuthInitiateRequest":
if not self.return_path.startswith("/"):
raise ValueError("return_path must start with a slash")
return self
class MCPUserOAuthConnectResponse(BaseModel):
server_id: int
class MCPUserOAuthInitiateResponse(BaseModel):
oauth_url: str = Field(..., description="OAuth URL to redirect user to")
state: str = Field(..., description="OAuth state parameter")
server_id: int = Field(..., description="Server ID")
server_name: str = Field(..., description="Server name")
code_verifier: Optional[str] = Field(
None, description="PKCE code verifier to be used at callback"
)
class MCPOAuthCallbackRequest(BaseModel):
@@ -209,6 +194,7 @@ class MCPOAuthCallbackResponse(BaseModel):
message: str
server_id: int
server_name: str
authenticated: bool
redirect_url: str
@@ -269,7 +255,6 @@ class MCPServer(BaseModel):
name: str
description: Optional[str] = None
server_url: str
transport: MCPTransport
auth_type: MCPAuthenticationType
auth_performer: MCPAuthenticationPerformer
is_authenticated: bool

View File

@@ -162,7 +162,7 @@ def unlink_user_file_from_project(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
priority=OnyxCeleryPriority.HIGHEST,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"
@@ -210,7 +210,7 @@ def link_user_file_to_project(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
priority=OnyxCeleryPriority.HIGHEST,
priority=OnyxCeleryPriority.HIGH,
)
logger.info(
f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"

View File

@@ -4,7 +4,6 @@ from datetime import datetime
from datetime import timezone
import boto3
import httpx
from botocore.exceptions import BotoCoreError
from botocore.exceptions import ClientError
from botocore.exceptions import NoCredentialsError
@@ -12,12 +11,10 @@ from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from pydantic import ValidationError
from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
from onyx.db.engine.sql_engine import get_session
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import fetch_existing_llm_providers
@@ -43,9 +40,6 @@ from onyx.server.manage.llm.models import LLMProviderDescriptor
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import LLMProviderView
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.manage.llm.models import OllamaFinalModelResponse
from onyx.server.manage.llm.models import OllamaModelDetails
from onyx.server.manage.llm.models import OllamaModelsRequest
from onyx.server.manage.llm.models import TestLLMRequest
from onyx.server.manage.llm.models import VisionProviderResponse
from onyx.utils.logger import setup_logger
@@ -480,100 +474,3 @@ def get_bedrock_available_models(
raise HTTPException(
status_code=500, detail=f"Unexpected error fetching Bedrock models: {e}"
)
def _get_ollama_available_model_names(api_base: str) -> set[str]:
"""Fetch available model names from Ollama server."""
tags_url = f"{api_base}/api/tags"
try:
response = httpx.get(tags_url, timeout=5.0)
response.raise_for_status()
response_json = response.json()
except Exception as e:
raise HTTPException(
status_code=400,
detail=f"Failed to fetch Ollama models: {e}",
)
models = response_json.get("models", [])
return {model.get("name") for model in models if model.get("name")}
@admin_router.post("/ollama/available-models")
def get_ollama_available_models(
request: OllamaModelsRequest,
_: User | None = Depends(current_admin_user),
) -> list[OllamaFinalModelResponse]:
"""Fetch the list of available models from an Ollama server."""
cleaned_api_base = request.api_base.strip().rstrip("/")
if not cleaned_api_base:
raise HTTPException(
status_code=400, detail="API base URL is required to fetch Ollama models."
)
model_names = _get_ollama_available_model_names(cleaned_api_base)
if not model_names:
raise HTTPException(
status_code=400,
detail="No models found from your Ollama server",
)
all_models_with_context_size_and_vision: list[OllamaFinalModelResponse] = []
show_url = f"{cleaned_api_base}/api/show"
for model_name in model_names:
context_limit: int | None = None
supports_image_input: bool | None = None
try:
show_response = httpx.post(
show_url,
json={"model": model_name},
timeout=5.0,
)
show_response.raise_for_status()
show_response_json = show_response.json()
# Parse the response into the expected format
ollama_model_details = OllamaModelDetails.model_validate(show_response_json)
# Check if this model supports completion/chat
if not ollama_model_details.supports_completion():
continue
# Optimistically access. Context limit is stored as "model_architecture.context" = int
architecture = ollama_model_details.model_info.get(
"general.architecture", ""
)
context_limit = ollama_model_details.model_info.get(
architecture + ".context_length", None
)
supports_image_input = ollama_model_details.supports_image_input()
except ValidationError as e:
logger.warning(
"Invalid model details from Ollama server",
extra={"model": model_name, "validation_error": str(e)},
)
except Exception as e:
logger.warning(
"Failed to fetch Ollama model details",
extra={"model": model_name, "error": str(e)},
)
# If we fail at any point attempting to extract context limit,
# still allow this model to be used with a fallback max context size
if not context_limit:
context_limit = GEN_AI_MODEL_FALLBACK_MAX_TOKENS
if not supports_image_input:
supports_image_input = False
all_models_with_context_size_and_vision.append(
OllamaFinalModelResponse(
name=model_name,
max_input_tokens=context_limit,
supports_image_input=supports_image_input,
)
)
return all_models_with_context_size_and_vision

View File

@@ -1,4 +1,3 @@
from typing import Any
from typing import TYPE_CHECKING
from pydantic import BaseModel
@@ -139,9 +138,8 @@ class LLMProviderView(LLMProvider):
class ModelConfigurationUpsertRequest(BaseModel):
name: str
is_visible: bool
is_visible: bool | None = False
max_input_tokens: int | None = None
supports_image_input: bool | None = None
@classmethod
def from_model(
@@ -151,13 +149,12 @@ class ModelConfigurationUpsertRequest(BaseModel):
name=model_configuration_model.name,
is_visible=model_configuration_model.is_visible,
max_input_tokens=model_configuration_model.max_input_tokens,
supports_image_input=model_configuration_model.supports_image_input,
)
class ModelConfigurationView(BaseModel):
name: str
is_visible: bool
is_visible: bool | None = False
max_input_tokens: int | None = None
supports_image_input: bool
@@ -199,28 +196,3 @@ class BedrockModelsRequest(BaseModel):
aws_secret_access_key: str | None = None
aws_bearer_token_bedrock: str | None = None
provider_name: str | None = None # Optional: to save models to existing provider
class OllamaModelsRequest(BaseModel):
api_base: str
class OllamaFinalModelResponse(BaseModel):
name: str
max_input_tokens: int
supports_image_input: bool
class OllamaModelDetails(BaseModel):
"""Response model for Ollama /api/show endpoint"""
model_info: dict[str, Any]
capabilities: list[str] = []
def supports_completion(self) -> bool:
"""Check if this model supports completion/chat"""
return "completion" in self.capabilities
def supports_image_input(self) -> bool:
"""Check if this model supports image input"""
return "vision" in self.capabilities

View File

@@ -1,5 +1,3 @@
import csv
import io
import re
from datetime import datetime
from datetime import timedelta
@@ -16,7 +14,6 @@ from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -300,43 +297,6 @@ def list_all_users(
)
@router.get("/manage/users/download")
def download_users_csv(
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse:
"""Download all users as a CSV file."""
# Get all users from the database
users = get_all_users(db_session)
# Create CSV content in memory
output = io.StringIO()
writer = csv.writer(output)
# Write CSV header
writer.writerow(["Email", "Role", "Status"])
# Write user data
for user in users:
writer.writerow(
[
user.email,
user.role.value if user.role else "",
"Active" if user.is_active else "Inactive",
]
)
# Prepare the CSV content for download
csv_content = output.getvalue()
output.close()
return StreamingResponse(
io.BytesIO(csv_content.encode("utf-8")),
media_type="text/csv",
headers={"Content-Disposition": "attachment;"},
)
@router.put("/manage/admin/users")
def bulk_invite_users(
emails: list[str] = Body(..., embed=True),

View File

@@ -0,0 +1,596 @@
import io
import time
from datetime import datetime
from datetime import timedelta
from typing import List
import requests
import sqlalchemy.exc
from bs4 import BeautifulSoup
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Form
from fastapi import HTTPException
from fastapi import Query
from fastapi import UploadFile
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_user
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import InputType
from onyx.db.connector import create_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.credentials import create_credential
from onyx.db.engine.sql_engine import get_session
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.db.user_documents import calculate_user_files_token_count
from onyx.db.user_documents import create_user_files
from onyx.db.user_documents import get_user_file_indexing_status
from onyx.db.user_documents import share_file_with_assistant
from onyx.db.user_documents import share_folder_with_assistant
from onyx.db.user_documents import unshare_file_with_assistant
from onyx.db.user_documents import unshare_folder_with_assistant
from onyx.db.user_documents import upload_files_to_user_files_with_indexing
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
from onyx.server.documents.models import ConnectorBase
from onyx.server.documents.models import CredentialBase
from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
from onyx.server.user_documents.models import MessageResponse
from onyx.server.user_documents.models import UserFileSnapshot
from onyx.server.user_documents.models import UserFolderSnapshot
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
router = APIRouter()
class FolderCreationRequest(BaseModel):
name: str
description: str
@router.post("/user/folder")
def create_folder(
request: FolderCreationRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
try:
new_folder = UserFolder(
user_id=user.id if user else None,
name=request.name,
description=request.description,
)
db_session.add(new_folder)
db_session.commit()
return UserFolderSnapshot.from_model(new_folder)
except sqlalchemy.exc.DataError as e:
if "StringDataRightTruncation" in str(e):
raise HTTPException(
status_code=400,
detail="Folder name or description is too long. Please use a shorter name or description.",
)
raise
@router.get(
"/user/folder",
)
def user_get_folders(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFolderSnapshot]:
user_id = user.id if user else None
# Get folders that belong to the user or have the RECENT_DOCS_FOLDER_ID
folders = (
db_session.query(UserFolder)
.filter(
(UserFolder.user_id == user_id) | (UserFolder.id == RECENT_DOCS_FOLDER_ID)
)
.all()
)
# For each folder, filter files to only include those belonging to the current user
result = []
for folder in folders:
folder_snapshot = UserFolderSnapshot.from_model(folder)
folder_snapshot.files = [
file for file in folder_snapshot.files if file.user_id == user_id
]
result.append(folder_snapshot)
return result
@router.get("/user/folder/{folder_id}")
def get_folder(
folder_id: int,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(
UserFolder.id == folder_id,
(
(UserFolder.user_id == user_id)
| (UserFolder.id == RECENT_DOCS_FOLDER_ID)
),
)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
folder_snapshot = UserFolderSnapshot.from_model(folder)
# Filter files to only include those belonging to the current user
folder_snapshot.files = [
file for file in folder_snapshot.files if file.user_id == user_id
]
return folder_snapshot
@router.post("/user/file/upload")
def upload_user_files(
files: List[UploadFile] = File(...),
folder_id: int | None = Form(None),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
if folder_id == 0:
folder_id = None
try:
# Use our consolidated function that handles indexing properly
user_files = upload_files_to_user_files_with_indexing(
files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
)
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
except Exception as e:
logger.error(f"Error uploading files: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}")
class FolderUpdateRequest(BaseModel):
name: str | None = None
description: str | None = None
@router.put("/user/folder/{folder_id}")
def update_folder(
folder_id: int,
request: FolderUpdateRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFolderSnapshot:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
if request.name:
folder.name = request.name
if request.description:
folder.description = request.description
db_session.commit()
return UserFolderSnapshot.from_model(folder)
@router.delete("/user/folder/{folder_id}")
def delete_folder(
folder_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
db_session.delete(folder)
db_session.commit()
return MessageResponse(message="Folder deleted successfully")
@router.delete("/user/file/{file_id}")
def delete_file(
file_id: int,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
db_session.delete(file)
db_session.commit()
return MessageResponse(message="File deleted successfully")
class FileMoveRequest(BaseModel):
new_folder_id: int | None
@router.put("/user/file/{file_id}/move")
def move_file(
file_id: int,
request: FileMoveRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.folder_id = request.new_folder_id
db_session.commit()
return UserFileSnapshot.from_model(file)
@router.get("/user/file-system")
def get_file_system(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFolderSnapshot]:
user_id = user.id if user else None
folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all()
return [UserFolderSnapshot.from_model(folder) for folder in folders]
@router.put("/user/file/{file_id}/rename")
def rename_file(
file_id: int,
name: str,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> UserFileSnapshot:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
file.name = name
db_session.commit()
return UserFileSnapshot.from_model(file)
class ShareRequest(BaseModel):
assistant_id: int
@router.post("/user/file/{file_id}/share")
def share_file(
file_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
share_file_with_assistant(file_id, request.assistant_id, db_session)
return MessageResponse(message="File shared successfully with the assistant")
@router.post("/user/file/{file_id}/unshare")
def unshare_file(
file_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
file = (
db_session.query(UserFile)
.filter(UserFile.id == file_id, UserFile.user_id == user_id)
.first()
)
if not file:
raise HTTPException(status_code=404, detail="File not found")
unshare_file_with_assistant(file_id, request.assistant_id, db_session)
return MessageResponse(message="File unshared successfully from the assistant")
@router.post("/user/folder/{folder_id}/share")
def share_folder(
folder_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
share_folder_with_assistant(folder_id, request.assistant_id, db_session)
return MessageResponse(
message="Folder and its files shared successfully with the assistant"
)
@router.post("/user/folder/{folder_id}/unshare")
def unshare_folder(
folder_id: int,
request: ShareRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
unshare_folder_with_assistant(folder_id, request.assistant_id, db_session)
return MessageResponse(
message="Folder and its files unshared successfully from the assistant"
)
class CreateFileFromLinkRequest(BaseModel):
url: str
folder_id: int | None
@router.post("/user/file/create-from-link")
def create_file_from_link(
request: CreateFileFromLinkRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[UserFileSnapshot]:
try:
response = requests.get(request.url)
response.raise_for_status()
content = response.text
soup = BeautifulSoup(content, "html.parser")
parsed_html = web_html_cleanup(soup, mintlify_cleanup_enabled=False)
file_name = f"{parsed_html.title or 'Untitled'}.txt"
file_content = parsed_html.cleaned_text.encode()
file = UploadFile(filename=file_name, file=io.BytesIO(file_content))
user_files = create_user_files(
[file], request.folder_id or -1, user, db_session, link_url=request.url
)
# Create connector and credential (same as in upload_user_files)
for user_file in user_files:
connector_base = ConnectorBase(
name=f"UserFile-{user_file.file_id}-{int(time.time())}",
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={
"file_locations": [user_file.file_id],
"file_names": [user_file.name],
"zip_metadata": {},
},
refresh_freq=None,
prune_freq=None,
indexing_start=None,
)
connector = create_connector(
db_session=db_session,
connector_data=connector_base,
)
credential_info = CredentialBase(
credential_json={},
admin_public=True,
source=DocumentSource.FILE,
curator_public=True,
groups=[],
name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
)
credential = create_credential(credential_info, user, db_session)
cc_pair = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector.id,
credential_id=credential.id,
cc_pair_name=f"UserFileCCPair-{int(time.time())}",
access_type=AccessType.PRIVATE,
auto_sync_options=None,
groups=[],
is_user_file=True,
)
user_file.cc_pair_id = cc_pair.data
db_session.commit()
# Trigger immediate indexing with highest priority
tenant_id = get_current_tenant_id()
trigger_indexing_for_cc_pair(
[], connector.id, False, tenant_id, db_session, is_user_file=True
)
db_session.commit()
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
except requests.RequestException as e:
raise HTTPException(status_code=400, detail=f"Failed to fetch URL: {str(e)}")
@router.get("/user/file/indexing-status")
def get_files_indexing_status(
file_ids: list[int] = Query(...),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict[int, bool]:
"""Get indexing status for multiple files"""
return get_user_file_indexing_status(file_ids, db_session)
@router.get("/user/file/token-estimate")
def get_files_token_estimate(
file_ids: list[int] = Query([]),
folder_ids: list[int] = Query([]),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> dict:
"""Get token estimate for files and folders"""
total_tokens = calculate_user_files_token_count(file_ids, folder_ids, db_session)
return {"total_tokens": total_tokens}
class ReindexFileRequest(BaseModel):
file_id: int
@router.post("/user/file/reindex")
def reindex_file(
request: ReindexFileRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
user_id = user.id if user else None
user_file_to_reindex = (
db_session.query(UserFile)
.filter(UserFile.id == request.file_id, UserFile.user_id == user_id)
.first()
)
if not user_file_to_reindex:
raise HTTPException(status_code=404, detail="File not found")
if not user_file_to_reindex.cc_pair_id:
raise HTTPException(
status_code=400,
detail="File does not have an associated connector-credential pair",
)
# Get the connector id from the cc_pair
cc_pair = (
db_session.query(ConnectorCredentialPair)
.filter_by(id=user_file_to_reindex.cc_pair_id)
.first()
)
if not cc_pair:
raise HTTPException(
status_code=404, detail="Associated connector-credential pair not found"
)
# Trigger immediate reindexing with highest priority
tenant_id = get_current_tenant_id()
# Update the cc_pair status to ACTIVE to ensure it's processed
cc_pair.status = ConnectorCredentialPairStatus.ACTIVE
db_session.commit()
try:
trigger_indexing_for_cc_pair(
[], cc_pair.connector_id, True, tenant_id, db_session, is_user_file=True
)
return MessageResponse(
message="File reindexing has been triggered successfully"
)
except Exception as e:
logger.error(
f"Error triggering reindexing for file {request.file_id}: {str(e)}"
)
raise HTTPException(
status_code=500, detail=f"Failed to trigger reindexing: {str(e)}"
)
class BulkCleanupRequest(BaseModel):
folder_id: int
days_older_than: int | None = None
@router.post("/user/file/bulk-cleanup")
def bulk_cleanup_files(
request: BulkCleanupRequest,
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> MessageResponse:
"""Bulk delete files older than specified days in a folder"""
user_id = user.id if user else None
logger.info(
f"Bulk cleanup request: folder_id={request.folder_id}, days_older_than={request.days_older_than}"
)
# Check if folder exists
if request.folder_id != RECENT_DOCS_FOLDER_ID:
folder = (
db_session.query(UserFolder)
.filter(UserFolder.id == request.folder_id, UserFolder.user_id == user_id)
.first()
)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
filter_criteria = [UserFile.user_id == user_id]
# Filter by folder
if request.folder_id != -2: # -2 means all folders
filter_criteria.append(UserFile.folder_id == request.folder_id)
# Filter by date if days_older_than is provided
if request.days_older_than is not None:
cutoff_date = datetime.utcnow() - timedelta(days=request.days_older_than)
logger.info(f"Filtering files older than {cutoff_date} (UTC)")
filter_criteria.append(UserFile.created_at < cutoff_date)
# Get all files matching the criteria
files_to_delete = db_session.query(UserFile).filter(*filter_criteria).all()
logger.info(f"Found {len(files_to_delete)} files to delete")
# Delete files
delete_count = 0
for file in files_to_delete:
logger.debug(
f"Deleting file: id={file.id}, name={file.name}, created_at={file.created_at}"
)
db_session.delete(file)
delete_count += 1
db_session.commit()
return MessageResponse(message=f"Successfully deleted {delete_count} files")

Some files were not shown because too many files have changed in this diff Show More