mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-17 13:42:41 +00:00
Compare commits
84 Commits
v3.0.0-clo
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11cfc92f15 | ||
|
|
c7da99cfd7 | ||
|
|
b384c77863 | ||
|
|
b0f31cd46b | ||
|
|
323eb9bbba | ||
|
|
708e310849 | ||
|
|
c25509e212 | ||
|
|
6af0da41bd | ||
|
|
b94da25d7c | ||
|
|
7d443c1b53 | ||
|
|
d6b7b3c68f | ||
|
|
f5073d331e | ||
|
|
64c9f6a0d5 | ||
|
|
f5a494f790 | ||
|
|
8598e9f25d | ||
|
|
3ef8aecc54 | ||
|
|
eb311c7550 | ||
|
|
13284d9def | ||
|
|
aaa99fcb60 | ||
|
|
5f628da4e8 | ||
|
|
e40f80cfe1 | ||
|
|
ca6ba2cca9 | ||
|
|
98ef5006ff | ||
|
|
dfd168cde9 | ||
|
|
6c7ae243d0 | ||
|
|
c4a2ff2593 | ||
|
|
4b74a6dc76 | ||
|
|
eea5f5b380 | ||
|
|
ae428ba684 | ||
|
|
7b927e79c2 | ||
|
|
a6815d1221 | ||
|
|
f73d103b6b | ||
|
|
5ec424a3f3 | ||
|
|
0bd3e9a11c | ||
|
|
a336691882 | ||
|
|
bd4965b4d9 | ||
|
|
3c8a24eeba | ||
|
|
613be0de66 | ||
|
|
6f05dbd650 | ||
|
|
8dc7aae816 | ||
|
|
e4527cf117 | ||
|
|
868c9428e2 | ||
|
|
be61c54d45 | ||
|
|
aec0c28c59 | ||
|
|
ab9e3e5338 | ||
|
|
d17c748f75 | ||
|
|
196b6b0514 | ||
|
|
608491ac36 | ||
|
|
a4a664fa2c | ||
|
|
8a6e349741 | ||
|
|
11f8408558 | ||
|
|
24de76ad28 | ||
|
|
e264356eb5 | ||
|
|
c5c08c5da6 | ||
|
|
78a9b386c7 | ||
|
|
dbcbfc1629 | ||
|
|
fabbb00c49 | ||
|
|
809dab5746 | ||
|
|
1649bed548 | ||
|
|
dd07b3cf27 | ||
|
|
c57ea65d42 | ||
|
|
c1ce180b72 | ||
|
|
b5474dc127 | ||
|
|
e1df3f533a | ||
|
|
df5252db05 | ||
|
|
f01f210af8 | ||
|
|
781219cf18 | ||
|
|
ca39da7de9 | ||
|
|
abf76cd747 | ||
|
|
a78607f1b5 | ||
|
|
e213853f63 | ||
|
|
8dc379c6fd | ||
|
|
787f117e17 | ||
|
|
665640fac8 | ||
|
|
d2d44c1e68 | ||
|
|
ffe04ab91f | ||
|
|
6499b21235 | ||
|
|
c5bfd5a152 | ||
|
|
a0329161b0 | ||
|
|
334b7a6d2f | ||
|
|
36196373a8 | ||
|
|
533aa8eff8 | ||
|
|
ecbb267f80 | ||
|
|
66023dbb6d |
3
.github/CODEOWNERS
vendored
3
.github/CODEOWNERS
vendored
@@ -8,3 +8,6 @@
|
||||
# Agent context files
|
||||
/CLAUDE.md @Weves
|
||||
/AGENTS.md @Weves
|
||||
|
||||
# Beta cherry-pick workflow owners
|
||||
/.github/workflows/post-merge-beta-cherry-pick.yml @justin-tahara @jmelahman
|
||||
|
||||
56
.github/actions/slack-notify/action.yml
vendored
56
.github/actions/slack-notify/action.yml
vendored
@@ -1,11 +1,17 @@
|
||||
name: "Slack Notify on Failure"
|
||||
description: "Sends a Slack notification when a workflow fails"
|
||||
name: "Slack Notify"
|
||||
description: "Sends a Slack notification for workflow events"
|
||||
inputs:
|
||||
webhook-url:
|
||||
description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
|
||||
required: false
|
||||
details:
|
||||
description: "Additional message body content"
|
||||
required: false
|
||||
failed-jobs:
|
||||
description: "List of failed job names (newline-separated)"
|
||||
description: "Deprecated alias for details"
|
||||
required: false
|
||||
mention:
|
||||
description: "GitHub username to resolve to a Slack @-mention. Replaces {mention} in details."
|
||||
required: false
|
||||
title:
|
||||
description: "Title for the notification"
|
||||
@@ -21,7 +27,9 @@ runs:
|
||||
shell: bash
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
|
||||
DETAILS: ${{ inputs.details }}
|
||||
FAILED_JOBS: ${{ inputs.failed-jobs }}
|
||||
MENTION_USER: ${{ inputs.mention }}
|
||||
TITLE: ${{ inputs.title }}
|
||||
REF_NAME: ${{ inputs.ref-name }}
|
||||
REPO: ${{ github.repository }}
|
||||
@@ -44,6 +52,39 @@ runs:
|
||||
REF_NAME="$GITHUB_REF_NAME"
|
||||
fi
|
||||
|
||||
if [ -z "$DETAILS" ]; then
|
||||
DETAILS="$FAILED_JOBS"
|
||||
fi
|
||||
|
||||
# Resolve {mention} placeholder if a GitHub username was provided.
|
||||
# Looks up the username in user-mappings.json (co-located with this action)
|
||||
# and replaces {mention} with <@SLACK_ID> for a Slack @-mention.
|
||||
# Falls back to the plain GitHub username if not found in the mapping.
|
||||
if [ -n "$MENTION_USER" ]; then
|
||||
MAPPINGS_FILE="${GITHUB_ACTION_PATH}/user-mappings.json"
|
||||
slack_id="$(jq -r --arg gh "$MENTION_USER" 'to_entries[] | select(.value | ascii_downcase == ($gh | ascii_downcase)) | .key' "$MAPPINGS_FILE" 2>/dev/null | head -1)"
|
||||
|
||||
if [ -n "$slack_id" ]; then
|
||||
mention_text="<@${slack_id}>"
|
||||
else
|
||||
mention_text="${MENTION_USER}"
|
||||
fi
|
||||
|
||||
DETAILS="${DETAILS//\{mention\}/$mention_text}"
|
||||
TITLE="${TITLE//\{mention\}/}"
|
||||
else
|
||||
DETAILS="${DETAILS//\{mention\}/}"
|
||||
TITLE="${TITLE//\{mention\}/}"
|
||||
fi
|
||||
|
||||
normalize_multiline() {
|
||||
printf '%s' "$1" | awk 'BEGIN { ORS=""; first=1 } { if (!first) printf "\\n"; printf "%s", $0; first=0 }'
|
||||
}
|
||||
|
||||
DETAILS="$(normalize_multiline "$DETAILS")"
|
||||
REF_NAME="$(normalize_multiline "$REF_NAME")"
|
||||
TITLE="$(normalize_multiline "$TITLE")"
|
||||
|
||||
# Escape JSON special characters
|
||||
escape_json() {
|
||||
local input="$1"
|
||||
@@ -59,12 +100,12 @@ runs:
|
||||
}
|
||||
|
||||
REF_NAME_ESC=$(escape_json "$REF_NAME")
|
||||
FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
|
||||
DETAILS_ESC=$(escape_json "$DETAILS")
|
||||
WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
|
||||
TITLE_ESC=$(escape_json "$TITLE")
|
||||
|
||||
# Build JSON payload piece by piece
|
||||
# Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
|
||||
# Note: DETAILS_ESC already contains \n sequences that should remain as \n in JSON
|
||||
PAYLOAD="{"
|
||||
PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
|
||||
PAYLOAD="${PAYLOAD}\"blocks\":[{"
|
||||
@@ -79,10 +120,10 @@ runs:
|
||||
PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
|
||||
PAYLOAD="${PAYLOAD}]"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
if [ -n "$FAILED_JOBS" ]; then
|
||||
if [ -n "$DETAILS" ]; then
|
||||
PAYLOAD="${PAYLOAD},{"
|
||||
PAYLOAD="${PAYLOAD}\"type\":\"section\","
|
||||
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
|
||||
PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"${DETAILS_ESC}\"}"
|
||||
PAYLOAD="${PAYLOAD}}"
|
||||
fi
|
||||
PAYLOAD="${PAYLOAD},{"
|
||||
@@ -99,4 +140,3 @@ runs:
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data "$PAYLOAD" \
|
||||
"$SLACK_WEBHOOK_URL"
|
||||
|
||||
|
||||
18
.github/actions/slack-notify/user-mappings.json
vendored
Normal file
18
.github/actions/slack-notify/user-mappings.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"U05SAGZPEA1": "yuhongsun96",
|
||||
"U05SAH6UGUD": "Weves",
|
||||
"U07PWEQB7A5": "evan-onyx",
|
||||
"U07V1SM68KF": "joachim-danswer",
|
||||
"U08JZ9N3QNN": "raunakab",
|
||||
"U08L24NCLJE": "Subash-Mohan",
|
||||
"U090B9M07B2": "wenxi-onyx",
|
||||
"U094RASDP0Q": "duo-onyx",
|
||||
"U096L8ZQ85B": "justin-tahara",
|
||||
"U09AHV8UBQX": "jessicasingh7",
|
||||
"U09KAL5T3C2": "nmgarza5",
|
||||
"U09KPGVQ70R": "acaprau",
|
||||
"U09QR8KTSJH": "rohoswagger",
|
||||
"U09RB4NTXA4": "jmelahman",
|
||||
"U0A6K9VCY6A": "Danelegend",
|
||||
"U0AGC4KH71A": "Bo-Onyx"
|
||||
}
|
||||
90
.github/workflows/deployment.yml
vendored
90
.github/workflows/deployment.yml
vendored
@@ -29,20 +29,32 @@ jobs:
|
||||
build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}
|
||||
build-model-server: ${{ steps.check.outputs.build-model-server }}
|
||||
is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
|
||||
is-stable: ${{ steps.check.outputs.is-stable }}
|
||||
is-beta: ${{ steps.check.outputs.is-beta }}
|
||||
is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
|
||||
is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
|
||||
is-craft-latest: ${{ steps.check.outputs.is-craft-latest }}
|
||||
is-latest: ${{ steps.check.outputs.is-latest }}
|
||||
is-test-run: ${{ steps.check.outputs.is-test-run }}
|
||||
sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
|
||||
short-sha: ${{ steps.check.outputs.short-sha }}
|
||||
steps:
|
||||
- name: Checkout (for git tags)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
fetch-tags: true
|
||||
|
||||
- name: Setup uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: "0.9.9"
|
||||
enable-cache: false
|
||||
|
||||
- name: Check which components to build and version info
|
||||
id: check
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
run: |
|
||||
set -eo pipefail
|
||||
TAG="${GITHUB_REF_NAME}"
|
||||
# Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
|
||||
SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
|
||||
@@ -54,9 +66,8 @@ jobs:
|
||||
IS_VERSION_TAG=false
|
||||
IS_STABLE=false
|
||||
IS_BETA=false
|
||||
IS_STABLE_STANDALONE=false
|
||||
IS_BETA_STANDALONE=false
|
||||
IS_CRAFT_LATEST=false
|
||||
IS_LATEST=false
|
||||
IS_PROD_TAG=false
|
||||
IS_TEST_RUN=false
|
||||
BUILD_DESKTOP=false
|
||||
@@ -67,9 +78,6 @@ jobs:
|
||||
BUILD_MODEL_SERVER=true
|
||||
|
||||
# Determine tag type based on pattern matching (do regex checks once)
|
||||
if [[ "$TAG" == craft-* ]]; then
|
||||
IS_CRAFT_LATEST=true
|
||||
fi
|
||||
if [[ "$TAG" == *cloud* ]]; then
|
||||
IS_CLOUD=true
|
||||
fi
|
||||
@@ -97,20 +105,28 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
# Craft-latest builds backend with Craft enabled
|
||||
if [[ "$IS_CRAFT_LATEST" == "true" ]]; then
|
||||
BUILD_BACKEND_CRAFT=true
|
||||
BUILD_BACKEND=false
|
||||
fi
|
||||
|
||||
# Standalone version checks (for backend/model-server - version excluding cloud tags)
|
||||
if [[ "$IS_STABLE" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
|
||||
IS_STABLE_STANDALONE=true
|
||||
fi
|
||||
if [[ "$IS_BETA" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
|
||||
IS_BETA_STANDALONE=true
|
||||
fi
|
||||
|
||||
# Determine if this tag should get the "latest" Docker tag.
|
||||
# Only the highest semver stable tag (vX.Y.Z exactly) gets "latest".
|
||||
if [[ "$IS_STABLE" == "true" ]]; then
|
||||
HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {
|
||||
echo "::error::Failed to determine highest stable tag via 'ods latest-stable-tag'"
|
||||
exit 1
|
||||
}
|
||||
if [[ "$TAG" == "$HIGHEST_STABLE" ]]; then
|
||||
IS_LATEST=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Build craft-latest backend alongside the regular latest.
|
||||
if [[ "$IS_LATEST" == "true" ]]; then
|
||||
BUILD_BACKEND_CRAFT=true
|
||||
fi
|
||||
|
||||
# Determine if this is a production tag
|
||||
# Production tags are: version tags (v1.2.3*) or nightly tags
|
||||
if [[ "$IS_VERSION_TAG" == "true" ]] || [[ "$IS_NIGHTLY" == "true" ]]; then
|
||||
@@ -129,11 +145,9 @@ jobs:
|
||||
echo "build-backend-craft=$BUILD_BACKEND_CRAFT"
|
||||
echo "build-model-server=$BUILD_MODEL_SERVER"
|
||||
echo "is-cloud-tag=$IS_CLOUD"
|
||||
echo "is-stable=$IS_STABLE"
|
||||
echo "is-beta=$IS_BETA"
|
||||
echo "is-stable-standalone=$IS_STABLE_STANDALONE"
|
||||
echo "is-beta-standalone=$IS_BETA_STANDALONE"
|
||||
echo "is-craft-latest=$IS_CRAFT_LATEST"
|
||||
echo "is-latest=$IS_LATEST"
|
||||
echo "is-test-run=$IS_TEST_RUN"
|
||||
echo "sanitized-tag=$SANITIZED_TAG"
|
||||
echo "short-sha=$SHORT_SHA"
|
||||
@@ -441,7 +455,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -515,7 +529,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -593,14 +607,14 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
|
||||
|
||||
@@ -654,7 +668,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -736,7 +750,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -822,7 +836,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -880,7 +894,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -953,7 +967,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1030,14 +1044,14 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
|
||||
|
||||
@@ -1091,7 +1105,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1164,7 +1178,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1242,7 +1256,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1303,7 +1317,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1383,7 +1397,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1466,14 +1480,14 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
|
||||
type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
|
||||
|
||||
|
||||
239
.github/workflows/post-merge-beta-cherry-pick.yml
vendored
239
.github/workflows/post-merge-beta-cherry-pick.yml
vendored
@@ -1,67 +1,112 @@
|
||||
name: Post-Merge Beta Cherry-Pick
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request_target:
|
||||
types:
|
||||
- closed
|
||||
|
||||
# SECURITY NOTE:
|
||||
# This workflow intentionally uses pull_request_target so post-merge automation can
|
||||
# use base-repo credentials. Do not checkout PR head refs in this workflow
|
||||
# (e.g. github.event.pull_request.head.sha). Only trusted base refs are allowed.
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cherry-pick-to-latest-release:
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
resolve-cherry-pick-request:
|
||||
if: >-
|
||||
github.event.pull_request.merged == true
|
||||
&& github.event.pull_request.base.ref == 'main'
|
||||
&& github.event.pull_request.head.repo.full_name == github.repository
|
||||
outputs:
|
||||
should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}
|
||||
pr_number: ${{ steps.gate.outputs.pr_number }}
|
||||
cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}
|
||||
cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}
|
||||
merge_commit_sha: ${{ steps.gate.outputs.merge_commit_sha }}
|
||||
merged_by: ${{ steps.gate.outputs.merged_by }}
|
||||
gate_error: ${{ steps.gate.outputs.gate_error }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Resolve merged PR and checkbox state
|
||||
id: gate
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
# SECURITY: keep PR body in env/plain-text handling; avoid directly
|
||||
# inlining github.event.pull_request.body into shell commands.
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
MERGE_COMMIT_SHA: ${{ github.event.pull_request.merge_commit_sha }}
|
||||
MERGED_BY: ${{ github.event.pull_request.merged_by.login }}
|
||||
# Explicit merger allowlist used because pull_request_target runs with
|
||||
# the default GITHUB_TOKEN, which cannot reliably read org/team
|
||||
# membership for this repository context.
|
||||
ALLOWED_MERGERS: |
|
||||
acaprau
|
||||
bo-onyx
|
||||
danelegend
|
||||
duo-onyx
|
||||
evan-onyx
|
||||
jessicasingh7
|
||||
jmelahman
|
||||
joachim-danswer
|
||||
justin-tahara
|
||||
nmgarza5
|
||||
raunakab
|
||||
rohoswagger
|
||||
subash-mohan
|
||||
trial2onyx
|
||||
wenxi-onyx
|
||||
weves
|
||||
yuhongsun96
|
||||
run: |
|
||||
# For the commit that triggered this workflow (HEAD on main), fetch all
|
||||
# associated PRs and keep only the PR that was actually merged into main
|
||||
# with this exact merge commit SHA.
|
||||
pr_numbers="$(gh api "repos/${GITHUB_REPOSITORY}/commits/${GITHUB_SHA}/pulls" | jq -r --arg sha "${GITHUB_SHA}" '.[] | select(.merged_at != null and .base.ref == "main" and .merge_commit_sha == $sha) | .number')"
|
||||
match_count="$(printf '%s\n' "$pr_numbers" | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ')"
|
||||
pr_number="$(printf '%s\n' "$pr_numbers" | sed '/^[[:space:]]*$/d' | head -n 1)"
|
||||
echo "pr_number=${PR_NUMBER}" >> "$GITHUB_OUTPUT"
|
||||
echo "merged_by=${MERGED_BY}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
if [ "${match_count}" -gt 1 ]; then
|
||||
echo "::warning::Multiple merged PRs matched commit ${GITHUB_SHA}. Using PR #${pr_number}."
|
||||
fi
|
||||
|
||||
if [ -z "$pr_number" ]; then
|
||||
echo "No merged PR associated with commit ${GITHUB_SHA}; skipping."
|
||||
if ! echo "${PR_BODY}" | grep -qiE "\\[x\\][[:space:]]*(\\[[^]]+\\][[:space:]]*)?Please cherry-pick this PR to the latest release version"; then
|
||||
echo "should_cherrypick=false" >> "$GITHUB_OUTPUT"
|
||||
echo "Cherry-pick checkbox not checked for PR #${PR_NUMBER}. Skipping."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Read the PR once so we can gate behavior and infer preferred actor.
|
||||
pr_json="$(gh api "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}")"
|
||||
pr_body="$(printf '%s' "$pr_json" | jq -r '.body // ""')"
|
||||
merged_by="$(printf '%s' "$pr_json" | jq -r '.merged_by.login // ""')"
|
||||
# Keep should_cherrypick output before any possible exit 1 below so
|
||||
# notify-slack can still gate on this output even if this job fails.
|
||||
echo "should_cherrypick=true" >> "$GITHUB_OUTPUT"
|
||||
echo "Cherry-pick checkbox checked for PR #${PR_NUMBER}."
|
||||
|
||||
echo "pr_number=$pr_number" >> "$GITHUB_OUTPUT"
|
||||
echo "merged_by=$merged_by" >> "$GITHUB_OUTPUT"
|
||||
|
||||
if echo "$pr_body" | grep -qiE "\\[x\\][[:space:]]*(\\[[^]]+\\][[:space:]]*)?Please cherry-pick this PR to the latest release version"; then
|
||||
echo "should_cherrypick=true" >> "$GITHUB_OUTPUT"
|
||||
echo "Cherry-pick checkbox checked for PR #${pr_number}."
|
||||
exit 0
|
||||
if [ -z "${MERGE_COMMIT_SHA}" ] || [ "${MERGE_COMMIT_SHA}" = "null" ]; then
|
||||
echo "gate_error=missing-merge-commit-sha" >> "$GITHUB_OUTPUT"
|
||||
echo "::error::PR #${PR_NUMBER} requested cherry-pick, but merge_commit_sha is missing."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "should_cherrypick=false" >> "$GITHUB_OUTPUT"
|
||||
echo "Cherry-pick checkbox not checked for PR #${pr_number}. Skipping."
|
||||
echo "merge_commit_sha=${MERGE_COMMIT_SHA}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
normalized_merged_by="$(printf '%s' "${MERGED_BY}" | tr '[:upper:]' '[:lower:]')"
|
||||
normalized_allowed_mergers="$(printf '%s\n' "${ALLOWED_MERGERS}" | tr '[:upper:]' '[:lower:]')"
|
||||
if ! printf '%s\n' "${normalized_allowed_mergers}" | grep -Fxq "${normalized_merged_by}"; then
|
||||
echo "gate_error=not-allowed-merger" >> "$GITHUB_OUTPUT"
|
||||
echo "::error::${MERGED_BY} is not in the explicit cherry-pick merger allowlist. Failing cherry-pick gate."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
|
||||
cherry-pick-to-latest-release:
|
||||
needs:
|
||||
- resolve-cherry-pick-request
|
||||
if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success'
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
cherry_pick_pr_url: ${{ steps.run_cherry_pick.outputs.pr_url }}
|
||||
cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}
|
||||
cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
if: steps.gate.outputs.should_cherrypick == 'true'
|
||||
# SECURITY: keep checkout pinned to trusted base branch; do not switch to PR head refs.
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
@@ -69,34 +114,44 @@ jobs:
|
||||
ref: main
|
||||
|
||||
- name: Install the latest version of uv
|
||||
if: steps.gate.outputs.should_cherrypick == 'true'
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: false
|
||||
version: "0.9.9"
|
||||
|
||||
- name: Configure git identity
|
||||
if: steps.gate.outputs.should_cherrypick == 'true'
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Create cherry-pick PR to latest release
|
||||
id: run_cherry_pick
|
||||
if: steps.gate.outputs.should_cherrypick == 'true'
|
||||
continue-on-error: true
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
CHERRY_PICK_ASSIGNEE: ${{ steps.gate.outputs.merged_by }}
|
||||
CHERRY_PICK_ASSIGNEE: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
|
||||
MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
|
||||
run: |
|
||||
set -o pipefail
|
||||
output_file="$(mktemp)"
|
||||
uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify 2>&1 | tee "$output_file"
|
||||
exit_code="${PIPESTATUS[0]}"
|
||||
set +e
|
||||
uv run --no-sync --with onyx-devtools ods cherry-pick "${MERGE_COMMIT_SHA}" --yes --no-verify 2>&1 | tee "$output_file"
|
||||
pipe_statuses=("${PIPESTATUS[@]}")
|
||||
exit_code="${pipe_statuses[0]}"
|
||||
tee_exit="${pipe_statuses[1]:-0}"
|
||||
set -e
|
||||
if [ "${tee_exit}" -ne 0 ]; then
|
||||
echo "status=failure" >> "$GITHUB_OUTPUT"
|
||||
echo "reason=output-capture-failed" >> "$GITHUB_OUTPUT"
|
||||
echo "::error::tee failed to capture cherry-pick output (exit ${tee_exit}); cannot classify result."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${exit_code}" -eq 0 ]; then
|
||||
pr_url="$(sed -n 's/^.*PR created successfully: \(https:\/\/github\.com\/[^[:space:]]\+\/pull\/[0-9]\+\).*$/\1/p' "$output_file" | tail -n 1)"
|
||||
echo "status=success" >> "$GITHUB_OUTPUT"
|
||||
if [ -n "${pr_url}" ]; then
|
||||
echo "pr_url=${pr_url}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@@ -115,17 +170,18 @@ jobs:
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Mark workflow as failed if cherry-pick failed
|
||||
if: steps.gate.outputs.should_cherrypick == 'true' && steps.run_cherry_pick.outputs.status == 'failure'
|
||||
if: steps.run_cherry_pick.outputs.status == 'failure'
|
||||
env:
|
||||
CHERRY_PICK_REASON: ${{ steps.run_cherry_pick.outputs.reason }}
|
||||
run: |
|
||||
echo "::error::Automated cherry-pick failed (${CHERRY_PICK_REASON})."
|
||||
exit 1
|
||||
|
||||
notify-slack-on-cherry-pick-failure:
|
||||
notify-slack-on-cherry-pick-success:
|
||||
needs:
|
||||
- resolve-cherry-pick-request
|
||||
- cherry-pick-to-latest-release
|
||||
if: always() && needs.cherry-pick-to-latest-release.outputs.should_cherrypick == 'true' && needs.cherry-pick-to-latest-release.result != 'success'
|
||||
if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
@@ -134,32 +190,107 @@ jobs:
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Fail if Slack webhook secret is missing
|
||||
env:
|
||||
CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
run: |
|
||||
if [ -z "${CHERRY_PICK_PRS_WEBHOOK}" ]; then
|
||||
echo "::error::CHERRY_PICK_PRS_WEBHOOK is not configured."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build cherry-pick success summary
|
||||
id: success-summary
|
||||
env:
|
||||
SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}
|
||||
MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
|
||||
CHERRY_PICK_PR_URL: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_pr_url }}
|
||||
run: |
|
||||
source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
|
||||
details="*Cherry-pick PR opened successfully.*\\n• author: {mention}\\n• source PR: ${source_pr_url}"
|
||||
if [ -n "${CHERRY_PICK_PR_URL}" ]; then
|
||||
details="${details}\\n• cherry-pick PR: ${CHERRY_PICK_PR_URL}"
|
||||
fi
|
||||
if [ -n "${MERGE_COMMIT_SHA}" ]; then
|
||||
details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
|
||||
fi
|
||||
|
||||
echo "details=${details}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Notify #cherry-pick-prs about cherry-pick success
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
|
||||
details: ${{ steps.success-summary.outputs.details }}
|
||||
title: "✅ Automated Cherry-Pick PR Opened"
|
||||
ref-name: ${{ github.event.pull_request.base.ref }}
|
||||
|
||||
notify-slack-on-cherry-pick-failure:
|
||||
needs:
|
||||
- resolve-cherry-pick-request
|
||||
- cherry-pick-to-latest-release
|
||||
if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Fail if Slack webhook secret is missing
|
||||
env:
|
||||
CHERRY_PICK_PRS_WEBHOOK: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
run: |
|
||||
if [ -z "${CHERRY_PICK_PRS_WEBHOOK}" ]; then
|
||||
echo "::error::CHERRY_PICK_PRS_WEBHOOK is not configured."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build cherry-pick failure summary
|
||||
id: failure-summary
|
||||
env:
|
||||
SOURCE_PR_NUMBER: ${{ needs.cherry-pick-to-latest-release.outputs.pr_number }}
|
||||
SOURCE_PR_NUMBER: ${{ needs.resolve-cherry-pick-request.outputs.pr_number }}
|
||||
MERGE_COMMIT_SHA: ${{ needs.resolve-cherry-pick-request.outputs.merge_commit_sha }}
|
||||
GATE_ERROR: ${{ needs.resolve-cherry-pick-request.outputs.gate_error }}
|
||||
CHERRY_PICK_REASON: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_reason }}
|
||||
CHERRY_PICK_DETAILS: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_details }}
|
||||
run: |
|
||||
source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
|
||||
|
||||
reason_text="cherry-pick command failed"
|
||||
if [ "${CHERRY_PICK_REASON}" = "merge-conflict" ]; then
|
||||
if [ "${GATE_ERROR}" = "missing-merge-commit-sha" ]; then
|
||||
reason_text="requested cherry-pick but merge commit SHA was missing"
|
||||
elif [ "${GATE_ERROR}" = "not-allowed-merger" ]; then
|
||||
reason_text="merger is not in the explicit cherry-pick allowlist"
|
||||
elif [ "${CHERRY_PICK_REASON}" = "output-capture-failed" ]; then
|
||||
reason_text="failed to capture cherry-pick output for classification"
|
||||
elif [ "${CHERRY_PICK_REASON}" = "merge-conflict" ]; then
|
||||
reason_text="merge conflict during cherry-pick"
|
||||
fi
|
||||
|
||||
details_excerpt="$(printf '%s' "${CHERRY_PICK_DETAILS}" | tail -n 8 | tr '\n' ' ' | sed "s/[[:space:]]\\+/ /g" | sed "s/\"/'/g" | cut -c1-350)"
|
||||
failed_jobs="• cherry-pick-to-latest-release\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
|
||||
if [ -n "${GATE_ERROR}" ]; then
|
||||
failed_job_label="resolve-cherry-pick-request"
|
||||
else
|
||||
failed_job_label="cherry-pick-to-latest-release"
|
||||
fi
|
||||
details="• author: {mention}\\n• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
|
||||
if [ -n "${MERGE_COMMIT_SHA}" ]; then
|
||||
details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
|
||||
fi
|
||||
if [ -n "${details_excerpt}" ]; then
|
||||
failed_jobs="${failed_jobs}\\n• excerpt: ${details_excerpt}"
|
||||
details="${details}\\n• excerpt: ${details_excerpt}"
|
||||
fi
|
||||
|
||||
echo "jobs=${failed_jobs}" >> "$GITHUB_OUTPUT"
|
||||
echo "details=${details}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Notify #cherry-pick-prs about cherry-pick failure
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
failed-jobs: ${{ steps.failure-summary.outputs.jobs }}
|
||||
mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
|
||||
details: ${{ steps.failure-summary.outputs.details }}
|
||||
title: "🚨 Automated Cherry-Pick Failed"
|
||||
ref-name: ${{ github.ref_name }}
|
||||
ref-name: ${{ github.event.pull_request.base.ref }}
|
||||
|
||||
2
.github/workflows/pr-desktop-build.yml
vendored
2
.github/workflows/pr-desktop-build.yml
vendored
@@ -105,7 +105,7 @@ jobs:
|
||||
|
||||
- name: Upload build artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
|
||||
path: |
|
||||
|
||||
@@ -174,7 +174,7 @@ jobs:
|
||||
|
||||
- name: Upload Docker logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-${{ matrix.test-dir }}
|
||||
path: docker-logs/
|
||||
|
||||
4
.github/workflows/pr-golang-tests.yml
vendored
4
.github/workflows/pr-golang-tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
outputs:
|
||||
modules: ${{ steps.set-modules.outputs.modules }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: false
|
||||
- id: set-modules
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]
|
||||
|
||||
11
.github/workflows/pr-helm-chart-testing.yml
vendored
11
.github/workflows/pr-helm-chart-testing.yml
vendored
@@ -133,7 +133,7 @@ jobs:
|
||||
echo "=== Validating chart dependencies ==="
|
||||
cd deployment/helm/charts/onyx
|
||||
helm dependency update
|
||||
helm lint .
|
||||
helm lint . --set auth.userauth.values.user_auth_secret=placeholder
|
||||
|
||||
- name: Run chart-testing (install) with enhanced monitoring
|
||||
timeout-minutes: 25
|
||||
@@ -194,6 +194,7 @@ jobs:
|
||||
--set=vespa.enabled=false \
|
||||
--set=opensearch.enabled=true \
|
||||
--set=auth.opensearch.enabled=true \
|
||||
--set=auth.userauth.values.user_auth_secret=test-secret \
|
||||
--set=slackbot.enabled=false \
|
||||
--set=postgresql.enabled=true \
|
||||
--set=postgresql.cluster.storage.storageClass=standard \
|
||||
@@ -230,6 +231,10 @@ jobs:
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Post-install verification ==="
|
||||
if ! kubectl cluster-info >/dev/null 2>&1; then
|
||||
echo "ERROR: Kubernetes cluster is not reachable after install"
|
||||
exit 1
|
||||
fi
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get services --all-namespaces
|
||||
# Only show issues if they exist
|
||||
@@ -239,6 +244,10 @@ jobs:
|
||||
if: failure() && steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
echo "=== Cleanup on failure ==="
|
||||
if ! kubectl cluster-info >/dev/null 2>&1; then
|
||||
echo "Skipping failure cleanup: Kubernetes cluster is not reachable"
|
||||
exit 0
|
||||
fi
|
||||
echo "=== Final cluster state ==="
|
||||
kubectl get pods --all-namespaces
|
||||
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
|
||||
|
||||
6
.github/workflows/pr-integration-tests.yml
vendored
6
.github/workflows/pr-integration-tests.yml
vendored
@@ -466,7 +466,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
@@ -587,7 +587,7 @@ jobs:
|
||||
|
||||
- name: Upload logs (onyx-lite)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-onyx-lite
|
||||
path: ${{ github.workspace }}/docker-compose-onyx-lite.log
|
||||
@@ -725,7 +725,7 @@ jobs:
|
||||
|
||||
- name: Upload logs (multi-tenant)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-multitenant
|
||||
path: ${{ github.workspace }}/docker-compose-multitenant.log
|
||||
|
||||
2
.github/workflows/pr-jest-tests.yml
vendored
2
.github/workflows/pr-jest-tests.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
|
||||
- name: Upload coverage reports
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: jest-coverage-${{ github.run_id }}
|
||||
path: ./web/coverage
|
||||
|
||||
14
.github/workflows/pr-playwright-tests.yml
vendored
14
.github/workflows/pr-playwright-tests.yml
vendored
@@ -445,7 +445,7 @@ jobs:
|
||||
run: |
|
||||
npx playwright test --project ${PROJECT}
|
||||
|
||||
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
# Includes test results and trace.zip files
|
||||
@@ -454,7 +454,7 @@ jobs:
|
||||
retention-days: 30
|
||||
|
||||
- name: Upload screenshots
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
|
||||
@@ -534,7 +534,7 @@ jobs:
|
||||
"s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"
|
||||
|
||||
- name: Upload visual diff summary
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: screenshot-diff-summary-${{ matrix.project }}
|
||||
@@ -543,7 +543,7 @@ jobs:
|
||||
retention-days: 5
|
||||
|
||||
- name: Upload visual diff report artifact
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
|
||||
@@ -590,7 +590,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
@@ -674,7 +674,7 @@ jobs:
|
||||
working-directory: ./web
|
||||
run: npx playwright test --project lite
|
||||
|
||||
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: playwright-test-results-lite-${{ github.run_id }}
|
||||
@@ -692,7 +692,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-lite-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
2
.github/workflows/pr-python-model-tests.yml
vendored
2
.github/workflows/pr-python-model-tests.yml
vendored
@@ -122,7 +122,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
@@ -319,7 +319,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
|
||||
path: |
|
||||
|
||||
6
.github/workflows/sandbox-deployment.yml
vendored
6
.github/workflows/sandbox-deployment.yml
vendored
@@ -125,7 +125,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -268,7 +268,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
|
||||
@@ -244,7 +244,10 @@ def do_run_migrations(
|
||||
|
||||
|
||||
def provide_iam_token_for_alembic(
|
||||
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
|
||||
dialect: Any, # noqa: ARG001
|
||||
conn_rec: Any, # noqa: ARG001
|
||||
cargs: Any, # noqa: ARG001
|
||||
cparams: Any,
|
||||
) -> None:
|
||||
if USE_IAM_AUTH:
|
||||
# Database connection settings
|
||||
@@ -360,8 +363,7 @@ async def run_async_migrations() -> None:
|
||||
# upgrade_all_tenants=true or schemas in multi-tenant mode
|
||||
# and for non-multi-tenant mode, we should use schemas with the default schema
|
||||
raise ValueError(
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
|
||||
"or schemas for specific schemas."
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas."
|
||||
)
|
||||
|
||||
await engine.dispose()
|
||||
@@ -457,8 +459,7 @@ def run_migrations_offline() -> None:
|
||||
else:
|
||||
# This should not happen in the new design
|
||||
raise ValueError(
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants "
|
||||
"or schemas for specific schemas."
|
||||
"No migration target specified. Use either upgrade_all_tenants=true for all tenants or schemas for specific schemas."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ Usage examples::
|
||||
# custom settings
|
||||
python alembic/run_multitenant_migrations.py -j 8 -b 100
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -117,8 +118,7 @@ def run_migrations_parallel(
|
||||
batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]
|
||||
total_batches = len(batches)
|
||||
print(
|
||||
f"{len(schemas)} schemas in {total_batches} batch(es) "
|
||||
f"with {max_workers} workers (batch size: {batch_size})...",
|
||||
f"{len(schemas)} schemas in {total_batches} batch(es) with {max_workers} workers (batch size: {batch_size})...",
|
||||
flush=True,
|
||||
)
|
||||
all_success = True
|
||||
@@ -166,8 +166,7 @@ def run_migrations_parallel(
|
||||
with lock:
|
||||
in_flight[batch_idx] = batch
|
||||
print(
|
||||
f"Batch {batch_idx + 1}/{total_batches} started "
|
||||
f"({len(batch)} schemas): {', '.join(batch)}",
|
||||
f"Batch {batch_idx + 1}/{total_batches} started ({len(batch)} schemas): {', '.join(batch)}",
|
||||
flush=True,
|
||||
)
|
||||
result = run_alembic_for_batch(batch)
|
||||
@@ -201,7 +200,7 @@ def run_migrations_parallel(
|
||||
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Batch {batch_idx + 1}/{total_batches} " f"✗ exception: {e}",
|
||||
f"Batch {batch_idx + 1}/{total_batches} ✗ exception: {e}",
|
||||
flush=True,
|
||||
)
|
||||
all_success = False
|
||||
@@ -268,14 +267,12 @@ def main() -> int:
|
||||
|
||||
if not schemas_to_migrate:
|
||||
print(
|
||||
f"All {len(tenant_schemas)} tenants are already at head "
|
||||
f"revision ({head_rev})."
|
||||
f"All {len(tenant_schemas)} tenants are already at head revision ({head_rev})."
|
||||
)
|
||||
return 0
|
||||
|
||||
print(
|
||||
f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need "
|
||||
f"migration (head: {head_rev})."
|
||||
f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need migration (head: {head_rev})."
|
||||
)
|
||||
|
||||
success = run_migrations_parallel(
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
"""add timestamps to user table
|
||||
|
||||
Revision ID: 27fb147a843f
|
||||
Revises: b5c4d7e8f9a1
|
||||
Create Date: 2026-03-08 17:18:40.828644
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "27fb147a843f"
|
||||
down_revision = "b5c4d7e8f9a1"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "updated_at")
|
||||
op.drop_column("user", "created_at")
|
||||
@@ -50,8 +50,7 @@ def upgrade() -> None:
|
||||
|
||||
if orphaned_count > 0:
|
||||
logger.warning(
|
||||
f"WARNING: {orphaned_count} chat_session records still have "
|
||||
f"folder_id without project_id. Proceeding anyway."
|
||||
f"WARNING: {orphaned_count} chat_session records still have folder_id without project_id. Proceeding anyway."
|
||||
)
|
||||
|
||||
# === Step 2: Drop chat_session.folder_id ===
|
||||
|
||||
@@ -75,8 +75,7 @@ def batch_delete(
|
||||
|
||||
if failed_batches:
|
||||
logger.warning(
|
||||
f"Failed to delete {len(failed_batches)} batches from {table_name}. "
|
||||
f"Total deleted: {total_deleted}/{total_count}"
|
||||
f"Failed to delete {len(failed_batches)} batches from {table_name}. Total deleted: {total_deleted}/{total_count}"
|
||||
)
|
||||
# Fail the migration to avoid silently succeeding on partial cleanup
|
||||
raise RuntimeError(
|
||||
|
||||
@@ -18,8 +18,7 @@ depends_on = None
|
||||
def upgrade() -> None:
|
||||
# Set all existing records to not migrated
|
||||
op.execute(
|
||||
"UPDATE user_file SET document_id_migrated = FALSE "
|
||||
"WHERE document_id_migrated IS DISTINCT FROM FALSE;"
|
||||
"UPDATE user_file SET document_id_migrated = FALSE WHERE document_id_migrated IS DISTINCT FROM FALSE;"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ def upgrade() -> None:
|
||||
# environment variables MUST be set. Otherwise, an exception will be raised.
|
||||
|
||||
if not MULTI_TENANT:
|
||||
|
||||
# Enable pg_trgm extension if not already enabled
|
||||
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
|
||||
|
||||
@@ -481,8 +480,7 @@ def upgrade() -> None:
|
||||
f"ON kg_entity USING GIN (name {POSTGRES_DEFAULT_SCHEMA}.gin_trgm_ops)"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_kg_entity_normalization_trigrams "
|
||||
"ON kg_entity USING GIN (name_trigrams)"
|
||||
"CREATE INDEX IF NOT EXISTS idx_kg_entity_normalization_trigrams ON kg_entity USING GIN (name_trigrams)"
|
||||
)
|
||||
|
||||
# Create kg_entity trigger to update kg_entity.name and its trigrams
|
||||
|
||||
@@ -51,10 +51,7 @@ def upgrade() -> None:
|
||||
next_email = f"{username.lower()}_{attempt}@{domain.lower()}"
|
||||
# Email conflict occurred, append `_1`, `_2`, etc., to the username
|
||||
logger.warning(
|
||||
f"Conflict while lowercasing email: "
|
||||
f"old_email={email} "
|
||||
f"conflicting_email={new_email} "
|
||||
f"next_email={next_email}"
|
||||
f"Conflict while lowercasing email: old_email={email} conflicting_email={new_email} next_email={next_email}"
|
||||
)
|
||||
new_email = next_email
|
||||
attempt += 1
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
"""add_hook_and_hook_execution_log_tables
|
||||
|
||||
Revision ID: 689433b0d8de
|
||||
Revises: 93a2e195e25c
|
||||
Create Date: 2026-03-13 11:25:06.547474
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID as PGUUID
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "689433b0d8de"
|
||||
down_revision = "93a2e195e25c"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"hook",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"hook_point",
|
||||
sa.Enum("document_ingestion", "query_processing", native_enum=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("endpoint_url", sa.Text(), nullable=True),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("is_reachable", sa.Boolean(), nullable=True),
|
||||
sa.Column(
|
||||
"fail_strategy",
|
||||
sa.Enum("hard", "soft", native_enum=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("timeout_seconds", sa.Float(), nullable=False),
|
||||
sa.Column(
|
||||
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column(
|
||||
"deleted", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column("creator_id", PGUUID(as_uuid=True), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(["creator_id"], ["user.id"], ondelete="SET NULL"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_hook_one_non_deleted_per_point",
|
||||
"hook",
|
||||
["hook_point"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("deleted = false"),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"hook_execution_log",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("hook_id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"is_success",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column("status_code", sa.Integer(), nullable=True),
|
||||
sa.Column("duration_ms", sa.Integer(), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(["hook_id"], ["hook.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index("ix_hook_execution_log_hook_id", "hook_execution_log", ["hook_id"])
|
||||
op.create_index(
|
||||
"ix_hook_execution_log_created_at", "hook_execution_log", ["created_at"]
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_hook_execution_log_created_at", table_name="hook_execution_log")
|
||||
op.drop_index("ix_hook_execution_log_hook_id", table_name="hook_execution_log")
|
||||
op.drop_table("hook_execution_log")
|
||||
|
||||
op.drop_index("ix_hook_one_non_deleted_per_point", table_name="hook")
|
||||
op.drop_table("hook")
|
||||
@@ -24,12 +24,10 @@ depends_on = None
|
||||
def upgrade() -> None:
|
||||
# Convert existing lowercase values to uppercase to match enum member names
|
||||
op.execute(
|
||||
"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' "
|
||||
"WHERE processing_mode = 'regular'"
|
||||
"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' WHERE processing_mode = 'regular'"
|
||||
)
|
||||
op.execute(
|
||||
"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' "
|
||||
"WHERE processing_mode = 'file_system'"
|
||||
"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' WHERE processing_mode = 'file_system'"
|
||||
)
|
||||
|
||||
# Update the server default to use uppercase
|
||||
|
||||
@@ -289,8 +289,7 @@ def upgrade() -> None:
|
||||
attributes_str = json.dumps(attributes).replace("'", "''")
|
||||
op.execute(
|
||||
sa.text(
|
||||
f"UPDATE kg_entity_type SET attributes = '{attributes_str}'"
|
||||
f"WHERE id_name = '{entity_type}'"
|
||||
f"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'"
|
||||
),
|
||||
)
|
||||
|
||||
@@ -312,7 +311,6 @@ def downgrade() -> None:
|
||||
attributes_str = json.dumps(attributes).replace("'", "''")
|
||||
op.execute(
|
||||
sa.text(
|
||||
f"UPDATE kg_entity_type SET attributes = '{attributes_str}'"
|
||||
f"WHERE id_name = '{entity_type}'"
|
||||
f"UPDATE kg_entity_type SET attributes = '{attributes_str}'WHERE id_name = '{entity_type}'"
|
||||
),
|
||||
)
|
||||
|
||||
@@ -160,7 +160,7 @@ def remove_old_tags() -> None:
|
||||
f"""
|
||||
DELETE FROM document__tag
|
||||
WHERE document_id = '{document_id}'
|
||||
AND tag_id IN ({','.join(to_delete)})
|
||||
AND tag_id IN ({",".join(to_delete)})
|
||||
"""
|
||||
)
|
||||
)
|
||||
@@ -239,7 +239,7 @@ def _get_batch_documents_with_multiple_tags(
|
||||
).fetchall()
|
||||
if not batch:
|
||||
break
|
||||
doc_ids = [document_id for document_id, in batch]
|
||||
doc_ids = [document_id for (document_id,) in batch]
|
||||
yield doc_ids
|
||||
offset_clause = f"AND document__tag.document_id > '{doc_ids[-1]}'"
|
||||
|
||||
|
||||
@@ -0,0 +1,117 @@
|
||||
"""add_voice_provider_and_user_voice_prefs
|
||||
|
||||
Revision ID: 93a2e195e25c
|
||||
Revises: 27fb147a843f
|
||||
Create Date: 2026-02-23 15:16:39.507304
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import column
|
||||
from sqlalchemy import true
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "93a2e195e25c"
|
||||
down_revision = "27fb147a843f"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create voice_provider table
|
||||
op.create_table(
|
||||
"voice_provider",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("name", sa.String(), unique=True, nullable=False),
|
||||
sa.Column("provider_type", sa.String(), nullable=False),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("api_base", sa.String(), nullable=True),
|
||||
sa.Column("custom_config", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("stt_model", sa.String(), nullable=True),
|
||||
sa.Column("tts_model", sa.String(), nullable=True),
|
||||
sa.Column("default_voice", sa.String(), nullable=True),
|
||||
sa.Column(
|
||||
"is_default_stt", sa.Boolean(), nullable=False, server_default="false"
|
||||
),
|
||||
sa.Column(
|
||||
"is_default_tts", sa.Boolean(), nullable=False, server_default="false"
|
||||
),
|
||||
sa.Column("deleted", sa.Boolean(), nullable=False, server_default="false"),
|
||||
sa.Column(
|
||||
"time_created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"time_updated",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
onupdate=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Add partial unique indexes to enforce only one default STT/TTS provider
|
||||
op.create_index(
|
||||
"ix_voice_provider_one_default_stt",
|
||||
"voice_provider",
|
||||
["is_default_stt"],
|
||||
unique=True,
|
||||
postgresql_where=column("is_default_stt") == true(),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_voice_provider_one_default_tts",
|
||||
"voice_provider",
|
||||
["is_default_tts"],
|
||||
unique=True,
|
||||
postgresql_where=column("is_default_tts") == true(),
|
||||
)
|
||||
|
||||
# Add voice preference columns to user table
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_auto_send",
|
||||
sa.Boolean(),
|
||||
default=False,
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_auto_playback",
|
||||
sa.Boolean(),
|
||||
default=False,
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"voice_playback_speed",
|
||||
sa.Float(),
|
||||
default=1.0,
|
||||
nullable=False,
|
||||
server_default="1.0",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Remove user voice preference columns
|
||||
op.drop_column("user", "voice_playback_speed")
|
||||
op.drop_column("user", "voice_auto_playback")
|
||||
op.drop_column("user", "voice_auto_send")
|
||||
|
||||
op.drop_index("ix_voice_provider_one_default_tts", table_name="voice_provider")
|
||||
op.drop_index("ix_voice_provider_one_default_stt", table_name="voice_provider")
|
||||
|
||||
# Drop voice_provider table
|
||||
op.drop_table("voice_provider")
|
||||
@@ -24,8 +24,7 @@ TOOL_DESCRIPTIONS = {
|
||||
"The action will be used when the user asks the agent to generate an image."
|
||||
),
|
||||
"WebSearchTool": (
|
||||
"The Web Search Action allows the agent "
|
||||
"to perform internet searches for up-to-date information."
|
||||
"The Web Search Action allows the agent to perform internet searches for up-to-date information."
|
||||
),
|
||||
"KnowledgeGraphTool": (
|
||||
"The Knowledge Graph Search Action allows the agent to search the "
|
||||
|
||||
@@ -140,8 +140,7 @@ def _migrate_files_to_postgres() -> None:
|
||||
# Fetch rows that have external storage pointers (bucket/object_key not NULL)
|
||||
result = session.execute(
|
||||
text(
|
||||
"SELECT file_id, bucket_name, object_key FROM file_record "
|
||||
"WHERE bucket_name IS NOT NULL AND object_key IS NOT NULL"
|
||||
"SELECT file_id, bucket_name, object_key FROM file_record WHERE bucket_name IS NOT NULL AND object_key IS NOT NULL"
|
||||
)
|
||||
)
|
||||
|
||||
@@ -182,8 +181,7 @@ def _migrate_files_to_postgres() -> None:
|
||||
# Update DB row: set lobj_oid, clear bucket/object_key
|
||||
session.execute(
|
||||
text(
|
||||
"UPDATE file_record SET lobj_oid = :lobj_oid, bucket_name = NULL, "
|
||||
"object_key = NULL WHERE file_id = :file_id"
|
||||
"UPDATE file_record SET lobj_oid = :lobj_oid, bucket_name = NULL, object_key = NULL WHERE file_id = :file_id"
|
||||
),
|
||||
{"lobj_oid": lobj_oid, "file_id": file_id},
|
||||
)
|
||||
@@ -224,8 +222,7 @@ def _migrate_files_to_external_storage() -> None:
|
||||
# Find all files currently stored in PostgreSQL (lobj_oid is not null)
|
||||
result = session.execute(
|
||||
text(
|
||||
"SELECT file_id FROM file_record WHERE lobj_oid IS NOT NULL "
|
||||
"AND bucket_name IS NULL AND object_key IS NULL"
|
||||
"SELECT file_id FROM file_record WHERE lobj_oid IS NOT NULL AND bucket_name IS NULL AND object_key IS NULL"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -39,8 +39,7 @@ BUILT_IN_TOOLS = [
|
||||
"name": "WebSearchTool",
|
||||
"display_name": "Web Search",
|
||||
"description": (
|
||||
"The Web Search Action allows the assistant "
|
||||
"to perform internet searches for up-to-date information."
|
||||
"The Web Search Action allows the assistant to perform internet searches for up-to-date information."
|
||||
),
|
||||
"in_code_tool_id": "WebSearchTool",
|
||||
},
|
||||
|
||||
@@ -25,8 +25,7 @@ def verify_auth_setting() -> None:
|
||||
raw_auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
|
||||
if raw_auth_type == "disabled":
|
||||
logger.warning(
|
||||
"AUTH_TYPE='disabled' is no longer supported. "
|
||||
"Using 'basic' instead. Please update your configuration."
|
||||
"AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration."
|
||||
)
|
||||
logger.notice(f"Using Auth Type: {AUTH_TYPE.value}")
|
||||
|
||||
|
||||
@@ -59,7 +59,6 @@ def cloud_beat_task_generator(
|
||||
# gated_tenants = get_gated_tenants()
|
||||
|
||||
for tenant_id in tenant_ids:
|
||||
|
||||
# Same comment here as the above NOTE
|
||||
# if tenant_id in gated_tenants:
|
||||
# continue
|
||||
|
||||
@@ -424,10 +424,7 @@ def connector_permission_sync_generator_task(
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if not redis_connector.permissions.fenced: # The fence must exist
|
||||
error_msg = (
|
||||
f"connector_permission_sync_generator_task - fence not found: "
|
||||
f"fence={redis_connector.permissions.fence_key}"
|
||||
)
|
||||
error_msg = f"connector_permission_sync_generator_task - fence not found: fence={redis_connector.permissions.fence_key}"
|
||||
_fail_doc_permission_sync_attempt(attempt_id, error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
@@ -441,8 +438,7 @@ def connector_permission_sync_generator_task(
|
||||
|
||||
if payload.celery_task_id is None:
|
||||
logger.info(
|
||||
f"connector_permission_sync_generator_task - Waiting for fence: "
|
||||
f"fence={redis_connector.permissions.fence_key}"
|
||||
f"connector_permission_sync_generator_task - Waiting for fence: fence={redis_connector.permissions.fence_key}"
|
||||
)
|
||||
sleep(1)
|
||||
continue
|
||||
@@ -608,8 +604,7 @@ def connector_permission_sync_generator_task(
|
||||
docs_with_permission_errors=docs_with_errors,
|
||||
)
|
||||
task_logger.info(
|
||||
f"Completed doc permission sync attempt {attempt_id}: "
|
||||
f"{tasks_generated} docs, {docs_with_errors} errors"
|
||||
f"Completed doc permission sync attempt {attempt_id}: {tasks_generated} docs, {docs_with_errors} errors"
|
||||
)
|
||||
|
||||
redis_connector.permissions.generator_complete = tasks_generated
|
||||
@@ -716,9 +711,7 @@ def element_update_permissions(
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
task_logger.info(
|
||||
f"{element_type}={element_id} "
|
||||
f"action=update_permissions "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
f"{element_type}={element_id} action=update_permissions elapsed={elapsed:.2f}"
|
||||
)
|
||||
except Exception as e:
|
||||
task_logger.exception(
|
||||
@@ -900,8 +893,7 @@ def validate_permission_sync_fence(
|
||||
tasks_not_in_celery += 1
|
||||
|
||||
task_logger.info(
|
||||
"validate_permission_sync_fence task check: "
|
||||
f"tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
f"validate_permission_sync_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
)
|
||||
|
||||
# we're active if there are still tasks to run and those tasks all exist in celery
|
||||
@@ -1007,7 +999,10 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
|
||||
|
||||
|
||||
def monitor_ccpair_permissions_taskset(
|
||||
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
|
||||
tenant_id: str,
|
||||
key_bytes: bytes,
|
||||
r: Redis, # noqa: ARG001
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
fence_key = key_bytes.decode("utf-8")
|
||||
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
|
||||
@@ -1031,8 +1026,7 @@ def monitor_ccpair_permissions_taskset(
|
||||
payload = redis_connector.permissions.payload
|
||||
except ValidationError:
|
||||
task_logger.exception(
|
||||
"Permissions sync payload failed to validate. "
|
||||
"Schema may have been updated."
|
||||
"Permissions sync payload failed to validate. Schema may have been updated."
|
||||
)
|
||||
return
|
||||
|
||||
@@ -1041,11 +1035,7 @@ def monitor_ccpair_permissions_taskset(
|
||||
|
||||
remaining = redis_connector.permissions.get_remaining()
|
||||
task_logger.info(
|
||||
f"Permissions sync progress: "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"id={payload.id} "
|
||||
f"remaining={remaining} "
|
||||
f"initial={initial}"
|
||||
f"Permissions sync progress: cc_pair={cc_pair_id} id={payload.id} remaining={remaining} initial={initial}"
|
||||
)
|
||||
|
||||
# Add telemetry for permission syncing progress
|
||||
@@ -1064,10 +1054,7 @@ def monitor_ccpair_permissions_taskset(
|
||||
|
||||
mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), payload.started)
|
||||
task_logger.info(
|
||||
f"Permissions sync finished: "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"id={payload.id} "
|
||||
f"num_synced={initial}"
|
||||
f"Permissions sync finished: cc_pair={cc_pair_id} id={payload.id} num_synced={initial}"
|
||||
)
|
||||
|
||||
# Add telemetry for permission syncing complete
|
||||
|
||||
@@ -111,23 +111,20 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
|
||||
|
||||
if cc_pair.access_type != AccessType.SYNC:
|
||||
task_logger.error(
|
||||
f"Received non-sync CC Pair {cc_pair.id} for external "
|
||||
f"group sync. Actual access type: {cc_pair.access_type}"
|
||||
f"Received non-sync CC Pair {cc_pair.id} for external group sync. Actual access type: {cc_pair.access_type}"
|
||||
)
|
||||
return False
|
||||
|
||||
if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
|
||||
task_logger.debug(
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - "
|
||||
f"CC Pair is being deleted"
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - CC Pair is being deleted"
|
||||
)
|
||||
return False
|
||||
|
||||
sync_config = get_source_perm_sync_config(cc_pair.connector.source)
|
||||
if sync_config is None:
|
||||
task_logger.debug(
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - "
|
||||
f"no sync config found for {cc_pair.connector.source}"
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - no sync config found for {cc_pair.connector.source}"
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -135,8 +132,7 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
|
||||
# This is fine because all sources dont necessarily have a concept of groups
|
||||
if sync_config.group_sync_config is None:
|
||||
task_logger.debug(
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - "
|
||||
f"no group sync config found for {cc_pair.connector.source}"
|
||||
f"Skipping group sync for CC Pair {cc_pair.id} - no group sync config found for {cc_pair.connector.source}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
@@ -74,8 +74,7 @@ def perform_ttl_management_task(
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"delete_chat_session exceptioned. "
|
||||
f"user_id={user_id} session_id={session_id}"
|
||||
f"delete_chat_session exceptioned. user_id={user_id} session_id={session_id}"
|
||||
)
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
mark_task_as_finished_with_id(
|
||||
|
||||
@@ -7,7 +7,8 @@ QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK
|
||||
|
||||
|
||||
def name_chat_ttl_task(
|
||||
retention_limit_days: float, tenant_id: str | None = None # noqa: ARG001
|
||||
retention_limit_days: float,
|
||||
tenant_id: str | None = None, # noqa: ARG001
|
||||
) -> str:
|
||||
return f"chat_ttl_{retention_limit_days}_days"
|
||||
|
||||
|
||||
@@ -118,9 +118,7 @@ JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
|
||||
SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", "[]"))
|
||||
SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
|
||||
|
||||
# The posthog client does not accept empty API keys or hosts however it fails silently
|
||||
# when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
|
||||
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
|
||||
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY")
|
||||
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
|
||||
POSTHOG_DEBUG_LOGS_ENABLED = (
|
||||
os.environ.get("POSTHOG_DEBUG_LOGS_ENABLED", "").lower() == "true"
|
||||
|
||||
@@ -31,7 +31,8 @@ def fetch_query_analytics(
|
||||
func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),
|
||||
func.sum(
|
||||
case(
|
||||
(ChatMessageFeedback.is_positive == False, 1), else_=0 # noqa: E712
|
||||
(ChatMessageFeedback.is_positive == False, 1), # noqa: E712
|
||||
else_=0, # noqa: E712
|
||||
)
|
||||
),
|
||||
cast(ChatMessage.time_sent, Date),
|
||||
@@ -66,7 +67,8 @@ def fetch_per_user_query_analytics(
|
||||
func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)),
|
||||
func.sum(
|
||||
case(
|
||||
(ChatMessageFeedback.is_positive == False, 1), else_=0 # noqa: E712
|
||||
(ChatMessageFeedback.is_positive == False, 1), # noqa: E712
|
||||
else_=0, # noqa: E712
|
||||
)
|
||||
),
|
||||
cast(ChatMessage.time_sent, Date),
|
||||
|
||||
@@ -23,8 +23,7 @@ def _delete_connector_credential_pair_user_groups_relationship__no_commit(
|
||||
)
|
||||
if cc_pair is None:
|
||||
raise ValueError(
|
||||
f"ConnectorCredentialPair with connector_id: {connector_id} "
|
||||
f"and credential_id: {credential_id} not found"
|
||||
f"ConnectorCredentialPair with connector_id: {connector_id} and credential_id: {credential_id} not found"
|
||||
)
|
||||
|
||||
stmt = delete(UserGroup__ConnectorCredentialPair).where(
|
||||
|
||||
@@ -123,8 +123,7 @@ def upsert_external_groups(
|
||||
user_id = email_id_map.get(user_email.lower())
|
||||
if user_id is None:
|
||||
logger.warning(
|
||||
f"User in group {external_group.id}"
|
||||
f" with email {user_email} not found"
|
||||
f"User in group {external_group.id} with email {user_email} not found"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
@@ -191,8 +191,7 @@ def create_initial_default_standard_answer_category(db_session: Session) -> None
|
||||
if default_category is not None:
|
||||
if default_category.name != default_category_name:
|
||||
raise ValueError(
|
||||
"DB is not in a valid initial state. "
|
||||
"Default standard answer category does not have expected name."
|
||||
"DB is not in a valid initial state. Default standard answer category does not have expected name."
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -424,8 +424,7 @@ def fetch_user_groups_for_documents(
|
||||
def _check_user_group_is_modifiable(user_group: UserGroup) -> None:
|
||||
if not user_group.is_up_to_date:
|
||||
raise ValueError(
|
||||
"Specified user group is currently syncing. Wait until the current "
|
||||
"sync has finished before editing."
|
||||
"Specified user group is currently syncing. Wait until the current sync has finished before editing."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -56,8 +56,7 @@ def _run_with_retry(
|
||||
if retry_count < MAX_RETRY_COUNT:
|
||||
sleep_after_rate_limit_exception(github_client)
|
||||
logger.warning(
|
||||
f"Rate limit exceeded while {description}. Retrying... "
|
||||
f"(attempt {retry_count + 1}/{MAX_RETRY_COUNT})"
|
||||
f"Rate limit exceeded while {description}. Retrying... (attempt {retry_count + 1}/{MAX_RETRY_COUNT})"
|
||||
)
|
||||
return _run_with_retry(
|
||||
operation, description, github_client, retry_count + 1
|
||||
@@ -91,7 +90,9 @@ class TeamInfo(BaseModel):
|
||||
|
||||
|
||||
def _fetch_organization_members(
|
||||
github_client: Github, org_name: str, retry_count: int = 0 # noqa: ARG001
|
||||
github_client: Github,
|
||||
org_name: str,
|
||||
retry_count: int = 0, # noqa: ARG001
|
||||
) -> List[UserInfo]:
|
||||
"""Fetch all organization members including owners and regular members."""
|
||||
org_members: List[UserInfo] = []
|
||||
@@ -124,7 +125,9 @@ def _fetch_organization_members(
|
||||
|
||||
|
||||
def _fetch_repository_teams_detailed(
|
||||
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
|
||||
repo: Repository,
|
||||
github_client: Github,
|
||||
retry_count: int = 0, # noqa: ARG001
|
||||
) -> List[TeamInfo]:
|
||||
"""Fetch teams with access to the repository and their members."""
|
||||
teams_data: List[TeamInfo] = []
|
||||
@@ -167,7 +170,9 @@ def _fetch_repository_teams_detailed(
|
||||
|
||||
|
||||
def fetch_repository_team_slugs(
|
||||
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
|
||||
repo: Repository,
|
||||
github_client: Github,
|
||||
retry_count: int = 0, # noqa: ARG001
|
||||
) -> List[str]:
|
||||
"""Fetch team slugs with access to the repository."""
|
||||
logger.info(f"Fetching team slugs for repository {repo.full_name}")
|
||||
|
||||
@@ -115,8 +115,7 @@ def get_external_access_for_raw_gdrive_file(
|
||||
)
|
||||
if len(permissions_list) != len(permission_ids) and retriever_drive_service:
|
||||
logger.warning(
|
||||
f"Failed to get all permissions for file {doc_id} with retriever service, "
|
||||
"trying admin service"
|
||||
f"Failed to get all permissions for file {doc_id} with retriever service, trying admin service"
|
||||
)
|
||||
backup_permissions_list = _get_permissions(admin_drive_service)
|
||||
permissions_list = _merge_permissions_lists(
|
||||
@@ -166,9 +165,7 @@ def get_external_access_for_raw_gdrive_file(
|
||||
user_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.error(
|
||||
"Permission is type `user` but no email address is "
|
||||
f"provided for document {doc_id}"
|
||||
f"\n {permission}"
|
||||
f"Permission is type `user` but no email address is provided for document {doc_id}\n {permission}"
|
||||
)
|
||||
elif permission.type == PermissionType.GROUP:
|
||||
# groups are represented as email addresses within Drive
|
||||
@@ -176,17 +173,14 @@ def get_external_access_for_raw_gdrive_file(
|
||||
group_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.error(
|
||||
"Permission is type `group` but no email address is "
|
||||
f"provided for document {doc_id}"
|
||||
f"\n {permission}"
|
||||
f"Permission is type `group` but no email address is provided for document {doc_id}\n {permission}"
|
||||
)
|
||||
elif permission.type == PermissionType.DOMAIN and company_domain:
|
||||
if permission.domain == company_domain:
|
||||
public = True
|
||||
else:
|
||||
logger.warning(
|
||||
"Permission is type domain but does not match company domain:"
|
||||
f"\n {permission}"
|
||||
f"Permission is type domain but does not match company domain:\n {permission}"
|
||||
)
|
||||
elif permission.type == PermissionType.ANYONE:
|
||||
public = True
|
||||
|
||||
@@ -18,10 +18,7 @@ logger = setup_logger()
|
||||
# Only include fields we need - folder ID and permissions
|
||||
# IMPORTANT: must fetch permissionIds, since sometimes the drive API
|
||||
# seems to miss permissions when requesting them directly
|
||||
FOLDER_PERMISSION_FIELDS = (
|
||||
"nextPageToken, files(id, name, permissionIds, "
|
||||
"permissions(id, emailAddress, type, domain, permissionDetails))"
|
||||
)
|
||||
FOLDER_PERMISSION_FIELDS = "nextPageToken, files(id, name, permissionIds, permissions(id, emailAddress, type, domain, permissionDetails))"
|
||||
|
||||
|
||||
def get_folder_permissions_by_ids(
|
||||
|
||||
@@ -142,8 +142,7 @@ def _drive_folder_to_onyx_group(
|
||||
elif permission.type == PermissionType.GROUP:
|
||||
if permission.email_address not in group_email_to_member_emails_map:
|
||||
logger.warning(
|
||||
f"Group email {permission.email_address} for folder {folder.id} "
|
||||
"not found in group_email_to_member_emails_map"
|
||||
f"Group email {permission.email_address} for folder {folder.id} not found in group_email_to_member_emails_map"
|
||||
)
|
||||
continue
|
||||
folder_member_emails.update(
|
||||
@@ -238,8 +237,7 @@ def _drive_member_map_to_onyx_groups(
|
||||
for group_email in group_emails:
|
||||
if group_email not in group_email_to_member_emails_map:
|
||||
logger.warning(
|
||||
f"Group email {group_email} for drive {drive_id} not found in "
|
||||
"group_email_to_member_emails_map"
|
||||
f"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map"
|
||||
)
|
||||
continue
|
||||
drive_member_emails.update(group_email_to_member_emails_map[group_email])
|
||||
@@ -326,8 +324,7 @@ def _build_onyx_groups(
|
||||
for group_email in group_emails:
|
||||
if group_email not in group_email_to_member_emails_map:
|
||||
logger.warning(
|
||||
f"Group email {group_email} for drive {drive_id} not found in "
|
||||
"group_email_to_member_emails_map"
|
||||
f"Group email {group_email} for drive {drive_id} not found in group_email_to_member_emails_map"
|
||||
)
|
||||
continue
|
||||
drive_member_emails.update(group_email_to_member_emails_map[group_email])
|
||||
|
||||
@@ -55,8 +55,7 @@ def get_permissions_by_ids(
|
||||
if len(filtered_permissions) < len(permission_ids):
|
||||
missing_ids = permission_id_set - {p.id for p in filtered_permissions if p.id}
|
||||
logger.warning(
|
||||
f"Could not find all requested permission IDs for document {doc_id}. "
|
||||
f"Missing IDs: {missing_ids}"
|
||||
f"Could not find all requested permission IDs for document {doc_id}. Missing IDs: {missing_ids}"
|
||||
)
|
||||
|
||||
return filtered_permissions
|
||||
|
||||
@@ -89,8 +89,7 @@ def _get_group_member_emails(
|
||||
emails.add(email)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Atlassian user {member.get('accountId', 'unknown')} "
|
||||
f"in group {group_name} has no visible email address"
|
||||
f"Atlassian user {member.get('accountId', 'unknown')} in group {group_name} has no visible email address"
|
||||
)
|
||||
|
||||
if page.get("isLast", True) or not members:
|
||||
|
||||
@@ -69,8 +69,7 @@ def _post_query_chunk_censoring(
|
||||
censored_chunks = censor_chunks_for_source(chunks_for_source, user.email)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Failed to censor chunks for source {source} so throwing out all"
|
||||
f" chunks for this source and continuing: {e}"
|
||||
f"Failed to censor chunks for source {source} so throwing out all chunks for this source and continuing: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
@@ -23,7 +23,9 @@ ContentRange = tuple[int, int | None] # (start_index, end_index) None means to
|
||||
|
||||
# NOTE: Used for testing timing
|
||||
def _get_dummy_object_access_map(
|
||||
object_ids: set[str], user_email: str, chunks: list[InferenceChunk] # noqa: ARG001
|
||||
object_ids: set[str],
|
||||
user_email: str, # noqa: ARG001
|
||||
chunks: list[InferenceChunk], # noqa: ARG001
|
||||
) -> dict[str, bool]:
|
||||
time.sleep(0.15)
|
||||
# return {object_id: True for object_id in object_ids}
|
||||
|
||||
@@ -61,8 +61,7 @@ def _graph_api_get(
|
||||
):
|
||||
wait = min(int(resp.headers.get("Retry-After", str(2**attempt))), 60)
|
||||
logger.warning(
|
||||
f"Graph API {resp.status_code} on attempt {attempt + 1}, "
|
||||
f"retrying in {wait}s: {url}"
|
||||
f"Graph API {resp.status_code} on attempt {attempt + 1}, retrying in {wait}s: {url}"
|
||||
)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
@@ -72,8 +71,7 @@ def _graph_api_get(
|
||||
if attempt < GRAPH_API_MAX_RETRIES:
|
||||
wait = min(2**attempt, 60)
|
||||
logger.warning(
|
||||
f"Graph API connection error on attempt {attempt + 1}, "
|
||||
f"retrying in {wait}s: {url}"
|
||||
f"Graph API connection error on attempt {attempt + 1}, retrying in {wait}s: {url}"
|
||||
)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
@@ -767,8 +765,7 @@ def get_sharepoint_external_groups(
|
||||
|
||||
if not enumerate_all_ad_groups or get_access_token is None:
|
||||
logger.info(
|
||||
"Skipping exhaustive Azure AD group enumeration. "
|
||||
"Only groups found in site role assignments are included."
|
||||
"Skipping exhaustive Azure AD group enumeration. Only groups found in site role assignments are included."
|
||||
)
|
||||
return external_user_groups
|
||||
|
||||
|
||||
@@ -166,8 +166,7 @@ def slack_doc_sync(
|
||||
user_id_to_email_map = fetch_user_id_to_email_map(slack_client)
|
||||
if not user_id_to_email_map:
|
||||
raise ValueError(
|
||||
"No user id to email map found. Please check to make sure that "
|
||||
"your Slack bot token has the `users:read.email` scope"
|
||||
"No user id to email map found. Please check to make sure that your Slack bot token has the `users:read.email` scope"
|
||||
)
|
||||
|
||||
workspace_permissions = _fetch_workspace_permissions(
|
||||
|
||||
@@ -34,6 +34,9 @@ class PostHogFeatureFlagProvider(FeatureFlagProvider):
|
||||
Returns:
|
||||
True if the feature is enabled for the user, False otherwise.
|
||||
"""
|
||||
if not posthog:
|
||||
return False
|
||||
|
||||
try:
|
||||
posthog.set(
|
||||
distinct_id=user_id,
|
||||
|
||||
@@ -152,10 +152,7 @@ def create_new_usage_report(
|
||||
zip_buffer.seek(0)
|
||||
|
||||
# store zip blob to file_store
|
||||
report_name = (
|
||||
f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d')}"
|
||||
f"_{report_id}_usage_report.zip"
|
||||
)
|
||||
report_name = f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d')}_{report_id}_usage_report.zip"
|
||||
file_store.save_file(
|
||||
content=zip_buffer,
|
||||
display_name=report_name,
|
||||
|
||||
@@ -449,8 +449,7 @@ def _apply_group_remove(
|
||||
match = _MEMBER_FILTER_RE.match(op.path)
|
||||
if not match:
|
||||
raise ScimPatchError(
|
||||
f"Unsupported remove path '{op.path}'. "
|
||||
'Expected: members[value eq "user-id"]'
|
||||
f"Unsupported remove path '{op.path}'. Expected: members[value eq \"user-id\"]"
|
||||
)
|
||||
|
||||
target_id = match.group(1)
|
||||
|
||||
@@ -29,7 +29,6 @@ from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
|
||||
from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
|
||||
from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
|
||||
from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
|
||||
from onyx.configs.constants import MilestoneRecordType
|
||||
from onyx.db.engine.sql_engine import get_session_with_shared_schema
|
||||
from onyx.db.engine.sql_engine import get_session_with_tenant
|
||||
from onyx.db.image_generation import create_default_image_gen_config_from_api_key
|
||||
@@ -59,7 +58,6 @@ from onyx.server.manage.llm.models import LLMProviderUpsertRequest
|
||||
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
|
||||
from onyx.setup import setup_onyx
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import mt_cloud_telemetry
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
from shared_configs.configs import TENANT_ID_PREFIX
|
||||
@@ -71,7 +69,9 @@ logger = setup_logger()
|
||||
|
||||
|
||||
async def get_or_provision_tenant(
|
||||
email: str, referral_source: str | None = None, request: Request | None = None
|
||||
email: str,
|
||||
referral_source: str | None = None,
|
||||
request: Request | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Get existing tenant ID for an email or create a new tenant if none exists.
|
||||
@@ -123,7 +123,8 @@ async def get_or_provision_tenant(
|
||||
|
||||
|
||||
async def create_tenant(
|
||||
email: str, referral_source: str | None = None # noqa: ARG001
|
||||
email: str,
|
||||
referral_source: str | None = None, # noqa: ARG001
|
||||
) -> str:
|
||||
"""
|
||||
Create a new tenant on-demand when no pre-provisioned tenants are available.
|
||||
@@ -679,7 +680,9 @@ async def setup_tenant(tenant_id: str) -> None:
|
||||
|
||||
|
||||
async def assign_tenant_to_user(
|
||||
tenant_id: str, email: str, referral_source: str | None = None # noqa: ARG001
|
||||
tenant_id: str,
|
||||
email: str,
|
||||
referral_source: str | None = None, # noqa: ARG001
|
||||
) -> None:
|
||||
"""
|
||||
Assign a tenant to a user and perform necessary operations.
|
||||
@@ -690,12 +693,6 @@ async def assign_tenant_to_user(
|
||||
|
||||
try:
|
||||
add_users_to_tenant([email], tenant_id)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=email,
|
||||
event=MilestoneRecordType.TENANT_CREATED,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
|
||||
raise Exception("Failed to assign tenant to user")
|
||||
|
||||
@@ -75,8 +75,7 @@ def _decrypt_bytes(input_bytes: bytes, key: str | None = None) -> str:
|
||||
# Does NOT handle data encrypted with a different key — that
|
||||
# ciphertext is not valid UTF-8 and will raise below.
|
||||
logger.warning(
|
||||
"AES decryption failed — falling back to raw decode. "
|
||||
"Run the re-encrypt secrets script to rotate to the current key."
|
||||
"AES decryption failed — falling back to raw decode. Run the re-encrypt secrets script to rotate to the current key."
|
||||
)
|
||||
try:
|
||||
return input_bytes.decode()
|
||||
|
||||
@@ -9,6 +9,7 @@ from ee.onyx.configs.app_configs import POSTHOG_API_KEY
|
||||
from ee.onyx.configs.app_configs import POSTHOG_DEBUG_LOGS_ENABLED
|
||||
from ee.onyx.configs.app_configs import POSTHOG_HOST
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -18,12 +19,19 @@ def posthog_on_error(error: Any, items: Any) -> None:
|
||||
logger.error(f"PostHog error: {error}, items: {items}")
|
||||
|
||||
|
||||
posthog = Posthog(
|
||||
project_api_key=POSTHOG_API_KEY,
|
||||
host=POSTHOG_HOST,
|
||||
debug=POSTHOG_DEBUG_LOGS_ENABLED,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
posthog: Posthog | None = None
|
||||
if POSTHOG_API_KEY:
|
||||
posthog = Posthog(
|
||||
project_api_key=POSTHOG_API_KEY,
|
||||
host=POSTHOG_HOST,
|
||||
debug=POSTHOG_DEBUG_LOGS_ENABLED,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
elif MULTI_TENANT:
|
||||
logger.warning(
|
||||
"POSTHOG_API_KEY is not set but MULTI_TENANT is enabled — "
|
||||
"PostHog telemetry and feature flags will be disabled"
|
||||
)
|
||||
|
||||
# For cross referencing between cloud and www Onyx sites
|
||||
# NOTE: These clients are separate because they are separate posthog projects.
|
||||
@@ -60,7 +68,7 @@ def capture_and_sync_with_alternate_posthog(
|
||||
logger.error(f"Error capturing marketing posthog event: {e}")
|
||||
|
||||
try:
|
||||
if cloud_user_id := props.get("onyx_cloud_user_id"):
|
||||
if posthog and (cloud_user_id := props.get("onyx_cloud_user_id")):
|
||||
cloud_props = props.copy()
|
||||
cloud_props.pop("onyx_cloud_user_id", None)
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Any
|
||||
|
||||
from ee.onyx.utils.posthog_client import posthog
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
@@ -5,12 +7,27 @@ logger = setup_logger()
|
||||
|
||||
|
||||
def event_telemetry(
|
||||
distinct_id: str, event: str, properties: dict | None = None
|
||||
distinct_id: str, event: str, properties: dict[str, Any] | None = None
|
||||
) -> None:
|
||||
"""Capture and send an event to PostHog, flushing immediately."""
|
||||
if not posthog:
|
||||
return
|
||||
|
||||
logger.info(f"Capturing PostHog event: {distinct_id} {event} {properties}")
|
||||
try:
|
||||
posthog.capture(distinct_id, event, properties)
|
||||
posthog.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Error capturing PostHog event: {e}")
|
||||
|
||||
|
||||
def identify_user(distinct_id: str, properties: dict[str, Any] | None = None) -> None:
|
||||
"""Create/update a PostHog person profile, flushing immediately."""
|
||||
if not posthog:
|
||||
return
|
||||
|
||||
try:
|
||||
posthog.identify(distinct_id, properties)
|
||||
posthog.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Error identifying PostHog user: {e}")
|
||||
|
||||
@@ -96,7 +96,9 @@ def get_access_for_documents(
|
||||
return versioned_get_access_for_documents_fn(document_ids, db_session)
|
||||
|
||||
|
||||
def _get_acl_for_user(user: User, db_session: Session) -> set[str]: # noqa: ARG001
|
||||
def _get_acl_for_user(
|
||||
user: User, db_session: Session # noqa: ARG001
|
||||
) -> set[str]: # noqa: ARG001
|
||||
"""Returns a list of ACL entries that the user has access to. This is meant to be
|
||||
used downstream to filter out documents that the user does not have access to. The
|
||||
user should have access to a document if at least one entry in the document's ACL
|
||||
|
||||
@@ -5,7 +5,8 @@ from onyx.utils.variable_functionality import fetch_versioned_implementation
|
||||
|
||||
|
||||
def _get_user_external_group_ids(
|
||||
db_session: Session, user: User # noqa: ARG001
|
||||
db_session: Session, # noqa: ARG001
|
||||
user: User, # noqa: ARG001
|
||||
) -> list[str]:
|
||||
return []
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ from onyx.configs.constants import PUBLIC_DOC_PAT
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExternalAccess:
|
||||
|
||||
# arbitrary limit to prevent excessively large permissions sets
|
||||
# not internally enforced ... the caller can check this before using the instance
|
||||
MAX_NUM_ENTRIES = 5000
|
||||
|
||||
@@ -96,8 +96,7 @@ async def verify_captcha_token(
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Captcha verification passed: score={result.score}, "
|
||||
f"action={result.action}"
|
||||
f"Captcha verification passed: score={result.score}, action={result.action}"
|
||||
)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
|
||||
@@ -353,20 +353,11 @@ def build_user_email_invite(
|
||||
"or login with Google and complete your registration.</p>"
|
||||
)
|
||||
elif auth_type == AuthType.BASIC:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to set a password "
|
||||
"and complete your registration.</p>"
|
||||
)
|
||||
message += "<p>To join the organization, please click the button below to set a password and complete your registration.</p>"
|
||||
elif auth_type == AuthType.GOOGLE_OAUTH:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to login with Google "
|
||||
"and complete your registration.</p>"
|
||||
)
|
||||
message += "<p>To join the organization, please click the button below to login with Google and complete your registration.</p>"
|
||||
elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
|
||||
message += (
|
||||
"<p>To join the organization, please click the button below to"
|
||||
" complete your registration.</p>"
|
||||
)
|
||||
message += "<p>To join the organization, please click the button below to complete your registration.</p>"
|
||||
else:
|
||||
raise ValueError(f"Invalid auth type: {auth_type}")
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
@@ -17,6 +19,7 @@ from typing import Optional
|
||||
from typing import Protocol
|
||||
from typing import Tuple
|
||||
from typing import TypeVar
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import jwt
|
||||
from email_validator import EmailNotValidError
|
||||
@@ -29,6 +32,8 @@ from fastapi import Query
|
||||
from fastapi import Request
|
||||
from fastapi import Response
|
||||
from fastapi import status
|
||||
from fastapi import WebSocket
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.responses import RedirectResponse
|
||||
from fastapi.security import OAuth2PasswordRequestForm
|
||||
from fastapi_users import BaseUserManager
|
||||
@@ -55,6 +60,7 @@ from fastapi_users.router.common import ErrorModel
|
||||
from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase
|
||||
from httpx_oauth.integrations.fastapi import OAuth2AuthorizeCallback
|
||||
from httpx_oauth.oauth2 import BaseOAuth2
|
||||
from httpx_oauth.oauth2 import GetAccessTokenError
|
||||
from httpx_oauth.oauth2 import OAuth2Token
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import nulls_last
|
||||
@@ -120,10 +126,16 @@ from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.db.pat import fetch_user_for_pat
|
||||
from onyx.db.users import get_user_by_email
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import log_onyx_error
|
||||
from onyx.error_handling.exceptions import onyx_error_to_json_response
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.redis.redis_pool import get_async_redis_connection
|
||||
from onyx.redis.redis_pool import retrieve_ws_token_data
|
||||
from onyx.server.settings.store import load_settings
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import mt_cloud_identify
|
||||
from onyx.utils.telemetry import mt_cloud_telemetry
|
||||
from onyx.utils.telemetry import optional_telemetry
|
||||
from onyx.utils.telemetry import RecordType
|
||||
@@ -158,8 +170,7 @@ def verify_auth_setting() -> None:
|
||||
)
|
||||
if raw_auth_type == "disabled":
|
||||
logger.warning(
|
||||
"AUTH_TYPE='disabled' is no longer supported. "
|
||||
"Using 'basic' instead. Please update your configuration."
|
||||
"AUTH_TYPE='disabled' is no longer supported. Using 'basic' instead. Please update your configuration."
|
||||
)
|
||||
|
||||
logger.notice(f"Using Auth Type: {AUTH_TYPE.value}")
|
||||
@@ -602,8 +613,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
char in PASSWORD_SPECIAL_CHARS for char in password
|
||||
):
|
||||
raise exceptions.InvalidPasswordException(
|
||||
reason="Password must contain at least one special character from the following set: "
|
||||
f"{PASSWORD_SPECIAL_CHARS}."
|
||||
reason=f"Password must contain at least one special character from the following set: {PASSWORD_SPECIAL_CHARS}."
|
||||
)
|
||||
return
|
||||
|
||||
@@ -784,6 +794,12 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
except Exception:
|
||||
logger.exception("Error deleting anonymous user cookie")
|
||||
|
||||
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
|
||||
mt_cloud_identify(
|
||||
distinct_id=str(user.id),
|
||||
properties={"email": user.email, "tenant_id": tenant_id},
|
||||
)
|
||||
|
||||
async def on_after_register(
|
||||
self, user: User, request: Optional[Request] = None
|
||||
) -> None:
|
||||
@@ -802,12 +818,25 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
user_count = await get_user_count()
|
||||
logger.debug(f"Current tenant user count: {user_count}")
|
||||
|
||||
# Ensure a PostHog person profile exists for this user.
|
||||
mt_cloud_identify(
|
||||
distinct_id=str(user.id),
|
||||
properties={"email": user.email, "tenant_id": tenant_id},
|
||||
)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.USER_SIGNED_UP,
|
||||
)
|
||||
|
||||
if user_count == 1:
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.TENANT_CREATED,
|
||||
)
|
||||
|
||||
finally:
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
|
||||
|
||||
@@ -870,7 +899,10 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
)
|
||||
|
||||
async def on_after_forgot_password(
|
||||
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
|
||||
self,
|
||||
user: User,
|
||||
token: str,
|
||||
request: Optional[Request] = None, # noqa: ARG002
|
||||
) -> None:
|
||||
if not EMAIL_CONFIGURED:
|
||||
logger.error(
|
||||
@@ -889,7 +921,10 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)
|
||||
|
||||
async def on_after_request_verify(
|
||||
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
|
||||
self,
|
||||
user: User,
|
||||
token: str,
|
||||
request: Optional[Request] = None, # noqa: ARG002
|
||||
) -> None:
|
||||
verify_email_domain(user.email)
|
||||
|
||||
@@ -1185,7 +1220,9 @@ class SingleTenantJWTStrategy(JWTStrategy[User, uuid.UUID]):
|
||||
return
|
||||
|
||||
async def refresh_token(
|
||||
self, token: Optional[str], user: User # noqa: ARG002
|
||||
self,
|
||||
token: Optional[str], # noqa: ARG002
|
||||
user: User, # noqa: ARG002
|
||||
) -> str:
|
||||
"""Issue a fresh JWT with a new expiry."""
|
||||
return await self.write_token(user)
|
||||
@@ -1213,8 +1250,7 @@ def get_jwt_strategy() -> SingleTenantJWTStrategy:
|
||||
if AUTH_BACKEND == AuthBackend.JWT:
|
||||
if MULTI_TENANT or AUTH_TYPE == AuthType.CLOUD:
|
||||
raise ValueError(
|
||||
"JWT auth backend is only supported for single-tenant, self-hosted deployments. "
|
||||
"Use 'redis' or 'postgres' instead."
|
||||
"JWT auth backend is only supported for single-tenant, self-hosted deployments. Use 'redis' or 'postgres' instead."
|
||||
)
|
||||
if not USER_AUTH_SECRET:
|
||||
raise ValueError("USER_AUTH_SECRET is required for JWT auth backend.")
|
||||
@@ -1612,6 +1648,125 @@ async def current_admin_user(user: User = Depends(current_user)) -> User:
|
||||
return user
|
||||
|
||||
|
||||
async def _get_user_from_token_data(token_data: dict) -> User | None:
|
||||
"""Shared logic: token data dict → User object.
|
||||
|
||||
Args:
|
||||
token_data: Decoded token data containing 'sub' (user ID).
|
||||
|
||||
Returns:
|
||||
User object if found and active, None otherwise.
|
||||
"""
|
||||
user_id = token_data.get("sub")
|
||||
if not user_id:
|
||||
return None
|
||||
|
||||
try:
|
||||
user_uuid = uuid.UUID(user_id)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
async with get_async_session_context_manager() as async_db_session:
|
||||
user = await async_db_session.get(User, user_uuid)
|
||||
if user is None or not user.is_active:
|
||||
return None
|
||||
return user
|
||||
|
||||
|
||||
_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1"})
|
||||
|
||||
|
||||
def _is_same_origin(actual: str, expected: str) -> bool:
|
||||
"""Compare two origins for the WebSocket CSWSH check.
|
||||
|
||||
Scheme and hostname must match exactly. Port must also match, except
|
||||
when the hostname is a loopback address (localhost / 127.0.0.1 / ::1),
|
||||
where port is ignored. On loopback, all ports belong to the same
|
||||
operator, so port differences carry no security significance — the
|
||||
CSWSH threat is remote origins, not local ones.
|
||||
"""
|
||||
a = urlparse(actual.rstrip("/"))
|
||||
e = urlparse(expected.rstrip("/"))
|
||||
|
||||
if a.scheme != e.scheme or a.hostname != e.hostname:
|
||||
return False
|
||||
|
||||
if a.hostname in _LOOPBACK_HOSTNAMES:
|
||||
return True
|
||||
|
||||
actual_port = a.port or (443 if a.scheme == "https" else 80)
|
||||
expected_port = e.port or (443 if e.scheme == "https" else 80)
|
||||
|
||||
return actual_port == expected_port
|
||||
|
||||
|
||||
async def current_user_from_websocket(
|
||||
websocket: WebSocket,
|
||||
token: str = Query(..., description="WebSocket authentication token"),
|
||||
) -> User:
|
||||
"""
|
||||
WebSocket authentication dependency using query parameter.
|
||||
|
||||
Validates the WS token from query param and returns the User.
|
||||
Raises BasicAuthenticationError if authentication fails.
|
||||
|
||||
The token must be obtained from POST /voice/ws-token before connecting.
|
||||
Tokens are single-use and expire after 60 seconds.
|
||||
|
||||
Usage:
|
||||
1. POST /voice/ws-token -> {"token": "xxx"}
|
||||
2. Connect to ws://host/path?token=xxx
|
||||
|
||||
This applies the same auth checks as current_user() for HTTP endpoints.
|
||||
"""
|
||||
# Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH).
|
||||
# Browsers always send Origin on WebSocket connections.
|
||||
origin = websocket.headers.get("origin")
|
||||
if not origin:
|
||||
logger.warning("WS auth: missing Origin header")
|
||||
raise BasicAuthenticationError(detail="Access denied. Missing origin.")
|
||||
|
||||
if not _is_same_origin(origin, WEB_DOMAIN):
|
||||
logger.warning(f"WS auth: origin mismatch. Expected {WEB_DOMAIN}, got {origin}")
|
||||
raise BasicAuthenticationError(detail="Access denied. Invalid origin.")
|
||||
|
||||
# Validate WS token in Redis (single-use, deleted after retrieval)
|
||||
try:
|
||||
token_data = await retrieve_ws_token_data(token)
|
||||
if token_data is None:
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. Invalid or expired authentication token."
|
||||
)
|
||||
except BasicAuthenticationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"WS auth: error during token validation: {e}")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Authentication verification failed."
|
||||
) from e
|
||||
|
||||
# Get user from token data
|
||||
user = await _get_user_from_token_data(token_data)
|
||||
if user is None:
|
||||
logger.warning(f"WS auth: user not found for id={token_data.get('sub')}")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User not found or inactive."
|
||||
)
|
||||
|
||||
# Apply same checks as HTTP auth (verification, OIDC expiry, role)
|
||||
user = await double_check_user(user)
|
||||
|
||||
# Block LIMITED users (same as current_user)
|
||||
if user.role == UserRole.LIMITED:
|
||||
logger.warning(f"WS auth: user {user.email} has LIMITED role")
|
||||
raise BasicAuthenticationError(
|
||||
detail="Access denied. User role is LIMITED. BASIC or higher permissions are required.",
|
||||
)
|
||||
|
||||
logger.debug(f"WS auth: authenticated {user.email}")
|
||||
return user
|
||||
|
||||
|
||||
def get_default_admin_user_emails_() -> list[str]:
|
||||
# No default seeding available for Onyx MIT
|
||||
return []
|
||||
@@ -1621,6 +1776,7 @@ STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
|
||||
STATE_TOKEN_LIFETIME_SECONDS = 3600
|
||||
CSRF_TOKEN_KEY = "csrftoken"
|
||||
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
|
||||
PKCE_COOKIE_NAME_PREFIX = "fastapiusersoauthpkce"
|
||||
|
||||
|
||||
class OAuth2AuthorizeResponse(BaseModel):
|
||||
@@ -1641,6 +1797,21 @@ def generate_csrf_token() -> str:
|
||||
return secrets.token_urlsafe(32)
|
||||
|
||||
|
||||
def _base64url_encode(data: bytes) -> str:
|
||||
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
|
||||
|
||||
|
||||
def generate_pkce_pair() -> tuple[str, str]:
|
||||
verifier = secrets.token_urlsafe(64)
|
||||
challenge = _base64url_encode(hashlib.sha256(verifier.encode("ascii")).digest())
|
||||
return verifier, challenge
|
||||
|
||||
|
||||
def get_pkce_cookie_name(state: str) -> str:
|
||||
state_hash = hashlib.sha256(state.encode("utf-8")).hexdigest()
|
||||
return f"{PKCE_COOKIE_NAME_PREFIX}_{state_hash}"
|
||||
|
||||
|
||||
# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
|
||||
def create_onyx_oauth_router(
|
||||
oauth_client: BaseOAuth2,
|
||||
@@ -1649,6 +1820,7 @@ def create_onyx_oauth_router(
|
||||
redirect_url: Optional[str] = None,
|
||||
associate_by_email: bool = False,
|
||||
is_verified_by_default: bool = False,
|
||||
enable_pkce: bool = False,
|
||||
) -> APIRouter:
|
||||
return get_oauth_router(
|
||||
oauth_client,
|
||||
@@ -1658,6 +1830,7 @@ def create_onyx_oauth_router(
|
||||
redirect_url,
|
||||
associate_by_email,
|
||||
is_verified_by_default,
|
||||
enable_pkce=enable_pkce,
|
||||
)
|
||||
|
||||
|
||||
@@ -1676,6 +1849,7 @@ def get_oauth_router(
|
||||
csrf_token_cookie_secure: Optional[bool] = None,
|
||||
csrf_token_cookie_httponly: bool = True,
|
||||
csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
|
||||
enable_pkce: bool = False,
|
||||
) -> APIRouter:
|
||||
"""Generate a router with the OAuth routes."""
|
||||
router = APIRouter()
|
||||
@@ -1692,6 +1866,13 @@ def get_oauth_router(
|
||||
route_name=callback_route_name,
|
||||
)
|
||||
|
||||
async def null_access_token_state() -> tuple[OAuth2Token, Optional[str]] | None:
|
||||
return None
|
||||
|
||||
access_token_state_dependency = (
|
||||
oauth2_authorize_callback if not enable_pkce else null_access_token_state
|
||||
)
|
||||
|
||||
if csrf_token_cookie_secure is None:
|
||||
csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
|
||||
|
||||
@@ -1725,13 +1906,26 @@ def get_oauth_router(
|
||||
CSRF_TOKEN_KEY: csrf_token,
|
||||
}
|
||||
state = generate_state_token(state_data, state_secret)
|
||||
pkce_cookie: tuple[str, str] | None = None
|
||||
|
||||
# Get the basic authorization URL
|
||||
authorization_url = await oauth_client.get_authorization_url(
|
||||
authorize_redirect_url,
|
||||
state,
|
||||
scopes,
|
||||
)
|
||||
if enable_pkce:
|
||||
code_verifier, code_challenge = generate_pkce_pair()
|
||||
pkce_cookie_name = get_pkce_cookie_name(state)
|
||||
pkce_cookie = (pkce_cookie_name, code_verifier)
|
||||
authorization_url = await oauth_client.get_authorization_url(
|
||||
authorize_redirect_url,
|
||||
state,
|
||||
scopes,
|
||||
code_challenge=code_challenge,
|
||||
code_challenge_method="S256",
|
||||
)
|
||||
else:
|
||||
# Get the basic authorization URL
|
||||
authorization_url = await oauth_client.get_authorization_url(
|
||||
authorize_redirect_url,
|
||||
state,
|
||||
scopes,
|
||||
)
|
||||
|
||||
# For Google OAuth, add parameters to request refresh tokens
|
||||
if oauth_client.name == "google":
|
||||
@@ -1739,11 +1933,15 @@ def get_oauth_router(
|
||||
authorization_url, {"access_type": "offline", "prompt": "consent"}
|
||||
)
|
||||
|
||||
if redirect:
|
||||
redirect_response = RedirectResponse(authorization_url, status_code=302)
|
||||
redirect_response.set_cookie(
|
||||
key=csrf_token_cookie_name,
|
||||
value=csrf_token,
|
||||
def set_oauth_cookie(
|
||||
target_response: Response,
|
||||
*,
|
||||
key: str,
|
||||
value: str,
|
||||
) -> None:
|
||||
target_response.set_cookie(
|
||||
key=key,
|
||||
value=value,
|
||||
max_age=STATE_TOKEN_LIFETIME_SECONDS,
|
||||
path=csrf_token_cookie_path,
|
||||
domain=csrf_token_cookie_domain,
|
||||
@@ -1751,18 +1949,28 @@ def get_oauth_router(
|
||||
httponly=csrf_token_cookie_httponly,
|
||||
samesite=csrf_token_cookie_samesite,
|
||||
)
|
||||
return redirect_response
|
||||
|
||||
response.set_cookie(
|
||||
response_with_cookies: Response
|
||||
if redirect:
|
||||
response_with_cookies = RedirectResponse(authorization_url, status_code=302)
|
||||
else:
|
||||
response_with_cookies = response
|
||||
|
||||
set_oauth_cookie(
|
||||
response_with_cookies,
|
||||
key=csrf_token_cookie_name,
|
||||
value=csrf_token,
|
||||
max_age=STATE_TOKEN_LIFETIME_SECONDS,
|
||||
path=csrf_token_cookie_path,
|
||||
domain=csrf_token_cookie_domain,
|
||||
secure=csrf_token_cookie_secure,
|
||||
httponly=csrf_token_cookie_httponly,
|
||||
samesite=csrf_token_cookie_samesite,
|
||||
)
|
||||
if pkce_cookie is not None:
|
||||
pkce_cookie_name, code_verifier = pkce_cookie
|
||||
set_oauth_cookie(
|
||||
response_with_cookies,
|
||||
key=pkce_cookie_name,
|
||||
value=code_verifier,
|
||||
)
|
||||
|
||||
if redirect:
|
||||
return response_with_cookies
|
||||
|
||||
return OAuth2AuthorizeResponse(authorization_url=authorization_url)
|
||||
|
||||
@@ -1793,119 +2001,242 @@ def get_oauth_router(
|
||||
)
|
||||
async def callback(
|
||||
request: Request,
|
||||
access_token_state: Tuple[OAuth2Token, str] = Depends(
|
||||
oauth2_authorize_callback
|
||||
access_token_state: Tuple[OAuth2Token, Optional[str]] | None = Depends(
|
||||
access_token_state_dependency
|
||||
),
|
||||
code: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
error: Optional[str] = None,
|
||||
user_manager: BaseUserManager[models.UP, models.ID] = Depends(get_user_manager),
|
||||
strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy),
|
||||
) -> RedirectResponse:
|
||||
token, state = access_token_state
|
||||
account_id, account_email = await oauth_client.get_id_email(
|
||||
token["access_token"]
|
||||
)
|
||||
) -> Response:
|
||||
pkce_cookie_name: str | None = None
|
||||
|
||||
if account_email is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,
|
||||
)
|
||||
def delete_pkce_cookie(response: Response) -> None:
|
||||
if enable_pkce and pkce_cookie_name:
|
||||
response.delete_cookie(
|
||||
key=pkce_cookie_name,
|
||||
path=csrf_token_cookie_path,
|
||||
domain=csrf_token_cookie_domain,
|
||||
secure=csrf_token_cookie_secure,
|
||||
httponly=csrf_token_cookie_httponly,
|
||||
samesite=csrf_token_cookie_samesite,
|
||||
)
|
||||
|
||||
try:
|
||||
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
|
||||
except jwt.DecodeError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(
|
||||
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
|
||||
),
|
||||
)
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(
|
||||
ErrorCode,
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
),
|
||||
)
|
||||
def build_error_response(exc: OnyxError) -> JSONResponse:
|
||||
log_onyx_error(exc)
|
||||
error_response = onyx_error_to_json_response(exc)
|
||||
delete_pkce_cookie(error_response)
|
||||
return error_response
|
||||
|
||||
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
|
||||
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
|
||||
if (
|
||||
not cookie_csrf_token
|
||||
or not state_csrf_token
|
||||
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
|
||||
)
|
||||
def decode_and_validate_state(state_value: str) -> Dict[str, str]:
|
||||
try:
|
||||
state_data = decode_jwt(
|
||||
state_value, state_secret, [STATE_TOKEN_AUDIENCE]
|
||||
)
|
||||
except jwt.DecodeError:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
getattr(
|
||||
ErrorCode,
|
||||
"ACCESS_TOKEN_DECODE_ERROR",
|
||||
"ACCESS_TOKEN_DECODE_ERROR",
|
||||
),
|
||||
)
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
getattr(
|
||||
ErrorCode,
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
),
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
getattr(
|
||||
ErrorCode,
|
||||
"ACCESS_TOKEN_DECODE_ERROR",
|
||||
"ACCESS_TOKEN_DECODE_ERROR",
|
||||
),
|
||||
)
|
||||
|
||||
next_url = state_data.get("next_url", "/")
|
||||
referral_source = state_data.get("referral_source", None)
|
||||
try:
|
||||
tenant_id = fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
|
||||
)(account_email)
|
||||
except exceptions.UserNotExists:
|
||||
tenant_id = None
|
||||
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
|
||||
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
|
||||
if (
|
||||
not cookie_csrf_token
|
||||
or not state_csrf_token
|
||||
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
|
||||
):
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
|
||||
)
|
||||
|
||||
request.state.referral_source = referral_source
|
||||
return state_data
|
||||
|
||||
# Proceed to authenticate or create the user
|
||||
try:
|
||||
user = await user_manager.oauth_callback(
|
||||
oauth_client.name,
|
||||
token["access_token"],
|
||||
account_id,
|
||||
account_email,
|
||||
token.get("expires_at"),
|
||||
token.get("refresh_token"),
|
||||
request,
|
||||
associate_by_email=associate_by_email,
|
||||
is_verified_by_default=is_verified_by_default,
|
||||
)
|
||||
except UserAlreadyExists:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ErrorCode.OAUTH_USER_ALREADY_EXISTS,
|
||||
)
|
||||
token: OAuth2Token
|
||||
state_data: Dict[str, str]
|
||||
|
||||
if not user.is_active:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ErrorCode.LOGIN_BAD_CREDENTIALS,
|
||||
)
|
||||
# `code`, `state`, and `error` are read directly only in the PKCE path.
|
||||
# In the non-PKCE path, `oauth2_authorize_callback` consumes them.
|
||||
if enable_pkce:
|
||||
if state is not None:
|
||||
pkce_cookie_name = get_pkce_cookie_name(state)
|
||||
|
||||
# Login user
|
||||
response = await backend.login(strategy, user)
|
||||
await user_manager.on_after_login(user, request, response)
|
||||
if error is not None:
|
||||
return build_error_response(
|
||||
OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Authorization request failed or was denied",
|
||||
)
|
||||
)
|
||||
if code is None:
|
||||
return build_error_response(
|
||||
OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Missing authorization code in OAuth callback",
|
||||
)
|
||||
)
|
||||
if state is None:
|
||||
return build_error_response(
|
||||
OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Missing state parameter in OAuth callback",
|
||||
)
|
||||
)
|
||||
|
||||
# Prepare redirect response
|
||||
if tenant_id is None:
|
||||
# Use URL utility to add parameters
|
||||
redirect_url = add_url_params(next_url, {"new_team": "true"})
|
||||
redirect_response = RedirectResponse(redirect_url, status_code=302)
|
||||
else:
|
||||
# No parameters to add
|
||||
redirect_response = RedirectResponse(next_url, status_code=302)
|
||||
state_value = state
|
||||
|
||||
# Copy headers from auth response to redirect response, with special handling for Set-Cookie
|
||||
for header_name, header_value in response.headers.items():
|
||||
# FastAPI can have multiple Set-Cookie headers as a list
|
||||
if header_name.lower() == "set-cookie" and isinstance(header_value, list):
|
||||
for cookie_value in header_value:
|
||||
redirect_response.headers.append(header_name, cookie_value)
|
||||
if redirect_url is not None:
|
||||
callback_redirect_url = redirect_url
|
||||
else:
|
||||
callback_path = request.app.url_path_for(callback_route_name)
|
||||
callback_redirect_url = f"{WEB_DOMAIN}{callback_path}"
|
||||
|
||||
code_verifier = request.cookies.get(cast(str, pkce_cookie_name))
|
||||
if not code_verifier:
|
||||
return build_error_response(
|
||||
OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Missing PKCE verifier cookie in OAuth callback",
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
state_data = decode_and_validate_state(state_value)
|
||||
except OnyxError as e:
|
||||
return build_error_response(e)
|
||||
|
||||
try:
|
||||
token = await oauth_client.get_access_token(
|
||||
code, callback_redirect_url, code_verifier
|
||||
)
|
||||
except GetAccessTokenError:
|
||||
return build_error_response(
|
||||
OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Authorization code exchange failed",
|
||||
)
|
||||
)
|
||||
else:
|
||||
if access_token_state is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INTERNAL_ERROR, "Missing OAuth callback state"
|
||||
)
|
||||
token, callback_state = access_token_state
|
||||
if callback_state is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
"Missing state parameter in OAuth callback",
|
||||
)
|
||||
state_data = decode_and_validate_state(callback_state)
|
||||
|
||||
async def complete_login_flow(
|
||||
token: OAuth2Token, state_data: Dict[str, str]
|
||||
) -> RedirectResponse:
|
||||
account_id, account_email = await oauth_client.get_id_email(
|
||||
token["access_token"]
|
||||
)
|
||||
|
||||
if account_email is None:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL,
|
||||
)
|
||||
|
||||
next_url = state_data.get("next_url", "/")
|
||||
referral_source = state_data.get("referral_source", None)
|
||||
try:
|
||||
tenant_id = fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.user_mapping", "get_tenant_id_for_email", None
|
||||
)(account_email)
|
||||
except exceptions.UserNotExists:
|
||||
tenant_id = None
|
||||
|
||||
request.state.referral_source = referral_source
|
||||
|
||||
# Proceed to authenticate or create the user
|
||||
try:
|
||||
user = await user_manager.oauth_callback(
|
||||
oauth_client.name,
|
||||
token["access_token"],
|
||||
account_id,
|
||||
account_email,
|
||||
token.get("expires_at"),
|
||||
token.get("refresh_token"),
|
||||
request,
|
||||
associate_by_email=associate_by_email,
|
||||
is_verified_by_default=is_verified_by_default,
|
||||
)
|
||||
except UserAlreadyExists:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
ErrorCode.OAUTH_USER_ALREADY_EXISTS,
|
||||
)
|
||||
|
||||
if not user.is_active:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.VALIDATION_ERROR,
|
||||
ErrorCode.LOGIN_BAD_CREDENTIALS,
|
||||
)
|
||||
|
||||
# Login user
|
||||
response = await backend.login(strategy, user)
|
||||
await user_manager.on_after_login(user, request, response)
|
||||
|
||||
# Prepare redirect response
|
||||
if tenant_id is None:
|
||||
# Use URL utility to add parameters
|
||||
redirect_destination = add_url_params(next_url, {"new_team": "true"})
|
||||
redirect_response = RedirectResponse(
|
||||
redirect_destination, status_code=302
|
||||
)
|
||||
else:
|
||||
# No parameters to add
|
||||
redirect_response = RedirectResponse(next_url, status_code=302)
|
||||
|
||||
# Copy headers from auth response to redirect response, with special handling for Set-Cookie
|
||||
for header_name, header_value in response.headers.items():
|
||||
header_name_lower = header_name.lower()
|
||||
if header_name_lower == "set-cookie":
|
||||
redirect_response.headers.append(header_name, header_value)
|
||||
continue
|
||||
if header_name_lower in {"location", "content-length"}:
|
||||
continue
|
||||
redirect_response.headers[header_name] = header_value
|
||||
|
||||
if hasattr(response, "body"):
|
||||
redirect_response.body = response.body
|
||||
if hasattr(response, "status_code"):
|
||||
redirect_response.status_code = response.status_code
|
||||
if hasattr(response, "media_type"):
|
||||
redirect_response.media_type = response.media_type
|
||||
return redirect_response
|
||||
|
||||
return redirect_response
|
||||
if enable_pkce:
|
||||
try:
|
||||
redirect_response = await complete_login_flow(token, state_data)
|
||||
except OnyxError as e:
|
||||
return build_error_response(e)
|
||||
delete_pkce_cookie(redirect_response)
|
||||
return redirect_response
|
||||
|
||||
return await complete_login_flow(token, state_data)
|
||||
|
||||
return router
|
||||
|
||||
@@ -154,8 +154,7 @@ def on_task_postrun(
|
||||
tenant_id = cast(str, kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA))
|
||||
|
||||
task_logger.debug(
|
||||
f"Task {task.name} (ID: {task_id}) completed with state: {state} "
|
||||
f"{f'for tenant_id={tenant_id}' if tenant_id else ''}"
|
||||
f"Task {task.name} (ID: {task_id}) completed with state: {state} {f'for tenant_id={tenant_id}' if tenant_id else ''}"
|
||||
)
|
||||
|
||||
r = get_redis_client(tenant_id=tenant_id)
|
||||
@@ -211,7 +210,9 @@ def on_task_postrun(
|
||||
|
||||
|
||||
def on_celeryd_init(
|
||||
sender: str, conf: Any = None, **kwargs: Any # noqa: ARG001
|
||||
sender: str, # noqa: ARG001
|
||||
conf: Any = None, # noqa: ARG001
|
||||
**kwargs: Any, # noqa: ARG001
|
||||
) -> None:
|
||||
"""The first signal sent on celery worker startup"""
|
||||
|
||||
@@ -277,10 +278,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
|
||||
time.sleep(WAIT_INTERVAL)
|
||||
|
||||
if not ready:
|
||||
msg = (
|
||||
f"Redis: Readiness probe did not succeed within the timeout "
|
||||
f"({WAIT_LIMIT} seconds). Exiting..."
|
||||
)
|
||||
msg = f"Redis: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
@@ -319,10 +317,7 @@ def wait_for_db(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
|
||||
time.sleep(WAIT_INTERVAL)
|
||||
|
||||
if not ready:
|
||||
msg = (
|
||||
f"Database: Readiness probe did not succeed within the timeout "
|
||||
f"({WAIT_LIMIT} seconds). Exiting..."
|
||||
)
|
||||
msg = f"Database: Readiness probe did not succeed within the timeout ({WAIT_LIMIT} seconds). Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
@@ -349,10 +344,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None: # noqa: ARG00
|
||||
f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
|
||||
)
|
||||
if time_elapsed > WAIT_LIMIT:
|
||||
msg = (
|
||||
f"Primary worker was not ready within the timeout. "
|
||||
f"({WAIT_LIMIT} seconds). Exiting..."
|
||||
)
|
||||
msg = f"Primary worker was not ready within the timeout. ({WAIT_LIMIT} seconds). Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
@@ -522,7 +514,9 @@ def reset_tenant_id(
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
|
||||
|
||||
|
||||
def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
|
||||
def wait_for_vespa_or_shutdown(
|
||||
sender: Any, **kwargs: Any # noqa: ARG001
|
||||
) -> None: # noqa: ARG001
|
||||
"""Waits for Vespa to become ready subject to a timeout.
|
||||
Raises WorkerShutdown if the timeout is reached."""
|
||||
|
||||
|
||||
@@ -181,9 +181,7 @@ class DynamicTenantScheduler(PersistentScheduler):
|
||||
if not do_update:
|
||||
# exit early if nothing changed
|
||||
task_logger.info(
|
||||
f"_try_updating_schedule - Schedule unchanged: "
|
||||
f"tasks={len(new_schedule)} "
|
||||
f"beat_multiplier={beat_multiplier}"
|
||||
f"_try_updating_schedule - Schedule unchanged: tasks={len(new_schedule)} beat_multiplier={beat_multiplier}"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -186,7 +186,6 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
# Check if the Celery task actually exists
|
||||
try:
|
||||
|
||||
result: AsyncResult = AsyncResult(attempt.celery_task_id)
|
||||
|
||||
# If the task is not in PENDING state, it exists in Celery
|
||||
@@ -207,8 +206,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
except Exception:
|
||||
# If we can't check the task status, be conservative and continue
|
||||
logger.warning(
|
||||
f"Could not verify Celery task status on startup for attempt {attempt.id}, "
|
||||
f"task_id={attempt.celery_task_id}"
|
||||
f"Could not verify Celery task status on startup for attempt {attempt.id}, task_id={attempt.celery_task_id}"
|
||||
)
|
||||
|
||||
|
||||
@@ -278,8 +276,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
|
||||
lock.reacquire()
|
||||
else:
|
||||
task_logger.warning(
|
||||
"Full acquisition of primary worker lock. "
|
||||
"Reasons could be worker restart or lock expiration."
|
||||
"Full acquisition of primary worker lock. Reasons could be worker restart or lock expiration."
|
||||
)
|
||||
lock = r.lock(
|
||||
OnyxRedisLocks.PRIMARY_WORKER,
|
||||
|
||||
@@ -120,7 +120,7 @@ def _extract_from_batch(
|
||||
if failed_id:
|
||||
ids[failed_id] = None
|
||||
logger.warning(
|
||||
f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
|
||||
f"Failed to retrieve document {failed_id}: {item.failure_message}"
|
||||
)
|
||||
else:
|
||||
ids[item.id] = item.parent_hierarchy_raw_node_id
|
||||
|
||||
@@ -307,14 +307,12 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
|
||||
if redis_connector.prune.fenced:
|
||||
raise TaskDependencyError(
|
||||
"Connector deletion - Delayed (pruning in progress): "
|
||||
f"cc_pair={cc_pair_id}"
|
||||
f"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}"
|
||||
)
|
||||
|
||||
if redis_connector.permissions.fenced:
|
||||
raise TaskDependencyError(
|
||||
f"Connector deletion - Delayed (permissions in progress): "
|
||||
f"cc_pair={cc_pair_id}"
|
||||
f"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}"
|
||||
)
|
||||
|
||||
# add tasks to celery and build up the task set to monitor in redis
|
||||
@@ -354,8 +352,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
# return 0
|
||||
|
||||
task_logger.info(
|
||||
"RedisConnectorDeletion.generate_tasks finished. "
|
||||
f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
f"RedisConnectorDeletion.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
|
||||
# set this only after all tasks have been added
|
||||
@@ -366,7 +363,9 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
|
||||
|
||||
def monitor_connector_deletion_taskset(
|
||||
tenant_id: str, key_bytes: bytes, r: Redis # noqa: ARG001
|
||||
tenant_id: str,
|
||||
key_bytes: bytes,
|
||||
r: Redis, # noqa: ARG001
|
||||
) -> None:
|
||||
fence_key = key_bytes.decode("utf-8")
|
||||
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
|
||||
@@ -690,8 +689,7 @@ def validate_connector_deletion_fence(
|
||||
tasks_not_in_celery += 1
|
||||
|
||||
task_logger.info(
|
||||
"validate_connector_deletion_fence task check: "
|
||||
f"tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
f"validate_connector_deletion_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
)
|
||||
|
||||
# we're active if there are still tasks to run and those tasks all exist in celery
|
||||
|
||||
@@ -109,9 +109,7 @@ def try_creating_docfetching_task(
|
||||
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"try_creating_indexing_task - Unexpected exception: "
|
||||
f"cc_pair={cc_pair.id} "
|
||||
f"search_settings={search_settings.id}"
|
||||
f"try_creating_indexing_task - Unexpected exception: cc_pair={cc_pair.id} search_settings={search_settings.id}"
|
||||
)
|
||||
|
||||
# Clean up on failure
|
||||
|
||||
@@ -60,15 +60,13 @@ def _verify_indexing_attempt(
|
||||
|
||||
if attempt.connector_credential_pair_id != cc_pair_id:
|
||||
raise SimpleJobException(
|
||||
f"docfetching_task - CC pair mismatch: "
|
||||
f"expected={cc_pair_id} actual={attempt.connector_credential_pair_id}",
|
||||
f"docfetching_task - CC pair mismatch: expected={cc_pair_id} actual={attempt.connector_credential_pair_id}",
|
||||
code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
|
||||
)
|
||||
|
||||
if attempt.search_settings_id != search_settings_id:
|
||||
raise SimpleJobException(
|
||||
f"docfetching_task - Search settings mismatch: "
|
||||
f"expected={search_settings_id} actual={attempt.search_settings_id}",
|
||||
f"docfetching_task - Search settings mismatch: expected={search_settings_id} actual={attempt.search_settings_id}",
|
||||
code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
|
||||
)
|
||||
|
||||
@@ -77,8 +75,7 @@ def _verify_indexing_attempt(
|
||||
IndexingStatus.IN_PROGRESS,
|
||||
]:
|
||||
raise SimpleJobException(
|
||||
f"docfetching_task - Invalid attempt status: "
|
||||
f"attempt_id={index_attempt_id} status={attempt.status}",
|
||||
f"docfetching_task - Invalid attempt status: attempt_id={index_attempt_id} status={attempt.status}",
|
||||
code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
|
||||
)
|
||||
|
||||
@@ -248,9 +245,7 @@ def _docfetching_task(
|
||||
raise e
|
||||
|
||||
logger.info(
|
||||
f"Indexing spawned task finished: attempt={index_attempt_id} "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"search_settings={search_settings_id}"
|
||||
f"Indexing spawned task finished: attempt={index_attempt_id} cc_pair={cc_pair_id} search_settings={search_settings_id}"
|
||||
)
|
||||
os._exit(0) # ensure process exits cleanly
|
||||
|
||||
@@ -286,8 +281,7 @@ def process_job_result(
|
||||
result.status = IndexingWatchdogTerminalStatus.SUCCEEDED
|
||||
task_logger.warning(
|
||||
log_builder.build(
|
||||
"Indexing watchdog - spawned task has non-zero exit code "
|
||||
"but completion signal is OK. Continuing...",
|
||||
"Indexing watchdog - spawned task has non-zero exit code but completion signal is OK. Continuing...",
|
||||
exit_code=str(result.exit_code),
|
||||
)
|
||||
)
|
||||
@@ -296,10 +290,7 @@ def process_job_result(
|
||||
result.status = IndexingWatchdogTerminalStatus.from_code(result.exit_code)
|
||||
|
||||
job_level_exception = job.exception()
|
||||
result.exception_str = (
|
||||
f"Docfetching returned exit code {result.exit_code} "
|
||||
f"with exception: {job_level_exception}"
|
||||
)
|
||||
result.exception_str = f"Docfetching returned exit code {result.exit_code} with exception: {job_level_exception}"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -158,7 +158,6 @@ def validate_active_indexing_attempts(
|
||||
logger.info("Validating active indexing attempts")
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
|
||||
# Find all active indexing attempts
|
||||
active_attempts = (
|
||||
db_session.execute(
|
||||
@@ -190,8 +189,7 @@ def validate_active_indexing_attempts(
|
||||
db_session.commit()
|
||||
|
||||
task_logger.info(
|
||||
f"Initialized heartbeat tracking for attempt {fresh_attempt.id}: "
|
||||
f"counter={fresh_attempt.heartbeat_counter}"
|
||||
f"Initialized heartbeat tracking for attempt {fresh_attempt.id}: counter={fresh_attempt.heartbeat_counter}"
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -214,8 +212,7 @@ def validate_active_indexing_attempts(
|
||||
db_session.commit()
|
||||
|
||||
task_logger.debug(
|
||||
f"Heartbeat advanced for attempt {fresh_attempt.id}: "
|
||||
f"new_counter={current_counter}"
|
||||
f"Heartbeat advanced for attempt {fresh_attempt.id}: new_counter={current_counter}"
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -350,9 +347,7 @@ def monitor_indexing_attempt_progress(
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Failed to monitor document processing completion: "
|
||||
f"attempt={attempt.id} "
|
||||
f"error={str(e)}"
|
||||
f"Failed to monitor document processing completion: attempt={attempt.id} error={str(e)}"
|
||||
)
|
||||
|
||||
# Mark the attempt as failed if monitoring fails
|
||||
@@ -401,9 +396,7 @@ def check_indexing_completion(
|
||||
) -> None:
|
||||
|
||||
logger.info(
|
||||
f"Checking for indexing completion: "
|
||||
f"attempt={index_attempt_id} "
|
||||
f"tenant={tenant_id}"
|
||||
f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
|
||||
)
|
||||
|
||||
# Check if indexing is complete and all batches are processed
|
||||
@@ -445,7 +438,7 @@ def check_indexing_completion(
|
||||
if attempt.status == IndexingStatus.IN_PROGRESS:
|
||||
logger.error(
|
||||
f"Indexing attempt {index_attempt_id} has been indexing for "
|
||||
f"{stalled_timeout_hours//2}-{stalled_timeout_hours} hours without progress. "
|
||||
f"{stalled_timeout_hours // 2}-{stalled_timeout_hours} hours without progress. "
|
||||
f"Marking it as failed."
|
||||
)
|
||||
mark_attempt_failed(
|
||||
@@ -695,17 +688,12 @@ def _kickoff_indexing_tasks(
|
||||
|
||||
if attempt_id is not None:
|
||||
task_logger.info(
|
||||
f"Connector indexing queued: "
|
||||
f"index_attempt={attempt_id} "
|
||||
f"cc_pair={cc_pair.id} "
|
||||
f"search_settings={search_settings.id}"
|
||||
f"Connector indexing queued: index_attempt={attempt_id} cc_pair={cc_pair.id} search_settings={search_settings.id}"
|
||||
)
|
||||
tasks_created += 1
|
||||
else:
|
||||
task_logger.error(
|
||||
f"Failed to create indexing task: "
|
||||
f"cc_pair={cc_pair.id} "
|
||||
f"search_settings={search_settings.id}"
|
||||
f"Failed to create indexing task: cc_pair={cc_pair.id} search_settings={search_settings.id}"
|
||||
)
|
||||
|
||||
return tasks_created
|
||||
@@ -901,9 +889,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
|
||||
and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
|
||||
):
|
||||
task_logger.info(
|
||||
f"Skipping secondary indexing: "
|
||||
f"switchover_type=INSTANT "
|
||||
f"for search_settings={secondary_search_settings.id}"
|
||||
f"Skipping secondary indexing: switchover_type=INSTANT for search_settings={secondary_search_settings.id}"
|
||||
)
|
||||
|
||||
# 2/3: VALIDATE
|
||||
@@ -1005,8 +991,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
|
||||
lock_beat.release()
|
||||
else:
|
||||
task_logger.error(
|
||||
"check_for_indexing - Lock not owned on completion: "
|
||||
f"tenant={tenant_id}"
|
||||
f"check_for_indexing - Lock not owned on completion: tenant={tenant_id}"
|
||||
)
|
||||
redis_lock_dump(lock_beat, redis_client)
|
||||
|
||||
@@ -1060,8 +1045,7 @@ def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
|
||||
lock.release()
|
||||
else:
|
||||
task_logger.error(
|
||||
"check_for_checkpoint_cleanup - Lock not owned on completion: "
|
||||
f"tenant={tenant_id}"
|
||||
f"check_for_checkpoint_cleanup - Lock not owned on completion: tenant={tenant_id}"
|
||||
)
|
||||
|
||||
|
||||
@@ -1071,7 +1055,10 @@ def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
|
||||
bind=True,
|
||||
)
|
||||
def cleanup_checkpoint_task(
|
||||
self: Task, *, index_attempt_id: int, tenant_id: str | None # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
index_attempt_id: int,
|
||||
tenant_id: str | None,
|
||||
) -> None:
|
||||
"""Clean up a checkpoint for a given index attempt"""
|
||||
|
||||
@@ -1084,9 +1071,7 @@ def cleanup_checkpoint_task(
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
task_logger.info(
|
||||
f"cleanup_checkpoint_task completed: tenant_id={tenant_id} "
|
||||
f"index_attempt_id={index_attempt_id} "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
f"cleanup_checkpoint_task completed: tenant_id={tenant_id} index_attempt_id={index_attempt_id} elapsed={elapsed:.2f}"
|
||||
)
|
||||
|
||||
|
||||
@@ -1149,8 +1134,7 @@ def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
|
||||
lock.release()
|
||||
else:
|
||||
task_logger.error(
|
||||
"check_for_index_attempt_cleanup - Lock not owned on completion: "
|
||||
f"tenant={tenant_id}"
|
||||
f"check_for_index_attempt_cleanup - Lock not owned on completion: tenant={tenant_id}"
|
||||
)
|
||||
|
||||
|
||||
@@ -1160,7 +1144,10 @@ def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
|
||||
bind=True,
|
||||
)
|
||||
def cleanup_index_attempt_task(
|
||||
self: Task, *, index_attempt_ids: list[int], tenant_id: str # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
index_attempt_ids: list[int],
|
||||
tenant_id: str,
|
||||
) -> None:
|
||||
"""Clean up an index attempt"""
|
||||
start = time.monotonic()
|
||||
@@ -1207,15 +1194,13 @@ def _check_failure_threshold(
|
||||
FAILURE_RATIO_THRESHOLD = 0.1
|
||||
if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:
|
||||
logger.error(
|
||||
f"Connector run failed with '{total_failures}' errors "
|
||||
f"after '{batch_num}' batches."
|
||||
f"Connector run failed with '{total_failures}' errors after '{batch_num}' batches."
|
||||
)
|
||||
if last_failure and last_failure.exception:
|
||||
raise last_failure.exception from last_failure.exception
|
||||
|
||||
raise RuntimeError(
|
||||
f"Connector run encountered too many errors, aborting. "
|
||||
f"Last error: {last_failure}"
|
||||
f"Connector run encountered too many errors, aborting. Last error: {last_failure}"
|
||||
)
|
||||
|
||||
|
||||
@@ -1339,9 +1324,7 @@ def _docprocessing_task(
|
||||
raise
|
||||
|
||||
task_logger.info(
|
||||
f"Processing document batch: "
|
||||
f"attempt={index_attempt_id} "
|
||||
f"batch_num={batch_num} "
|
||||
f"Processing document batch: attempt={index_attempt_id} batch_num={batch_num} "
|
||||
)
|
||||
|
||||
# Get the document batch storage
|
||||
@@ -1599,9 +1582,7 @@ def _docprocessing_task(
|
||||
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"Document batch processing failed: "
|
||||
f"batch_num={batch_num} "
|
||||
f"attempt={index_attempt_id} "
|
||||
f"Document batch processing failed: batch_num={batch_num} attempt={index_attempt_id} "
|
||||
)
|
||||
|
||||
raise
|
||||
|
||||
@@ -84,8 +84,7 @@ def scheduled_eval_task(self: Task, **kwargs: Any) -> None: # noqa: ARG001
|
||||
run_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
logger.info(
|
||||
f"Starting scheduled eval pipeline for project '{project_name}' "
|
||||
f"with {len(dataset_names)} dataset(s): {dataset_names}"
|
||||
f"Starting scheduled eval pipeline for project '{project_name}' with {len(dataset_names)} dataset(s): {dataset_names}"
|
||||
)
|
||||
|
||||
pipeline_start = datetime.now(timezone.utc)
|
||||
@@ -101,8 +100,7 @@ def scheduled_eval_task(self: Task, **kwargs: Any) -> None: # noqa: ARG001
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
f"Running scheduled eval for dataset: {dataset_name} "
|
||||
f"(project: {project_name})"
|
||||
f"Running scheduled eval for dataset: {dataset_name} (project: {project_name})"
|
||||
)
|
||||
|
||||
configuration = EvalConfigurationOptions(
|
||||
@@ -142,6 +140,5 @@ def scheduled_eval_task(self: Task, **kwargs: Any) -> None: # noqa: ARG001
|
||||
|
||||
passed_count = sum(1 for r in results if r["success"])
|
||||
logger.info(
|
||||
f"Scheduled eval pipeline completed: {passed_count}/{len(results)} passed "
|
||||
f"in {total_duration:.1f}s"
|
||||
f"Scheduled eval pipeline completed: {passed_count}/{len(results)} passed in {total_duration:.1f}s"
|
||||
)
|
||||
|
||||
@@ -29,6 +29,8 @@ from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.connectors.factory import ConnectorMissingException
|
||||
from onyx.connectors.factory import identify_connector_class
|
||||
from onyx.connectors.factory import instantiate_connector
|
||||
from onyx.connectors.interfaces import HierarchyConnector
|
||||
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
|
||||
@@ -55,6 +57,26 @@ logger = setup_logger()
|
||||
HIERARCHY_FETCH_INTERVAL_SECONDS = 24 * 60 * 60
|
||||
|
||||
|
||||
def _connector_supports_hierarchy_fetching(
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
) -> bool:
|
||||
"""Return True only for connectors whose class implements HierarchyConnector."""
|
||||
try:
|
||||
connector_class = identify_connector_class(
|
||||
cc_pair.connector.source,
|
||||
)
|
||||
except ConnectorMissingException as e:
|
||||
task_logger.warning(
|
||||
"Skipping hierarchy fetching enqueue for source=%s input_type=%s: %s",
|
||||
cc_pair.connector.source,
|
||||
cc_pair.connector.input_type,
|
||||
str(e),
|
||||
)
|
||||
return False
|
||||
|
||||
return issubclass(connector_class, HierarchyConnector)
|
||||
|
||||
|
||||
def _is_hierarchy_fetching_due(cc_pair: ConnectorCredentialPair) -> bool:
|
||||
"""Returns boolean indicating if hierarchy fetching is due for this connector.
|
||||
|
||||
@@ -127,9 +149,7 @@ def _try_creating_hierarchy_fetching_task(
|
||||
raise RuntimeError("send_task for hierarchy_fetching_task failed.")
|
||||
|
||||
task_logger.info(
|
||||
f"Created hierarchy fetching task: "
|
||||
f"cc_pair={cc_pair.id} "
|
||||
f"celery_task_id={custom_task_id}"
|
||||
f"Created hierarchy fetching task: cc_pair={cc_pair.id} celery_task_id={custom_task_id}"
|
||||
)
|
||||
|
||||
return custom_task_id
|
||||
@@ -188,7 +208,10 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
|
||||
cc_pair_id=cc_pair_id,
|
||||
)
|
||||
|
||||
if not cc_pair or not _is_hierarchy_fetching_due(cc_pair):
|
||||
if not cc_pair or not _connector_supports_hierarchy_fetching(cc_pair):
|
||||
continue
|
||||
|
||||
if not _is_hierarchy_fetching_due(cc_pair):
|
||||
continue
|
||||
|
||||
task_id = _try_creating_hierarchy_fetching_task(
|
||||
@@ -215,8 +238,7 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
|
||||
|
||||
time_elapsed = time.monotonic() - time_start
|
||||
task_logger.info(
|
||||
f"check_for_hierarchy_fetching finished: "
|
||||
f"tasks_created={tasks_created} elapsed={time_elapsed:.2f}s"
|
||||
f"check_for_hierarchy_fetching finished: tasks_created={tasks_created} elapsed={time_elapsed:.2f}s"
|
||||
)
|
||||
return tasks_created
|
||||
|
||||
@@ -342,8 +364,7 @@ def connector_hierarchy_fetching_task(
|
||||
from the connector source and stores it in the database.
|
||||
"""
|
||||
task_logger.info(
|
||||
f"connector_hierarchy_fetching_task starting: "
|
||||
f"cc_pair={cc_pair_id} tenant={tenant_id}"
|
||||
f"connector_hierarchy_fetching_task starting: cc_pair={cc_pair_id} tenant={tenant_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -361,8 +382,7 @@ def connector_hierarchy_fetching_task(
|
||||
|
||||
if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
|
||||
task_logger.info(
|
||||
f"Skipping hierarchy fetching for deleting connector: "
|
||||
f"cc_pair={cc_pair_id}"
|
||||
f"Skipping hierarchy fetching for deleting connector: cc_pair={cc_pair_id}"
|
||||
)
|
||||
return
|
||||
|
||||
@@ -375,8 +395,7 @@ def connector_hierarchy_fetching_task(
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
f"connector_hierarchy_fetching_task: "
|
||||
f"Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}"
|
||||
f"connector_hierarchy_fetching_task: Extracted {total_nodes} hierarchy nodes for cc_pair={cc_pair_id}"
|
||||
)
|
||||
|
||||
# Update the last fetch time to prevent re-running until next interval
|
||||
|
||||
@@ -18,7 +18,9 @@ from onyx.llm.well_known_providers.auto_update_service import (
|
||||
bind=True,
|
||||
)
|
||||
def check_for_auto_llm_updates(
|
||||
self: Task, *, tenant_id: str # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
tenant_id: str, # noqa: ARG001
|
||||
) -> bool | None:
|
||||
"""Periodic task to fetch LLM model updates from GitHub
|
||||
and sync them to providers in Auto mode.
|
||||
|
||||
@@ -116,8 +116,7 @@ class Metric(BaseModel):
|
||||
string_value = self.value
|
||||
else:
|
||||
task_logger.error(
|
||||
f"Invalid metric value type: {type(self.value)} "
|
||||
f"({self.value}) for metric {self.name}."
|
||||
f"Invalid metric value type: {type(self.value)} ({self.value}) for metric {self.name}."
|
||||
)
|
||||
return
|
||||
|
||||
@@ -260,8 +259,7 @@ def _build_connector_final_metrics(
|
||||
)
|
||||
if _has_metric_been_emitted(redis_std, metric_key):
|
||||
task_logger.info(
|
||||
f"Skipping final metrics for connector {cc_pair.connector.id} "
|
||||
f"index attempt {attempt.id}, already emitted."
|
||||
f"Skipping final metrics for connector {cc_pair.connector.id} index attempt {attempt.id}, already emitted."
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -1036,8 +1034,7 @@ def monitor_process_memory(self: Task, *, tenant_id: str) -> None: # noqa: ARG0
|
||||
if process_name in cmdline:
|
||||
if process_type in supervisor_processes.values():
|
||||
task_logger.error(
|
||||
f"Duplicate process type for type {process_type} "
|
||||
f"with cmd {cmdline} with pid={proc.pid}."
|
||||
f"Duplicate process type for type {process_type} with cmd {cmdline} with pid={proc.pid}."
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -1046,8 +1043,7 @@ def monitor_process_memory(self: Task, *, tenant_id: str) -> None: # noqa: ARG0
|
||||
|
||||
if len(supervisor_processes) != len(process_type_mapping):
|
||||
task_logger.error(
|
||||
"Missing processes: "
|
||||
f"{set(process_type_mapping.keys()).symmetric_difference(supervisor_processes.values())}"
|
||||
f"Missing processes: {set(process_type_mapping.keys()).symmetric_difference(supervisor_processes.values())}"
|
||||
)
|
||||
|
||||
# Log memory usage for each process
|
||||
@@ -1101,9 +1097,7 @@ def cloud_monitor_celery_pidbox(
|
||||
|
||||
r_celery.delete(key)
|
||||
task_logger.info(
|
||||
f"Deleted idle pidbox: pidbox={key_str} "
|
||||
f"idletime={idletime} "
|
||||
f"max_idletime={MAX_PIDBOX_IDLE}"
|
||||
f"Deleted idle pidbox: pidbox={key_str} idletime={idletime} max_idletime={MAX_PIDBOX_IDLE}"
|
||||
)
|
||||
num_deleted += 1
|
||||
|
||||
|
||||
@@ -205,8 +205,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
|
||||
) = get_vespa_visit_state(db_session)
|
||||
if is_continuation_token_done_for_all_slices(continuation_token_map):
|
||||
task_logger.info(
|
||||
f"OpenSearch migration COMPLETED for tenant {tenant_id}. "
|
||||
f"Total chunks migrated: {total_chunks_migrated}."
|
||||
f"OpenSearch migration COMPLETED for tenant {tenant_id}. Total chunks migrated: {total_chunks_migrated}."
|
||||
)
|
||||
mark_migration_completed_time_if_not_set_with_commit(db_session)
|
||||
break
|
||||
|
||||
@@ -151,8 +151,7 @@ def _resolve_and_update_document_parents(
|
||||
commit=True,
|
||||
)
|
||||
task_logger.info(
|
||||
f"Pruning: resolved and updated parent hierarchy for "
|
||||
f"{len(resolved)} documents (source={source.value})"
|
||||
f"Pruning: resolved and updated parent hierarchy for {len(resolved)} documents (source={source.value})"
|
||||
)
|
||||
|
||||
|
||||
@@ -220,7 +219,6 @@ def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
|
||||
|
||||
# but pruning only kicks off once per hour
|
||||
if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
|
||||
|
||||
task_logger.info("Checking for pruning due")
|
||||
|
||||
cc_pair_ids: list[int] = []
|
||||
@@ -484,8 +482,7 @@ def connector_pruning_generator_task(
|
||||
|
||||
if not redis_connector.prune.fenced: # The fence must exist
|
||||
raise ValueError(
|
||||
f"connector_prune_generator_task - fence not found: "
|
||||
f"fence={redis_connector.prune.fence_key}"
|
||||
f"connector_prune_generator_task - fence not found: fence={redis_connector.prune.fence_key}"
|
||||
)
|
||||
|
||||
payload = redis_connector.prune.payload # The payload must exist
|
||||
@@ -496,8 +493,7 @@ def connector_pruning_generator_task(
|
||||
|
||||
if payload.celery_task_id is None:
|
||||
logger.info(
|
||||
f"connector_prune_generator_task - Waiting for fence: "
|
||||
f"fence={redis_connector.prune.fence_key}"
|
||||
f"connector_prune_generator_task - Waiting for fence: fence={redis_connector.prune.fence_key}"
|
||||
)
|
||||
time.sleep(1)
|
||||
continue
|
||||
@@ -553,9 +549,7 @@ def connector_pruning_generator_task(
|
||||
redis_connector.prune.set_fence(new_payload)
|
||||
|
||||
task_logger.info(
|
||||
f"Pruning generator running connector: "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
f"connector_source={cc_pair.connector.source}"
|
||||
f"Pruning generator running connector: cc_pair={cc_pair_id} connector_source={cc_pair.connector.source}"
|
||||
)
|
||||
|
||||
runnable_connector = instantiate_connector(
|
||||
@@ -673,8 +667,7 @@ def connector_pruning_generator_task(
|
||||
return None
|
||||
|
||||
task_logger.info(
|
||||
"RedisConnector.prune.generate_tasks finished. "
|
||||
f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
f"RedisConnector.prune.generate_tasks finished. cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
|
||||
redis_connector.prune.generator_complete = tasks_generated
|
||||
@@ -717,9 +710,7 @@ def connector_pruning_generator_task(
|
||||
)
|
||||
except Exception as e:
|
||||
task_logger.exception(
|
||||
f"Pruning exceptioned: cc_pair={cc_pair_id} "
|
||||
f"connector={connector_id} "
|
||||
f"payload_id={payload_id}"
|
||||
f"Pruning exceptioned: cc_pair={cc_pair_id} connector={connector_id} payload_id={payload_id}"
|
||||
)
|
||||
|
||||
redis_connector.prune.reset()
|
||||
@@ -737,7 +728,10 @@ def connector_pruning_generator_task(
|
||||
|
||||
|
||||
def monitor_ccpair_pruning_taskset(
|
||||
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
|
||||
tenant_id: str,
|
||||
key_bytes: bytes,
|
||||
r: Redis, # noqa: ARG001
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
fence_key = key_bytes.decode("utf-8")
|
||||
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
|
||||
@@ -931,8 +925,7 @@ def validate_pruning_fence(
|
||||
tasks_not_in_celery += 1
|
||||
|
||||
task_logger.info(
|
||||
"validate_pruning_fence task check: "
|
||||
f"tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
f"validate_pruning_fence task check: tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
|
||||
)
|
||||
|
||||
# we're active if there are still tasks to run and those tasks all exist in celery
|
||||
|
||||
@@ -192,10 +192,7 @@ def document_by_cc_pair_cleanup_task(
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
task_logger.info(
|
||||
f"doc={document_id} "
|
||||
f"action={action} "
|
||||
f"refcount={count} "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
f"doc={document_id} action={action} refcount={count} elapsed={elapsed:.2f}"
|
||||
)
|
||||
except SoftTimeLimitExceeded:
|
||||
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
|
||||
@@ -218,9 +215,7 @@ def document_by_cc_pair_cleanup_task(
|
||||
if isinstance(e, httpx.HTTPStatusError):
|
||||
if e.response.status_code == HTTPStatus.BAD_REQUEST:
|
||||
task_logger.exception(
|
||||
f"Non-retryable HTTPStatusError: "
|
||||
f"doc={document_id} "
|
||||
f"status={e.response.status_code}"
|
||||
f"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}"
|
||||
)
|
||||
completion_status = (
|
||||
OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
|
||||
@@ -239,8 +234,7 @@ def document_by_cc_pair_cleanup_task(
|
||||
# This is the last attempt! mark the document as dirty in the db so that it
|
||||
# eventually gets fixed out of band via stale document reconciliation
|
||||
task_logger.warning(
|
||||
f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
|
||||
f"doc={document_id}"
|
||||
f"Max celery task retries reached. Marking doc as dirty for reconciliation: doc={document_id}"
|
||||
)
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
# delete the cc pair relationship now and let reconciliation clean it up
|
||||
@@ -285,4 +279,4 @@ def celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None: # noqa: ARG00
|
||||
r: Redis = get_redis_client()
|
||||
r.set(ONYX_CELERY_BEAT_HEARTBEAT_KEY, 1, ex=600)
|
||||
time_elapsed = time.monotonic() - time_start
|
||||
task_logger.info(f"celery_beat_heartbeat finished: " f"elapsed={time_elapsed:.2f}")
|
||||
task_logger.info(f"celery_beat_heartbeat finished: elapsed={time_elapsed:.2f}")
|
||||
|
||||
@@ -285,8 +285,7 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} "
|
||||
f"tasks for tenant={tenant_id}"
|
||||
f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} tasks for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -317,8 +316,7 @@ def _process_user_file_without_vector_db(
|
||||
token_count: int | None = len(encode(combined_text))
|
||||
except Exception:
|
||||
task_logger.warning(
|
||||
f"_process_user_file_without_vector_db - "
|
||||
f"Failed to compute token count for {uf.id}, falling back to None"
|
||||
f"_process_user_file_without_vector_db - Failed to compute token count for {uf.id}, falling back to None"
|
||||
)
|
||||
token_count = None
|
||||
|
||||
@@ -338,8 +336,7 @@ def _process_user_file_without_vector_db(
|
||||
db_session.commit()
|
||||
|
||||
task_logger.info(
|
||||
f"_process_user_file_without_vector_db - "
|
||||
f"Completed id={uf.id} tokens={token_count}"
|
||||
f"_process_user_file_without_vector_db - Completed id={uf.id} tokens={token_count}"
|
||||
)
|
||||
|
||||
|
||||
@@ -366,8 +363,7 @@ def _process_user_file_with_indexing(
|
||||
)
|
||||
if current_search_settings is None:
|
||||
raise RuntimeError(
|
||||
f"_process_user_file_with_indexing - "
|
||||
f"No current search settings found for tenant={tenant_id}"
|
||||
f"_process_user_file_with_indexing - No current search settings found for tenant={tenant_id}"
|
||||
)
|
||||
|
||||
adapter = UserFileIndexingAdapter(
|
||||
@@ -397,8 +393,7 @@ def _process_user_file_with_indexing(
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
f"_process_user_file_with_indexing - "
|
||||
f"Indexing pipeline completed ={index_pipeline_result}"
|
||||
f"_process_user_file_with_indexing - Indexing pipeline completed ={index_pipeline_result}"
|
||||
)
|
||||
|
||||
if (
|
||||
@@ -407,8 +402,7 @@ def _process_user_file_with_indexing(
|
||||
or index_pipeline_result.total_chunks == 0
|
||||
):
|
||||
task_logger.error(
|
||||
f"_process_user_file_with_indexing - "
|
||||
f"Indexing pipeline failed id={user_file_id}"
|
||||
f"_process_user_file_with_indexing - Indexing pipeline failed id={user_file_id}"
|
||||
)
|
||||
if uf.status != UserFileStatus.DELETING:
|
||||
uf.status = UserFileStatus.FAILED
|
||||
@@ -535,7 +529,10 @@ def process_user_file_impl(
|
||||
ignore_result=True,
|
||||
)
|
||||
def process_single_user_file(
|
||||
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
user_file_id: str,
|
||||
tenant_id: str,
|
||||
) -> None:
|
||||
process_user_file_impl(
|
||||
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
|
||||
@@ -691,7 +688,10 @@ def delete_user_file_impl(
|
||||
ignore_result=True,
|
||||
)
|
||||
def process_single_user_file_delete(
|
||||
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
user_file_id: str,
|
||||
tenant_id: str,
|
||||
) -> None:
|
||||
delete_user_file_impl(
|
||||
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
|
||||
@@ -761,8 +761,7 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"Enqueued {enqueued} "
|
||||
f"Skipped guard {skipped_guard} tasks for tenant={tenant_id}"
|
||||
f"Enqueued {enqueued} Skipped guard {skipped_guard} tasks for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -876,7 +875,10 @@ def project_sync_user_file_impl(
|
||||
ignore_result=True,
|
||||
)
|
||||
def process_single_user_file_project_sync(
|
||||
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
|
||||
self: Task, # noqa: ARG001
|
||||
*,
|
||||
user_file_id: str,
|
||||
tenant_id: str,
|
||||
) -> None:
|
||||
project_sync_user_file_impl(
|
||||
user_file_id=user_file_id, tenant_id=tenant_id, redis_locking=True
|
||||
|
||||
@@ -199,8 +199,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
|
||||
lock_beat.release()
|
||||
else:
|
||||
task_logger.error(
|
||||
"check_for_vespa_sync_task - Lock not owned on completion: "
|
||||
f"tenant={tenant_id}"
|
||||
f"check_for_vespa_sync_task - Lock not owned on completion: tenant={tenant_id}"
|
||||
)
|
||||
redis_lock_dump(lock_beat, r)
|
||||
|
||||
@@ -266,8 +265,7 @@ def try_generate_document_set_sync_tasks(
|
||||
# return 0
|
||||
|
||||
task_logger.info(
|
||||
f"RedisDocumentSet.generate_tasks finished. "
|
||||
f"document_set={document_set.id} tasks_generated={tasks_generated}"
|
||||
f"RedisDocumentSet.generate_tasks finished. document_set={document_set.id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
|
||||
# create before setting fence to avoid race condition where the monitoring
|
||||
@@ -342,8 +340,7 @@ def try_generate_user_group_sync_tasks(
|
||||
# return 0
|
||||
|
||||
task_logger.info(
|
||||
f"RedisUserGroup.generate_tasks finished. "
|
||||
f"usergroup={usergroup.id} tasks_generated={tasks_generated}"
|
||||
f"RedisUserGroup.generate_tasks finished. usergroup={usergroup.id} tasks_generated={tasks_generated}"
|
||||
)
|
||||
|
||||
# create before setting fence to avoid race condition where the monitoring
|
||||
@@ -398,8 +395,7 @@ def monitor_document_set_taskset(
|
||||
|
||||
count = cast(int, r.scard(rds.taskset_key))
|
||||
task_logger.info(
|
||||
f"Document set sync progress: document_set={document_set_id} "
|
||||
f"remaining={count} initial={initial_count}"
|
||||
f"Document set sync progress: document_set={document_set_id} remaining={count} initial={initial_count}"
|
||||
)
|
||||
if count > 0:
|
||||
update_sync_record_status(
|
||||
@@ -444,9 +440,7 @@ def monitor_document_set_taskset(
|
||||
)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
"update_sync_record_status exceptioned. "
|
||||
f"document_set_id={document_set_id} "
|
||||
"Resetting document set regardless."
|
||||
f"update_sync_record_status exceptioned. document_set_id={document_set_id} Resetting document set regardless."
|
||||
)
|
||||
|
||||
rds.reset()
|
||||
@@ -483,9 +477,7 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
|
||||
if not doc:
|
||||
elapsed = time.monotonic() - start
|
||||
task_logger.info(
|
||||
f"doc={document_id} "
|
||||
f"action=no_operation "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
f"doc={document_id} action=no_operation elapsed={elapsed:.2f}"
|
||||
)
|
||||
completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
|
||||
else:
|
||||
@@ -524,9 +516,7 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
|
||||
mark_document_as_synced(document_id, db_session)
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
task_logger.info(
|
||||
f"doc={document_id} " f"action=sync " f"elapsed={elapsed:.2f}"
|
||||
)
|
||||
task_logger.info(f"doc={document_id} action=sync elapsed={elapsed:.2f}")
|
||||
completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
|
||||
except SoftTimeLimitExceeded:
|
||||
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
|
||||
@@ -549,9 +539,7 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
|
||||
if isinstance(e, httpx.HTTPStatusError):
|
||||
if e.response.status_code == HTTPStatus.BAD_REQUEST:
|
||||
task_logger.exception(
|
||||
f"Non-retryable HTTPStatusError: "
|
||||
f"doc={document_id} "
|
||||
f"status={e.response.status_code}"
|
||||
f"Non-retryable HTTPStatusError: doc={document_id} status={e.response.status_code}"
|
||||
)
|
||||
completion_status = (
|
||||
OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
|
||||
|
||||
@@ -175,14 +175,16 @@ class SimpleJobClient:
|
||||
del self.jobs[job.id]
|
||||
|
||||
def submit(
|
||||
self, func: Callable, *args: Any, pure: bool = True # noqa: ARG002
|
||||
self,
|
||||
func: Callable,
|
||||
*args: Any,
|
||||
pure: bool = True, # noqa: ARG002
|
||||
) -> SimpleJob | None:
|
||||
"""NOTE: `pure` arg is needed so this can be a drop in replacement for Dask"""
|
||||
self._cleanup_completed_jobs()
|
||||
if len(self.jobs) >= self.n_workers:
|
||||
logger.debug(
|
||||
f"No available workers to run job. "
|
||||
f"Currently running '{len(self.jobs)}' jobs, with a limit of '{self.n_workers}'."
|
||||
f"No available workers to run job. Currently running '{len(self.jobs)}' jobs, with a limit of '{self.n_workers}'."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -226,15 +226,13 @@ def _check_failure_threshold(
|
||||
FAILURE_RATIO_THRESHOLD = 0.1
|
||||
if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:
|
||||
logger.error(
|
||||
f"Connector run failed with '{total_failures}' errors "
|
||||
f"after '{batch_num}' batches."
|
||||
f"Connector run failed with '{total_failures}' errors after '{batch_num}' batches."
|
||||
)
|
||||
if last_failure and last_failure.exception:
|
||||
raise last_failure.exception from last_failure.exception
|
||||
|
||||
raise RuntimeError(
|
||||
f"Connector run encountered too many errors, aborting. "
|
||||
f"Last error: {last_failure}"
|
||||
f"Connector run encountered too many errors, aborting. Last error: {last_failure}"
|
||||
)
|
||||
|
||||
|
||||
@@ -609,8 +607,7 @@ def connector_document_extraction(
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Persisted and cached {len(hierarchy_node_batch_cleaned)} hierarchy nodes "
|
||||
f"for attempt={index_attempt_id}"
|
||||
f"Persisted and cached {len(hierarchy_node_batch_cleaned)} hierarchy nodes for attempt={index_attempt_id}"
|
||||
)
|
||||
|
||||
# below is all document processing task, so if no batch we can just continue
|
||||
@@ -812,15 +809,12 @@ def connector_document_extraction(
|
||||
queue=OnyxCeleryQueues.SANDBOX,
|
||||
)
|
||||
logger.info(
|
||||
f"Triggered sandbox file sync for user {creator_id} "
|
||||
f"source={source_value} after indexing complete"
|
||||
f"Triggered sandbox file sync for user {creator_id} source={source_value} after indexing complete"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Document extraction failed: "
|
||||
f"attempt={index_attempt_id} "
|
||||
f"error={str(e)}"
|
||||
f"Document extraction failed: attempt={index_attempt_id} error={str(e)}"
|
||||
)
|
||||
|
||||
# Do NOT clean up batches on failure; future runs will use those batches
|
||||
@@ -956,7 +950,6 @@ def reissue_old_batches(
|
||||
# is still in the filestore waiting for processing or not.
|
||||
last_batch_num = len(old_batches) + recent_batches
|
||||
logger.info(
|
||||
f"Starting from batch {last_batch_num} due to "
|
||||
f"re-issued batches: {old_batches}, completed batches: {recent_batches}"
|
||||
f"Starting from batch {last_batch_num} due to re-issued batches: {old_batches}, completed batches: {recent_batches}"
|
||||
)
|
||||
return len(old_batches), recent_batches
|
||||
|
||||
@@ -259,8 +259,7 @@ def _poller_loop(tenant_id: str) -> None:
|
||||
|
||||
periodic_tasks = _build_periodic_tasks()
|
||||
logger.info(
|
||||
f"Periodic poller started with {len(periodic_tasks)} periodic task(s): "
|
||||
f"{[t.name for t in periodic_tasks]}"
|
||||
f"Periodic poller started with {len(periodic_tasks)} periodic task(s): {[t.name for t in periodic_tasks]}"
|
||||
)
|
||||
|
||||
while not _shutdown_event.is_set():
|
||||
|
||||
3
backend/onyx/cache/factory.py
vendored
3
backend/onyx/cache/factory.py
vendored
@@ -38,8 +38,7 @@ def get_cache_backend(*, tenant_id: str | None = None) -> CacheBackend:
|
||||
builder = _BACKEND_BUILDERS.get(CACHE_BACKEND)
|
||||
if builder is None:
|
||||
raise ValueError(
|
||||
f"Unsupported CACHE_BACKEND={CACHE_BACKEND!r}. "
|
||||
f"Supported values: {[t.value for t in CacheBackendType]}"
|
||||
f"Unsupported CACHE_BACKEND={CACHE_BACKEND!r}. Supported values: {[t.value for t in CacheBackendType]}"
|
||||
)
|
||||
return builder(tenant_id)
|
||||
|
||||
|
||||
@@ -270,7 +270,10 @@ def extract_headers(
|
||||
|
||||
|
||||
def process_kg_commands(
|
||||
message: str, persona_name: str, tenant_id: str, db_session: Session # noqa: ARG001
|
||||
message: str,
|
||||
persona_name: str,
|
||||
tenant_id: str, # noqa: ARG001
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
# Temporarily, until we have a draft UI for the KG Operations/Management
|
||||
# TODO: move to api endpoint once we get frontend
|
||||
|
||||
@@ -472,8 +472,7 @@ class DynamicCitationProcessor:
|
||||
# Check if we have a mapping for this citation number
|
||||
if num not in self.citation_to_doc:
|
||||
logger.warning(
|
||||
f"Citation number {num} not found in mapping. "
|
||||
f"Available: {list(self.citation_to_doc.keys())}"
|
||||
f"Citation number {num} not found in mapping. Available: {list(self.citation_to_doc.keys())}"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
@@ -157,8 +157,7 @@ def _try_fallback_tool_extraction(
|
||||
)
|
||||
if extracted_tool_calls:
|
||||
logger.info(
|
||||
f"Extracted {len(extracted_tool_calls)} tool call(s) from response text "
|
||||
"as fallback"
|
||||
f"Extracted {len(extracted_tool_calls)} tool call(s) from response text as fallback"
|
||||
)
|
||||
return (
|
||||
LlmStepResult(
|
||||
@@ -397,8 +396,7 @@ def construct_message_history(
|
||||
]
|
||||
if forgotten_meta:
|
||||
logger.debug(
|
||||
f"FileReader: building forgotten-files message for "
|
||||
f"{[(m.file_id, m.filename) for m in forgotten_meta]}"
|
||||
f"FileReader: building forgotten-files message for {[(m.file_id, m.filename) for m in forgotten_meta]}"
|
||||
)
|
||||
forgotten_files_message = _create_file_tool_metadata_message(
|
||||
forgotten_meta, token_counter
|
||||
@@ -488,8 +486,7 @@ def _drop_orphaned_tool_call_responses(
|
||||
sanitized.append(msg)
|
||||
else:
|
||||
logger.debug(
|
||||
"Dropping orphaned tool response with tool_call_id=%s while "
|
||||
"constructing message history",
|
||||
"Dropping orphaned tool response with tool_call_id=%s while constructing message history",
|
||||
msg.tool_call_id,
|
||||
)
|
||||
continue
|
||||
@@ -515,8 +512,7 @@ def _create_file_tool_metadata_message(
|
||||
]
|
||||
for meta in file_metadata:
|
||||
lines.append(
|
||||
f'- file_id="{meta.file_id}" filename="{meta.filename}" '
|
||||
f"(~{meta.approx_char_count:,} chars)"
|
||||
f'- file_id="{meta.file_id}" filename="{meta.filename}" (~{meta.approx_char_count:,} chars)'
|
||||
)
|
||||
|
||||
message_content = "\n".join(lines)
|
||||
|
||||
@@ -695,8 +695,7 @@ def _build_structured_assistant_message(msg: ChatMessageSimple) -> AssistantMess
|
||||
def _build_structured_tool_response_message(msg: ChatMessageSimple) -> ToolMessage:
|
||||
if not msg.tool_call_id:
|
||||
raise ValueError(
|
||||
"Tool call response message encountered but tool_call_id is not available. "
|
||||
f"Message: {msg}"
|
||||
f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
|
||||
)
|
||||
|
||||
return ToolMessage(
|
||||
@@ -731,8 +730,7 @@ class _OllamaHistoryMessageFormatter(_HistoryMessageFormatter):
|
||||
|
||||
tool_call_lines = [
|
||||
(
|
||||
f"[Tool Call] name={tc.tool_name} "
|
||||
f"id={tc.tool_call_id} args={json.dumps(tc.tool_arguments)}"
|
||||
f"[Tool Call] name={tc.tool_name} id={tc.tool_call_id} args={json.dumps(tc.tool_arguments)}"
|
||||
)
|
||||
for tc in msg.tool_calls
|
||||
]
|
||||
@@ -750,8 +748,7 @@ class _OllamaHistoryMessageFormatter(_HistoryMessageFormatter):
|
||||
def format_tool_response_message(self, msg: ChatMessageSimple) -> UserMessage:
|
||||
if not msg.tool_call_id:
|
||||
raise ValueError(
|
||||
"Tool call response message encountered but tool_call_id is not available. "
|
||||
f"Message: {msg}"
|
||||
f"Tool call response message encountered but tool_call_id is not available. Message: {msg}"
|
||||
)
|
||||
|
||||
return UserMessage(
|
||||
@@ -839,8 +836,7 @@ def translate_history_to_llm_format(
|
||||
content_parts.append(image_part)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to process image file {img_file.file_id}: {e}. "
|
||||
"Skipping image."
|
||||
f"Failed to process image file {img_file.file_id}: {e}. Skipping image."
|
||||
)
|
||||
user_msg = UserMessage(
|
||||
role="user",
|
||||
|
||||
@@ -490,13 +490,13 @@ def handle_stream_message_objects(
|
||||
# Milestone tracking, most devs using the API don't need to understand this
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
|
||||
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
|
||||
)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
|
||||
event=MilestoneRecordType.USER_MESSAGE_SENT,
|
||||
properties={
|
||||
"origin": new_msg_req.origin.value,
|
||||
@@ -796,8 +796,7 @@ def handle_stream_message_objects(
|
||||
|
||||
if all_injected_file_metadata:
|
||||
logger.debug(
|
||||
"FileReader: file metadata for LLM: "
|
||||
f"{[(fid, m.filename) for fid, m in all_injected_file_metadata.items()]}"
|
||||
f"FileReader: file metadata for LLM: {[(fid, m.filename) for fid, m in all_injected_file_metadata.items()]}"
|
||||
)
|
||||
|
||||
# Prepend summary message if compression exists
|
||||
|
||||
@@ -87,8 +87,7 @@ def _create_and_link_tool_calls(
|
||||
tool_call_tokens = len(default_tokenizer.encode(arguments_json_str))
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to tokenize tool call arguments for {tool_call_info.tool_call_id}: {e}. "
|
||||
f"Using length as (over) estimate."
|
||||
f"Failed to tokenize tool call arguments for {tool_call_info.tool_call_id}: {e}. Using length as (over) estimate."
|
||||
)
|
||||
arguments_json_str = json.dumps(tool_call_info.tool_call_arguments)
|
||||
tool_call_tokens = len(arguments_json_str)
|
||||
|
||||
@@ -196,6 +196,10 @@ if _OIDC_SCOPE_OVERRIDE:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Enables PKCE for OIDC login flow. Disabled by default to preserve
|
||||
# backwards compatibility for existing OIDC deployments.
|
||||
OIDC_PKCE_ENABLED = os.environ.get("OIDC_PKCE_ENABLED", "").lower() == "true"
|
||||
|
||||
# Applicable for SAML Auth
|
||||
SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/onyx/configs/saml_config"
|
||||
|
||||
@@ -1042,6 +1046,8 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
|
||||
|
||||
DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"
|
||||
|
||||
HOOK_ENABLED = os.environ.get("HOOK_ENABLED", "").lower() == "true"
|
||||
|
||||
INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"
|
||||
|
||||
#####
|
||||
|
||||
@@ -90,8 +90,7 @@ def parse_airtable_url(
|
||||
match = _AIRTABLE_URL_PATTERN.search(url.strip())
|
||||
if not match:
|
||||
raise ValueError(
|
||||
f"Could not parse Airtable URL: '{url}'. "
|
||||
"Expected format: https://airtable.com/appXXX/tblYYY[/viwZZZ]"
|
||||
f"Could not parse Airtable URL: '{url}'. Expected format: https://airtable.com/appXXX/tblYYY[/viwZZZ]"
|
||||
)
|
||||
return match.group(1), match.group(2), match.group(3)
|
||||
|
||||
@@ -170,16 +169,14 @@ class AirtableConnector(LoadConnector):
|
||||
else:
|
||||
if not self.base_id or not self.table_name_or_id:
|
||||
raise ConnectorValidationError(
|
||||
"A valid Airtable URL or base_id and table_name_or_id are required "
|
||||
"when not using index_all mode."
|
||||
"A valid Airtable URL or base_id and table_name_or_id are required when not using index_all mode."
|
||||
)
|
||||
try:
|
||||
table = self.airtable_client.table(self.base_id, self.table_name_or_id)
|
||||
table.schema()
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(
|
||||
f"Failed to access table '{self.table_name_or_id}' "
|
||||
f"in base '{self.base_id}': {e}"
|
||||
f"Failed to access table '{self.table_name_or_id}' in base '{self.base_id}': {e}"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -391,10 +388,7 @@ class AirtableConnector(LoadConnector):
|
||||
TextSection(
|
||||
link=link,
|
||||
text=(
|
||||
f"{field_name}:\n"
|
||||
"------------------------\n"
|
||||
f"{text}\n"
|
||||
"------------------------"
|
||||
f"{field_name}:\n------------------------\n{text}\n------------------------"
|
||||
),
|
||||
)
|
||||
for text, link in field_value_and_links
|
||||
@@ -440,8 +434,7 @@ class AirtableConnector(LoadConnector):
|
||||
field_type = field_schema.type
|
||||
|
||||
logger.debug(
|
||||
f"Processing field '{field_name}' of type '{field_type}' "
|
||||
f"for record '{record_id}'."
|
||||
f"Processing field '{field_name}' of type '{field_type}' for record '{record_id}'."
|
||||
)
|
||||
|
||||
field_sections, field_metadata = self._process_field(
|
||||
@@ -534,8 +527,7 @@ class AirtableConnector(LoadConnector):
|
||||
break
|
||||
|
||||
logger.info(
|
||||
f"Processing {len(records)} records from table "
|
||||
f"'{table_schema.name}' in base '{base_name or base_id}'."
|
||||
f"Processing {len(records)} records from table '{table_schema.name}' in base '{base_name or base_id}'."
|
||||
)
|
||||
|
||||
if not records:
|
||||
@@ -629,7 +621,6 @@ class AirtableConnector(LoadConnector):
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Failed to index table '{table.name}' ({table.id}) "
|
||||
f"in base '{base_name}' ({base_id}), skipping."
|
||||
f"Failed to index table '{table.name}' ({table.id}) in base '{base_name}' ({base_id}), skipping."
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -68,7 +68,7 @@ class ClickupConnector(LoadConnector, PollConnector):
|
||||
response = self._make_request(url_endpoint)
|
||||
comments = [
|
||||
TextSection(
|
||||
link=f'https://app.clickup.com/t/{task_id}?comment={comment_dict["id"]}',
|
||||
link=f"https://app.clickup.com/t/{task_id}?comment={comment_dict['id']}",
|
||||
text=comment_dict["comment_text"],
|
||||
)
|
||||
for comment_dict in response["comments"]
|
||||
|
||||
@@ -698,8 +698,7 @@ class CodaConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
elif e.status_code == 429:
|
||||
raise ConnectorValidationError(
|
||||
"Validation failed due to Coda rate-limits being exceeded (HTTP 429). "
|
||||
"Please try again later."
|
||||
"Validation failed due to Coda rate-limits being exceeded (HTTP 429). Please try again later."
|
||||
)
|
||||
else:
|
||||
raise UnexpectedValidationError(
|
||||
|
||||
@@ -95,7 +95,6 @@ def _get_page_id(page: dict[str, Any], allow_missing: bool = False) -> str:
|
||||
|
||||
|
||||
class ConfluenceCheckpoint(ConnectorCheckpoint):
|
||||
|
||||
next_page_url: str | None
|
||||
|
||||
|
||||
|
||||
@@ -296,8 +296,7 @@ class OnyxConfluence:
|
||||
except HTTPError as e:
|
||||
if e.response.status_code == 404 and use_v2:
|
||||
logger.warning(
|
||||
"v2 spaces API returned 404, falling back to v1 API. "
|
||||
"This may indicate an older Confluence Cloud instance."
|
||||
"v2 spaces API returned 404, falling back to v1 API. This may indicate an older Confluence Cloud instance."
|
||||
)
|
||||
# Fallback to v1
|
||||
yield from self._paginate_spaces_for_endpoint(
|
||||
@@ -354,9 +353,7 @@ class OnyxConfluence:
|
||||
|
||||
if not first_space:
|
||||
raise RuntimeError(
|
||||
f"No spaces found at {self._url}! "
|
||||
"Check your credentials and wiki_base and make sure "
|
||||
"is_cloud is set correctly."
|
||||
f"No spaces found at {self._url}! Check your credentials and wiki_base and make sure is_cloud is set correctly."
|
||||
)
|
||||
|
||||
logger.info("Confluence probe succeeded.")
|
||||
@@ -461,8 +458,7 @@ class OnyxConfluence:
|
||||
except HTTPError as e:
|
||||
delay_until = _handle_http_error(e, attempt)
|
||||
logger.warning(
|
||||
f"HTTPError in confluence call. "
|
||||
f"Retrying in {delay_until} seconds..."
|
||||
f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
|
||||
)
|
||||
while time.monotonic() < delay_until:
|
||||
# in the future, check a signal here to exit
|
||||
@@ -544,8 +540,7 @@ class OnyxConfluence:
|
||||
if not latest_results:
|
||||
# no more results, break out of the loop
|
||||
logger.info(
|
||||
f"No results found for call '{temp_url_suffix}'"
|
||||
"Stopping pagination."
|
||||
f"No results found for call '{temp_url_suffix}'Stopping pagination."
|
||||
)
|
||||
found_empty_page = True
|
||||
break
|
||||
@@ -606,8 +601,7 @@ class OnyxConfluence:
|
||||
# If that fails, raise the error
|
||||
if _PROBLEMATIC_EXPANSIONS in url_suffix:
|
||||
logger.warning(
|
||||
f"Replacing {_PROBLEMATIC_EXPANSIONS} with {_REPLACEMENT_EXPANSIONS}"
|
||||
" and trying again."
|
||||
f"Replacing {_PROBLEMATIC_EXPANSIONS} with {_REPLACEMENT_EXPANSIONS} and trying again."
|
||||
)
|
||||
url_suffix = url_suffix.replace(
|
||||
_PROBLEMATIC_EXPANSIONS,
|
||||
@@ -711,8 +705,7 @@ class OnyxConfluence:
|
||||
# stop paginating.
|
||||
if url_suffix and not results:
|
||||
logger.info(
|
||||
f"No results found for call '{old_url_suffix}' despite next link "
|
||||
"being present. Stopping pagination."
|
||||
f"No results found for call '{old_url_suffix}' despite next link being present. Stopping pagination."
|
||||
)
|
||||
break
|
||||
|
||||
@@ -934,8 +927,7 @@ class OnyxConfluence:
|
||||
logger.debug(f"jsonrpc response: {response}")
|
||||
if not response.get("result"):
|
||||
logger.warning(
|
||||
f"No jsonrpc response for space permissions for space {space_key}"
|
||||
f"\nResponse: {response}"
|
||||
f"No jsonrpc response for space permissions for space {space_key}\nResponse: {response}"
|
||||
)
|
||||
|
||||
return response.get("result", [])
|
||||
@@ -978,8 +970,7 @@ def get_user_email_from_username__server(
|
||||
except HTTPError as e:
|
||||
status_code = e.response.status_code if e.response is not None else "N/A"
|
||||
logger.warning(
|
||||
f"Failed to get confluence email for {user_name}: "
|
||||
f"HTTP {status_code} - {e}"
|
||||
f"Failed to get confluence email for {user_name}: HTTP {status_code} - {e}"
|
||||
)
|
||||
# For now, we'll just return None and log a warning. This means
|
||||
# we will keep retrying to get the email every group sync.
|
||||
@@ -1060,7 +1051,7 @@ def extract_text_from_confluence_html(
|
||||
)
|
||||
if not user_id:
|
||||
logger.warning(
|
||||
"ri:userkey not found in ri:user element. " f"Found attrs: {user.attrs}"
|
||||
f"ri:userkey not found in ri:user element. Found attrs: {user.attrs}"
|
||||
)
|
||||
continue
|
||||
# Include @ sign for tagging, more clear for LLM
|
||||
|
||||
@@ -155,10 +155,7 @@ def process_attachment(
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Downloading attachment: "
|
||||
f"title={attachment['title']} "
|
||||
f"length={attachment_size} "
|
||||
f"link={attachment_link}"
|
||||
f"Downloading attachment: title={attachment['title']} length={attachment_size} link={attachment_link}"
|
||||
)
|
||||
|
||||
# Download the attachment
|
||||
@@ -368,8 +365,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
|
||||
except requests.HTTPError as e:
|
||||
delay_until = _handle_http_error(e, attempt)
|
||||
logger.warning(
|
||||
f"HTTPError in confluence call. "
|
||||
f"Retrying in {delay_until} seconds..."
|
||||
f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
|
||||
)
|
||||
while time.monotonic() < delay_until:
|
||||
# in the future, check a signal here to exit
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user