mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-18 14:12:45 +00:00
Compare commits
49 Commits
nikg/genui
...
refactor/t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7425c59e39 | ||
|
|
64d484039f | ||
|
|
0530095b71 | ||
|
|
23280d5b91 | ||
|
|
229442679c | ||
|
|
95a192fb0f | ||
|
|
6bd96ec906 | ||
|
|
a1ec88269f | ||
|
|
b929518c34 | ||
|
|
479220e774 | ||
|
|
d3e0acf905 | ||
|
|
cbd1a344f2 | ||
|
|
e79264b69b | ||
|
|
1e0a8e9a0e | ||
|
|
b7841a513d | ||
|
|
c779bf722d | ||
|
|
a5aff0d199 | ||
|
|
8ed170b070 | ||
|
|
c890cd4767 | ||
|
|
2b2df18463 | ||
|
|
11cfc92f15 | ||
|
|
c7da99cfd7 | ||
|
|
b384c77863 | ||
|
|
b0f31cd46b | ||
|
|
323eb9bbba | ||
|
|
708e310849 | ||
|
|
c25509e212 | ||
|
|
6af0da41bd | ||
|
|
b94da25d7c | ||
|
|
7d443c1b53 | ||
|
|
d6b7b3c68f | ||
|
|
f5073d331e | ||
|
|
64c9f6a0d5 | ||
|
|
f5a494f790 | ||
|
|
8598e9f25d | ||
|
|
3ef8aecc54 | ||
|
|
eb311c7550 | ||
|
|
13284d9def | ||
|
|
aaa99fcb60 | ||
|
|
5f628da4e8 | ||
|
|
e40f80cfe1 | ||
|
|
ca6ba2cca9 | ||
|
|
98ef5006ff | ||
|
|
dfd168cde9 | ||
|
|
6c7ae243d0 | ||
|
|
c4a2ff2593 | ||
|
|
4b74a6dc76 | ||
|
|
eea5f5b380 | ||
|
|
ae428ba684 |
25
.github/actions/slack-notify/action.yml
vendored
25
.github/actions/slack-notify/action.yml
vendored
@@ -10,6 +10,9 @@ inputs:
|
||||
failed-jobs:
|
||||
description: "Deprecated alias for details"
|
||||
required: false
|
||||
mention:
|
||||
description: "GitHub username to resolve to a Slack @-mention. Replaces {mention} in details."
|
||||
required: false
|
||||
title:
|
||||
description: "Title for the notification"
|
||||
required: false
|
||||
@@ -26,6 +29,7 @@ runs:
|
||||
SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
|
||||
DETAILS: ${{ inputs.details }}
|
||||
FAILED_JOBS: ${{ inputs.failed-jobs }}
|
||||
MENTION_USER: ${{ inputs.mention }}
|
||||
TITLE: ${{ inputs.title }}
|
||||
REF_NAME: ${{ inputs.ref-name }}
|
||||
REPO: ${{ github.repository }}
|
||||
@@ -52,6 +56,27 @@ runs:
|
||||
DETAILS="$FAILED_JOBS"
|
||||
fi
|
||||
|
||||
# Resolve {mention} placeholder if a GitHub username was provided.
|
||||
# Looks up the username in user-mappings.json (co-located with this action)
|
||||
# and replaces {mention} with <@SLACK_ID> for a Slack @-mention.
|
||||
# Falls back to the plain GitHub username if not found in the mapping.
|
||||
if [ -n "$MENTION_USER" ]; then
|
||||
MAPPINGS_FILE="${GITHUB_ACTION_PATH}/user-mappings.json"
|
||||
slack_id="$(jq -r --arg gh "$MENTION_USER" 'to_entries[] | select(.value | ascii_downcase == ($gh | ascii_downcase)) | .key' "$MAPPINGS_FILE" 2>/dev/null | head -1)"
|
||||
|
||||
if [ -n "$slack_id" ]; then
|
||||
mention_text="<@${slack_id}>"
|
||||
else
|
||||
mention_text="${MENTION_USER}"
|
||||
fi
|
||||
|
||||
DETAILS="${DETAILS//\{mention\}/$mention_text}"
|
||||
TITLE="${TITLE//\{mention\}/}"
|
||||
else
|
||||
DETAILS="${DETAILS//\{mention\}/}"
|
||||
TITLE="${TITLE//\{mention\}/}"
|
||||
fi
|
||||
|
||||
normalize_multiline() {
|
||||
printf '%s' "$1" | awk 'BEGIN { ORS=""; first=1 } { if (!first) printf "\\n"; printf "%s", $0; first=0 }'
|
||||
}
|
||||
|
||||
18
.github/actions/slack-notify/user-mappings.json
vendored
Normal file
18
.github/actions/slack-notify/user-mappings.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"U05SAGZPEA1": "yuhongsun96",
|
||||
"U05SAH6UGUD": "Weves",
|
||||
"U07PWEQB7A5": "evan-onyx",
|
||||
"U07V1SM68KF": "joachim-danswer",
|
||||
"U08JZ9N3QNN": "raunakab",
|
||||
"U08L24NCLJE": "Subash-Mohan",
|
||||
"U090B9M07B2": "wenxi-onyx",
|
||||
"U094RASDP0Q": "duo-onyx",
|
||||
"U096L8ZQ85B": "justin-tahara",
|
||||
"U09AHV8UBQX": "jessicasingh7",
|
||||
"U09KAL5T3C2": "nmgarza5",
|
||||
"U09KPGVQ70R": "acaprau",
|
||||
"U09QR8KTSJH": "rohoswagger",
|
||||
"U09RB4NTXA4": "jmelahman",
|
||||
"U0A6K9VCY6A": "Danelegend",
|
||||
"U0AGC4KH71A": "Bo-Onyx"
|
||||
}
|
||||
30
.github/workflows/deployment.yml
vendored
30
.github/workflows/deployment.yml
vendored
@@ -455,7 +455,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -529,7 +529,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -607,7 +607,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -668,7 +668,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -750,7 +750,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -836,7 +836,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -894,7 +894,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -967,7 +967,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1044,7 +1044,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1105,7 +1105,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1178,7 +1178,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1256,7 +1256,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1317,7 +1317,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1397,7 +1397,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -1480,7 +1480,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
|
||||
@@ -207,7 +207,7 @@ jobs:
|
||||
CHERRY_PICK_PR_URL: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_pr_url }}
|
||||
run: |
|
||||
source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
|
||||
details="*Cherry-pick PR opened successfully.*\\n• source PR: ${source_pr_url}"
|
||||
details="*Cherry-pick PR opened successfully.*\\n• author: {mention}\\n• source PR: ${source_pr_url}"
|
||||
if [ -n "${CHERRY_PICK_PR_URL}" ]; then
|
||||
details="${details}\\n• cherry-pick PR: ${CHERRY_PICK_PR_URL}"
|
||||
fi
|
||||
@@ -221,6 +221,7 @@ jobs:
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
|
||||
details: ${{ steps.success-summary.outputs.details }}
|
||||
title: "✅ Automated Cherry-Pick PR Opened"
|
||||
ref-name: ${{ github.event.pull_request.base.ref }}
|
||||
@@ -275,20 +276,21 @@ jobs:
|
||||
else
|
||||
failed_job_label="cherry-pick-to-latest-release"
|
||||
fi
|
||||
failed_jobs="• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
|
||||
details="• author: {mention}\\n• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
|
||||
if [ -n "${MERGE_COMMIT_SHA}" ]; then
|
||||
failed_jobs="${failed_jobs}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
|
||||
details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
|
||||
fi
|
||||
if [ -n "${details_excerpt}" ]; then
|
||||
failed_jobs="${failed_jobs}\\n• excerpt: ${details_excerpt}"
|
||||
details="${details}\\n• excerpt: ${details_excerpt}"
|
||||
fi
|
||||
|
||||
echo "jobs=${failed_jobs}" >> "$GITHUB_OUTPUT"
|
||||
echo "details=${details}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Notify #cherry-pick-prs about cherry-pick failure
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
details: ${{ steps.failure-summary.outputs.jobs }}
|
||||
mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
|
||||
details: ${{ steps.failure-summary.outputs.details }}
|
||||
title: "🚨 Automated Cherry-Pick Failed"
|
||||
ref-name: ${{ github.event.pull_request.base.ref }}
|
||||
|
||||
2
.github/workflows/pr-desktop-build.yml
vendored
2
.github/workflows/pr-desktop-build.yml
vendored
@@ -105,7 +105,7 @@ jobs:
|
||||
|
||||
- name: Upload build artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
|
||||
path: |
|
||||
|
||||
@@ -174,7 +174,7 @@ jobs:
|
||||
|
||||
- name: Upload Docker logs
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-${{ matrix.test-dir }}
|
||||
path: docker-logs/
|
||||
|
||||
4
.github/workflows/pr-golang-tests.yml
vendored
4
.github/workflows/pr-golang-tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
outputs:
|
||||
modules: ${{ steps.set-modules.outputs.modules }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: false
|
||||
- id: set-modules
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]
|
||||
|
||||
6
.github/workflows/pr-integration-tests.yml
vendored
6
.github/workflows/pr-integration-tests.yml
vendored
@@ -466,7 +466,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
@@ -587,7 +587,7 @@ jobs:
|
||||
|
||||
- name: Upload logs (onyx-lite)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-onyx-lite
|
||||
path: ${{ github.workspace }}/docker-compose-onyx-lite.log
|
||||
@@ -725,7 +725,7 @@ jobs:
|
||||
|
||||
- name: Upload logs (multi-tenant)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-multitenant
|
||||
path: ${{ github.workspace }}/docker-compose-multitenant.log
|
||||
|
||||
2
.github/workflows/pr-jest-tests.yml
vendored
2
.github/workflows/pr-jest-tests.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
|
||||
- name: Upload coverage reports
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: jest-coverage-${{ github.run_id }}
|
||||
path: ./web/coverage
|
||||
|
||||
14
.github/workflows/pr-playwright-tests.yml
vendored
14
.github/workflows/pr-playwright-tests.yml
vendored
@@ -445,7 +445,7 @@ jobs:
|
||||
run: |
|
||||
npx playwright test --project ${PROJECT}
|
||||
|
||||
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
# Includes test results and trace.zip files
|
||||
@@ -454,7 +454,7 @@ jobs:
|
||||
retention-days: 30
|
||||
|
||||
- name: Upload screenshots
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
|
||||
@@ -534,7 +534,7 @@ jobs:
|
||||
"s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"
|
||||
|
||||
- name: Upload visual diff summary
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: screenshot-diff-summary-${{ matrix.project }}
|
||||
@@ -543,7 +543,7 @@ jobs:
|
||||
retention-days: 5
|
||||
|
||||
- name: Upload visual diff report artifact
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
|
||||
@@ -590,7 +590,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
@@ -674,7 +674,7 @@ jobs:
|
||||
working-directory: ./web
|
||||
run: npx playwright test --project lite
|
||||
|
||||
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
if: always()
|
||||
with:
|
||||
name: playwright-test-results-lite-${{ github.run_id }}
|
||||
@@ -692,7 +692,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: success() || failure()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-logs-lite-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
2
.github/workflows/pr-python-model-tests.yml
vendored
2
.github/workflows/pr-python-model-tests.yml
vendored
@@ -122,7 +122,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs
|
||||
path: ${{ github.workspace }}/docker-compose.log
|
||||
|
||||
@@ -319,7 +319,7 @@ jobs:
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
|
||||
path: |
|
||||
|
||||
6
.github/workflows/sandbox-deployment.yml
vendored
6
.github/workflows/sandbox-deployment.yml
vendored
@@ -125,7 +125,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
@@ -268,7 +268,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY_IMAGE }}
|
||||
flavor: |
|
||||
|
||||
32
.vscode/launch.json
vendored
32
.vscode/launch.json
vendored
@@ -15,7 +15,7 @@
|
||||
{
|
||||
"name": "Run All Onyx Services",
|
||||
"configurations": [
|
||||
// "Web Server",
|
||||
"Web Server",
|
||||
"Model Server",
|
||||
"API Server",
|
||||
"MCP Server",
|
||||
@@ -95,7 +95,7 @@
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1"
|
||||
},
|
||||
"args": ["model_server.main:app", "--reload", "--port", "9010"],
|
||||
"args": ["model_server.main:app", "--reload", "--port", "9000"],
|
||||
"presentation": {
|
||||
"group": "2"
|
||||
},
|
||||
@@ -113,7 +113,7 @@
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1"
|
||||
},
|
||||
"args": ["onyx.main:app", "--reload", "--port", "8090"],
|
||||
"args": ["onyx.main:app", "--reload", "--port", "8080"],
|
||||
"presentation": {
|
||||
"group": "2"
|
||||
},
|
||||
@@ -165,7 +165,7 @@
|
||||
"envFile": "${workspaceFolder}/.vscode/.env",
|
||||
"env": {
|
||||
"MCP_SERVER_ENABLED": "true",
|
||||
"MCP_SERVER_PORT": "8100",
|
||||
"MCP_SERVER_PORT": "8090",
|
||||
"MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
|
||||
"LOG_LEVEL": "DEBUG",
|
||||
"PYTHONUNBUFFERED": "1"
|
||||
@@ -174,7 +174,7 @@
|
||||
"onyx.mcp_server.api:mcp_app",
|
||||
"--reload",
|
||||
"--port",
|
||||
"8100",
|
||||
"8090",
|
||||
"--timeout-graceful-shutdown",
|
||||
"0"
|
||||
],
|
||||
@@ -526,7 +526,10 @@
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": ["sync", "--all-extras"],
|
||||
"runtimeArgs": [
|
||||
"sync",
|
||||
"--all-extras"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
@@ -650,7 +653,14 @@
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": ["run", "--with", "onyx-devtools", "ods", "db", "upgrade"],
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"--with",
|
||||
"onyx-devtools",
|
||||
"ods",
|
||||
"db",
|
||||
"upgrade"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
@@ -669,11 +679,7 @@
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PYTHONPATH": "backend"
|
||||
},
|
||||
"args": [
|
||||
"--filename",
|
||||
"backend/generated/openapi.json",
|
||||
"--generate-python-client"
|
||||
]
|
||||
"args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
|
||||
},
|
||||
{
|
||||
// script to debug multi tenant db issues
|
||||
@@ -702,7 +708,7 @@
|
||||
"name": "Debug React Web App in Chrome",
|
||||
"type": "chrome",
|
||||
"request": "launch",
|
||||
"url": "http://localhost:3010",
|
||||
"url": "http://localhost:3000",
|
||||
"webRoot": "${workspaceFolder}/web"
|
||||
}
|
||||
]
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
"""add_hook_and_hook_execution_log_tables
|
||||
|
||||
Revision ID: 689433b0d8de
|
||||
Revises: 93a2e195e25c
|
||||
Create Date: 2026-03-13 11:25:06.547474
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID as PGUUID
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "689433b0d8de"
|
||||
down_revision = "93a2e195e25c"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"hook",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"hook_point",
|
||||
sa.Enum("document_ingestion", "query_processing", native_enum=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("endpoint_url", sa.Text(), nullable=True),
|
||||
sa.Column("api_key", sa.LargeBinary(), nullable=True),
|
||||
sa.Column("is_reachable", sa.Boolean(), nullable=True),
|
||||
sa.Column(
|
||||
"fail_strategy",
|
||||
sa.Enum("hard", "soft", native_enum=False),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("timeout_seconds", sa.Float(), nullable=False),
|
||||
sa.Column(
|
||||
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column(
|
||||
"deleted", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column("creator_id", PGUUID(as_uuid=True), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(["creator_id"], ["user.id"], ondelete="SET NULL"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_hook_one_non_deleted_per_point",
|
||||
"hook",
|
||||
["hook_point"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("deleted = false"),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"hook_execution_log",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("hook_id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"is_success",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column("status_code", sa.Integer(), nullable=True),
|
||||
sa.Column("duration_ms", sa.Integer(), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(["hook_id"], ["hook.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
op.create_index("ix_hook_execution_log_hook_id", "hook_execution_log", ["hook_id"])
|
||||
op.create_index(
|
||||
"ix_hook_execution_log_created_at", "hook_execution_log", ["created_at"]
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_hook_execution_log_created_at", table_name="hook_execution_log")
|
||||
op.drop_index("ix_hook_execution_log_hook_id", table_name="hook_execution_log")
|
||||
op.drop_table("hook_execution_log")
|
||||
|
||||
op.drop_index("ix_hook_one_non_deleted_per_point", table_name="hook")
|
||||
op.drop_table("hook")
|
||||
@@ -118,9 +118,7 @@ JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
|
||||
SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", "[]"))
|
||||
SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
|
||||
|
||||
# The posthog client does not accept empty API keys or hosts however it fails silently
|
||||
# when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
|
||||
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
|
||||
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY")
|
||||
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
|
||||
POSTHOG_DEBUG_LOGS_ENABLED = (
|
||||
os.environ.get("POSTHOG_DEBUG_LOGS_ENABLED", "").lower() == "true"
|
||||
|
||||
@@ -34,6 +34,9 @@ class PostHogFeatureFlagProvider(FeatureFlagProvider):
|
||||
Returns:
|
||||
True if the feature is enabled for the user, False otherwise.
|
||||
"""
|
||||
if not posthog:
|
||||
return False
|
||||
|
||||
try:
|
||||
posthog.set(
|
||||
distinct_id=user_id,
|
||||
|
||||
@@ -29,7 +29,6 @@ from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
|
||||
from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
|
||||
from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
|
||||
from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
|
||||
from onyx.configs.constants import MilestoneRecordType
|
||||
from onyx.db.engine.sql_engine import get_session_with_shared_schema
|
||||
from onyx.db.engine.sql_engine import get_session_with_tenant
|
||||
from onyx.db.image_generation import create_default_image_gen_config_from_api_key
|
||||
@@ -59,7 +58,6 @@ from onyx.server.manage.llm.models import LLMProviderUpsertRequest
|
||||
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
|
||||
from onyx.setup import setup_onyx
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import mt_cloud_telemetry
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
from shared_configs.configs import TENANT_ID_PREFIX
|
||||
@@ -71,7 +69,9 @@ logger = setup_logger()
|
||||
|
||||
|
||||
async def get_or_provision_tenant(
|
||||
email: str, referral_source: str | None = None, request: Request | None = None
|
||||
email: str,
|
||||
referral_source: str | None = None,
|
||||
request: Request | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Get existing tenant ID for an email or create a new tenant if none exists.
|
||||
@@ -693,12 +693,6 @@ async def assign_tenant_to_user(
|
||||
|
||||
try:
|
||||
add_users_to_tenant([email], tenant_id)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=email,
|
||||
event=MilestoneRecordType.TENANT_CREATED,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
|
||||
raise Exception("Failed to assign tenant to user")
|
||||
|
||||
@@ -9,6 +9,7 @@ from ee.onyx.configs.app_configs import POSTHOG_API_KEY
|
||||
from ee.onyx.configs.app_configs import POSTHOG_DEBUG_LOGS_ENABLED
|
||||
from ee.onyx.configs.app_configs import POSTHOG_HOST
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -18,12 +19,19 @@ def posthog_on_error(error: Any, items: Any) -> None:
|
||||
logger.error(f"PostHog error: {error}, items: {items}")
|
||||
|
||||
|
||||
posthog = Posthog(
|
||||
project_api_key=POSTHOG_API_KEY,
|
||||
host=POSTHOG_HOST,
|
||||
debug=POSTHOG_DEBUG_LOGS_ENABLED,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
posthog: Posthog | None = None
|
||||
if POSTHOG_API_KEY:
|
||||
posthog = Posthog(
|
||||
project_api_key=POSTHOG_API_KEY,
|
||||
host=POSTHOG_HOST,
|
||||
debug=POSTHOG_DEBUG_LOGS_ENABLED,
|
||||
on_error=posthog_on_error,
|
||||
)
|
||||
elif MULTI_TENANT:
|
||||
logger.warning(
|
||||
"POSTHOG_API_KEY is not set but MULTI_TENANT is enabled — "
|
||||
"PostHog telemetry and feature flags will be disabled"
|
||||
)
|
||||
|
||||
# For cross referencing between cloud and www Onyx sites
|
||||
# NOTE: These clients are separate because they are separate posthog projects.
|
||||
@@ -60,7 +68,7 @@ def capture_and_sync_with_alternate_posthog(
|
||||
logger.error(f"Error capturing marketing posthog event: {e}")
|
||||
|
||||
try:
|
||||
if cloud_user_id := props.get("onyx_cloud_user_id"):
|
||||
if posthog and (cloud_user_id := props.get("onyx_cloud_user_id")):
|
||||
cloud_props = props.copy()
|
||||
cloud_props.pop("onyx_cloud_user_id", None)
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Any
|
||||
|
||||
from ee.onyx.utils.posthog_client import posthog
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
@@ -5,12 +7,27 @@ logger = setup_logger()
|
||||
|
||||
|
||||
def event_telemetry(
|
||||
distinct_id: str, event: str, properties: dict | None = None
|
||||
distinct_id: str, event: str, properties: dict[str, Any] | None = None
|
||||
) -> None:
|
||||
"""Capture and send an event to PostHog, flushing immediately."""
|
||||
if not posthog:
|
||||
return
|
||||
|
||||
logger.info(f"Capturing PostHog event: {distinct_id} {event} {properties}")
|
||||
try:
|
||||
posthog.capture(distinct_id, event, properties)
|
||||
posthog.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Error capturing PostHog event: {e}")
|
||||
|
||||
|
||||
def identify_user(distinct_id: str, properties: dict[str, Any] | None = None) -> None:
|
||||
"""Create/update a PostHog person profile, flushing immediately."""
|
||||
if not posthog:
|
||||
return
|
||||
|
||||
try:
|
||||
posthog.identify(distinct_id, properties)
|
||||
posthog.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Error identifying PostHog user: {e}")
|
||||
|
||||
@@ -19,6 +19,7 @@ from typing import Optional
|
||||
from typing import Protocol
|
||||
from typing import Tuple
|
||||
from typing import TypeVar
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import jwt
|
||||
from email_validator import EmailNotValidError
|
||||
@@ -134,6 +135,7 @@ from onyx.redis.redis_pool import retrieve_ws_token_data
|
||||
from onyx.server.settings.store import load_settings
|
||||
from onyx.server.utils import BasicAuthenticationError
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import mt_cloud_identify
|
||||
from onyx.utils.telemetry import mt_cloud_telemetry
|
||||
from onyx.utils.telemetry import optional_telemetry
|
||||
from onyx.utils.telemetry import RecordType
|
||||
@@ -792,6 +794,12 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
except Exception:
|
||||
logger.exception("Error deleting anonymous user cookie")
|
||||
|
||||
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
|
||||
mt_cloud_identify(
|
||||
distinct_id=str(user.id),
|
||||
properties={"email": user.email, "tenant_id": tenant_id},
|
||||
)
|
||||
|
||||
async def on_after_register(
|
||||
self, user: User, request: Optional[Request] = None
|
||||
) -> None:
|
||||
@@ -810,12 +818,25 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
user_count = await get_user_count()
|
||||
logger.debug(f"Current tenant user count: {user_count}")
|
||||
|
||||
# Ensure a PostHog person profile exists for this user.
|
||||
mt_cloud_identify(
|
||||
distinct_id=str(user.id),
|
||||
properties={"email": user.email, "tenant_id": tenant_id},
|
||||
)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.USER_SIGNED_UP,
|
||||
)
|
||||
|
||||
if user_count == 1:
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.TENANT_CREATED,
|
||||
)
|
||||
|
||||
finally:
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
|
||||
|
||||
@@ -1652,6 +1673,33 @@ async def _get_user_from_token_data(token_data: dict) -> User | None:
|
||||
return user
|
||||
|
||||
|
||||
_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1"})
|
||||
|
||||
|
||||
def _is_same_origin(actual: str, expected: str) -> bool:
|
||||
"""Compare two origins for the WebSocket CSWSH check.
|
||||
|
||||
Scheme and hostname must match exactly. Port must also match, except
|
||||
when the hostname is a loopback address (localhost / 127.0.0.1 / ::1),
|
||||
where port is ignored. On loopback, all ports belong to the same
|
||||
operator, so port differences carry no security significance — the
|
||||
CSWSH threat is remote origins, not local ones.
|
||||
"""
|
||||
a = urlparse(actual.rstrip("/"))
|
||||
e = urlparse(expected.rstrip("/"))
|
||||
|
||||
if a.scheme != e.scheme or a.hostname != e.hostname:
|
||||
return False
|
||||
|
||||
if a.hostname in _LOOPBACK_HOSTNAMES:
|
||||
return True
|
||||
|
||||
actual_port = a.port or (443 if a.scheme == "https" else 80)
|
||||
expected_port = e.port or (443 if e.scheme == "https" else 80)
|
||||
|
||||
return actual_port == expected_port
|
||||
|
||||
|
||||
async def current_user_from_websocket(
|
||||
websocket: WebSocket,
|
||||
token: str = Query(..., description="WebSocket authentication token"),
|
||||
@@ -1671,19 +1719,15 @@ async def current_user_from_websocket(
|
||||
|
||||
This applies the same auth checks as current_user() for HTTP endpoints.
|
||||
"""
|
||||
# Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH)
|
||||
# Browsers always send Origin on WebSocket connections
|
||||
# Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH).
|
||||
# Browsers always send Origin on WebSocket connections.
|
||||
origin = websocket.headers.get("origin")
|
||||
expected_origin = WEB_DOMAIN.rstrip("/")
|
||||
if not origin:
|
||||
logger.warning("WS auth: missing Origin header")
|
||||
raise BasicAuthenticationError(detail="Access denied. Missing origin.")
|
||||
|
||||
actual_origin = origin.rstrip("/")
|
||||
if actual_origin != expected_origin:
|
||||
logger.warning(
|
||||
f"WS auth: origin mismatch. Expected {expected_origin}, got {actual_origin}"
|
||||
)
|
||||
if not _is_same_origin(origin, WEB_DOMAIN):
|
||||
logger.warning(f"WS auth: origin mismatch. Expected {WEB_DOMAIN}, got {origin}")
|
||||
raise BasicAuthenticationError(detail="Access denied. Invalid origin.")
|
||||
|
||||
# Validate WS token in Redis (single-use, deleted after retrieval)
|
||||
|
||||
@@ -29,6 +29,8 @@ from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.connectors.factory import ConnectorMissingException
|
||||
from onyx.connectors.factory import identify_connector_class
|
||||
from onyx.connectors.factory import instantiate_connector
|
||||
from onyx.connectors.interfaces import HierarchyConnector
|
||||
from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
|
||||
@@ -55,6 +57,26 @@ logger = setup_logger()
|
||||
HIERARCHY_FETCH_INTERVAL_SECONDS = 24 * 60 * 60
|
||||
|
||||
|
||||
def _connector_supports_hierarchy_fetching(
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
) -> bool:
|
||||
"""Return True only for connectors whose class implements HierarchyConnector."""
|
||||
try:
|
||||
connector_class = identify_connector_class(
|
||||
cc_pair.connector.source,
|
||||
)
|
||||
except ConnectorMissingException as e:
|
||||
task_logger.warning(
|
||||
"Skipping hierarchy fetching enqueue for source=%s input_type=%s: %s",
|
||||
cc_pair.connector.source,
|
||||
cc_pair.connector.input_type,
|
||||
str(e),
|
||||
)
|
||||
return False
|
||||
|
||||
return issubclass(connector_class, HierarchyConnector)
|
||||
|
||||
|
||||
def _is_hierarchy_fetching_due(cc_pair: ConnectorCredentialPair) -> bool:
|
||||
"""Returns boolean indicating if hierarchy fetching is due for this connector.
|
||||
|
||||
@@ -186,7 +208,10 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
|
||||
cc_pair_id=cc_pair_id,
|
||||
)
|
||||
|
||||
if not cc_pair or not _is_hierarchy_fetching_due(cc_pair):
|
||||
if not cc_pair or not _connector_supports_hierarchy_fetching(cc_pair):
|
||||
continue
|
||||
|
||||
if not _is_hierarchy_fetching_due(cc_pair):
|
||||
continue
|
||||
|
||||
task_id = _try_creating_hierarchy_fetching_task(
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
"""
|
||||
GenUI system prompt for LLM integration.
|
||||
|
||||
This prompt teaches the LLM to output structured UI using GenUI Lang.
|
||||
It's generated from the Onyx component library definitions and kept
|
||||
in sync with the frontend @onyx/genui-onyx library.
|
||||
|
||||
TODO: Auto-generate this from the frontend library at build time
|
||||
instead of maintaining a static copy.
|
||||
"""
|
||||
|
||||
GENUI_SYSTEM_PROMPT = """# Structured UI Output (GenUI Lang)
|
||||
|
||||
When the user's request benefits from structured UI (tables, cards, buttons, layouts), respond using GenUI Lang — a compact, line-oriented markup. Otherwise respond in plain markdown.
|
||||
|
||||
## Syntax
|
||||
|
||||
Each line declares a variable: `name = expression`
|
||||
|
||||
Expressions:
|
||||
- `ComponentName(arg1, arg2, key: value)` — component with positional or named args
|
||||
- `[a, b, c]` — array
|
||||
- `{key: value}` — object
|
||||
- `"string"`, `42`, `true`, `false`, `null` — literals
|
||||
- `variableName` — reference to a previously defined variable
|
||||
|
||||
Rules:
|
||||
- PascalCase identifiers are component types
|
||||
- camelCase identifiers are variable references
|
||||
- Positional args map to props in the order defined below
|
||||
- The last statement is the root element (or name one `root`)
|
||||
- Lines inside brackets/parens can span multiple lines
|
||||
- Lines that don't match `name = expression` are treated as plain text
|
||||
|
||||
## Available Components
|
||||
|
||||
### Layout
|
||||
- `Stack(children?: unknown[], gap?: "none" | "xs" | "sm" | "md" | "lg" | "xl", align?: "start" | "center" | "end" | "stretch")` — Vertical stack layout — arranges children top to bottom
|
||||
- `Row(children?: unknown[], gap?: "none" | "xs" | "sm" | "md" | "lg" | "xl", align?: "start" | "center" | "end" | "stretch", wrap?: boolean)` — Horizontal row layout — arranges children left to right
|
||||
- `Column(children?: unknown[], width?: string)` — A column within a Row, with optional width control
|
||||
- `Card(title?: string, padding?: "none" | "sm" | "md" | "lg")` — A container card with optional title and padding
|
||||
- `Divider(spacing?: "sm" | "md" | "lg")` — A horizontal separator line
|
||||
|
||||
### Content
|
||||
- `Text(children: string, headingH1?: boolean, headingH2?: boolean, headingH3?: boolean, muted?: boolean, mono?: boolean, bold?: boolean)` — Displays text with typography variants
|
||||
- `Tag(title: string, color?: "green" | "purple" | "blue" | "gray" | "amber", size?: "sm" | "md")` — A small label tag with color
|
||||
- `Table(columns: string[], rows: unknown[][], compact?: boolean)` — A data table with columns and rows
|
||||
- `Code(children: string, language?: string, showCopyButton?: boolean)` — A code block with optional copy button
|
||||
- `Image(src: string, alt?: string, width?: string, height?: string)` — Displays an image
|
||||
- `Link(children: string, href: string, external?: boolean)` — A clickable hyperlink
|
||||
- `List(items: string[], ordered?: boolean)` — An ordered or unordered list
|
||||
|
||||
### Interactive
|
||||
- `Button(children: string, main?: boolean, action?: boolean, danger?: boolean, primary?: boolean, secondary?: boolean, tertiary?: boolean, size?: "lg" | "md", actionId?: string, disabled?: boolean)` — An interactive button that triggers an action
|
||||
- `IconButton(icon: string, tooltip?: string, main?: boolean, action?: boolean, danger?: boolean, primary?: boolean, secondary?: boolean, actionId?: string, disabled?: boolean)` — A button that displays an icon with an optional tooltip
|
||||
- `Input(placeholder?: string, value?: string, actionId?: string, readOnly?: boolean)` — A text input field
|
||||
|
||||
### Feedback
|
||||
- `Alert(text: string, description?: string, level?: "default" | "info" | "success" | "warning" | "error", showIcon?: boolean)` — A status message banner (info, success, warning, error)
|
||||
|
||||
## Output Format
|
||||
|
||||
**CRITICAL: Output GenUI Lang directly as plain text. Do NOT wrap it in code fences (no ```genui or ``` blocks). The output is parsed as a streaming language, not displayed as code.**
|
||||
|
||||
## Streaming Guidelines
|
||||
|
||||
- Define variables before referencing them
|
||||
- Each line is independently parseable — the UI updates as each line completes
|
||||
- Keep variable names short and descriptive
|
||||
- Build up complex UIs incrementally: define data first, then layout
|
||||
|
||||
## Examples
|
||||
|
||||
### Search results with table
|
||||
```
|
||||
title = Text("Search Results", headingH2: true)
|
||||
row1 = ["Onyx Docs", Tag("PDF", color: "blue"), "2024-01-15"]
|
||||
row2 = ["API Guide", Tag("MD", color: "green"), "2024-02-01"]
|
||||
results = Table(["Name", "Type", "Date"], [row1, row2])
|
||||
action = Button("View All", main: true, primary: true, actionId: "viewAll")
|
||||
root = Stack([title, results, action], gap: "md")
|
||||
```
|
||||
|
||||
### Status card with actions
|
||||
```
|
||||
status = Alert("Pipeline completed successfully", level: "success")
|
||||
stats = Row([
|
||||
Text("Processed: 1,234 docs"),
|
||||
Text("Duration: 2m 34s", muted: true)
|
||||
], gap: "lg")
|
||||
actions = Row([
|
||||
Button("View Results", main: true, primary: true, actionId: "viewResults"),
|
||||
Button("Run Again", action: true, secondary: true, actionId: "rerun")
|
||||
], gap: "sm")
|
||||
root = Stack([status, stats, actions], gap: "md")
|
||||
```
|
||||
|
||||
### Simple info display
|
||||
```
|
||||
root = Card(title: "Document Summary")
|
||||
```
|
||||
|
||||
## Additional Guidelines
|
||||
|
||||
- Use Stack for vertical layouts and Row for horizontal layouts
|
||||
- For tables, pass column headers as a string array and rows as arrays of values
|
||||
- Tags are great for showing status, categories, or labels inline
|
||||
- Use Alert for important status messages — choose the right level (info, success, warning, error)
|
||||
- Buttons need an actionId to trigger events — the UI framework handles the callback
|
||||
- Keep layouts simple — prefer flat structures over deeply nested ones
|
||||
- For search results or document lists, use Table with relevant columns
|
||||
- Use Card to visually group related content"""
|
||||
@@ -13,7 +13,6 @@ from onyx.chat.citation_processor import CitationMode
|
||||
from onyx.chat.citation_processor import DynamicCitationProcessor
|
||||
from onyx.chat.citation_utils import update_citation_processor_from_tool_response
|
||||
from onyx.chat.emitter import Emitter
|
||||
from onyx.chat.genui_prompt import GENUI_SYSTEM_PROMPT
|
||||
from onyx.chat.llm_step import extract_tool_calls_from_response_text
|
||||
from onyx.chat.llm_step import run_llm_step
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
@@ -27,7 +26,6 @@ from onyx.chat.prompt_utils import build_system_prompt
|
||||
from onyx.chat.prompt_utils import (
|
||||
get_default_base_system_prompt,
|
||||
)
|
||||
from onyx.configs.app_configs import GENUI_ENABLED
|
||||
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import MessageType
|
||||
@@ -38,9 +36,11 @@ from onyx.db.memory import add_memory
|
||||
from onyx.db.memory import update_memory_at_index
|
||||
from onyx.db.memory import UserMemoryContext
|
||||
from onyx.db.models import Persona
|
||||
from onyx.llm.constants import LlmProviderNames
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.llm.interfaces import LLMUserIdentity
|
||||
from onyx.llm.interfaces import ToolChoiceOptions
|
||||
from onyx.llm.utils import is_true_openai_model
|
||||
from onyx.prompts.chat_prompts import IMAGE_GEN_REMINDER
|
||||
from onyx.prompts.chat_prompts import OPEN_URL_REMINDER
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
@@ -74,6 +74,70 @@ from shared_configs.contextvars import get_current_tenant_id
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class EmptyLLMResponseError(RuntimeError):
|
||||
"""Raised when the streamed LLM response completes without a usable answer."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
tool_choice: ToolChoiceOptions,
|
||||
client_error_msg: str,
|
||||
error_code: str = "EMPTY_LLM_RESPONSE",
|
||||
is_retryable: bool = True,
|
||||
) -> None:
|
||||
super().__init__(client_error_msg)
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.tool_choice = tool_choice
|
||||
self.client_error_msg = client_error_msg
|
||||
self.error_code = error_code
|
||||
self.is_retryable = is_retryable
|
||||
|
||||
|
||||
def _build_empty_llm_response_error(
|
||||
llm: LLM,
|
||||
llm_step_result: LlmStepResult,
|
||||
tool_choice: ToolChoiceOptions,
|
||||
) -> EmptyLLMResponseError:
|
||||
provider = llm.config.model_provider
|
||||
model = llm.config.model_name
|
||||
|
||||
# OpenAI quota exhaustion has reached us as a streamed "stop" with zero content.
|
||||
# When the stream is completely empty and there is no reasoning/tool output, surface
|
||||
# the likely account-level cause instead of a generic tool-calling error.
|
||||
if (
|
||||
not llm_step_result.reasoning
|
||||
and provider == LlmProviderNames.OPENAI
|
||||
and is_true_openai_model(provider, model)
|
||||
):
|
||||
return EmptyLLMResponseError(
|
||||
provider=provider,
|
||||
model=model,
|
||||
tool_choice=tool_choice,
|
||||
client_error_msg=(
|
||||
"The selected OpenAI model returned an empty streamed response "
|
||||
"before producing any tokens. This commonly happens when the API "
|
||||
"key or project has no remaining quota or billing is not enabled. "
|
||||
"Verify quota and billing for this key and try again."
|
||||
),
|
||||
error_code="BUDGET_EXCEEDED",
|
||||
is_retryable=False,
|
||||
)
|
||||
|
||||
return EmptyLLMResponseError(
|
||||
provider=provider,
|
||||
model=model,
|
||||
tool_choice=tool_choice,
|
||||
client_error_msg=(
|
||||
"The selected model returned no final answer before the stream "
|
||||
"completed. No text or tool calls were received from the upstream "
|
||||
"provider."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_xml_tool_call_payload(text: str | None) -> bool:
|
||||
"""Detect XML-style marshaled tool calls emitted as plain text."""
|
||||
if not text:
|
||||
@@ -615,7 +679,12 @@ def run_llm_loop(
|
||||
)
|
||||
citation_processor.update_citation_mapping(project_citation_mapping)
|
||||
|
||||
llm_step_result: LlmStepResult | None = None
|
||||
llm_step_result = LlmStepResult(
|
||||
reasoning=None,
|
||||
answer=None,
|
||||
tool_calls=None,
|
||||
raw_answer=None,
|
||||
)
|
||||
|
||||
# Pass the total budget to construct_message_history, which will handle token allocation
|
||||
available_tokens = llm.config.max_input_tokens
|
||||
@@ -701,7 +770,6 @@ def run_llm_loop(
|
||||
tools=tools,
|
||||
should_cite_documents=should_cite_documents
|
||||
or always_cite_documents,
|
||||
genui_prompt=GENUI_SYSTEM_PROMPT if GENUI_ENABLED else None,
|
||||
)
|
||||
system_prompt = ChatMessageSimple(
|
||||
message=system_prompt_str,
|
||||
@@ -795,7 +863,6 @@ def run_llm_loop(
|
||||
final_documents=gathered_documents,
|
||||
user_identity=user_identity,
|
||||
pre_answer_processing_time=pre_answer_processing_time,
|
||||
use_genui=GENUI_ENABLED,
|
||||
)
|
||||
if has_reasoned:
|
||||
reasoning_cycles += 1
|
||||
@@ -1088,12 +1155,18 @@ def run_llm_loop(
|
||||
# As long as 1 tool with citeable documents is called at any point, we ask the LLM to try to cite
|
||||
should_cite_documents = True
|
||||
|
||||
if not llm_step_result or not llm_step_result.answer:
|
||||
if not llm_step_result.answer and not llm_step_result.tool_calls:
|
||||
raise _build_empty_llm_response_error(
|
||||
llm=llm,
|
||||
llm_step_result=llm_step_result,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
|
||||
if not llm_step_result.answer:
|
||||
raise RuntimeError(
|
||||
"The LLM did not return an answer. "
|
||||
"Typically this is an issue with LLMs that do not support tool calling natively, "
|
||||
"or the model serving API is not configured correctly. "
|
||||
"This may also happen with models that are lower quality outputting invalid tool calls."
|
||||
"The LLM did not return a final answer after tool execution. "
|
||||
"Typically this indicates invalid tool-call output, a model/provider mismatch, "
|
||||
"or serving API misconfiguration."
|
||||
)
|
||||
|
||||
emitter.emit(
|
||||
|
||||
@@ -48,8 +48,6 @@ from onyx.server.query_and_chat.placement import Placement
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
from onyx.server.query_and_chat.streaming_models import GenUIDelta
|
||||
from onyx.server.query_and_chat.streaming_models import GenUIStart
|
||||
from onyx.server.query_and_chat.streaming_models import Packet
|
||||
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
|
||||
from onyx.server.query_and_chat.streaming_models import ReasoningDone
|
||||
@@ -933,7 +931,6 @@ def run_llm_step_pkt_generator(
|
||||
is_deep_research: bool = False,
|
||||
pre_answer_processing_time: float | None = None,
|
||||
timeout_override: int | None = None,
|
||||
use_genui: bool = False,
|
||||
) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:
|
||||
"""Run an LLM step and stream the response as packets.
|
||||
NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and
|
||||
@@ -969,8 +966,6 @@ def run_llm_step_pkt_generator(
|
||||
pre_answer_processing_time: Optional time spent processing before the
|
||||
answer started, recorded in state_container for analytics.
|
||||
timeout_override: Optional timeout override for the LLM call.
|
||||
use_genui: If True, emit GenUIStart/GenUIDelta packets instead of
|
||||
AgentResponseStart/AgentResponseDelta.
|
||||
|
||||
Yields:
|
||||
Packet: Streaming packets containing:
|
||||
@@ -1018,6 +1013,10 @@ def run_llm_step_pkt_generator(
|
||||
accumulated_reasoning = ""
|
||||
accumulated_answer = ""
|
||||
accumulated_raw_answer = ""
|
||||
stream_chunk_count = 0
|
||||
actionable_chunk_count = 0
|
||||
empty_chunk_count = 0
|
||||
finish_reasons: set[str] = set()
|
||||
xml_tool_call_content_filter = _XmlToolCallContentFilter()
|
||||
|
||||
processor_state: Any = None
|
||||
@@ -1117,7 +1116,6 @@ def run_llm_step_pkt_generator(
|
||||
pre_answer_processing_time
|
||||
)
|
||||
|
||||
# Always emit AgentResponseStart for text rendering
|
||||
yield Packet(
|
||||
placement=_current_placement(),
|
||||
obj=AgentResponseStart(
|
||||
@@ -1125,30 +1123,9 @@ def run_llm_step_pkt_generator(
|
||||
pre_answer_processing_seconds=pre_answer_processing_time,
|
||||
),
|
||||
)
|
||||
# When GenUI is enabled, also emit GenUIStart so the
|
||||
# frontend can offer both text and structured views.
|
||||
if use_genui:
|
||||
yield Packet(
|
||||
placement=_current_placement(),
|
||||
obj=GenUIStart(),
|
||||
)
|
||||
answer_start = True
|
||||
|
||||
if use_genui:
|
||||
accumulated_answer += content_chunk
|
||||
if state_container:
|
||||
state_container.set_answer_tokens(accumulated_answer)
|
||||
# Emit both text and GenUI deltas so the frontend can
|
||||
# toggle between plain text and structured rendering.
|
||||
yield Packet(
|
||||
placement=_current_placement(),
|
||||
obj=AgentResponseDelta(content=content_chunk),
|
||||
)
|
||||
yield Packet(
|
||||
placement=_current_placement(),
|
||||
obj=GenUIDelta(content=content_chunk),
|
||||
)
|
||||
elif citation_processor:
|
||||
if citation_processor:
|
||||
yield from _emit_citation_results(
|
||||
citation_processor.process_token(content_chunk)
|
||||
)
|
||||
@@ -1172,6 +1149,7 @@ def run_llm_step_pkt_generator(
|
||||
user_identity=user_identity,
|
||||
timeout_override=timeout_override,
|
||||
):
|
||||
stream_chunk_count += 1
|
||||
if packet.usage:
|
||||
usage = packet.usage
|
||||
span_generation.span_data.usage = {
|
||||
@@ -1181,16 +1159,21 @@ def run_llm_step_pkt_generator(
|
||||
"cache_creation_input_tokens": usage.cache_creation_input_tokens,
|
||||
}
|
||||
# Note: LLM cost tracking is now handled in multi_llm.py
|
||||
finish_reason = packet.choice.finish_reason
|
||||
if finish_reason:
|
||||
finish_reasons.add(str(finish_reason))
|
||||
delta = packet.choice.delta
|
||||
|
||||
# Weird behavior from some model providers, just log and ignore for now
|
||||
if (
|
||||
delta.content is None
|
||||
not delta.content
|
||||
and delta.reasoning_content is None
|
||||
and delta.tool_calls is None
|
||||
and not delta.tool_calls
|
||||
):
|
||||
empty_chunk_count += 1
|
||||
logger.warning(
|
||||
f"LLM packet is empty (no contents, reasoning or tool calls). Skipping: {packet}"
|
||||
"LLM packet is empty (no content, reasoning, or tool calls). "
|
||||
f"finish_reason={finish_reason}. Skipping: {packet}"
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -1199,6 +1182,8 @@ def run_llm_step_pkt_generator(
|
||||
time.monotonic() - stream_start_time
|
||||
)
|
||||
first_action_recorded = True
|
||||
if _delta_has_action(delta):
|
||||
actionable_chunk_count += 1
|
||||
|
||||
if custom_token_processor:
|
||||
# The custom token processor can modify the deltas for specific custom logic
|
||||
@@ -1334,6 +1319,15 @@ def run_llm_step_pkt_generator(
|
||||
else:
|
||||
logger.debug("Tool calls: []")
|
||||
|
||||
if actionable_chunk_count == 0:
|
||||
logger.warning(
|
||||
"LLM stream completed with no actionable deltas. "
|
||||
f"chunks={stream_chunk_count}, empty_chunks={empty_chunk_count}, "
|
||||
f"finish_reasons={sorted(finish_reasons)}, "
|
||||
f"provider={llm.config.model_provider}, model={llm.config.model_name}, "
|
||||
f"tool_choice={tool_choice}, tools_sent={len(tool_definitions)}"
|
||||
)
|
||||
|
||||
return (
|
||||
LlmStepResult(
|
||||
reasoning=accumulated_reasoning if accumulated_reasoning else None,
|
||||
@@ -1365,7 +1359,6 @@ def run_llm_step(
|
||||
is_deep_research: bool = False,
|
||||
pre_answer_processing_time: float | None = None,
|
||||
timeout_override: int | None = None,
|
||||
use_genui: bool = False,
|
||||
) -> tuple[LlmStepResult, bool]:
|
||||
"""Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.
|
||||
|
||||
@@ -1389,7 +1382,6 @@ def run_llm_step(
|
||||
is_deep_research=is_deep_research,
|
||||
pre_answer_processing_time=pre_answer_processing_time,
|
||||
timeout_override=timeout_override,
|
||||
use_genui=use_genui,
|
||||
)
|
||||
|
||||
while True:
|
||||
|
||||
@@ -29,6 +29,7 @@ from onyx.chat.compression import compress_chat_history
|
||||
from onyx.chat.compression import find_summary_for_branch
|
||||
from onyx.chat.compression import get_compression_params
|
||||
from onyx.chat.emitter import get_default_emitter
|
||||
from onyx.chat.llm_loop import EmptyLLMResponseError
|
||||
from onyx.chat.llm_loop import run_llm_loop
|
||||
from onyx.chat.models import AnswerStream
|
||||
from onyx.chat.models import ChatBasicResponse
|
||||
@@ -490,13 +491,13 @@ def handle_stream_message_objects(
|
||||
# Milestone tracking, most devs using the API don't need to understand this
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
|
||||
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
|
||||
)
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email if not user.is_anonymous else tenant_id,
|
||||
distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
|
||||
event=MilestoneRecordType.USER_MESSAGE_SENT,
|
||||
properties={
|
||||
"origin": new_msg_req.origin.value,
|
||||
@@ -925,9 +926,28 @@ def handle_stream_message_objects(
|
||||
db_session.rollback()
|
||||
return
|
||||
|
||||
except EmptyLLMResponseError as e:
|
||||
stack_trace = traceback.format_exc()
|
||||
|
||||
logger.warning(
|
||||
"LLM returned an empty response "
|
||||
f"(provider={e.provider}, model={e.model}, tool_choice={e.tool_choice})"
|
||||
)
|
||||
|
||||
yield StreamingError(
|
||||
error=e.client_error_msg,
|
||||
stack_trace=stack_trace,
|
||||
error_code=e.error_code,
|
||||
is_retryable=e.is_retryable,
|
||||
details={
|
||||
"model": e.model,
|
||||
"provider": e.provider,
|
||||
"tool_choice": e.tool_choice.value,
|
||||
},
|
||||
)
|
||||
db_session.rollback()
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to process chat message due to {e}")
|
||||
error_msg = str(e)
|
||||
stack_trace = traceback.format_exc()
|
||||
|
||||
if llm:
|
||||
@@ -1046,10 +1066,46 @@ def llm_loop_completion_handle(
|
||||
)
|
||||
|
||||
|
||||
def remove_answer_citations(answer: str) -> str:
|
||||
pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)"
|
||||
_CITATION_LINK_START_PATTERN = re.compile(r"\s*\[\[\d+\]\]\(")
|
||||
|
||||
return re.sub(pattern, "", answer)
|
||||
|
||||
def _find_markdown_link_end(text: str, destination_start: int) -> int | None:
|
||||
depth = 0
|
||||
i = destination_start
|
||||
|
||||
while i < len(text):
|
||||
curr = text[i]
|
||||
if curr == "\\":
|
||||
i += 2
|
||||
continue
|
||||
|
||||
if curr == "(":
|
||||
depth += 1
|
||||
elif curr == ")":
|
||||
if depth == 0:
|
||||
return i
|
||||
depth -= 1
|
||||
|
||||
i += 1
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def remove_answer_citations(answer: str) -> str:
|
||||
stripped_parts: list[str] = []
|
||||
cursor = 0
|
||||
|
||||
while match := _CITATION_LINK_START_PATTERN.search(answer, cursor):
|
||||
stripped_parts.append(answer[cursor : match.start()])
|
||||
link_end = _find_markdown_link_end(answer, match.end())
|
||||
if link_end is None:
|
||||
stripped_parts.append(answer[match.start() :])
|
||||
return "".join(stripped_parts)
|
||||
|
||||
cursor = link_end + 1
|
||||
|
||||
stripped_parts.append(answer[cursor:])
|
||||
return "".join(stripped_parts)
|
||||
|
||||
|
||||
@log_function_time()
|
||||
@@ -1087,8 +1143,11 @@ def gather_stream(
|
||||
raise ValueError("Message ID is required")
|
||||
|
||||
if answer is None:
|
||||
# This should never be the case as these non-streamed flows do not have a stop-generation signal
|
||||
raise RuntimeError("Answer was not generated")
|
||||
if error_msg is not None:
|
||||
answer = ""
|
||||
else:
|
||||
# This should never be the case as these non-streamed flows do not have a stop-generation signal
|
||||
raise RuntimeError("Answer was not generated")
|
||||
|
||||
return ChatBasicResponse(
|
||||
answer=answer,
|
||||
|
||||
@@ -200,7 +200,6 @@ def build_system_prompt(
|
||||
tools: Sequence[Tool] | None = None,
|
||||
should_cite_documents: bool = False,
|
||||
include_all_guidance: bool = False,
|
||||
genui_prompt: str | None = None,
|
||||
) -> str:
|
||||
"""Should only be called with the default behavior system prompt.
|
||||
If the user has replaced the default behavior prompt with their custom agent prompt, do not call this function.
|
||||
@@ -289,7 +288,4 @@ def build_system_prompt(
|
||||
if tool_guidance_sections:
|
||||
system_prompt += TOOL_SECTION_HEADER + "\n".join(tool_guidance_sections)
|
||||
|
||||
if genui_prompt:
|
||||
system_prompt += "\n\n" + genui_prompt
|
||||
|
||||
return system_prompt
|
||||
|
||||
@@ -318,9 +318,6 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
|
||||
OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
|
||||
os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
|
||||
)
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = int(
|
||||
os.environ.get("OPENSEARCH_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES") or 0
|
||||
)
|
||||
|
||||
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
|
||||
# NOTE: this is used if and only if the vespa config server is accessible via a
|
||||
@@ -957,7 +954,7 @@ ENTERPRISE_EDITION_ENABLED = (
|
||||
#####
|
||||
# Image Generation Configuration (DEPRECATED)
|
||||
# These environment variables will be deprecated soon.
|
||||
# To configure image generation, please visit the Image Generation page in the Admin Settings.
|
||||
# To configure image generation, please visit the Image Generation page in the Admin Panel.
|
||||
#####
|
||||
# Azure Image Configurations
|
||||
AZURE_IMAGE_API_VERSION = os.environ.get("AZURE_IMAGE_API_VERSION") or os.environ.get(
|
||||
@@ -1046,13 +1043,9 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
|
||||
|
||||
DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"
|
||||
|
||||
INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"
|
||||
HOOK_ENABLED = os.environ.get("HOOK_ENABLED", "").lower() == "true"
|
||||
|
||||
#####
|
||||
# GenUI Configuration
|
||||
#####
|
||||
# Enable GenUI structured UI rendering in chat responses
|
||||
GENUI_ENABLED = os.environ.get("GENUI_ENABLED", "").lower() == "true"
|
||||
INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"
|
||||
|
||||
#####
|
||||
# Captcha Configuration (for cloud signup protection)
|
||||
|
||||
@@ -511,7 +511,7 @@ def add_credential_to_connector(
|
||||
user: User,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
cc_pair_name: str | None,
|
||||
cc_pair_name: str,
|
||||
access_type: AccessType,
|
||||
groups: list[int] | None,
|
||||
auto_sync_options: dict | None = None,
|
||||
|
||||
@@ -304,3 +304,13 @@ class LLMModelFlowType(str, PyEnum):
|
||||
CHAT = "chat"
|
||||
VISION = "vision"
|
||||
CONTEXTUAL_RAG = "contextual_rag"
|
||||
|
||||
|
||||
class HookPoint(str, PyEnum):
|
||||
DOCUMENT_INGESTION = "document_ingestion"
|
||||
QUERY_PROCESSING = "query_processing"
|
||||
|
||||
|
||||
class HookFailStrategy(str, PyEnum):
|
||||
HARD = "hard" # exception propagates, pipeline aborts
|
||||
SOFT = "soft" # log error, return original input, pipeline continues
|
||||
|
||||
233
backend/onyx/db/hook.py
Normal file
233
backend/onyx/db/hook.py
Normal file
@@ -0,0 +1,233 @@
|
||||
import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.engine import CursorResult
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import selectinload
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.constants import UNSET
|
||||
from onyx.db.constants import UnsetType
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.db.models import Hook
|
||||
from onyx.db.models import HookExecutionLog
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
|
||||
|
||||
# ── Hook CRUD ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_hook_by_id(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
include_deleted: bool = False,
|
||||
include_creator: bool = False,
|
||||
) -> Hook | None:
|
||||
stmt = select(Hook).where(Hook.id == hook_id)
|
||||
if not include_deleted:
|
||||
stmt = stmt.where(Hook.deleted.is_(False))
|
||||
if include_creator:
|
||||
stmt = stmt.options(selectinload(Hook.creator))
|
||||
return db_session.scalar(stmt)
|
||||
|
||||
|
||||
def get_non_deleted_hook_by_hook_point(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_point: HookPoint,
|
||||
include_creator: bool = False,
|
||||
) -> Hook | None:
|
||||
stmt = (
|
||||
select(Hook).where(Hook.hook_point == hook_point).where(Hook.deleted.is_(False))
|
||||
)
|
||||
if include_creator:
|
||||
stmt = stmt.options(selectinload(Hook.creator))
|
||||
return db_session.scalar(stmt)
|
||||
|
||||
|
||||
def get_hooks(
|
||||
*,
|
||||
db_session: Session,
|
||||
include_deleted: bool = False,
|
||||
include_creator: bool = False,
|
||||
) -> list[Hook]:
|
||||
stmt = select(Hook)
|
||||
if not include_deleted:
|
||||
stmt = stmt.where(Hook.deleted.is_(False))
|
||||
if include_creator:
|
||||
stmt = stmt.options(selectinload(Hook.creator))
|
||||
stmt = stmt.order_by(Hook.hook_point, Hook.created_at.desc())
|
||||
return list(db_session.scalars(stmt).all())
|
||||
|
||||
|
||||
def create_hook__no_commit(
|
||||
*,
|
||||
db_session: Session,
|
||||
name: str,
|
||||
hook_point: HookPoint,
|
||||
endpoint_url: str | None = None,
|
||||
api_key: str | None = None,
|
||||
fail_strategy: HookFailStrategy,
|
||||
timeout_seconds: float,
|
||||
is_active: bool = False,
|
||||
creator_id: UUID | None = None,
|
||||
) -> Hook:
|
||||
"""Create a new hook for the given hook point.
|
||||
|
||||
At most one non-deleted hook per hook point is allowed. Raises
|
||||
OnyxError(CONFLICT) if a hook already exists, including under concurrent
|
||||
duplicate creates where the partial unique index fires an IntegrityError.
|
||||
"""
|
||||
existing = get_non_deleted_hook_by_hook_point(
|
||||
db_session=db_session, hook_point=hook_point
|
||||
)
|
||||
if existing:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.CONFLICT,
|
||||
f"A hook for '{hook_point.value}' already exists (id={existing.id}).",
|
||||
)
|
||||
|
||||
hook = Hook(
|
||||
name=name,
|
||||
hook_point=hook_point,
|
||||
endpoint_url=endpoint_url,
|
||||
api_key=api_key,
|
||||
fail_strategy=fail_strategy,
|
||||
timeout_seconds=timeout_seconds,
|
||||
is_active=is_active,
|
||||
creator_id=creator_id,
|
||||
)
|
||||
# Use a savepoint so that a failed insert only rolls back this operation,
|
||||
# not the entire outer transaction.
|
||||
savepoint = db_session.begin_nested()
|
||||
try:
|
||||
db_session.add(hook)
|
||||
savepoint.commit()
|
||||
except IntegrityError as exc:
|
||||
savepoint.rollback()
|
||||
if "ix_hook_one_non_deleted_per_point" in str(exc.orig):
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.CONFLICT,
|
||||
f"A hook for '{hook_point.value}' already exists.",
|
||||
)
|
||||
raise # re-raise unrelated integrity errors (FK violations, etc.)
|
||||
return hook
|
||||
|
||||
|
||||
def update_hook__no_commit(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
name: str | None = None,
|
||||
endpoint_url: str | None | UnsetType = UNSET,
|
||||
api_key: str | None | UnsetType = UNSET,
|
||||
fail_strategy: HookFailStrategy | None = None,
|
||||
timeout_seconds: float | None = None,
|
||||
is_active: bool | None = None,
|
||||
is_reachable: bool | None = None,
|
||||
include_creator: bool = False,
|
||||
) -> Hook:
|
||||
"""Update hook fields.
|
||||
|
||||
Sentinel conventions:
|
||||
- endpoint_url, api_key: pass UNSET to leave unchanged; pass None to clear.
|
||||
- name, fail_strategy, timeout_seconds, is_active, is_reachable: pass None to leave unchanged.
|
||||
"""
|
||||
hook = get_hook_by_id(
|
||||
db_session=db_session, hook_id=hook_id, include_creator=include_creator
|
||||
)
|
||||
if hook is None:
|
||||
raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")
|
||||
|
||||
if name is not None:
|
||||
hook.name = name
|
||||
if not isinstance(endpoint_url, UnsetType):
|
||||
hook.endpoint_url = endpoint_url
|
||||
if not isinstance(api_key, UnsetType):
|
||||
hook.api_key = api_key # type: ignore[assignment] # EncryptedString coerces str → SensitiveValue at the ORM level
|
||||
if fail_strategy is not None:
|
||||
hook.fail_strategy = fail_strategy
|
||||
if timeout_seconds is not None:
|
||||
hook.timeout_seconds = timeout_seconds
|
||||
if is_active is not None:
|
||||
hook.is_active = is_active
|
||||
if is_reachable is not None:
|
||||
hook.is_reachable = is_reachable
|
||||
|
||||
db_session.flush()
|
||||
return hook
|
||||
|
||||
|
||||
def delete_hook__no_commit(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
) -> None:
|
||||
hook = get_hook_by_id(db_session=db_session, hook_id=hook_id)
|
||||
if hook is None:
|
||||
raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")
|
||||
|
||||
hook.deleted = True
|
||||
hook.is_active = False
|
||||
db_session.flush()
|
||||
|
||||
|
||||
# ── HookExecutionLog CRUD ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def create_hook_execution_log__no_commit(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
is_success: bool,
|
||||
error_message: str | None = None,
|
||||
status_code: int | None = None,
|
||||
duration_ms: int | None = None,
|
||||
) -> HookExecutionLog:
|
||||
log = HookExecutionLog(
|
||||
hook_id=hook_id,
|
||||
is_success=is_success,
|
||||
error_message=error_message,
|
||||
status_code=status_code,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
db_session.add(log)
|
||||
db_session.flush()
|
||||
return log
|
||||
|
||||
|
||||
def get_hook_execution_logs(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
limit: int,
|
||||
) -> list[HookExecutionLog]:
|
||||
stmt = (
|
||||
select(HookExecutionLog)
|
||||
.where(HookExecutionLog.hook_id == hook_id)
|
||||
.order_by(HookExecutionLog.created_at.desc())
|
||||
.limit(limit)
|
||||
)
|
||||
return list(db_session.scalars(stmt).all())
|
||||
|
||||
|
||||
def cleanup_old_execution_logs__no_commit(
|
||||
*,
|
||||
db_session: Session,
|
||||
max_age_days: int,
|
||||
) -> int:
|
||||
"""Delete execution logs older than max_age_days. Returns the number of rows deleted."""
|
||||
cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
|
||||
days=max_age_days
|
||||
)
|
||||
result: CursorResult = db_session.execute( # type: ignore[assignment]
|
||||
delete(HookExecutionLog)
|
||||
.where(HookExecutionLog.created_at < cutoff)
|
||||
.execution_options(synchronize_session=False)
|
||||
)
|
||||
return result.rowcount
|
||||
@@ -64,6 +64,8 @@ from onyx.db.enums import (
|
||||
BuildSessionStatus,
|
||||
EmbeddingPrecision,
|
||||
HierarchyNodeType,
|
||||
HookFailStrategy,
|
||||
HookPoint,
|
||||
IndexingMode,
|
||||
OpenSearchDocumentMigrationStatus,
|
||||
OpenSearchTenantMigrationStatus,
|
||||
@@ -5178,3 +5180,90 @@ class CacheStore(Base):
|
||||
expires_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
|
||||
|
||||
class Hook(Base):
|
||||
"""Pairs a HookPoint with a customer-provided API endpoint.
|
||||
|
||||
At most one non-deleted Hook per HookPoint is allowed, enforced by a
|
||||
partial unique index on (hook_point) where deleted=false.
|
||||
"""
|
||||
|
||||
__tablename__ = "hook"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String, nullable=False)
|
||||
hook_point: Mapped[HookPoint] = mapped_column(
|
||||
Enum(HookPoint, native_enum=False), nullable=False
|
||||
)
|
||||
endpoint_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
|
||||
EncryptedString(), nullable=True
|
||||
)
|
||||
is_reachable: Mapped[bool | None] = mapped_column(
|
||||
Boolean, nullable=True, default=None
|
||||
) # null = never validated, true = last check passed, false = last check failed
|
||||
fail_strategy: Mapped[HookFailStrategy] = mapped_column(
|
||||
Enum(HookFailStrategy, native_enum=False),
|
||||
nullable=False,
|
||||
default=HookFailStrategy.HARD,
|
||||
)
|
||||
timeout_seconds: Mapped[float] = mapped_column(Float, nullable=False, default=30.0)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
creator_id: Mapped[UUID | None] = mapped_column(
|
||||
PGUUID(as_uuid=True),
|
||||
ForeignKey("user.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
updated_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
creator: Mapped["User | None"] = relationship("User", foreign_keys=[creator_id])
|
||||
execution_logs: Mapped[list["HookExecutionLog"]] = relationship(
|
||||
"HookExecutionLog", back_populates="hook", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_hook_one_non_deleted_per_point",
|
||||
"hook_point",
|
||||
unique=True,
|
||||
postgresql_where=(deleted == False), # noqa: E712
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class HookExecutionLog(Base):
|
||||
"""Records hook executions for health monitoring and debugging.
|
||||
|
||||
Currently only failures are logged; the is_success column exists so
|
||||
success logging can be added later without a schema change.
|
||||
Retention: rows older than 30 days are deleted by a nightly Celery task.
|
||||
"""
|
||||
|
||||
__tablename__ = "hook_execution_log"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
hook_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("hook.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
is_success: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
|
||||
)
|
||||
|
||||
hook: Mapped["Hook"] = relationship("Hook", back_populates="execution_logs")
|
||||
|
||||
@@ -1,12 +1,23 @@
|
||||
# Default value for the maximum number of tokens a chunk can hold, if none is
|
||||
# specified when creating an index.
|
||||
from onyx.configs.app_configs import (
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
)
|
||||
import os
|
||||
from enum import Enum
|
||||
|
||||
|
||||
DEFAULT_MAX_CHUNK_SIZE = 512
|
||||
|
||||
|
||||
# By default OpenSearch will only return a maximum of this many results in a
|
||||
# given search. This value is configurable in the index settings.
|
||||
DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW = 10_000
|
||||
|
||||
|
||||
# For documents which do not have a value for LAST_UPDATED_FIELD_NAME, we assume
|
||||
# that the document was last updated this many days ago for the purpose of time
|
||||
# cutoff filtering during retrieval.
|
||||
ASSUMED_DOCUMENT_AGE_DAYS = 90
|
||||
|
||||
|
||||
# Size of the dynamic list used to consider elements during kNN graph creation.
|
||||
# Higher values improve search quality but increase indexing time. Values
|
||||
# typically range between 100 - 512.
|
||||
@@ -26,10 +37,10 @@ M = 32 # Set relatively high for better accuracy.
|
||||
# we have a much higher chance of all 10 of the final desired docs showing up
|
||||
# and getting scored. In worse situations, the final 10 docs don't even show up
|
||||
# as the final 10 (worse than just a miss at the reranking step).
|
||||
DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
|
||||
if OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES > 0
|
||||
else 750
|
||||
# Defaults to 100 for now. Initially this defaulted to 750 but we were seeing
|
||||
# poor search performance.
|
||||
DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES = int(
|
||||
os.environ.get("DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES", 100)
|
||||
)
|
||||
|
||||
# Number of vectors to examine to decide the top k neighbors for the HNSW
|
||||
@@ -39,23 +50,43 @@ DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
|
||||
# larger than k, you can provide the size parameter to limit the final number of
|
||||
# results to k." from
|
||||
# https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search
|
||||
EF_SEARCH = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
|
||||
EF_SEARCH = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES
|
||||
|
||||
# Since the titles are included in the contents, the embedding matches are
|
||||
# heavily downweighted as they act as a boost rather than an independent scoring
|
||||
# component.
|
||||
SEARCH_TITLE_VECTOR_WEIGHT = 0.1
|
||||
SEARCH_CONTENT_VECTOR_WEIGHT = 0.45
|
||||
# Single keyword weight for both title and content (merged from former title
|
||||
# keyword + content keyword).
|
||||
SEARCH_KEYWORD_WEIGHT = 0.45
|
||||
|
||||
# NOTE: It is critical that the order of these weights matches the order of the
|
||||
# sub-queries in the hybrid search.
|
||||
HYBRID_SEARCH_NORMALIZATION_WEIGHTS = [
|
||||
SEARCH_TITLE_VECTOR_WEIGHT,
|
||||
SEARCH_CONTENT_VECTOR_WEIGHT,
|
||||
SEARCH_KEYWORD_WEIGHT,
|
||||
]
|
||||
class HybridSearchSubqueryConfiguration(Enum):
|
||||
TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 1
|
||||
# Current default.
|
||||
CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 2
|
||||
|
||||
assert sum(HYBRID_SEARCH_NORMALIZATION_WEIGHTS) == 1.0
|
||||
|
||||
# Will raise and block application start if HYBRID_SEARCH_SUBQUERY_CONFIGURATION
|
||||
# is set but not a valid value. If not set, defaults to
|
||||
# CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD.
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION: HybridSearchSubqueryConfiguration = (
|
||||
HybridSearchSubqueryConfiguration(
|
||||
int(os.environ["HYBRID_SEARCH_SUBQUERY_CONFIGURATION"])
|
||||
)
|
||||
if os.environ.get("HYBRID_SEARCH_SUBQUERY_CONFIGURATION", None) is not None
|
||||
else HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
|
||||
)
|
||||
|
||||
|
||||
class HybridSearchNormalizationPipeline(Enum):
|
||||
# Current default.
|
||||
MIN_MAX = 1
|
||||
# NOTE: Using z-score normalization is better for hybrid search from a
|
||||
# theoretical standpoint. Empirically on a small dataset of up to 10K docs,
|
||||
# it's not very different. Likely more impactful at scale.
|
||||
# https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
|
||||
ZSCORE = 2
|
||||
|
||||
|
||||
# Will raise and block application start if HYBRID_SEARCH_NORMALIZATION_PIPELINE
|
||||
# is set but not a valid value. If not set, defaults to MIN_MAX.
|
||||
HYBRID_SEARCH_NORMALIZATION_PIPELINE: HybridSearchNormalizationPipeline = (
|
||||
HybridSearchNormalizationPipeline(
|
||||
int(os.environ["HYBRID_SEARCH_NORMALIZATION_PIPELINE"])
|
||||
)
|
||||
if os.environ.get("HYBRID_SEARCH_NORMALIZATION_PIPELINE", None) is not None
|
||||
else HybridSearchNormalizationPipeline.MIN_MAX
|
||||
)
|
||||
|
||||
@@ -55,16 +55,13 @@ from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.search import DocumentQuery
|
||||
from onyx.document_index.opensearch.search import (
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
get_min_max_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import (
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
get_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import (
|
||||
ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import (
|
||||
ZSCORE_NORMALIZATION_PIPELINE_NAME,
|
||||
get_zscore_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.indexing.models import DocMetadataAwareIndexChunk
|
||||
from onyx.indexing.models import Document
|
||||
@@ -103,13 +100,19 @@ def set_cluster_state(client: OpenSearchClient) -> None:
|
||||
"is not the first time running Onyx against this instance of OpenSearch, these "
|
||||
"settings have likely already been set. Not taking any further action..."
|
||||
)
|
||||
client.create_search_pipeline(
|
||||
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (
|
||||
get_min_max_normalization_pipeline_name_and_config()
|
||||
)
|
||||
zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (
|
||||
get_zscore_normalization_pipeline_name_and_config()
|
||||
)
|
||||
client.create_search_pipeline(
|
||||
pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
|
||||
pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
|
||||
pipeline_id=min_max_normalization_pipeline_name,
|
||||
pipeline_body=min_max_normalization_pipeline_config,
|
||||
)
|
||||
client.create_search_pipeline(
|
||||
pipeline_id=zscore_normalization_pipeline_name,
|
||||
pipeline_body=zscore_normalization_pipeline_config,
|
||||
)
|
||||
|
||||
|
||||
@@ -940,14 +943,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
index_filters=filters,
|
||||
include_hidden=False,
|
||||
)
|
||||
# NOTE: Using z-score normalization here because it's better for hybrid
|
||||
# search from a theoretical standpoint. Empirically on a small dataset
|
||||
# of up to 10K docs, it's not very different. Likely more impactful at
|
||||
# scale.
|
||||
# https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
|
||||
normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()
|
||||
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
|
||||
body=query_body,
|
||||
search_pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
|
||||
search_pipeline_id=normalization_pipeline_name,
|
||||
)
|
||||
|
||||
# Good place for a breakpoint to inspect the search hits if you have "explain" enabled.
|
||||
|
||||
@@ -13,10 +13,21 @@ from onyx.configs.constants import INDEX_SEPARATOR
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import Tag
|
||||
from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.constants import ASSUMED_DOCUMENT_AGE_DAYS
|
||||
from onyx.document_index.opensearch.constants import (
|
||||
DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
|
||||
)
|
||||
from onyx.document_index.opensearch.constants import HYBRID_SEARCH_NORMALIZATION_WEIGHTS
|
||||
from onyx.document_index.opensearch.constants import (
|
||||
DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW,
|
||||
)
|
||||
from onyx.document_index.opensearch.constants import (
|
||||
HYBRID_SEARCH_NORMALIZATION_PIPELINE,
|
||||
)
|
||||
from onyx.document_index.opensearch.constants import (
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION,
|
||||
)
|
||||
from onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline
|
||||
from onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration
|
||||
from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import ANCESTOR_HIERARCHY_NODE_IDS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import CHUNK_INDEX_FIELD_NAME
|
||||
@@ -43,49 +54,113 @@ from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
|
||||
|
||||
# TODO(andrei): Turn all magic dictionaries to pydantic models.
|
||||
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_NAME = "normalization_pipeline_min_max"
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_CONFIG: dict[str, Any] = {
|
||||
"description": "Normalization for keyword and vector scores using min-max",
|
||||
"phase_results_processors": [
|
||||
{
|
||||
# https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
|
||||
"normalization-processor": {
|
||||
"normalization": {"technique": "min_max"},
|
||||
"combination": {
|
||||
"technique": "arithmetic_mean",
|
||||
"parameters": {"weights": HYBRID_SEARCH_NORMALIZATION_WEIGHTS},
|
||||
},
|
||||
|
||||
def _get_hybrid_search_normalization_weights() -> list[float]:
|
||||
if (
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION
|
||||
is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
|
||||
):
|
||||
# Since the titles are included in the contents, the embedding matches
|
||||
# are heavily downweighted as they act as a boost rather than an
|
||||
# independent scoring component.
|
||||
search_title_vector_weight = 0.1
|
||||
search_content_vector_weight = 0.45
|
||||
# Single keyword weight for both title and content (merged from former
|
||||
# title keyword + content keyword).
|
||||
search_keyword_weight = 0.45
|
||||
|
||||
# NOTE: It is critical that the order of these weights matches the order
|
||||
# of the sub-queries in the hybrid search.
|
||||
hybrid_search_normalization_weights = [
|
||||
search_title_vector_weight,
|
||||
search_content_vector_weight,
|
||||
search_keyword_weight,
|
||||
]
|
||||
elif (
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION
|
||||
is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
|
||||
):
|
||||
search_content_vector_weight = 0.5
|
||||
# Single keyword weight for both title and content (merged from former
|
||||
# title keyword + content keyword).
|
||||
search_keyword_weight = 0.5
|
||||
|
||||
# NOTE: It is critical that the order of these weights matches the order
|
||||
# of the sub-queries in the hybrid search.
|
||||
hybrid_search_normalization_weights = [
|
||||
search_content_vector_weight,
|
||||
search_keyword_weight,
|
||||
]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}."
|
||||
)
|
||||
|
||||
assert (
|
||||
sum(hybrid_search_normalization_weights) == 1.0
|
||||
), "Bug: Hybrid search normalization weights do not sum to 1.0."
|
||||
|
||||
return hybrid_search_normalization_weights
|
||||
|
||||
|
||||
def get_min_max_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
|
||||
min_max_normalization_pipeline_name = "normalization_pipeline_min_max"
|
||||
min_max_normalization_pipeline_config: dict[str, Any] = {
|
||||
"description": "Normalization for keyword and vector scores using min-max",
|
||||
"phase_results_processors": [
|
||||
{
|
||||
# https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
|
||||
"normalization-processor": {
|
||||
"normalization": {"technique": "min_max"},
|
||||
"combination": {
|
||||
"technique": "arithmetic_mean",
|
||||
"parameters": {
|
||||
"weights": _get_hybrid_search_normalization_weights()
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
return min_max_normalization_pipeline_name, min_max_normalization_pipeline_config
|
||||
|
||||
ZSCORE_NORMALIZATION_PIPELINE_NAME = "normalization_pipeline_zscore"
|
||||
ZSCORE_NORMALIZATION_PIPELINE_CONFIG: dict[str, Any] = {
|
||||
"description": "Normalization for keyword and vector scores using z-score",
|
||||
"phase_results_processors": [
|
||||
{
|
||||
# https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
|
||||
"normalization-processor": {
|
||||
"normalization": {"technique": "z_score"},
|
||||
"combination": {
|
||||
"technique": "arithmetic_mean",
|
||||
"parameters": {"weights": HYBRID_SEARCH_NORMALIZATION_WEIGHTS},
|
||||
},
|
||||
|
||||
def get_zscore_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
|
||||
zscore_normalization_pipeline_name = "normalization_pipeline_zscore"
|
||||
zscore_normalization_pipeline_config: dict[str, Any] = {
|
||||
"description": "Normalization for keyword and vector scores using z-score",
|
||||
"phase_results_processors": [
|
||||
{
|
||||
# https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
|
||||
"normalization-processor": {
|
||||
"normalization": {"technique": "z_score"},
|
||||
"combination": {
|
||||
"technique": "arithmetic_mean",
|
||||
"parameters": {
|
||||
"weights": _get_hybrid_search_normalization_weights()
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
return zscore_normalization_pipeline_name, zscore_normalization_pipeline_config
|
||||
|
||||
|
||||
# By default OpenSearch will only return a maximum of this many results in a
|
||||
# given search. This value is configurable in the index settings.
|
||||
DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW = 10_000
|
||||
|
||||
# For documents which do not have a value for LAST_UPDATED_FIELD_NAME, we assume
|
||||
# that the document was last updated this many days ago for the purpose of time
|
||||
# cutoff filtering during retrieval.
|
||||
ASSUMED_DOCUMENT_AGE_DAYS = 90
|
||||
def get_normalization_pipeline_name_and_config() -> tuple[str, dict[str, Any]]:
|
||||
if (
|
||||
HYBRID_SEARCH_NORMALIZATION_PIPELINE
|
||||
is HybridSearchNormalizationPipeline.MIN_MAX
|
||||
):
|
||||
return get_min_max_normalization_pipeline_name_and_config()
|
||||
elif (
|
||||
HYBRID_SEARCH_NORMALIZATION_PIPELINE is HybridSearchNormalizationPipeline.ZSCORE
|
||||
):
|
||||
return get_zscore_normalization_pipeline_name_and_config()
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Bug: Unhandled hybrid search normalization pipeline: {HYBRID_SEARCH_NORMALIZATION_PIPELINE}."
|
||||
)
|
||||
|
||||
|
||||
class DocumentQuery:
|
||||
@@ -257,7 +332,7 @@ class DocumentQuery:
|
||||
|
||||
# TODO(andrei, yuhong): We can tune this more dynamically based on
|
||||
# num_hits.
|
||||
max_results_per_subquery = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
|
||||
max_results_per_subquery = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES
|
||||
|
||||
hybrid_search_subqueries = DocumentQuery._get_hybrid_search_subqueries(
|
||||
query_text, query_vector, vector_candidates=max_results_per_subquery
|
||||
@@ -385,7 +460,7 @@ class DocumentQuery:
|
||||
# search. This is higher than the number of results because the scoring
|
||||
# is hybrid. For a detailed breakdown, see where the default value is
|
||||
# set.
|
||||
vector_candidates: int = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Returns subqueries for hybrid search.
|
||||
|
||||
@@ -395,20 +470,18 @@ class DocumentQuery:
|
||||
The return of this function is not sufficient to be directly supplied to
|
||||
the OpenSearch client. See get_hybrid_search_query.
|
||||
|
||||
Matches:
|
||||
- Title vector
|
||||
- Content vector
|
||||
- Keyword (title + content, match and phrase)
|
||||
|
||||
Normalization is not performed here.
|
||||
The weights of each of these subqueries should be configured in a search
|
||||
pipeline.
|
||||
|
||||
The exact subqueries executed depend on the
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION setting.
|
||||
|
||||
NOTE: For OpenSearch, 5 is the maximum number of query clauses allowed
|
||||
in a single hybrid query. Source:
|
||||
https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
|
||||
|
||||
NOTE: Each query is independent during the search phase, there is no
|
||||
NOTE: Each query is independent during the search phase; there is no
|
||||
backfilling of scores for missing query components. What this means is
|
||||
that if a document was a good vector match but did not show up for
|
||||
keyword, it gets a score of 0 for the keyword component of the hybrid
|
||||
@@ -437,74 +510,115 @@ class DocumentQuery:
|
||||
similarity search.
|
||||
"""
|
||||
# Build sub-queries for hybrid search. Order must match normalization
|
||||
# pipeline weights: title vector, content vector, keyword (title + content).
|
||||
hybrid_search_queries: list[dict[str, Any]] = [
|
||||
# 1. Title vector search
|
||||
{
|
||||
"knn": {
|
||||
TITLE_VECTOR_FIELD_NAME: {
|
||||
"vector": query_vector,
|
||||
"k": vector_candidates,
|
||||
}
|
||||
}
|
||||
},
|
||||
# 2. Content vector search
|
||||
{
|
||||
"knn": {
|
||||
CONTENT_VECTOR_FIELD_NAME: {
|
||||
"vector": query_vector,
|
||||
"k": vector_candidates,
|
||||
}
|
||||
}
|
||||
},
|
||||
# 3. Keyword (title + content) match and phrase search.
|
||||
{
|
||||
"bool": {
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
TITLE_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"operator": "or",
|
||||
# The title fields are strongly discounted as they are included in the content.
|
||||
# It just acts as a minor boost
|
||||
"boost": 0.1,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match_phrase": {
|
||||
TITLE_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"slop": 1,
|
||||
"boost": 0.2,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
CONTENT_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"operator": "or",
|
||||
"boost": 1.0,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match_phrase": {
|
||||
CONTENT_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"slop": 1,
|
||||
"boost": 1.5,
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
},
|
||||
]
|
||||
# pipeline weights.
|
||||
if (
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION
|
||||
is HybridSearchSubqueryConfiguration.TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
|
||||
):
|
||||
return [
|
||||
DocumentQuery._get_title_vector_similarity_search_query(
|
||||
query_vector, vector_candidates
|
||||
),
|
||||
DocumentQuery._get_content_vector_similarity_search_query(
|
||||
query_vector, vector_candidates
|
||||
),
|
||||
DocumentQuery._get_title_content_combined_keyword_search_query(
|
||||
query_text
|
||||
),
|
||||
]
|
||||
elif (
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION
|
||||
is HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
|
||||
):
|
||||
return [
|
||||
DocumentQuery._get_content_vector_similarity_search_query(
|
||||
query_vector, vector_candidates
|
||||
),
|
||||
DocumentQuery._get_title_content_combined_keyword_search_query(
|
||||
query_text
|
||||
),
|
||||
]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Bug: Unhandled hybrid search subquery configuration: {HYBRID_SEARCH_SUBQUERY_CONFIGURATION}"
|
||||
)
|
||||
|
||||
return hybrid_search_queries
|
||||
@staticmethod
|
||||
def _get_title_vector_similarity_search_query(
|
||||
query_vector: list[float],
|
||||
vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"knn": {
|
||||
TITLE_VECTOR_FIELD_NAME: {
|
||||
"vector": query_vector,
|
||||
"k": vector_candidates,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_content_vector_similarity_search_query(
|
||||
query_vector: list[float],
|
||||
vector_candidates: int = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"knn": {
|
||||
CONTENT_VECTOR_FIELD_NAME: {
|
||||
"vector": query_vector,
|
||||
"k": vector_candidates,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_title_content_combined_keyword_search_query(
|
||||
query_text: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"bool": {
|
||||
"should": [
|
||||
{
|
||||
"match": {
|
||||
TITLE_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"operator": "or",
|
||||
# The title fields are strongly discounted as they are included in the content.
|
||||
# It just acts as a minor boost
|
||||
"boost": 0.1,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match_phrase": {
|
||||
TITLE_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"slop": 1,
|
||||
"boost": 0.2,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": {
|
||||
CONTENT_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"operator": "or",
|
||||
"boost": 1.0,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match_phrase": {
|
||||
CONTENT_FIELD_NAME: {
|
||||
"query": query_text,
|
||||
"slop": 1,
|
||||
"boost": 1.5,
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_search_filters(
|
||||
|
||||
@@ -501,20 +501,31 @@ def query_vespa(
|
||||
response = http_client.post(SEARCH_ENDPOINT, json=params)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError as e:
|
||||
error_base = "Failed to query Vespa"
|
||||
logger.error(
|
||||
f"{error_base}:\n"
|
||||
f"Request URL: {e.request.url}\n"
|
||||
f"Request Headers: {e.request.headers}\n"
|
||||
f"Request Payload: {params}\n"
|
||||
f"Exception: {str(e)}"
|
||||
+ (
|
||||
f"\nResponse: {e.response.text}"
|
||||
if isinstance(e, httpx.HTTPStatusError)
|
||||
else ""
|
||||
)
|
||||
response_text = (
|
||||
e.response.text if isinstance(e, httpx.HTTPStatusError) else None
|
||||
)
|
||||
raise httpx.HTTPError(error_base) from e
|
||||
status_code = (
|
||||
e.response.status_code if isinstance(e, httpx.HTTPStatusError) else None
|
||||
)
|
||||
yql_value = params.get("yql", "")
|
||||
yql_length = len(str(yql_value))
|
||||
|
||||
# Log each detail on its own line so log collectors capture them
|
||||
# as separate entries rather than truncating a single multiline msg
|
||||
logger.error(
|
||||
f"Failed to query Vespa | "
|
||||
f"status={status_code} | "
|
||||
f"yql_length={yql_length} | "
|
||||
f"exception={str(e)}"
|
||||
)
|
||||
if response_text:
|
||||
logger.error(f"Vespa error response: {response_text[:1000]}")
|
||||
logger.error(f"Vespa request URL: {e.request.url}")
|
||||
|
||||
# Re-raise with diagnostics so callers see what actually went wrong
|
||||
raise httpx.HTTPError(
|
||||
f"Failed to query Vespa (status={status_code}, " f"yql_length={yql_length})"
|
||||
) from e
|
||||
|
||||
response_json: dict[str, Any] = response.json()
|
||||
|
||||
|
||||
@@ -43,6 +43,22 @@ def build_vespa_filters(
|
||||
return ""
|
||||
return f"({' or '.join(eq_elems)})"
|
||||
|
||||
def _build_weighted_set_filter(key: str, vals: list[str] | None) -> str:
|
||||
"""Build a Vespa weightedSet filter for large value lists.
|
||||
|
||||
Uses Vespa's native weightedSet() operator instead of OR-chained
|
||||
'contains' clauses. This is critical for fields like
|
||||
access_control_list where a single user may have tens of thousands
|
||||
of ACL entries — OR clauses at that scale cause Vespa to reject
|
||||
the query with HTTP 400."""
|
||||
if not key or not vals:
|
||||
return ""
|
||||
filtered = [val for val in vals if val]
|
||||
if not filtered:
|
||||
return ""
|
||||
items = ", ".join(f'"{val}":1' for val in filtered)
|
||||
return f"weightedSet({key}, {{{items}}})"
|
||||
|
||||
def _build_int_or_filters(key: str, vals: list[int] | None) -> str:
|
||||
"""For an integer field filter.
|
||||
Returns a bare clause or ""."""
|
||||
@@ -157,11 +173,16 @@ def build_vespa_filters(
|
||||
if filters.tenant_id and MULTI_TENANT:
|
||||
filter_parts.append(build_tenant_id_filter(filters.tenant_id))
|
||||
|
||||
# ACL filters
|
||||
# ACL filters — use weightedSet for efficient matching against the
|
||||
# access_control_list weightedset<string> field. OR-chaining thousands
|
||||
# of 'contains' clauses causes Vespa to reject the query (HTTP 400)
|
||||
# for users with large numbers of external permission groups.
|
||||
if filters.access_control_list is not None:
|
||||
_append(
|
||||
filter_parts,
|
||||
_build_or_filters(ACCESS_CONTROL_LIST, filters.access_control_list),
|
||||
_build_weighted_set_filter(
|
||||
ACCESS_CONTROL_LIST, filters.access_control_list
|
||||
),
|
||||
)
|
||||
|
||||
# Source type filters
|
||||
|
||||
@@ -35,6 +35,8 @@ class OnyxErrorCode(Enum):
|
||||
INSUFFICIENT_PERMISSIONS = ("INSUFFICIENT_PERMISSIONS", 403)
|
||||
ADMIN_ONLY = ("ADMIN_ONLY", 403)
|
||||
EE_REQUIRED = ("EE_REQUIRED", 403)
|
||||
SINGLE_TENANT_ONLY = ("SINGLE_TENANT_ONLY", 403)
|
||||
ENV_VAR_GATED = ("ENV_VAR_GATED", 403)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validation / Bad Request (400)
|
||||
|
||||
0
backend/onyx/hooks/__init__.py
Normal file
0
backend/onyx/hooks/__init__.py
Normal file
26
backend/onyx/hooks/api_dependencies.py
Normal file
26
backend/onyx/hooks/api_dependencies.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from onyx.configs.app_configs import HOOK_ENABLED
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
|
||||
def require_hook_enabled() -> None:
|
||||
"""FastAPI dependency that gates all hook management endpoints.
|
||||
|
||||
Hooks are only available in single-tenant / self-hosted deployments with
|
||||
HOOK_ENABLED=true explicitly set. Two layers of protection:
|
||||
1. MULTI_TENANT check — rejects even if HOOK_ENABLED is accidentally set true
|
||||
2. HOOK_ENABLED flag — explicit opt-in by the operator
|
||||
|
||||
Use as: Depends(require_hook_enabled)
|
||||
"""
|
||||
if MULTI_TENANT:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.SINGLE_TENANT_ONLY,
|
||||
"Hooks are not available in multi-tenant deployments",
|
||||
)
|
||||
if not HOOK_ENABLED:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.ENV_VAR_GATED,
|
||||
"Hooks are not enabled. Set HOOK_ENABLED=true to enable.",
|
||||
)
|
||||
@@ -219,13 +219,26 @@ def litellm_exception_to_error_msg(
|
||||
"ratelimiterror"
|
||||
):
|
||||
upstream_detail = upstream_detail.split(":", 1)[1].strip()
|
||||
error_msg = (
|
||||
f"{provider_name} rate limit: {upstream_detail}"
|
||||
if upstream_detail
|
||||
else f"{provider_name} rate limit exceeded: Please slow down your requests and try again later."
|
||||
)
|
||||
error_code = "RATE_LIMIT"
|
||||
is_retryable = True
|
||||
upstream_detail_lower = upstream_detail.lower()
|
||||
if (
|
||||
"insufficient_quota" in upstream_detail_lower
|
||||
or "exceeded your current quota" in upstream_detail_lower
|
||||
):
|
||||
error_msg = (
|
||||
f"{provider_name} quota exceeded: {upstream_detail}"
|
||||
if upstream_detail
|
||||
else f"{provider_name} quota exceeded: Verify billing and quota for this API key."
|
||||
)
|
||||
error_code = "BUDGET_EXCEEDED"
|
||||
is_retryable = False
|
||||
else:
|
||||
error_msg = (
|
||||
f"{provider_name} rate limit: {upstream_detail}"
|
||||
if upstream_detail
|
||||
else f"{provider_name} rate limit exceeded: Please slow down your requests and try again later."
|
||||
)
|
||||
error_code = "RATE_LIMIT"
|
||||
is_retryable = True
|
||||
elif isinstance(core_exception, ServiceUnavailableError):
|
||||
provider_name = (
|
||||
llm.config.model_provider
|
||||
|
||||
@@ -1319,7 +1319,7 @@ def get_connector_indexing_status(
|
||||
# Track admin page visit for analytics
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.VISITED_ADMIN_PAGE,
|
||||
)
|
||||
|
||||
@@ -1533,7 +1533,7 @@ def create_connector_from_model(
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.CREATED_CONNECTOR,
|
||||
)
|
||||
|
||||
@@ -1611,7 +1611,7 @@ def create_connector_with_mock_credential(
|
||||
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.CREATED_CONNECTOR,
|
||||
)
|
||||
return response
|
||||
@@ -1915,9 +1915,7 @@ def submit_connector_request(
|
||||
if not connector_name:
|
||||
raise HTTPException(status_code=400, detail="Connector name cannot be empty")
|
||||
|
||||
# Get user identifier for telemetry
|
||||
user_email = user.email
|
||||
distinct_id = user_email or tenant_id
|
||||
|
||||
# Track connector request via PostHog telemetry (Cloud only)
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
@@ -1925,11 +1923,11 @@ def submit_connector_request(
|
||||
if MULTI_TENANT:
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=distinct_id,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.REQUESTED_CONNECTOR,
|
||||
properties={
|
||||
"connector_name": connector_name,
|
||||
"user_email": user_email,
|
||||
"user_email": user.email,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -408,7 +408,7 @@ class FailedConnectorIndexingStatus(BaseModel):
|
||||
"""Simplified version of ConnectorIndexingStatus for failed indexing attempts"""
|
||||
|
||||
cc_pair_id: int
|
||||
name: str | None
|
||||
name: str
|
||||
error_msg: str | None
|
||||
is_deletable: bool
|
||||
connector_id: int
|
||||
@@ -422,7 +422,7 @@ class ConnectorStatus(BaseModel):
|
||||
"""
|
||||
|
||||
cc_pair_id: int
|
||||
name: str | None
|
||||
name: str
|
||||
connector: ConnectorSnapshot
|
||||
credential: CredentialSnapshot
|
||||
access_type: AccessType
|
||||
@@ -453,7 +453,7 @@ class DocsCountOperator(str, Enum):
|
||||
|
||||
class ConnectorIndexingStatusLite(BaseModel):
|
||||
cc_pair_id: int
|
||||
name: str | None
|
||||
name: str
|
||||
source: DocumentSource
|
||||
access_type: AccessType
|
||||
cc_pair_status: ConnectorCredentialPairStatus
|
||||
@@ -488,7 +488,7 @@ class ConnectorCredentialPairIdentifier(BaseModel):
|
||||
|
||||
|
||||
class ConnectorCredentialPairMetadata(BaseModel):
|
||||
name: str | None = None
|
||||
name: str
|
||||
access_type: AccessType
|
||||
auto_sync_options: dict[str, Any] | None = None
|
||||
groups: list[int] = Field(default_factory=list)
|
||||
@@ -501,7 +501,7 @@ class CCStatusUpdateRequest(BaseModel):
|
||||
|
||||
class ConnectorCredentialPairDescriptor(BaseModel):
|
||||
id: int
|
||||
name: str | None = None
|
||||
name: str
|
||||
connector: ConnectorSnapshot
|
||||
credential: CredentialSnapshot
|
||||
access_type: AccessType
|
||||
@@ -511,7 +511,7 @@ class CCPairSummary(BaseModel):
|
||||
"""Simplified connector-credential pair information with just essential data"""
|
||||
|
||||
id: int
|
||||
name: str | None
|
||||
name: str
|
||||
source: DocumentSource
|
||||
access_type: AccessType
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"date-fns": "^4.1.0",
|
||||
"embla-carousel-react": "^8.6.0",
|
||||
"lucide-react": "^0.562.0",
|
||||
"next": "16.1.5",
|
||||
"next": "16.1.7",
|
||||
"next-themes": "^0.4.6",
|
||||
"radix-ui": "^1.4.3",
|
||||
"react": "19.2.3",
|
||||
@@ -1711,9 +1711,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.5.tgz",
|
||||
"integrity": "sha512-CRSCPJiSZoi4Pn69RYBDI9R7YK2g59vLexPQFXY0eyw+ILevIenCywzg+DqmlBik9zszEnw2HLFOUlLAcJbL7g==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
|
||||
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
@@ -1727,9 +1727,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.5.tgz",
|
||||
"integrity": "sha512-eK7Wdm3Hjy/SCL7TevlH0C9chrpeOYWx2iR7guJDaz4zEQKWcS1IMVfMb9UKBFMg1XgzcPTYPIp1Vcpukkjg6Q==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
|
||||
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1743,9 +1743,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.5.tgz",
|
||||
"integrity": "sha512-foQscSHD1dCuxBmGkbIr6ScAUF6pRoDZP6czajyvmXPAOFNnQUJu2Os1SGELODjKp/ULa4fulnBWoHV3XdPLfA==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
|
||||
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1759,9 +1759,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.5.tgz",
|
||||
"integrity": "sha512-qNIb42o3C02ccIeSeKjacF3HXotGsxh/FMk/rSRmCzOVMtoWH88odn2uZqF8RLsSUWHcAqTgYmPD3pZ03L9ZAA==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1775,9 +1775,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.5.tgz",
|
||||
"integrity": "sha512-U+kBxGUY1xMAzDTXmuVMfhaWUZQAwzRaHJ/I6ihtR5SbTVUEaDRiEU9YMjy1obBWpdOBuk1bcm+tsmifYSygfw==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1791,9 +1791,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.5.tgz",
|
||||
"integrity": "sha512-gq2UtoCpN7Ke/7tKaU7i/1L7eFLfhMbXjNghSv0MVGF1dmuoaPeEVDvkDuO/9LVa44h5gqpWeJ4mRRznjDv7LA==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1807,9 +1807,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.5.tgz",
|
||||
"integrity": "sha512-bQWSE729PbXT6mMklWLf8dotislPle2L70E9q6iwETYEOt092GDn0c+TTNj26AjmeceSsC4ndyGsK5nKqHYXjQ==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1823,9 +1823,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.5.tgz",
|
||||
"integrity": "sha512-LZli0anutkIllMtTAWZlDqdfvjWX/ch8AFK5WgkNTvaqwlouiD1oHM+WW8RXMiL0+vAkAJyAGEzPPjO+hnrSNQ==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -1839,9 +1839,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.5.tgz",
|
||||
"integrity": "sha512-7is37HJTNQGhjPpQbkKjKEboHYQnCgpVt/4rBrrln0D9nderNxZ8ZWs8w1fAtzUx7wEyYjQ+/13myFgFj6K2Ng==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -4971,12 +4971,15 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.9.17",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.17.tgz",
|
||||
"integrity": "sha512-agD0MgJFUP/4nvjqzIB29zRPUuCF7Ge6mEv9s8dHrtYD7QWXRcx75rOADE/d5ah1NI+0vkDl0yorDd5U852IQQ==",
|
||||
"version": "2.10.8",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.8.tgz",
|
||||
"integrity": "sha512-PCLz/LXGBsNTErbtB6i5u4eLpHeMfi93aUv5duMmj6caNu6IphS4q6UevDnL36sZQv9lrP11dbPKGMaXPwMKfQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"baseline-browser-mapping": "dist/cli.js"
|
||||
"baseline-browser-mapping": "dist/cli.cjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/body-parser": {
|
||||
@@ -8975,14 +8978,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "16.1.5",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.1.5.tgz",
|
||||
"integrity": "sha512-f+wE+NSbiQgh3DSAlTaw2FwY5yGdVViAtp8TotNQj4kk4Q8Bh1sC/aL9aH+Rg1YAVn18OYXsRDT7U/079jgP7w==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
|
||||
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "16.1.5",
|
||||
"@next/env": "16.1.7",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"baseline-browser-mapping": "^2.8.3",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
"postcss": "8.4.31",
|
||||
"styled-jsx": "5.1.6"
|
||||
@@ -8994,14 +8997,14 @@
|
||||
"node": ">=20.9.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "16.1.5",
|
||||
"@next/swc-darwin-x64": "16.1.5",
|
||||
"@next/swc-linux-arm64-gnu": "16.1.5",
|
||||
"@next/swc-linux-arm64-musl": "16.1.5",
|
||||
"@next/swc-linux-x64-gnu": "16.1.5",
|
||||
"@next/swc-linux-x64-musl": "16.1.5",
|
||||
"@next/swc-win32-arm64-msvc": "16.1.5",
|
||||
"@next/swc-win32-x64-msvc": "16.1.5",
|
||||
"@next/swc-darwin-arm64": "16.1.7",
|
||||
"@next/swc-darwin-x64": "16.1.7",
|
||||
"@next/swc-linux-arm64-gnu": "16.1.7",
|
||||
"@next/swc-linux-arm64-musl": "16.1.7",
|
||||
"@next/swc-linux-x64-gnu": "16.1.7",
|
||||
"@next/swc-linux-x64-musl": "16.1.7",
|
||||
"@next/swc-win32-arm64-msvc": "16.1.7",
|
||||
"@next/swc-win32-x64-msvc": "16.1.7",
|
||||
"sharp": "^0.34.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"date-fns": "^4.1.0",
|
||||
"embla-carousel-react": "^8.6.0",
|
||||
"lucide-react": "^0.562.0",
|
||||
"next": "16.1.5",
|
||||
"next": "16.1.7",
|
||||
"next-themes": "^0.4.6",
|
||||
"radix-ui": "^1.4.3",
|
||||
"react": "19.2.3",
|
||||
|
||||
@@ -314,7 +314,7 @@ def create_persona(
|
||||
)
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=user.email,
|
||||
distinct_id=str(user.id),
|
||||
event=MilestoneRecordType.CREATED_ASSISTANT,
|
||||
)
|
||||
|
||||
|
||||
@@ -81,6 +81,7 @@ from onyx.server.manage.llm.models import VisionProviderResponse
|
||||
from onyx.server.manage.llm.utils import generate_bedrock_display_name
|
||||
from onyx.server.manage.llm.utils import generate_ollama_display_name
|
||||
from onyx.server.manage.llm.utils import infer_vision_support
|
||||
from onyx.server.manage.llm.utils import is_embedding_model
|
||||
from onyx.server.manage.llm.utils import is_reasoning_model
|
||||
from onyx.server.manage.llm.utils import is_valid_bedrock_model
|
||||
from onyx.server.manage.llm.utils import ModelMetadata
|
||||
@@ -1374,6 +1375,10 @@ def get_litellm_available_models(
|
||||
try:
|
||||
model_details = LitellmModelDetails.model_validate(model)
|
||||
|
||||
# Skip embedding models
|
||||
if is_embedding_model(model_details.id):
|
||||
continue
|
||||
|
||||
results.append(
|
||||
LitellmFinalModelResponse(
|
||||
provider_name=model_details.owned_by,
|
||||
|
||||
@@ -366,3 +366,18 @@ def extract_vendor_from_model_name(model_name: str, provider: str) -> str | None
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_embedding_model(model_name: str) -> bool:
|
||||
"""Checks for if a model is an embedding model"""
|
||||
from litellm import get_model_info
|
||||
|
||||
try:
|
||||
# get_model_info raises on unknown models
|
||||
# default to False
|
||||
model_info = get_model_info(model_name)
|
||||
except Exception:
|
||||
return False
|
||||
is_embedding_mode = model_info.get("mode") == "embedding"
|
||||
|
||||
return is_embedding_mode
|
||||
|
||||
@@ -561,7 +561,7 @@ def handle_send_chat_message(
|
||||
tenant_id = get_current_tenant_id()
|
||||
mt_cloud_telemetry(
|
||||
tenant_id=tenant_id,
|
||||
distinct_id=tenant_id if user.is_anonymous else user.email,
|
||||
distinct_id=tenant_id if user.is_anonymous else str(user.id),
|
||||
event=MilestoneRecordType.RAN_QUERY,
|
||||
)
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ from pydantic import ValidationError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.chat.citation_utils import extract_citation_order_from_text
|
||||
from onyx.configs.app_configs import GENUI_ENABLED
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.models import SavedSearchDoc
|
||||
from onyx.context.search.models import SearchDoc
|
||||
@@ -30,8 +29,6 @@ from onyx.server.query_and_chat.streaming_models import CustomToolStart
|
||||
from onyx.server.query_and_chat.streaming_models import FileReaderResult
|
||||
from onyx.server.query_and_chat.streaming_models import FileReaderStart
|
||||
from onyx.server.query_and_chat.streaming_models import GeneratedImage
|
||||
from onyx.server.query_and_chat.streaming_models import GenUIDelta
|
||||
from onyx.server.query_and_chat.streaming_models import GenUIStart
|
||||
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
|
||||
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
|
||||
from onyx.server.query_and_chat.streaming_models import IntermediateReportDelta
|
||||
@@ -92,16 +89,6 @@ def create_message_packets(
|
||||
)
|
||||
)
|
||||
|
||||
# When GenUI is enabled, also emit GenUIStart so the frontend
|
||||
# can offer both text and structured views for old conversations.
|
||||
if GENUI_ENABLED:
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=GenUIStart(),
|
||||
)
|
||||
)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
@@ -111,16 +98,6 @@ def create_message_packets(
|
||||
),
|
||||
)
|
||||
|
||||
if GENUI_ENABLED:
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=GenUIDelta(
|
||||
content=message_text,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
|
||||
@@ -55,9 +55,6 @@ class StreamingType(Enum):
|
||||
INTERMEDIATE_REPORT_DELTA = "intermediate_report_delta"
|
||||
INTERMEDIATE_REPORT_CITED_DOCS = "intermediate_report_cited_docs"
|
||||
|
||||
GENUI_START = "genui_start"
|
||||
GENUI_DELTA = "genui_delta"
|
||||
|
||||
|
||||
class BaseObj(BaseModel):
|
||||
type: str = ""
|
||||
@@ -370,18 +367,6 @@ class IntermediateReportCitedDocs(BaseObj):
|
||||
cited_docs: list[SearchDoc] | None = None
|
||||
|
||||
|
||||
################################################
|
||||
# GenUI Packets
|
||||
################################################
|
||||
class GenUIStart(BaseObj):
|
||||
type: Literal["genui_start"] = StreamingType.GENUI_START.value
|
||||
|
||||
|
||||
class GenUIDelta(BaseObj):
|
||||
type: Literal["genui_delta"] = StreamingType.GENUI_DELTA.value
|
||||
content: str
|
||||
|
||||
|
||||
################################################
|
||||
# Packet Object
|
||||
################################################
|
||||
@@ -430,9 +415,6 @@ PacketObj = Union[
|
||||
IntermediateReportStart,
|
||||
IntermediateReportDelta,
|
||||
IntermediateReportCitedDocs,
|
||||
# GenUI Packets
|
||||
GenUIStart,
|
||||
GenUIDelta,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,239 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from onyx.context.search.models import SavedSearchDoc
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
from onyx.server.query_and_chat.streaming_models import GeneratedImage
|
||||
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
|
||||
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
|
||||
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
|
||||
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
|
||||
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
|
||||
from onyx.server.query_and_chat.streaming_models import Packet
|
||||
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
|
||||
from onyx.server.query_and_chat.streaming_models import ReasoningStart
|
||||
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
|
||||
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
|
||||
from onyx.server.query_and_chat.streaming_models import SearchToolStart
|
||||
from onyx.server.query_and_chat.streaming_models import SectionEnd
|
||||
|
||||
|
||||
_CANNOT_SHOW_STEP_RESULTS_STR = "[Cannot display step results]"
|
||||
|
||||
|
||||
def _adjust_message_text_for_agent_search_results(
|
||||
adjusted_message_text: str,
|
||||
final_documents: list[SavedSearchDoc], # noqa: ARG001
|
||||
) -> str:
|
||||
# Remove all [Q<integer>] patterns (sub-question citations)
|
||||
return re.sub(r"\[Q\d+\]", "", adjusted_message_text)
|
||||
|
||||
|
||||
def _replace_d_citations_with_links(
|
||||
message_text: str, final_documents: list[SavedSearchDoc]
|
||||
) -> str:
|
||||
def replace_citation(match: re.Match[str]) -> str:
|
||||
d_number = match.group(1)
|
||||
try:
|
||||
doc_index = int(d_number) - 1
|
||||
if 0 <= doc_index < len(final_documents):
|
||||
doc = final_documents[doc_index]
|
||||
link = doc.link if doc.link else ""
|
||||
return f"[[{d_number}]]({link})"
|
||||
return match.group(0)
|
||||
except (ValueError, IndexError):
|
||||
return match.group(0)
|
||||
|
||||
return re.sub(r"\[D(\d+)\]", replace_citation, message_text)
|
||||
|
||||
|
||||
def create_message_packets(
|
||||
message_text: str,
|
||||
final_documents: list[SavedSearchDoc] | None,
|
||||
turn_index: int,
|
||||
is_legacy_agentic: bool = False,
|
||||
) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=AgentResponseStart(
|
||||
final_documents=SearchDoc.from_saved_search_docs(final_documents or []),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
adjusted_message_text = message_text
|
||||
if is_legacy_agentic:
|
||||
if final_documents is not None:
|
||||
adjusted_message_text = _adjust_message_text_for_agent_search_results(
|
||||
message_text, final_documents
|
||||
)
|
||||
adjusted_message_text = _replace_d_citations_with_links(
|
||||
adjusted_message_text, final_documents
|
||||
)
|
||||
else:
|
||||
adjusted_message_text = re.sub(r"\[Q\d+\]", "", message_text)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=AgentResponseDelta(
|
||||
content=adjusted_message_text,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=SectionEnd(),
|
||||
)
|
||||
)
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def create_citation_packets(
|
||||
citation_info_list: list[CitationInfo], turn_index: int
|
||||
) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
|
||||
# Emit each citation as a separate CitationInfo packet
|
||||
for citation_info in citation_info_list:
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=citation_info,
|
||||
)
|
||||
)
|
||||
|
||||
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def create_reasoning_packets(reasoning_text: str, turn_index: int) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
|
||||
packets.append(
|
||||
Packet(placement=Placement(turn_index=turn_index), obj=ReasoningStart())
|
||||
)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=ReasoningDelta(
|
||||
reasoning=reasoning_text,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def create_image_generation_packets(
|
||||
images: list[GeneratedImage], turn_index: int
|
||||
) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=ImageGenerationToolStart(),
|
||||
)
|
||||
)
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=ImageGenerationFinal(images=images),
|
||||
),
|
||||
)
|
||||
|
||||
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def create_fetch_packets(
|
||||
fetch_docs: list[SavedSearchDoc],
|
||||
urls: list[str],
|
||||
turn_index: int,
|
||||
) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
# Emit start packet
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=OpenUrlStart(),
|
||||
)
|
||||
)
|
||||
# Emit URLs packet
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=OpenUrlUrls(urls=urls),
|
||||
)
|
||||
)
|
||||
# Emit documents packet
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=OpenUrlDocuments(
|
||||
documents=SearchDoc.from_saved_search_docs(fetch_docs)
|
||||
),
|
||||
)
|
||||
)
|
||||
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
|
||||
return packets
|
||||
|
||||
|
||||
def create_search_packets(
|
||||
search_queries: list[str],
|
||||
saved_search_docs: list[SavedSearchDoc],
|
||||
is_internet_search: bool,
|
||||
turn_index: int,
|
||||
) -> list[Packet]:
|
||||
packets: list[Packet] = []
|
||||
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=SearchToolStart(
|
||||
is_internet_search=is_internet_search,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Emit queries if present
|
||||
if search_queries:
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=SearchToolQueriesDelta(queries=search_queries),
|
||||
),
|
||||
)
|
||||
|
||||
# Emit documents if present
|
||||
if saved_search_docs:
|
||||
packets.append(
|
||||
Packet(
|
||||
placement=Placement(turn_index=turn_index),
|
||||
obj=SearchToolDocumentsDelta(
|
||||
documents=SearchDoc.from_saved_search_docs(saved_search_docs)
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
packets.append(Packet(placement=Placement(turn_index=turn_index), obj=SectionEnd()))
|
||||
|
||||
return packets
|
||||
@@ -2,6 +2,7 @@ import contextvars
|
||||
import threading
|
||||
import uuid
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
@@ -152,7 +153,7 @@ def mt_cloud_telemetry(
|
||||
tenant_id: str,
|
||||
distinct_id: str,
|
||||
event: MilestoneRecordType,
|
||||
properties: dict | None = None,
|
||||
properties: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
if not MULTI_TENANT:
|
||||
return
|
||||
@@ -173,3 +174,18 @@ def mt_cloud_telemetry(
|
||||
attribute="event_telemetry",
|
||||
fallback=noop_fallback,
|
||||
)(distinct_id, event, all_properties)
|
||||
|
||||
|
||||
def mt_cloud_identify(
|
||||
distinct_id: str,
|
||||
properties: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Create/update a PostHog person profile (Cloud only)."""
|
||||
if not MULTI_TENANT:
|
||||
return
|
||||
|
||||
fetch_versioned_implementation_with_fallback(
|
||||
module="onyx.utils.telemetry",
|
||||
attribute="identify_user",
|
||||
fallback=noop_fallback,
|
||||
)(distinct_id, properties)
|
||||
|
||||
@@ -65,7 +65,7 @@ attrs==25.4.0
|
||||
# jsonschema
|
||||
# referencing
|
||||
# zeep
|
||||
authlib==1.6.7
|
||||
authlib==1.6.9
|
||||
# via fastmcp
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
# via onyx
|
||||
@@ -698,7 +698,7 @@ py-key-value-aio==0.4.4
|
||||
# via fastmcp
|
||||
pyairtable==3.0.1
|
||||
# via onyx
|
||||
pyasn1==0.6.2
|
||||
pyasn1==0.6.3
|
||||
# via
|
||||
# pyasn1-modules
|
||||
# rsa
|
||||
@@ -737,7 +737,7 @@ pygithub==2.5.0
|
||||
# via onyx
|
||||
pygments==2.19.2
|
||||
# via rich
|
||||
pyjwt==2.11.0
|
||||
pyjwt==2.12.0
|
||||
# via
|
||||
# fastapi-users
|
||||
# mcp
|
||||
|
||||
@@ -263,7 +263,7 @@ oauthlib==3.2.2
|
||||
# via
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
onyx-devtools==0.7.0
|
||||
onyx-devtools==0.7.1
|
||||
# via onyx
|
||||
openai==2.14.0
|
||||
# via
|
||||
@@ -326,7 +326,7 @@ pure-eval==0.2.3
|
||||
# via stack-data
|
||||
py==1.11.0
|
||||
# via retry
|
||||
pyasn1==0.6.2
|
||||
pyasn1==0.6.3
|
||||
# via
|
||||
# pyasn1-modules
|
||||
# rsa
|
||||
@@ -353,7 +353,7 @@ pygments==2.19.2
|
||||
# via
|
||||
# ipython
|
||||
# ipython-pygments-lexers
|
||||
pyjwt==2.11.0
|
||||
pyjwt==2.12.0
|
||||
# via mcp
|
||||
pyparsing==3.2.5
|
||||
# via matplotlib
|
||||
|
||||
@@ -195,7 +195,7 @@ propcache==0.4.1
|
||||
# yarl
|
||||
py==1.11.0
|
||||
# via retry
|
||||
pyasn1==0.6.2
|
||||
pyasn1==0.6.3
|
||||
# via
|
||||
# pyasn1-modules
|
||||
# rsa
|
||||
@@ -218,7 +218,7 @@ pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.12.0
|
||||
# via mcp
|
||||
pyjwt==2.11.0
|
||||
pyjwt==2.12.0
|
||||
# via mcp
|
||||
python-dateutil==2.8.2
|
||||
# via
|
||||
|
||||
@@ -285,7 +285,7 @@ psutil==7.1.3
|
||||
# via accelerate
|
||||
py==1.11.0
|
||||
# via retry
|
||||
pyasn1==0.6.2
|
||||
pyasn1==0.6.3
|
||||
# via
|
||||
# pyasn1-modules
|
||||
# rsa
|
||||
@@ -308,7 +308,7 @@ pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.12.0
|
||||
# via mcp
|
||||
pyjwt==2.11.0
|
||||
pyjwt==2.12.0
|
||||
# via mcp
|
||||
python-dateutil==2.8.2
|
||||
# via
|
||||
|
||||
471
backend/scripts/run_industryrag_bench_questions.py
Normal file
471
backend/scripts/run_industryrag_bench_questions.py
Normal file
@@ -0,0 +1,471 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from dataclasses import asdict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import TypedDict
|
||||
from typing import TypeGuard
|
||||
|
||||
import aiohttp
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_API_BASE = "http://localhost:3000"
|
||||
INTERNAL_SEARCH_TOOL_NAME = "internal_search"
|
||||
INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
|
||||
MAX_REQUEST_ATTEMPTS = 5
|
||||
RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QuestionRecord:
|
||||
question_id: str
|
||||
question: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AnswerRecord:
|
||||
question_id: str
|
||||
answer: str
|
||||
document_ids: list[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FailedQuestionRecord:
|
||||
question_id: str
|
||||
error: str
|
||||
|
||||
|
||||
class Citation(TypedDict, total=False):
|
||||
citation_number: int
|
||||
document_id: str
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Submit questions to Onyx chat with internal search forced and write "
|
||||
"answers to a JSONL file."
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"--questions-file",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to the input questions JSONL file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-file",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to the output answers JSONL file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
type=str,
|
||||
required=True,
|
||||
help="API key used to authenticate against Onyx.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-base",
|
||||
type=str,
|
||||
default=DEFAULT_API_BASE,
|
||||
help=(
|
||||
"Frontend base URL for Onyx. If `/api` is omitted, it will be added "
|
||||
f"automatically. Default: {DEFAULT_API_BASE}"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--parallelism",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of questions to process in parallel. Default: 1.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-questions",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Optional cap on how many questions to process. Defaults to all.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def normalize_api_base(api_base: str) -> str:
|
||||
normalized = api_base.rstrip("/")
|
||||
if normalized.endswith("/api"):
|
||||
return normalized
|
||||
return f"{normalized}/api"
|
||||
|
||||
|
||||
def load_questions(questions_file: Path) -> list[QuestionRecord]:
|
||||
if not questions_file.exists():
|
||||
raise FileNotFoundError(f"Questions file not found: {questions_file}")
|
||||
|
||||
questions: list[QuestionRecord] = []
|
||||
with questions_file.open("r", encoding="utf-8") as file:
|
||||
for line_number, line in enumerate(file, start=1):
|
||||
stripped_line = line.strip()
|
||||
if not stripped_line:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(stripped_line)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(
|
||||
f"Invalid JSON on line {line_number} of {questions_file}"
|
||||
) from exc
|
||||
|
||||
question_id = payload.get("question_id")
|
||||
question = payload.get("question")
|
||||
|
||||
if not isinstance(question_id, str) or not question_id:
|
||||
raise ValueError(
|
||||
f"Line {line_number} is missing a non-empty `question_id`."
|
||||
)
|
||||
if not isinstance(question, str) or not question:
|
||||
raise ValueError(
|
||||
f"Line {line_number} is missing a non-empty `question`."
|
||||
)
|
||||
|
||||
questions.append(QuestionRecord(question_id=question_id, question=question))
|
||||
|
||||
return questions
|
||||
|
||||
|
||||
async def read_json_response(
|
||||
response: aiohttp.ClientResponse,
|
||||
) -> dict[str, Any] | list[dict[str, Any]]:
|
||||
response_text = await response.text()
|
||||
if response.status >= 400:
|
||||
raise RuntimeError(
|
||||
f"Request to {response.url} failed with {response.status}: {response_text}"
|
||||
)
|
||||
|
||||
try:
|
||||
payload = json.loads(response_text)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(
|
||||
f"Request to {response.url} returned non-JSON content: {response_text}"
|
||||
) from exc
|
||||
|
||||
if not isinstance(payload, (dict, list)):
|
||||
raise RuntimeError(
|
||||
f"Unexpected response payload type from {response.url}: {type(payload)}"
|
||||
)
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
async def request_json_with_retries(
|
||||
session: aiohttp.ClientSession,
|
||||
method: str,
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
json_payload: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any] | list[dict[str, Any]]:
|
||||
backoff_seconds = 1.0
|
||||
|
||||
for attempt in range(1, MAX_REQUEST_ATTEMPTS + 1):
|
||||
try:
|
||||
async with session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
json=json_payload,
|
||||
) as response:
|
||||
if (
|
||||
response.status in RETRIABLE_STATUS_CODES
|
||||
and attempt < MAX_REQUEST_ATTEMPTS
|
||||
):
|
||||
response_text = await response.text()
|
||||
logger.warning(
|
||||
"Retryable response from %s on attempt %s/%s: %s %s",
|
||||
url,
|
||||
attempt,
|
||||
MAX_REQUEST_ATTEMPTS,
|
||||
response.status,
|
||||
response_text,
|
||||
)
|
||||
await asyncio.sleep(backoff_seconds)
|
||||
backoff_seconds *= 2
|
||||
continue
|
||||
|
||||
return await read_json_response(response)
|
||||
except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
|
||||
if attempt == MAX_REQUEST_ATTEMPTS:
|
||||
raise RuntimeError(
|
||||
f"Request to {url} failed after {MAX_REQUEST_ATTEMPTS} attempts."
|
||||
) from exc
|
||||
|
||||
logger.warning(
|
||||
"Request to %s failed on attempt %s/%s: %s",
|
||||
url,
|
||||
attempt,
|
||||
MAX_REQUEST_ATTEMPTS,
|
||||
exc,
|
||||
)
|
||||
await asyncio.sleep(backoff_seconds)
|
||||
backoff_seconds *= 2
|
||||
|
||||
raise RuntimeError(f"Request to {url} failed unexpectedly.")
|
||||
|
||||
|
||||
def extract_document_ids(citation_info: object) -> list[str]:
|
||||
if not isinstance(citation_info, list):
|
||||
return []
|
||||
|
||||
sorted_citations = sorted(
|
||||
(citation for citation in citation_info if _is_valid_citation(citation)),
|
||||
key=_citation_sort_key,
|
||||
)
|
||||
|
||||
document_ids: list[str] = []
|
||||
seen_document_ids: set[str] = set()
|
||||
for citation in sorted_citations:
|
||||
document_id = citation["document_id"]
|
||||
if document_id not in seen_document_ids:
|
||||
seen_document_ids.add(document_id)
|
||||
document_ids.append(document_id)
|
||||
|
||||
return document_ids
|
||||
|
||||
|
||||
def _is_valid_citation(citation: object) -> TypeGuard[Citation]:
|
||||
return (
|
||||
isinstance(citation, dict)
|
||||
and isinstance(citation.get("document_id"), str)
|
||||
and bool(citation["document_id"])
|
||||
)
|
||||
|
||||
|
||||
def _citation_sort_key(citation: Citation) -> int:
|
||||
citation_number = citation.get("citation_number")
|
||||
if isinstance(citation_number, int):
|
||||
return citation_number
|
||||
return sys.maxsize
|
||||
|
||||
|
||||
async def fetch_internal_search_tool_id(
|
||||
session: aiohttp.ClientSession,
|
||||
api_base: str,
|
||||
headers: dict[str, str],
|
||||
) -> int:
|
||||
payload = await request_json_with_retries(
|
||||
session=session,
|
||||
method="GET",
|
||||
url=f"{api_base}/tool",
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError("Expected `/tool` to return a list.")
|
||||
|
||||
for tool in payload:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
|
||||
if tool.get("in_code_tool_id") == INTERNAL_SEARCH_IN_CODE_TOOL_ID:
|
||||
tool_id = tool.get("id")
|
||||
if isinstance(tool_id, int):
|
||||
return tool_id
|
||||
|
||||
for tool in payload:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
|
||||
if tool.get("name") == INTERNAL_SEARCH_TOOL_NAME:
|
||||
tool_id = tool.get("id")
|
||||
if isinstance(tool_id, int):
|
||||
return tool_id
|
||||
|
||||
raise RuntimeError(
|
||||
"Could not find the internal search tool in `/tool`. "
|
||||
"Make sure SearchTool is available for this environment."
|
||||
)
|
||||
|
||||
|
||||
async def submit_question(
|
||||
session: aiohttp.ClientSession,
|
||||
api_base: str,
|
||||
headers: dict[str, str],
|
||||
internal_search_tool_id: int,
|
||||
question_record: QuestionRecord,
|
||||
) -> AnswerRecord:
|
||||
payload = {
|
||||
"message": question_record.question,
|
||||
"chat_session_info": {"persona_id": 0},
|
||||
"parent_message_id": None,
|
||||
"file_descriptors": [],
|
||||
"allowed_tool_ids": [internal_search_tool_id],
|
||||
"forced_tool_id": internal_search_tool_id,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
response_payload = await request_json_with_retries(
|
||||
session=session,
|
||||
method="POST",
|
||||
url=f"{api_base}/chat/send-chat-message",
|
||||
headers=headers,
|
||||
json_payload=payload,
|
||||
)
|
||||
|
||||
if not isinstance(response_payload, dict):
|
||||
raise RuntimeError(
|
||||
"Expected `/chat/send-chat-message` to return an object when `stream=false`."
|
||||
)
|
||||
|
||||
answer = response_payload.get("answer_citationless")
|
||||
if not isinstance(answer, str):
|
||||
answer = response_payload.get("answer")
|
||||
|
||||
if not isinstance(answer, str):
|
||||
raise RuntimeError(
|
||||
f"Response for question {question_record.question_id} is missing `answer`."
|
||||
)
|
||||
|
||||
return AnswerRecord(
|
||||
question_id=question_record.question_id,
|
||||
answer=answer,
|
||||
document_ids=extract_document_ids(response_payload.get("citation_info")),
|
||||
)
|
||||
|
||||
|
||||
async def generate_answers(
|
||||
questions: list[QuestionRecord],
|
||||
output_file: Path,
|
||||
api_base: str,
|
||||
api_key: str,
|
||||
parallelism: int,
|
||||
) -> None:
|
||||
if parallelism < 1:
|
||||
raise ValueError("`--parallelism` must be at least 1.")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
timeout = aiohttp.ClientTimeout(
|
||||
total=None,
|
||||
connect=30,
|
||||
sock_connect=30,
|
||||
sock_read=600,
|
||||
)
|
||||
connector = aiohttp.TCPConnector(limit=parallelism)
|
||||
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_file.open("a", encoding="utf-8") as file:
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=timeout, connector=connector
|
||||
) as session:
|
||||
internal_search_tool_id = await fetch_internal_search_tool_id(
|
||||
session=session,
|
||||
api_base=api_base,
|
||||
headers=headers,
|
||||
)
|
||||
logger.info("Using internal search tool id %s", internal_search_tool_id)
|
||||
|
||||
semaphore = asyncio.Semaphore(parallelism)
|
||||
progress_lock = asyncio.Lock()
|
||||
write_lock = asyncio.Lock()
|
||||
completed = 0
|
||||
successful = 0
|
||||
failed_questions: list[FailedQuestionRecord] = []
|
||||
total = len(questions)
|
||||
|
||||
async def process_question(question_record: QuestionRecord) -> None:
|
||||
nonlocal completed
|
||||
nonlocal successful
|
||||
|
||||
try:
|
||||
async with semaphore:
|
||||
result = await submit_question(
|
||||
session=session,
|
||||
api_base=api_base,
|
||||
headers=headers,
|
||||
internal_search_tool_id=internal_search_tool_id,
|
||||
question_record=question_record,
|
||||
)
|
||||
except Exception as exc:
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
failed_questions.append(
|
||||
FailedQuestionRecord(
|
||||
question_id=question_record.question_id,
|
||||
error=str(exc),
|
||||
)
|
||||
)
|
||||
logger.exception(
|
||||
"Failed question %s (%s/%s)",
|
||||
question_record.question_id,
|
||||
completed,
|
||||
total,
|
||||
)
|
||||
return
|
||||
|
||||
async with write_lock:
|
||||
file.write(json.dumps(asdict(result), ensure_ascii=False))
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
successful += 1
|
||||
logger.info("Processed %s/%s questions", completed, total)
|
||||
|
||||
await asyncio.gather(
|
||||
*(process_question(question_record) for question_record in questions)
|
||||
)
|
||||
|
||||
if failed_questions:
|
||||
logger.warning(
|
||||
"Completed with %s failed questions and %s successful questions.",
|
||||
len(failed_questions),
|
||||
successful,
|
||||
)
|
||||
for failed_question in failed_questions:
|
||||
logger.warning(
|
||||
"Failed question %s: %s",
|
||||
failed_question.question_id,
|
||||
failed_question.error,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
questions = load_questions(args.questions_file)
|
||||
api_base = normalize_api_base(args.api_base)
|
||||
|
||||
if args.max_questions is not None:
|
||||
if args.max_questions < 1:
|
||||
raise ValueError("`--max-questions` must be at least 1 when provided.")
|
||||
questions = questions[: args.max_questions]
|
||||
|
||||
logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
|
||||
logger.info("Writing answers to %s", args.output_file)
|
||||
|
||||
asyncio.run(
|
||||
generate_answers(
|
||||
questions=questions,
|
||||
output_file=args.output_file,
|
||||
api_base=api_base,
|
||||
api_key=args.api_key,
|
||||
parallelism=args.parallelism,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
291
backend/scripts/upload_files_as_connectors.py
Normal file
291
backend/scripts/upload_files_as_connectors.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
Script to upload files from a directory as individual file connectors in Onyx.
|
||||
Each file gets its own connector named after the file.
|
||||
|
||||
Usage:
|
||||
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY
|
||||
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --api-base http://onyxserver:3000
|
||||
python upload_files_as_connectors.py --data-dir /path/to/files --api-key YOUR_KEY --file-glob '*.zip'
|
||||
|
||||
Requires:
|
||||
pip install requests
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import fnmatch
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
REQUEST_TIMEOUT = 900 # 15 minutes
|
||||
|
||||
|
||||
def _elapsed_printer(label: str, stop_event: threading.Event) -> None:
|
||||
"""Print a live elapsed-time counter until stop_event is set."""
|
||||
start = time.monotonic()
|
||||
while not stop_event.wait(timeout=1):
|
||||
elapsed = int(time.monotonic() - start)
|
||||
m, s = divmod(elapsed, 60)
|
||||
print(f"\r {label} ... {m:02d}:{s:02d}", end="", flush=True)
|
||||
elapsed = int(time.monotonic() - start)
|
||||
m, s = divmod(elapsed, 60)
|
||||
print(f"\r {label} ... {m:02d}:{s:02d} done")
|
||||
|
||||
|
||||
def _timed_request(label: str, fn: object) -> requests.Response:
|
||||
"""Run a request function while displaying a live elapsed timer."""
|
||||
stop = threading.Event()
|
||||
t = threading.Thread(target=_elapsed_printer, args=(label, stop), daemon=True)
|
||||
t.start()
|
||||
try:
|
||||
resp = fn() # type: ignore[operator]
|
||||
finally:
|
||||
stop.set()
|
||||
t.join()
|
||||
return resp
|
||||
|
||||
|
||||
def upload_file(
|
||||
session: requests.Session, base_url: str, file_path: str
|
||||
) -> dict | None:
|
||||
"""Upload a single file and return the response with file_paths and file_names."""
|
||||
with open(file_path, "rb") as f:
|
||||
resp = _timed_request(
|
||||
"Uploading",
|
||||
lambda: session.post(
|
||||
f"{base_url}/api/manage/admin/connector/file/upload",
|
||||
files={"files": (os.path.basename(file_path), f)},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
),
|
||||
)
|
||||
if not resp.ok:
|
||||
print(f" ERROR uploading: {resp.text}")
|
||||
return None
|
||||
return resp.json()
|
||||
|
||||
|
||||
def create_connector(
|
||||
session: requests.Session,
|
||||
base_url: str,
|
||||
name: str,
|
||||
file_paths: list[str],
|
||||
file_names: list[str],
|
||||
zip_metadata_file_id: str | None,
|
||||
) -> int | None:
|
||||
"""Create a file connector and return its ID."""
|
||||
resp = _timed_request(
|
||||
"Creating connector",
|
||||
lambda: session.post(
|
||||
f"{base_url}/api/manage/admin/connector",
|
||||
json={
|
||||
"name": name,
|
||||
"source": "file",
|
||||
"input_type": "load_state",
|
||||
"connector_specific_config": {
|
||||
"file_locations": file_paths,
|
||||
"file_names": file_names,
|
||||
"zip_metadata_file_id": zip_metadata_file_id,
|
||||
},
|
||||
"refresh_freq": None,
|
||||
"prune_freq": None,
|
||||
"indexing_start": None,
|
||||
"access_type": "public",
|
||||
"groups": [],
|
||||
},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
),
|
||||
)
|
||||
if not resp.ok:
|
||||
print(f" ERROR creating connector: {resp.text}")
|
||||
return None
|
||||
return resp.json()["id"]
|
||||
|
||||
|
||||
def create_credential(
|
||||
session: requests.Session, base_url: str, name: str
|
||||
) -> int | None:
|
||||
"""Create a dummy credential for the file connector."""
|
||||
resp = session.post(
|
||||
f"{base_url}/api/manage/credential",
|
||||
json={
|
||||
"credential_json": {},
|
||||
"admin_public": True,
|
||||
"source": "file",
|
||||
"curator_public": True,
|
||||
"groups": [],
|
||||
"name": name,
|
||||
},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
)
|
||||
if not resp.ok:
|
||||
print(f" ERROR creating credential: {resp.text}")
|
||||
return None
|
||||
return resp.json()["id"]
|
||||
|
||||
|
||||
def link_credential(
|
||||
session: requests.Session,
|
||||
base_url: str,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
name: str,
|
||||
) -> bool:
|
||||
"""Link the connector to the credential (create CC pair)."""
|
||||
resp = session.put(
|
||||
f"{base_url}/api/manage/connector/{connector_id}/credential/{credential_id}",
|
||||
json={
|
||||
"name": name,
|
||||
"access_type": "public",
|
||||
"groups": [],
|
||||
"auto_sync_options": None,
|
||||
"processing_mode": "REGULAR",
|
||||
},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
)
|
||||
if not resp.ok:
|
||||
print(f" ERROR linking credential: {resp.text}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def run_connector(
|
||||
session: requests.Session,
|
||||
base_url: str,
|
||||
connector_id: int,
|
||||
credential_id: int,
|
||||
) -> bool:
|
||||
"""Trigger the connector to start indexing."""
|
||||
resp = session.post(
|
||||
f"{base_url}/api/manage/admin/connector/run-once",
|
||||
json={
|
||||
"connector_id": connector_id,
|
||||
"credentialIds": [credential_id],
|
||||
"from_beginning": False,
|
||||
},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
)
|
||||
if not resp.ok:
|
||||
print(f" ERROR running connector: {resp.text}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def process_file(session: requests.Session, base_url: str, file_path: str) -> bool:
|
||||
"""Process a single file through the full connector creation flow."""
|
||||
file_name = os.path.basename(file_path)
|
||||
connector_name = file_name
|
||||
print(f"Processing: {file_name}")
|
||||
|
||||
# Step 1: Upload
|
||||
upload_resp = upload_file(session, base_url, file_path)
|
||||
if not upload_resp:
|
||||
return False
|
||||
|
||||
# Step 2: Create connector
|
||||
connector_id = create_connector(
|
||||
session,
|
||||
base_url,
|
||||
name=f"FileConnector-{connector_name}",
|
||||
file_paths=upload_resp["file_paths"],
|
||||
file_names=upload_resp["file_names"],
|
||||
zip_metadata_file_id=upload_resp.get("zip_metadata_file_id"),
|
||||
)
|
||||
if connector_id is None:
|
||||
return False
|
||||
|
||||
# Step 3: Create credential
|
||||
credential_id = create_credential(session, base_url, name=connector_name)
|
||||
if credential_id is None:
|
||||
return False
|
||||
|
||||
# Step 4: Link connector to credential
|
||||
if not link_credential(
|
||||
session, base_url, connector_id, credential_id, connector_name
|
||||
):
|
||||
return False
|
||||
|
||||
# Step 5: Trigger indexing
|
||||
if not run_connector(session, base_url, connector_id, credential_id):
|
||||
return False
|
||||
|
||||
print(f" OK (connector_id={connector_id})")
|
||||
return True
|
||||
|
||||
|
||||
def get_authenticated_session(api_key: str) -> requests.Session:
|
||||
"""Create a session authenticated with an API key."""
|
||||
session = requests.Session()
|
||||
session.headers.update({"Authorization": f"Bearer {api_key}"})
|
||||
return session
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload files as individual Onyx file connectors."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
required=True,
|
||||
help="Directory containing files to upload.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-base",
|
||||
default="http://localhost:3000",
|
||||
help="Base URL for the Onyx API (default: http://localhost:3000).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
required=True,
|
||||
help="API key for authentication.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file-glob",
|
||||
default=None,
|
||||
help="Glob pattern to filter files (e.g. '*.json', '*.zip').",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
data_dir = args.data_dir
|
||||
base_url = args.api_base.rstrip("/")
|
||||
api_key = args.api_key
|
||||
file_glob = args.file_glob
|
||||
|
||||
if not os.path.isdir(data_dir):
|
||||
print(f"Error: {data_dir} is not a directory")
|
||||
sys.exit(1)
|
||||
|
||||
script_path = os.path.realpath(__file__)
|
||||
files = sorted(
|
||||
os.path.join(data_dir, f)
|
||||
for f in os.listdir(data_dir)
|
||||
if os.path.isfile(os.path.join(data_dir, f))
|
||||
and os.path.realpath(os.path.join(data_dir, f)) != script_path
|
||||
and (file_glob is None or fnmatch.fnmatch(f, file_glob))
|
||||
)
|
||||
|
||||
if not files:
|
||||
print(f"No files found in {data_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(files)} file(s) in {data_dir}\n")
|
||||
|
||||
session = get_authenticated_session(api_key)
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
for file_path in files:
|
||||
if process_file(session, base_url, file_path):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
# Small delay to avoid overwhelming the server
|
||||
time.sleep(0.5)
|
||||
|
||||
print(f"\nDone: {success} succeeded, {failed} failed out of {len(files)} files.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -45,6 +45,21 @@ npx playwright test <TEST_NAME>
|
||||
Shared fixtures live in `backend/tests/conftest.py`. Test subdirectories can define
|
||||
their own `conftest.py` for directory-scoped fixtures.
|
||||
|
||||
## Running Tests Repeatedly (`pytest-repeat`)
|
||||
|
||||
Use `pytest-repeat` to catch flaky tests by running them multiple times:
|
||||
|
||||
```bash
|
||||
# Run a specific test 50 times
|
||||
pytest --count=50 backend/tests/unit/path/to/test.py::test_name
|
||||
|
||||
# Stop on first failure with -x
|
||||
pytest --count=50 -x backend/tests/unit/path/to/test.py::test_name
|
||||
|
||||
# Repeat an entire test file
|
||||
pytest --count=10 backend/tests/unit/path/to/test_file.py
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Use `enable_ee` fixture instead of inlining
|
||||
|
||||
@@ -297,6 +297,10 @@ def index_batch_params(
|
||||
class TestDocumentIndexOld:
|
||||
"""Tests the old DocumentIndex interface."""
|
||||
|
||||
# TODO(ENG-3864)(andrei): Re-enable this test.
|
||||
@pytest.mark.xfail(
|
||||
reason="Flaky test: Retrieved chunks vary non-deterministically before and after changing user projects and personas. Likely a timing issue with the index being updated."
|
||||
)
|
||||
def test_update_single_can_clear_user_projects_and_personas(
|
||||
self,
|
||||
document_indices: list[DocumentIndex],
|
||||
|
||||
@@ -22,6 +22,8 @@ from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.client import OpenSearchIndexClient
|
||||
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
|
||||
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
|
||||
from onyx.document_index.opensearch.constants import HybridSearchNormalizationPipeline
|
||||
from onyx.document_index.opensearch.constants import HybridSearchSubqueryConfiguration
|
||||
from onyx.document_index.opensearch.opensearch_document_index import (
|
||||
generate_opensearch_filtered_access_control_list,
|
||||
)
|
||||
@@ -31,9 +33,14 @@ from onyx.document_index.opensearch.schema import DocumentSchema
|
||||
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
|
||||
from onyx.document_index.opensearch.search import DocumentQuery
|
||||
from onyx.document_index.opensearch.search import (
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
get_min_max_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import (
|
||||
get_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import (
|
||||
get_zscore_normalization_pipeline_name_and_config,
|
||||
)
|
||||
from onyx.document_index.opensearch.search import MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
|
||||
|
||||
@@ -49,6 +56,46 @@ def _patch_global_tenant_state(monkeypatch: pytest.MonkeyPatch, state: bool) ->
|
||||
monkeypatch.setattr("onyx.document_index.opensearch.schema.MULTI_TENANT", state)
|
||||
|
||||
|
||||
def _patch_hybrid_search_subquery_configuration(
|
||||
monkeypatch: pytest.MonkeyPatch, configuration: HybridSearchSubqueryConfiguration
|
||||
) -> None:
|
||||
"""
|
||||
Patches HYBRID_SEARCH_SUBQUERY_CONFIGURATION wherever necessary for this
|
||||
test file.
|
||||
|
||||
Args:
|
||||
monkeypatch: The test instance's monkeypatch instance, used for
|
||||
patching.
|
||||
configuration: The intended state of
|
||||
HYBRID_SEARCH_SUBQUERY_CONFIGURATION.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"onyx.document_index.opensearch.constants.HYBRID_SEARCH_SUBQUERY_CONFIGURATION",
|
||||
configuration,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"onyx.document_index.opensearch.search.HYBRID_SEARCH_SUBQUERY_CONFIGURATION",
|
||||
configuration,
|
||||
)
|
||||
|
||||
|
||||
def _patch_hybrid_search_normalization_pipeline(
|
||||
monkeypatch: pytest.MonkeyPatch, pipeline: HybridSearchNormalizationPipeline
|
||||
) -> None:
|
||||
"""
|
||||
Patches HYBRID_SEARCH_NORMALIZATION_PIPELINE wherever necessary for this
|
||||
test file.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"onyx.document_index.opensearch.constants.HYBRID_SEARCH_NORMALIZATION_PIPELINE",
|
||||
pipeline,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"onyx.document_index.opensearch.search.HYBRID_SEARCH_NORMALIZATION_PIPELINE",
|
||||
pipeline,
|
||||
)
|
||||
|
||||
|
||||
def _create_test_document_chunk(
|
||||
document_id: str,
|
||||
content: str,
|
||||
@@ -144,14 +191,27 @@ def test_client(
|
||||
@pytest.fixture(scope="function")
|
||||
def search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None, None]:
|
||||
"""Creates a search pipeline for testing with automatic cleanup."""
|
||||
min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (
|
||||
get_min_max_normalization_pipeline_name_and_config()
|
||||
)
|
||||
zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (
|
||||
get_zscore_normalization_pipeline_name_and_config()
|
||||
)
|
||||
test_client.create_search_pipeline(
|
||||
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
pipeline_id=min_max_normalization_pipeline_name,
|
||||
pipeline_body=min_max_normalization_pipeline_config,
|
||||
)
|
||||
test_client.create_search_pipeline(
|
||||
pipeline_id=zscore_normalization_pipeline_name,
|
||||
pipeline_body=zscore_normalization_pipeline_config,
|
||||
)
|
||||
yield # Test runs here.
|
||||
try:
|
||||
test_client.delete_search_pipeline(
|
||||
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
pipeline_id=min_max_normalization_pipeline_name,
|
||||
)
|
||||
test_client.delete_search_pipeline(
|
||||
pipeline_id=zscore_normalization_pipeline_name,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -377,18 +437,19 @@ class TestOpenSearchClient:
|
||||
self, test_client: OpenSearchIndexClient
|
||||
) -> None:
|
||||
"""Tests creating and deleting a search pipeline."""
|
||||
# Precondition.
|
||||
pipeline_name, pipeline_config = get_normalization_pipeline_name_and_config()
|
||||
|
||||
# Under test and postcondition.
|
||||
# Should not raise.
|
||||
test_client.create_search_pipeline(
|
||||
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
pipeline_id=pipeline_name,
|
||||
pipeline_body=pipeline_config,
|
||||
)
|
||||
|
||||
# Under test and postcondition.
|
||||
# Should not raise.
|
||||
test_client.delete_search_pipeline(
|
||||
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
)
|
||||
test_client.delete_search_pipeline(pipeline_id=pipeline_name)
|
||||
|
||||
def test_index_document(
|
||||
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
|
||||
@@ -734,13 +795,13 @@ class TestOpenSearchClient:
|
||||
properties_to_update={"hidden": True},
|
||||
)
|
||||
|
||||
def test_hybrid_search_with_pipeline(
|
||||
def test_hybrid_search_configurations_and_pipelines(
|
||||
self,
|
||||
test_client: OpenSearchIndexClient,
|
||||
search_pipeline: None, # noqa: ARG002
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Tests hybrid search with a normalization pipeline."""
|
||||
"""Tests all hybrid search configurations and pipelines."""
|
||||
# Precondition.
|
||||
_patch_global_tenant_state(monkeypatch, False)
|
||||
tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
|
||||
@@ -749,7 +810,6 @@ class TestOpenSearchClient:
|
||||
)
|
||||
settings = DocumentSchema.get_index_settings()
|
||||
test_client.create_index(mappings=mappings, settings=settings)
|
||||
|
||||
# Index documents.
|
||||
docs = {
|
||||
"doc-1": _create_test_document_chunk(
|
||||
@@ -780,40 +840,58 @@ class TestOpenSearchClient:
|
||||
# Refresh index to make documents searchable.
|
||||
test_client.refresh_index()
|
||||
|
||||
# Search query.
|
||||
query_text = "Python programming"
|
||||
query_vector = _generate_test_vector(0.12)
|
||||
search_body = DocumentQuery.get_hybrid_search_query(
|
||||
query_text=query_text,
|
||||
query_vector=query_vector,
|
||||
num_hits=5,
|
||||
tenant_state=tenant_state,
|
||||
# We're not worried about filtering here. tenant_id in this object
|
||||
# is not relevant.
|
||||
index_filters=IndexFilters(access_control_list=None, tenant_id=None),
|
||||
include_hidden=False,
|
||||
)
|
||||
for configuration in HybridSearchSubqueryConfiguration:
|
||||
_patch_hybrid_search_subquery_configuration(monkeypatch, configuration)
|
||||
for pipeline in HybridSearchNormalizationPipeline:
|
||||
_patch_hybrid_search_normalization_pipeline(monkeypatch, pipeline)
|
||||
pipeline_name, pipeline_config = (
|
||||
get_normalization_pipeline_name_and_config()
|
||||
)
|
||||
test_client.create_search_pipeline(
|
||||
pipeline_id=pipeline_name,
|
||||
pipeline_body=pipeline_config,
|
||||
)
|
||||
|
||||
# Under test.
|
||||
results = test_client.search(
|
||||
body=search_body, search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
)
|
||||
# Search query.
|
||||
query_text = "Python programming"
|
||||
query_vector = _generate_test_vector(0.12)
|
||||
search_body = DocumentQuery.get_hybrid_search_query(
|
||||
query_text=query_text,
|
||||
query_vector=query_vector,
|
||||
num_hits=5,
|
||||
tenant_state=tenant_state,
|
||||
# We're not worried about filtering here. tenant_id in this object
|
||||
# is not relevant.
|
||||
index_filters=IndexFilters(
|
||||
access_control_list=None, tenant_id=None
|
||||
),
|
||||
include_hidden=False,
|
||||
)
|
||||
|
||||
# Postcondition.
|
||||
assert len(results) == len(docs)
|
||||
# Assert that all the chunks above are present.
|
||||
assert all(chunk.document_chunk.document_id in docs.keys() for chunk in results)
|
||||
# Make sure the chunk contents are preserved.
|
||||
for i, chunk in enumerate(results):
|
||||
assert chunk.document_chunk == docs[chunk.document_chunk.document_id]
|
||||
# Make sure score reporting seems reasonable (it should not be None
|
||||
# or 0).
|
||||
assert chunk.score
|
||||
# Make sure there is some kind of match highlight only for the first
|
||||
# result. The other results are so bad they're not expected to have
|
||||
# match highlights.
|
||||
if i == 0:
|
||||
assert chunk.match_highlights.get(CONTENT_FIELD_NAME, [])
|
||||
# Under test.
|
||||
results = test_client.search(
|
||||
body=search_body, search_pipeline_id=pipeline_name
|
||||
)
|
||||
|
||||
# Postcondition.
|
||||
assert len(results) == len(docs)
|
||||
# Assert that all the chunks above are present.
|
||||
assert all(
|
||||
chunk.document_chunk.document_id in docs.keys() for chunk in results
|
||||
)
|
||||
# Make sure the chunk contents are preserved.
|
||||
for i, chunk in enumerate(results):
|
||||
assert (
|
||||
chunk.document_chunk == docs[chunk.document_chunk.document_id]
|
||||
)
|
||||
# Make sure score reporting seems reasonable (it should not be None
|
||||
# or 0).
|
||||
assert chunk.score
|
||||
# Make sure there is some kind of match highlight only for the first
|
||||
# result. The other results are so bad they're not expected to have
|
||||
# match highlights.
|
||||
if i == 0:
|
||||
assert chunk.match_highlights.get(CONTENT_FIELD_NAME, [])
|
||||
|
||||
def test_search_empty_index(
|
||||
self,
|
||||
@@ -845,11 +923,10 @@ class TestOpenSearchClient:
|
||||
index_filters=IndexFilters(access_control_list=None, tenant_id=None),
|
||||
include_hidden=False,
|
||||
)
|
||||
pipeline_name, _ = get_normalization_pipeline_name_and_config()
|
||||
|
||||
# Under test.
|
||||
results = test_client.search(
|
||||
body=search_body, search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
)
|
||||
results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)
|
||||
|
||||
# Postcondition.
|
||||
assert len(results) == 0
|
||||
@@ -948,11 +1025,10 @@ class TestOpenSearchClient:
|
||||
),
|
||||
include_hidden=False,
|
||||
)
|
||||
pipeline_name, _ = get_normalization_pipeline_name_and_config()
|
||||
|
||||
# Under test.
|
||||
results = test_client.search(
|
||||
body=search_body, search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
)
|
||||
results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)
|
||||
|
||||
# Postcondition.
|
||||
# Should only get the public, non-hidden document, and the private
|
||||
@@ -1067,11 +1143,10 @@ class TestOpenSearchClient:
|
||||
index_filters=IndexFilters(access_control_list=[], tenant_id=None),
|
||||
include_hidden=False,
|
||||
)
|
||||
pipeline_name, _ = get_normalization_pipeline_name_and_config()
|
||||
|
||||
# Under test.
|
||||
results = test_client.search(
|
||||
body=search_body, search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME
|
||||
)
|
||||
results = test_client.search(body=search_body, search_pipeline_id=pipeline_name)
|
||||
|
||||
# Postcondition.
|
||||
# Should only get public, non-hidden documents (3 out of 5).
|
||||
@@ -1441,15 +1516,16 @@ class TestOpenSearchClient:
|
||||
),
|
||||
include_hidden=False,
|
||||
)
|
||||
pipeline_name, _ = get_normalization_pipeline_name_and_config()
|
||||
|
||||
# Under test.
|
||||
last_week_results = test_client.search(
|
||||
body=last_week_search_body,
|
||||
search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
search_pipeline_id=pipeline_name,
|
||||
)
|
||||
last_six_months_results = test_client.search(
|
||||
body=last_six_months_search_body,
|
||||
search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
search_pipeline_id=pipeline_name,
|
||||
)
|
||||
|
||||
# Postcondition.
|
||||
|
||||
120
backend/tests/unit/onyx/auth/test_is_same_origin.py
Normal file
120
backend/tests/unit/onyx/auth/test_is_same_origin.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import pytest
|
||||
|
||||
from onyx.auth.users import _is_same_origin
|
||||
|
||||
|
||||
class TestExactMatch:
|
||||
"""Origins that are textually identical should always match."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"origin",
|
||||
[
|
||||
"http://localhost:3000",
|
||||
"https://app.example.com",
|
||||
"https://app.example.com:8443",
|
||||
"http://127.0.0.1:8080",
|
||||
],
|
||||
)
|
||||
def test_identical_origins(self, origin: str) -> None:
|
||||
assert _is_same_origin(origin, origin)
|
||||
|
||||
|
||||
class TestLoopbackPortRelaxation:
|
||||
"""On loopback addresses, port differences should be ignored."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"actual,expected",
|
||||
[
|
||||
("http://localhost:3001", "http://localhost:3000"),
|
||||
("http://localhost:8080", "http://localhost:3000"),
|
||||
("http://localhost", "http://localhost:3000"),
|
||||
("http://127.0.0.1:3001", "http://127.0.0.1:3000"),
|
||||
("http://[::1]:3001", "http://[::1]:3000"),
|
||||
],
|
||||
)
|
||||
def test_loopback_different_ports_accepted(
|
||||
self, actual: str, expected: str
|
||||
) -> None:
|
||||
assert _is_same_origin(actual, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"actual,expected",
|
||||
[
|
||||
("https://localhost:3001", "http://localhost:3000"),
|
||||
("http://localhost:3001", "https://localhost:3000"),
|
||||
],
|
||||
)
|
||||
def test_loopback_different_scheme_rejected(
|
||||
self, actual: str, expected: str
|
||||
) -> None:
|
||||
assert not _is_same_origin(actual, expected)
|
||||
|
||||
def test_loopback_hostname_mismatch_rejected(self) -> None:
|
||||
assert not _is_same_origin("http://localhost:3001", "http://127.0.0.1:3000")
|
||||
|
||||
|
||||
class TestNonLoopbackStrictPort:
|
||||
"""Non-loopback origins must match scheme, hostname, AND port."""
|
||||
|
||||
def test_different_port_rejected(self) -> None:
|
||||
assert not _is_same_origin(
|
||||
"https://app.example.com:8443", "https://app.example.com"
|
||||
)
|
||||
|
||||
def test_different_hostname_rejected(self) -> None:
|
||||
assert not _is_same_origin("https://evil.com", "https://app.example.com")
|
||||
|
||||
def test_different_scheme_rejected(self) -> None:
|
||||
assert not _is_same_origin("http://app.example.com", "https://app.example.com")
|
||||
|
||||
def test_same_port_explicit(self) -> None:
|
||||
assert _is_same_origin(
|
||||
"https://app.example.com:443", "https://app.example.com:443"
|
||||
)
|
||||
|
||||
|
||||
class TestDefaultPortNormalization:
|
||||
"""Port should be normalized so that omitted default port == explicit default port."""
|
||||
|
||||
def test_http_implicit_vs_explicit_80(self) -> None:
|
||||
assert _is_same_origin("http://example.com", "http://example.com:80")
|
||||
|
||||
def test_http_explicit_80_vs_implicit(self) -> None:
|
||||
assert _is_same_origin("http://example.com:80", "http://example.com")
|
||||
|
||||
def test_https_implicit_vs_explicit_443(self) -> None:
|
||||
assert _is_same_origin("https://example.com", "https://example.com:443")
|
||||
|
||||
def test_https_explicit_443_vs_implicit(self) -> None:
|
||||
assert _is_same_origin("https://example.com:443", "https://example.com")
|
||||
|
||||
def test_http_non_default_port_vs_implicit_rejected(self) -> None:
|
||||
assert not _is_same_origin("http://example.com:8080", "http://example.com")
|
||||
|
||||
|
||||
class TestTrailingSlash:
|
||||
"""Trailing slashes should not affect comparison."""
|
||||
|
||||
def test_trailing_slash_on_actual(self) -> None:
|
||||
assert _is_same_origin("https://app.example.com/", "https://app.example.com")
|
||||
|
||||
def test_trailing_slash_on_expected(self) -> None:
|
||||
assert _is_same_origin("https://app.example.com", "https://app.example.com/")
|
||||
|
||||
def test_trailing_slash_on_both(self) -> None:
|
||||
assert _is_same_origin("https://app.example.com/", "https://app.example.com/")
|
||||
|
||||
|
||||
class TestCSWSHScenarios:
|
||||
"""Realistic attack scenarios that must be rejected."""
|
||||
|
||||
def test_remote_attacker_rejected(self) -> None:
|
||||
assert not _is_same_origin("https://evil.com", "http://localhost:3000")
|
||||
|
||||
def test_remote_attacker_same_port_rejected(self) -> None:
|
||||
assert not _is_same_origin("http://evil.com:3000", "http://localhost:3000")
|
||||
|
||||
def test_remote_attacker_matching_hostname_different_port(self) -> None:
|
||||
assert not _is_same_origin(
|
||||
"https://app.example.com:9999", "https://app.example.com"
|
||||
)
|
||||
@@ -0,0 +1,194 @@
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.background.celery.tasks.hierarchyfetching.tasks import (
|
||||
_connector_supports_hierarchy_fetching,
|
||||
)
|
||||
from onyx.background.celery.tasks.hierarchyfetching.tasks import (
|
||||
check_for_hierarchy_fetching,
|
||||
)
|
||||
from onyx.connectors.factory import ConnectorMissingException
|
||||
from onyx.connectors.interfaces import BaseConnector
|
||||
from onyx.connectors.interfaces import HierarchyConnector
|
||||
from onyx.connectors.interfaces import HierarchyOutput
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
|
||||
TASKS_MODULE = "onyx.background.celery.tasks.hierarchyfetching.tasks"
|
||||
|
||||
|
||||
class _NonHierarchyConnector(BaseConnector):
|
||||
def load_credentials(self, credentials: dict) -> dict | None: # noqa: ARG002
|
||||
return None
|
||||
|
||||
|
||||
class _HierarchyCapableConnector(HierarchyConnector):
|
||||
def load_credentials(self, credentials: dict) -> dict | None: # noqa: ARG002
|
||||
return None
|
||||
|
||||
def load_hierarchy(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch, # noqa: ARG002
|
||||
end: SecondsSinceUnixEpoch, # noqa: ARG002
|
||||
) -> HierarchyOutput:
|
||||
return
|
||||
yield
|
||||
|
||||
|
||||
def _build_cc_pair_mock() -> MagicMock:
|
||||
cc_pair = MagicMock()
|
||||
cc_pair.connector.source = "mock-source"
|
||||
cc_pair.connector.input_type = "mock-input-type"
|
||||
return cc_pair
|
||||
|
||||
|
||||
def _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:
|
||||
redis_client = MagicMock()
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = True
|
||||
lock.owned.return_value = True
|
||||
redis_client.lock.return_value = lock
|
||||
return redis_client, lock
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}.identify_connector_class")
|
||||
def test_connector_supports_hierarchy_fetching_false_for_non_hierarchy_connector(
|
||||
mock_identify_connector_class: MagicMock,
|
||||
) -> None:
|
||||
mock_identify_connector_class.return_value = _NonHierarchyConnector
|
||||
|
||||
assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False
|
||||
mock_identify_connector_class.assert_called_once_with("mock-source")
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}.task_logger.warning")
|
||||
@patch(f"{TASKS_MODULE}.identify_connector_class")
|
||||
def test_connector_supports_hierarchy_fetching_false_when_class_missing(
|
||||
mock_identify_connector_class: MagicMock,
|
||||
mock_warning: MagicMock,
|
||||
) -> None:
|
||||
mock_identify_connector_class.side_effect = ConnectorMissingException("missing")
|
||||
|
||||
assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is False
|
||||
mock_warning.assert_called_once()
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}.identify_connector_class")
|
||||
def test_connector_supports_hierarchy_fetching_true_for_supported_connector(
|
||||
mock_identify_connector_class: MagicMock,
|
||||
) -> None:
|
||||
mock_identify_connector_class.return_value = _HierarchyCapableConnector
|
||||
|
||||
assert _connector_supports_hierarchy_fetching(_build_cc_pair_mock()) is True
|
||||
mock_identify_connector_class.assert_called_once_with("mock-source")
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
|
||||
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
|
||||
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
|
||||
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
|
||||
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
|
||||
@patch(f"{TASKS_MODULE}.get_redis_client")
|
||||
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
|
||||
def test_check_for_hierarchy_fetching_skips_unsupported_connectors(
|
||||
mock_supports_hierarchy_fetching: MagicMock,
|
||||
mock_get_redis_client: MagicMock,
|
||||
mock_get_session: MagicMock,
|
||||
mock_fetch_cc_pair_ids: MagicMock,
|
||||
mock_get_cc_pair: MagicMock,
|
||||
mock_is_due: MagicMock,
|
||||
mock_try_create_task: MagicMock,
|
||||
) -> None:
|
||||
redis_client, lock = _build_redis_mock_with_lock()
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
mock_get_session.return_value.__enter__.return_value = MagicMock()
|
||||
mock_fetch_cc_pair_ids.return_value = [123]
|
||||
mock_get_cc_pair.return_value = _build_cc_pair_mock()
|
||||
mock_supports_hierarchy_fetching.return_value = False
|
||||
mock_is_due.return_value = True
|
||||
|
||||
task_app = MagicMock()
|
||||
with patch.object(check_for_hierarchy_fetching, "app", task_app):
|
||||
result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")
|
||||
|
||||
assert result == 0
|
||||
mock_is_due.assert_not_called()
|
||||
mock_try_create_task.assert_not_called()
|
||||
lock.release.assert_called_once()
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
|
||||
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
|
||||
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
|
||||
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
|
||||
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
|
||||
@patch(f"{TASKS_MODULE}.get_redis_client")
|
||||
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
|
||||
def test_check_for_hierarchy_fetching_creates_task_for_supported_due_connector(
|
||||
mock_supports_hierarchy_fetching: MagicMock,
|
||||
mock_get_redis_client: MagicMock,
|
||||
mock_get_session: MagicMock,
|
||||
mock_fetch_cc_pair_ids: MagicMock,
|
||||
mock_get_cc_pair: MagicMock,
|
||||
mock_is_due: MagicMock,
|
||||
mock_try_create_task: MagicMock,
|
||||
) -> None:
|
||||
redis_client, lock = _build_redis_mock_with_lock()
|
||||
cc_pair = _build_cc_pair_mock()
|
||||
db_session = MagicMock()
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
mock_get_session.return_value.__enter__.return_value = db_session
|
||||
mock_fetch_cc_pair_ids.return_value = [123]
|
||||
mock_get_cc_pair.return_value = cc_pair
|
||||
mock_supports_hierarchy_fetching.return_value = True
|
||||
mock_is_due.return_value = True
|
||||
mock_try_create_task.return_value = "task-id"
|
||||
|
||||
task_app = MagicMock()
|
||||
with patch.object(check_for_hierarchy_fetching, "app", task_app):
|
||||
result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")
|
||||
|
||||
assert result == 1
|
||||
mock_is_due.assert_called_once_with(cc_pair)
|
||||
mock_try_create_task.assert_called_once_with(
|
||||
celery_app=task_app,
|
||||
cc_pair=cc_pair,
|
||||
db_session=db_session,
|
||||
r=redis_client,
|
||||
tenant_id="test-tenant",
|
||||
)
|
||||
lock.release.assert_called_once()
|
||||
|
||||
|
||||
@patch(f"{TASKS_MODULE}._try_creating_hierarchy_fetching_task")
|
||||
@patch(f"{TASKS_MODULE}._is_hierarchy_fetching_due")
|
||||
@patch(f"{TASKS_MODULE}.get_connector_credential_pair_from_id")
|
||||
@patch(f"{TASKS_MODULE}.fetch_indexable_standard_connector_credential_pair_ids")
|
||||
@patch(f"{TASKS_MODULE}.get_session_with_current_tenant")
|
||||
@patch(f"{TASKS_MODULE}.get_redis_client")
|
||||
@patch(f"{TASKS_MODULE}._connector_supports_hierarchy_fetching")
|
||||
def test_check_for_hierarchy_fetching_skips_supported_connector_when_not_due(
|
||||
mock_supports_hierarchy_fetching: MagicMock,
|
||||
mock_get_redis_client: MagicMock,
|
||||
mock_get_session: MagicMock,
|
||||
mock_fetch_cc_pair_ids: MagicMock,
|
||||
mock_get_cc_pair: MagicMock,
|
||||
mock_is_due: MagicMock,
|
||||
mock_try_create_task: MagicMock,
|
||||
) -> None:
|
||||
redis_client, lock = _build_redis_mock_with_lock()
|
||||
cc_pair = _build_cc_pair_mock()
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
mock_get_session.return_value.__enter__.return_value = MagicMock()
|
||||
mock_fetch_cc_pair_ids.return_value = [123]
|
||||
mock_get_cc_pair.return_value = cc_pair
|
||||
mock_supports_hierarchy_fetching.return_value = True
|
||||
mock_is_due.return_value = False
|
||||
|
||||
task_app = MagicMock()
|
||||
with patch.object(check_for_hierarchy_fetching, "app", task_app):
|
||||
result = check_for_hierarchy_fetching.run(tenant_id="test-tenant")
|
||||
|
||||
assert result == 0
|
||||
mock_is_due.assert_called_once_with(cc_pair)
|
||||
mock_try_create_task.assert_not_called()
|
||||
lock.release.assert_called_once()
|
||||
@@ -1,9 +1,13 @@
|
||||
"""Tests for llm_loop.py, specifically the construct_message_history function."""
|
||||
"""Tests for llm_loop.py, including history construction and empty-response paths."""
|
||||
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.chat.llm_loop import _build_empty_llm_response_error
|
||||
from onyx.chat.llm_loop import _try_fallback_tool_extraction
|
||||
from onyx.chat.llm_loop import construct_message_history
|
||||
from onyx.chat.llm_loop import EmptyLLMResponseError
|
||||
from onyx.chat.models import ChatLoadedFile
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
from onyx.chat.models import ContextFileMetadata
|
||||
@@ -13,6 +17,7 @@ from onyx.chat.models import LlmStepResult
|
||||
from onyx.chat.models import ToolCallSimple
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.file_store.models import ChatFileType
|
||||
from onyx.llm.interfaces import LLMConfig
|
||||
from onyx.llm.interfaces import ToolChoiceOptions
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
@@ -1167,3 +1172,57 @@ class TestFallbackToolExtraction:
|
||||
|
||||
assert result is llm_step_result
|
||||
assert attempted is False
|
||||
|
||||
|
||||
class TestEmptyLlmResponseClassification:
|
||||
def _make_llm(self, provider: str = "openai", model: str = "gpt-5.2") -> Mock:
|
||||
llm = Mock()
|
||||
llm.config = LLMConfig(
|
||||
model_provider=provider,
|
||||
model_name=model,
|
||||
temperature=0.0,
|
||||
max_input_tokens=4096,
|
||||
)
|
||||
return llm
|
||||
|
||||
def test_openai_empty_stream_is_classified_as_budget_exceeded(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
monkeypatch.setattr("onyx.chat.llm_loop.is_true_openai_model", lambda *_: True)
|
||||
|
||||
err = _build_empty_llm_response_error(
|
||||
llm=self._make_llm(),
|
||||
llm_step_result=LlmStepResult(
|
||||
reasoning=None,
|
||||
answer=None,
|
||||
tool_calls=None,
|
||||
raw_answer=None,
|
||||
),
|
||||
tool_choice=ToolChoiceOptions.AUTO,
|
||||
)
|
||||
|
||||
assert isinstance(err, EmptyLLMResponseError)
|
||||
assert err.error_code == "BUDGET_EXCEEDED"
|
||||
assert err.is_retryable is False
|
||||
assert "quota" in err.client_error_msg.lower()
|
||||
|
||||
def test_reasoning_only_response_uses_generic_empty_response_error(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
monkeypatch.setattr("onyx.chat.llm_loop.is_true_openai_model", lambda *_: True)
|
||||
|
||||
err = _build_empty_llm_response_error(
|
||||
llm=self._make_llm(),
|
||||
llm_step_result=LlmStepResult(
|
||||
reasoning="scratchpad only",
|
||||
answer=None,
|
||||
tool_calls=None,
|
||||
raw_answer=None,
|
||||
),
|
||||
tool_choice=ToolChoiceOptions.AUTO,
|
||||
)
|
||||
|
||||
assert isinstance(err, EmptyLLMResponseError)
|
||||
assert err.error_code == "EMPTY_LLM_RESPONSE"
|
||||
assert err.is_retryable is True
|
||||
assert "quota" not in err.client_error_msg.lower()
|
||||
|
||||
34
backend/tests/unit/onyx/chat/test_process_message.py
Normal file
34
backend/tests/unit/onyx/chat/test_process_message.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from onyx.chat.process_message import remove_answer_citations
|
||||
|
||||
|
||||
def test_remove_answer_citations_strips_http_markdown_citation() -> None:
|
||||
answer = "The answer is Paris [[1]](https://example.com/doc)."
|
||||
|
||||
assert remove_answer_citations(answer) == "The answer is Paris."
|
||||
|
||||
|
||||
def test_remove_answer_citations_strips_empty_markdown_citation() -> None:
|
||||
answer = "The answer is Paris [[1]]()."
|
||||
|
||||
assert remove_answer_citations(answer) == "The answer is Paris."
|
||||
|
||||
|
||||
def test_remove_answer_citations_strips_citation_with_parentheses_in_url() -> None:
|
||||
answer = (
|
||||
"The answer is Paris "
|
||||
"[[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
|
||||
)
|
||||
|
||||
assert remove_answer_citations(answer) == "The answer is Paris."
|
||||
|
||||
|
||||
def test_remove_answer_citations_preserves_non_citation_markdown_links() -> None:
|
||||
answer = (
|
||||
"See [reference](https://example.com/Function_(mathematics)) "
|
||||
"for context [[1]](https://en.wikipedia.org/wiki/Function_(mathematics))."
|
||||
)
|
||||
|
||||
assert (
|
||||
remove_answer_citations(answer)
|
||||
== "See [reference](https://example.com/Function_(mathematics)) for context."
|
||||
)
|
||||
@@ -3,7 +3,10 @@ from unittest.mock import Mock
|
||||
import pytest
|
||||
|
||||
from onyx.chat import process_message
|
||||
from onyx.chat.models import AnswerStream
|
||||
from onyx.chat.models import StreamingError
|
||||
from onyx.configs import app_configs
|
||||
from onyx.server.query_and_chat.models import MessageResponseIDInfo
|
||||
from onyx.server.query_and_chat.models import SendMessageRequest
|
||||
|
||||
|
||||
@@ -35,3 +38,26 @@ def test_mock_llm_response_requires_integration_mode() -> None:
|
||||
db_session=Mock(),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_gather_stream_returns_empty_answer_when_streaming_error_only() -> None:
|
||||
packets: AnswerStream = iter(
|
||||
[
|
||||
MessageResponseIDInfo(
|
||||
user_message_id=None,
|
||||
reserved_assistant_message_id=42,
|
||||
),
|
||||
StreamingError(
|
||||
error="OpenAI quota exceeded",
|
||||
error_code="BUDGET_EXCEEDED",
|
||||
is_retryable=False,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
result = process_message.gather_stream(packets)
|
||||
|
||||
assert result.answer == ""
|
||||
assert result.answer_citationless == ""
|
||||
assert result.error_msg == "OpenAI quota exceeded"
|
||||
assert result.message_id == 42
|
||||
|
||||
0
backend/tests/unit/onyx/hooks/__init__.py
Normal file
0
backend/tests/unit/onyx/hooks/__init__.py
Normal file
40
backend/tests/unit/onyx/hooks/test_api_dependencies.py
Normal file
40
backend/tests/unit/onyx/hooks/test_api_dependencies.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Unit tests for the hooks feature gate."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.hooks.api_dependencies import require_hook_enabled
|
||||
|
||||
|
||||
class TestRequireHookEnabled:
|
||||
def test_raises_when_multi_tenant(self) -> None:
|
||||
with (
|
||||
patch("onyx.hooks.api_dependencies.MULTI_TENANT", True),
|
||||
patch("onyx.hooks.api_dependencies.HOOK_ENABLED", True),
|
||||
):
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
require_hook_enabled()
|
||||
assert exc_info.value.error_code is OnyxErrorCode.SINGLE_TENANT_ONLY
|
||||
assert exc_info.value.status_code == 403
|
||||
assert "multi-tenant" in exc_info.value.detail
|
||||
|
||||
def test_raises_when_flag_disabled(self) -> None:
|
||||
with (
|
||||
patch("onyx.hooks.api_dependencies.MULTI_TENANT", False),
|
||||
patch("onyx.hooks.api_dependencies.HOOK_ENABLED", False),
|
||||
):
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
require_hook_enabled()
|
||||
assert exc_info.value.error_code is OnyxErrorCode.ENV_VAR_GATED
|
||||
assert exc_info.value.status_code == 403
|
||||
assert "HOOK_ENABLED" in exc_info.value.detail
|
||||
|
||||
def test_passes_when_enabled_single_tenant(self) -> None:
|
||||
with (
|
||||
patch("onyx.hooks.api_dependencies.MULTI_TENANT", False),
|
||||
patch("onyx.hooks.api_dependencies.HOOK_ENABLED", True),
|
||||
):
|
||||
require_hook_enabled() # must not raise
|
||||
@@ -3,6 +3,7 @@
|
||||
from onyx.server.manage.llm.utils import generate_bedrock_display_name
|
||||
from onyx.server.manage.llm.utils import generate_ollama_display_name
|
||||
from onyx.server.manage.llm.utils import infer_vision_support
|
||||
from onyx.server.manage.llm.utils import is_embedding_model
|
||||
from onyx.server.manage.llm.utils import is_reasoning_model
|
||||
from onyx.server.manage.llm.utils import is_valid_bedrock_model
|
||||
from onyx.server.manage.llm.utils import strip_openrouter_vendor_prefix
|
||||
@@ -209,3 +210,35 @@ class TestIsReasoningModel:
|
||||
is_reasoning_model("anthropic/claude-3-5-sonnet", "Claude 3.5 Sonnet")
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
class TestIsEmbeddingModel:
|
||||
"""Tests for embedding model detection."""
|
||||
|
||||
def test_openai_embedding_ada(self) -> None:
|
||||
assert is_embedding_model("text-embedding-ada-002") is True
|
||||
|
||||
def test_openai_embedding_3_small(self) -> None:
|
||||
assert is_embedding_model("text-embedding-3-small") is True
|
||||
|
||||
def test_openai_embedding_3_large(self) -> None:
|
||||
assert is_embedding_model("text-embedding-3-large") is True
|
||||
|
||||
def test_cohere_embed_model(self) -> None:
|
||||
assert is_embedding_model("embed-english-v3.0") is True
|
||||
|
||||
def test_bedrock_titan_embed(self) -> None:
|
||||
assert is_embedding_model("amazon.titan-embed-text-v1") is True
|
||||
|
||||
def test_gpt4o_not_embedding(self) -> None:
|
||||
assert is_embedding_model("gpt-4o") is False
|
||||
|
||||
def test_gpt4_not_embedding(self) -> None:
|
||||
assert is_embedding_model("gpt-4") is False
|
||||
|
||||
def test_dall_e_not_embedding(self) -> None:
|
||||
assert is_embedding_model("dall-e-3") is False
|
||||
|
||||
def test_unknown_custom_model_not_embedding(self) -> None:
|
||||
"""Custom/local models not in litellm's model DB should default to False."""
|
||||
assert is_embedding_model("my-custom-local-model-v1") is False
|
||||
|
||||
@@ -17,7 +17,7 @@ def test_mt_cloud_telemetry_noop_when_not_multi_tenant(monkeypatch: Any) -> None
|
||||
|
||||
telemetry_utils.mt_cloud_telemetry(
|
||||
tenant_id="tenant-1",
|
||||
distinct_id="user@example.com",
|
||||
distinct_id="12345678-1234-1234-1234-123456789abc",
|
||||
event=MilestoneRecordType.USER_MESSAGE_SENT,
|
||||
properties={"origin": "web"},
|
||||
)
|
||||
@@ -40,7 +40,7 @@ def test_mt_cloud_telemetry_calls_event_telemetry_when_multi_tenant(
|
||||
|
||||
telemetry_utils.mt_cloud_telemetry(
|
||||
tenant_id="tenant-1",
|
||||
distinct_id="user@example.com",
|
||||
distinct_id="12345678-1234-1234-1234-123456789abc",
|
||||
event=MilestoneRecordType.USER_MESSAGE_SENT,
|
||||
properties={"origin": "web"},
|
||||
)
|
||||
@@ -51,7 +51,52 @@ def test_mt_cloud_telemetry_calls_event_telemetry_when_multi_tenant(
|
||||
fallback=telemetry_utils.noop_fallback,
|
||||
)
|
||||
event_telemetry.assert_called_once_with(
|
||||
"user@example.com",
|
||||
"12345678-1234-1234-1234-123456789abc",
|
||||
MilestoneRecordType.USER_MESSAGE_SENT,
|
||||
{"origin": "web", "tenant_id": "tenant-1"},
|
||||
)
|
||||
|
||||
|
||||
def test_mt_cloud_identify_noop_when_not_multi_tenant(monkeypatch: Any) -> None:
|
||||
fetch_impl = Mock()
|
||||
monkeypatch.setattr(
|
||||
telemetry_utils,
|
||||
"fetch_versioned_implementation_with_fallback",
|
||||
fetch_impl,
|
||||
)
|
||||
monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", False)
|
||||
|
||||
telemetry_utils.mt_cloud_identify(
|
||||
distinct_id="12345678-1234-1234-1234-123456789abc",
|
||||
properties={"email": "user@example.com"},
|
||||
)
|
||||
|
||||
fetch_impl.assert_not_called()
|
||||
|
||||
|
||||
def test_mt_cloud_identify_calls_identify_user_when_multi_tenant(
|
||||
monkeypatch: Any,
|
||||
) -> None:
|
||||
identify_user = Mock()
|
||||
fetch_impl = Mock(return_value=identify_user)
|
||||
monkeypatch.setattr(
|
||||
telemetry_utils,
|
||||
"fetch_versioned_implementation_with_fallback",
|
||||
fetch_impl,
|
||||
)
|
||||
monkeypatch.setattr("onyx.utils.telemetry.MULTI_TENANT", True)
|
||||
|
||||
telemetry_utils.mt_cloud_identify(
|
||||
distinct_id="12345678-1234-1234-1234-123456789abc",
|
||||
properties={"email": "user@example.com"},
|
||||
)
|
||||
|
||||
fetch_impl.assert_called_once_with(
|
||||
module="onyx.utils.telemetry",
|
||||
attribute="identify_user",
|
||||
fallback=telemetry_utils.noop_fallback,
|
||||
)
|
||||
identify_user.assert_called_once_with(
|
||||
"12345678-1234-1234-1234-123456789abc",
|
||||
{"email": "user@example.com"},
|
||||
)
|
||||
|
||||
@@ -32,15 +32,17 @@ def test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None
|
||||
"""Test that a function that exceeds timeout raises TimeoutError"""
|
||||
|
||||
def slow_function() -> None:
|
||||
time.sleep(slow) # Sleep for 2 seconds
|
||||
time.sleep(slow)
|
||||
|
||||
start = time.monotonic()
|
||||
with pytest.raises(TimeoutError) as exc_info:
|
||||
start = time.time()
|
||||
run_with_timeout(timeout, slow_function) # Set timeout to 0.1 seconds
|
||||
end = time.time()
|
||||
assert end - start >= timeout
|
||||
assert end - start < (slow + timeout) / 2
|
||||
run_with_timeout(timeout, slow_function)
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
assert f"timed out after {timeout} seconds" in str(exc_info.value)
|
||||
assert elapsed >= timeout
|
||||
# Should return around the timeout duration, not the full sleep duration
|
||||
assert elapsed == pytest.approx(timeout, abs=0.8)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
|
||||
|
||||
@@ -44,13 +44,13 @@ class TestBuildVespaFilters:
|
||||
assert result == f'({SOURCE_TYPE} contains "web") and '
|
||||
|
||||
def test_acl(self) -> None:
|
||||
"""Test with acls."""
|
||||
"""Test with acls — uses weightedSet operator for efficient matching."""
|
||||
# Single ACL
|
||||
filters = IndexFilters(access_control_list=["user1"])
|
||||
result = build_vespa_filters(filters)
|
||||
assert (
|
||||
result
|
||||
== f'!({HIDDEN}=true) and (access_control_list contains "user1") and '
|
||||
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1}}) and '
|
||||
)
|
||||
|
||||
# Multiple ACL's
|
||||
@@ -58,7 +58,7 @@ class TestBuildVespaFilters:
|
||||
result = build_vespa_filters(filters)
|
||||
assert (
|
||||
result
|
||||
== f'!({HIDDEN}=true) and (access_control_list contains "user2" or access_control_list contains "group2") and '
|
||||
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user2":1, "group2":1}}) and '
|
||||
)
|
||||
|
||||
def test_tenant_filter(self) -> None:
|
||||
@@ -250,7 +250,7 @@ class TestBuildVespaFilters:
|
||||
result = build_vespa_filters(filters)
|
||||
|
||||
expected = f"!({HIDDEN}=true) and "
|
||||
expected += '(access_control_list contains "user1" or access_control_list contains "group1") and '
|
||||
expected += 'weightedSet(access_control_list, {"user1":1, "group1":1}) and '
|
||||
expected += f'({SOURCE_TYPE} contains "web") and '
|
||||
expected += f'({METADATA_LIST} contains "color{INDEX_SEPARATOR}red") and '
|
||||
# Knowledge scope filters are OR'd together
|
||||
@@ -290,6 +290,38 @@ class TestBuildVespaFilters:
|
||||
expected = f'!({HIDDEN}=true) and (({DOCUMENT_SETS} contains "engineering") or ({DOCUMENT_ID} contains "{str(id1)}")) and '
|
||||
assert expected == result
|
||||
|
||||
def test_acl_large_list_uses_weighted_set(self) -> None:
|
||||
"""Verify that large ACL lists produce a weightedSet clause
|
||||
instead of OR-chained contains — this is what prevents Vespa
|
||||
HTTP 400 errors for users with thousands of permission groups."""
|
||||
acl = [f"external_group:google_drive_{i}" for i in range(10_000)]
|
||||
acl += ["user_email:user@example.com", "__PUBLIC__"]
|
||||
filters = IndexFilters(access_control_list=acl)
|
||||
result = build_vespa_filters(filters)
|
||||
|
||||
assert "weightedSet(access_control_list, {" in result
|
||||
# Must NOT contain OR-chained contains clauses
|
||||
assert "access_control_list contains" not in result
|
||||
# All entries should be present
|
||||
assert '"external_group:google_drive_0":1' in result
|
||||
assert '"external_group:google_drive_9999":1' in result
|
||||
assert '"user_email:user@example.com":1' in result
|
||||
assert '"__PUBLIC__":1' in result
|
||||
|
||||
def test_acl_empty_strings_filtered(self) -> None:
|
||||
"""Empty strings in the ACL list should be filtered out."""
|
||||
filters = IndexFilters(access_control_list=["user1", "", "group1"])
|
||||
result = build_vespa_filters(filters)
|
||||
assert (
|
||||
result
|
||||
== f'!({HIDDEN}=true) and weightedSet(access_control_list, {{"user1":1, "group1":1}}) and '
|
||||
)
|
||||
|
||||
# All empty
|
||||
filters = IndexFilters(access_control_list=["", ""])
|
||||
result = build_vespa_filters(filters)
|
||||
assert result == f"!({HIDDEN}=true) and "
|
||||
|
||||
def test_empty_or_none_values(self) -> None:
|
||||
"""Test with empty or None values in filter lists."""
|
||||
# Empty strings in document set
|
||||
|
||||
@@ -12,6 +12,11 @@ services:
|
||||
api_server:
|
||||
ports:
|
||||
- "8080:8080"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "${API_SERVER_CPU_LIMIT:-0}"
|
||||
memory: "${API_SERVER_MEM_LIMIT:-0}"
|
||||
|
||||
# Uncomment the block below to enable the MCP server for Onyx.
|
||||
# mcp_server:
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
# -f docker-compose.dev.yml up -d --wait
|
||||
#
|
||||
# This overlay:
|
||||
# - Moves Vespa (index), both model servers, code-interpreter, Redis (cache),
|
||||
# and the background worker to profiles so they do not start by default
|
||||
# - Moves Vespa (index), both model servers, OpenSearch, MinIO,
|
||||
# Redis (cache), and the background worker to profiles so they do
|
||||
# not start by default
|
||||
# - Makes depends_on references to removed services optional
|
||||
# - Sets DISABLE_VECTOR_DB=true on the api_server
|
||||
# - Uses PostgreSQL for caching and auth instead of Redis
|
||||
@@ -27,7 +28,8 @@
|
||||
# --profile inference Inference model server
|
||||
# --profile background Background worker (Celery) — also needs redis
|
||||
# --profile redis Redis cache
|
||||
# --profile code-interpreter Code interpreter
|
||||
# --profile opensearch OpenSearch
|
||||
# --profile s3-filestore MinIO (S3-compatible file store)
|
||||
# =============================================================================
|
||||
|
||||
name: onyx
|
||||
@@ -38,6 +40,9 @@ services:
|
||||
index:
|
||||
condition: service_started
|
||||
required: false
|
||||
opensearch:
|
||||
condition: service_started
|
||||
required: false
|
||||
cache:
|
||||
condition: service_started
|
||||
required: false
|
||||
@@ -84,4 +89,10 @@ services:
|
||||
inference_model_server:
|
||||
profiles: ["inference"]
|
||||
|
||||
code-interpreter: {}
|
||||
# OpenSearch is not needed in lite mode (no indexing).
|
||||
opensearch:
|
||||
profiles: ["opensearch"]
|
||||
|
||||
# MinIO is not needed in lite mode (Postgres handles file storage).
|
||||
minio:
|
||||
profiles: ["s3-filestore"]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -euo pipefail
|
||||
|
||||
# Expected resource requirements
|
||||
# Expected resource requirements (overridden below if --lite)
|
||||
EXPECTED_DOCKER_RAM_GB=10
|
||||
EXPECTED_DISK_GB=32
|
||||
|
||||
@@ -10,6 +10,11 @@ EXPECTED_DISK_GB=32
|
||||
SHUTDOWN_MODE=false
|
||||
DELETE_DATA_MODE=false
|
||||
INCLUDE_CRAFT=false # Disabled by default, use --include-craft to enable
|
||||
LITE_MODE=false # Disabled by default, use --lite to enable
|
||||
USE_LOCAL_FILES=false # Disabled by default, use --local to skip downloading config files
|
||||
NO_PROMPT=false
|
||||
DRY_RUN=false
|
||||
VERBOSE=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -25,6 +30,26 @@ while [[ $# -gt 0 ]]; do
|
||||
INCLUDE_CRAFT=true
|
||||
shift
|
||||
;;
|
||||
--lite)
|
||||
LITE_MODE=true
|
||||
shift
|
||||
;;
|
||||
--local)
|
||||
USE_LOCAL_FILES=true
|
||||
shift
|
||||
;;
|
||||
--no-prompt)
|
||||
NO_PROMPT=true
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Onyx Installation Script"
|
||||
echo ""
|
||||
@@ -32,15 +57,23 @@ while [[ $# -gt 0 ]]; do
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --include-craft Enable Onyx Craft (AI-powered web app building)"
|
||||
echo " --lite Deploy Onyx Lite (no Vespa, Redis, or model servers)"
|
||||
echo " --local Use existing config files instead of downloading from GitHub"
|
||||
echo " --shutdown Stop (pause) Onyx containers"
|
||||
echo " --delete-data Remove all Onyx data (containers, volumes, and files)"
|
||||
echo " --no-prompt Run non-interactively with defaults (for CI/automation)"
|
||||
echo " --dry-run Show what would be done without making changes"
|
||||
echo " --verbose Show detailed output for debugging"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Install Onyx"
|
||||
echo " $0 --lite # Install Onyx Lite (minimal deployment)"
|
||||
echo " $0 --include-craft # Install Onyx with Craft enabled"
|
||||
echo " $0 --shutdown # Pause Onyx services"
|
||||
echo " $0 --delete-data # Completely remove Onyx and all data"
|
||||
echo " $0 --local # Re-run using existing config files on disk"
|
||||
echo " $0 --no-prompt # Non-interactive install with defaults"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -51,8 +84,129 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$VERBOSE" = true ]]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
|
||||
echo "ERROR: --lite and --include-craft cannot be used together."
|
||||
echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# When --lite is passed as a flag, lower resource thresholds early (before the
|
||||
# resource check). When lite is chosen interactively, the thresholds are adjusted
|
||||
# inside the new-deployment flow, after the resource check has already passed
|
||||
# with the standard thresholds — which is the safer direction.
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
EXPECTED_DOCKER_RAM_GB=4
|
||||
EXPECTED_DISK_GB=16
|
||||
fi
|
||||
|
||||
INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"
|
||||
|
||||
LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"
|
||||
|
||||
# Build the -f flags for docker compose.
|
||||
# Pass "true" as $1 to auto-detect a previously-downloaded lite overlay
|
||||
# (used by shutdown/delete-data so users don't need to remember --lite).
|
||||
# Without the argument, the lite overlay is only included when --lite was
|
||||
# explicitly passed — preventing install/start from silently staying in
|
||||
# lite mode just because the file exists on disk from a prior run.
|
||||
compose_file_args() {
|
||||
local auto_detect="${1:-false}"
|
||||
local args="-f docker-compose.yml"
|
||||
if [[ "$LITE_MODE" = true ]] || { [[ "$auto_detect" = true ]] && [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; }; then
|
||||
args="$args -f ${LITE_COMPOSE_FILE}"
|
||||
fi
|
||||
echo "$args"
|
||||
}
|
||||
|
||||
# --- Downloader detection (curl with wget fallback) ---
|
||||
DOWNLOADER=""
|
||||
detect_downloader() {
|
||||
if command -v curl &> /dev/null; then
|
||||
DOWNLOADER="curl"
|
||||
return 0
|
||||
fi
|
||||
if command -v wget &> /dev/null; then
|
||||
DOWNLOADER="wget"
|
||||
return 0
|
||||
fi
|
||||
echo "ERROR: Neither curl nor wget found. Please install one and retry."
|
||||
exit 1
|
||||
}
|
||||
detect_downloader
|
||||
|
||||
download_file() {
|
||||
local url="$1"
|
||||
local output="$2"
|
||||
if [[ "$DOWNLOADER" == "curl" ]]; then
|
||||
curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
|
||||
else
|
||||
wget -q --tries=3 --timeout=20 -O "$output" "$url"
|
||||
fi
|
||||
}
|
||||
|
||||
# Ensures a required file is present. With --local, verifies the file exists on
|
||||
# disk. Otherwise, downloads it from the given URL. Returns 0 on success, 1 on
|
||||
# failure (caller should handle the exit).
|
||||
ensure_file() {
|
||||
local path="$1"
|
||||
local url="$2"
|
||||
local desc="$3"
|
||||
|
||||
if [[ "$USE_LOCAL_FILES" = true ]]; then
|
||||
if [[ -f "$path" ]]; then
|
||||
print_success "Using existing ${desc}"
|
||||
return 0
|
||||
fi
|
||||
print_error "Required file missing: ${desc} (${path})"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_info "Downloading ${desc}..."
|
||||
if download_file "$url" "$path" 2>/dev/null; then
|
||||
print_success "${desc} downloaded"
|
||||
return 0
|
||||
fi
|
||||
print_error "Failed to download ${desc}"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Interactive prompt helpers ---
|
||||
is_interactive() {
|
||||
[[ "$NO_PROMPT" = false ]] && [[ -t 0 ]]
|
||||
}
|
||||
|
||||
prompt_or_default() {
|
||||
local prompt_text="$1"
|
||||
local default_value="$2"
|
||||
if is_interactive; then
|
||||
read -p "$prompt_text" -r REPLY
|
||||
if [[ -z "$REPLY" ]]; then
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
else
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
}
|
||||
|
||||
prompt_yn_or_default() {
|
||||
local prompt_text="$1"
|
||||
local default_value="$2"
|
||||
if is_interactive; then
|
||||
read -p "$prompt_text" -n 1 -r
|
||||
echo ""
|
||||
if [[ -z "$REPLY" ]]; then
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
else
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
}
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
@@ -111,7 +265,7 @@ if [ "$SHUTDOWN_MODE" = true ]; then
|
||||
fi
|
||||
|
||||
# Stop containers (without removing them)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml stop)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) stop)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Onyx containers stopped (paused)"
|
||||
else
|
||||
@@ -140,12 +294,17 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
echo " • All downloaded files and configurations"
|
||||
echo " • All user data and documents"
|
||||
echo ""
|
||||
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
|
||||
echo ""
|
||||
|
||||
if [ "$REPLY" != "DELETE" ]; then
|
||||
print_info "Operation cancelled."
|
||||
exit 0
|
||||
if is_interactive; then
|
||||
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
|
||||
echo ""
|
||||
if [ "$REPLY" != "DELETE" ]; then
|
||||
print_info "Operation cancelled."
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
print_error "Cannot confirm destructive operation in non-interactive mode."
|
||||
print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "Removing Onyx containers and volumes..."
|
||||
@@ -164,7 +323,7 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
fi
|
||||
|
||||
# Stop and remove containers with volumes
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml down -v)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) down -v)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Onyx containers and volumes removed"
|
||||
else
|
||||
@@ -186,6 +345,117 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Auto-install Docker (Linux only) ---
|
||||
# Runs before the banner so a group-based re-exec doesn't repeat it.
|
||||
install_docker_linux() {
|
||||
local distro_id=""
|
||||
if [[ -f /etc/os-release ]]; then
|
||||
distro_id="$(. /etc/os-release && echo "${ID:-}")"
|
||||
fi
|
||||
|
||||
case "$distro_id" in
|
||||
amzn)
|
||||
print_info "Detected Amazon Linux — installing Docker via package manager..."
|
||||
if command -v dnf &> /dev/null; then
|
||||
sudo dnf install -y docker
|
||||
else
|
||||
sudo yum install -y docker
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
print_info "Installing Docker via get.docker.com..."
|
||||
download_file "https://get.docker.com" /tmp/get-docker.sh
|
||||
sudo sh /tmp/get-docker.sh
|
||||
rm -f /tmp/get-docker.sh
|
||||
;;
|
||||
esac
|
||||
|
||||
sudo systemctl start docker 2>/dev/null || sudo service docker start 2>/dev/null || true
|
||||
sudo systemctl enable docker 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Detect OS (including WSL)
|
||||
IS_WSL=false
|
||||
if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
|
||||
IS_WSL=true
|
||||
fi
|
||||
|
||||
# Dry-run: show plan and exit
|
||||
if [[ "$DRY_RUN" = true ]]; then
|
||||
print_info "Dry run mode — showing what would happen:"
|
||||
echo " • Install root: ${INSTALL_ROOT}"
|
||||
echo " • Lite mode: ${LITE_MODE}"
|
||||
echo " • Include Craft: ${INCLUDE_CRAFT}"
|
||||
echo " • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
|
||||
echo " • Downloader: ${DOWNLOADER}"
|
||||
echo ""
|
||||
print_success "Dry run complete (no changes made)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! command -v docker &> /dev/null; then
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
|
||||
install_docker_linux
|
||||
if ! command -v docker &> /dev/null; then
|
||||
print_error "Docker installation failed."
|
||||
echo " Visit: https://docs.docker.com/get-docker/"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Docker installed successfully"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Auto-install Docker Compose plugin (Linux only) ---
|
||||
if command -v docker &> /dev/null \
|
||||
&& ! docker compose version &> /dev/null \
|
||||
&& ! command -v docker-compose &> /dev/null \
|
||||
&& { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; }; then
|
||||
|
||||
print_info "Docker Compose not found — installing plugin..."
|
||||
COMPOSE_ARCH="$(uname -m)"
|
||||
COMPOSE_URL="https://github.com/docker/compose/releases/latest/download/docker-compose-linux-${COMPOSE_ARCH}"
|
||||
COMPOSE_DIR="/usr/local/lib/docker/cli-plugins"
|
||||
COMPOSE_TMP="$(mktemp)"
|
||||
sudo mkdir -p "$COMPOSE_DIR"
|
||||
if download_file "$COMPOSE_URL" "$COMPOSE_TMP"; then
|
||||
sudo mv "$COMPOSE_TMP" "$COMPOSE_DIR/docker-compose"
|
||||
sudo chmod +x "$COMPOSE_DIR/docker-compose"
|
||||
if docker compose version &> /dev/null; then
|
||||
print_success "Docker Compose plugin installed"
|
||||
else
|
||||
print_error "Docker Compose plugin installed but not detected."
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
rm -f "$COMPOSE_TMP"
|
||||
print_error "Failed to download Docker Compose plugin."
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# On Linux, ensure the current user can talk to the Docker daemon without
|
||||
# sudo. If necessary, add them to the "docker" group and re-exec the
|
||||
# script under that group so the rest of the install proceeds normally.
|
||||
if command -v docker &> /dev/null \
|
||||
&& { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; } \
|
||||
&& [[ "$(id -u)" -ne 0 ]] \
|
||||
&& ! docker info &> /dev/null; then
|
||||
if [[ "${_ONYX_REEXEC:-}" = "1" ]]; then
|
||||
print_error "Cannot connect to Docker after group re-exec."
|
||||
print_info "Log out and back in, then run the script again."
|
||||
exit 1
|
||||
fi
|
||||
if ! getent group docker &> /dev/null; then
|
||||
sudo groupadd docker
|
||||
fi
|
||||
print_info "Adding $USER to the docker group..."
|
||||
sudo usermod -aG docker "$USER"
|
||||
print_info "Re-launching with docker group active..."
|
||||
exec sg docker -c "_ONYX_REEXEC=1 bash $(printf '%q ' "$0" "$@")"
|
||||
fi
|
||||
|
||||
# ASCII Art Banner
|
||||
echo ""
|
||||
echo -e "${BLUE}${BOLD}"
|
||||
@@ -209,8 +479,7 @@ echo "2. Check your system resources (Docker, memory, disk space)"
|
||||
echo "3. Guide you through deployment options (version, authentication)"
|
||||
echo ""
|
||||
|
||||
# Only prompt for acknowledgment if running interactively
|
||||
if [ -t 0 ]; then
|
||||
if is_interactive; then
|
||||
echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
|
||||
read -r
|
||||
echo ""
|
||||
@@ -260,41 +529,35 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to compare version numbers
|
||||
# Returns 0 if $1 <= $2, 1 if $1 > $2
|
||||
# Handles missing or non-numeric parts gracefully (treats them as 0)
|
||||
version_compare() {
|
||||
# Returns 0 if $1 <= $2, 1 if $1 > $2
|
||||
local version1=$1
|
||||
local version2=$2
|
||||
local version1="${1:-0.0.0}"
|
||||
local version2="${2:-0.0.0}"
|
||||
|
||||
# Split versions into components
|
||||
local v1_major=$(echo $version1 | cut -d. -f1)
|
||||
local v1_minor=$(echo $version1 | cut -d. -f2)
|
||||
local v1_patch=$(echo $version1 | cut -d. -f3)
|
||||
local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
|
||||
v1_major=$(echo "$version1" | cut -d. -f1)
|
||||
v1_minor=$(echo "$version1" | cut -d. -f2)
|
||||
v1_patch=$(echo "$version1" | cut -d. -f3)
|
||||
v2_major=$(echo "$version2" | cut -d. -f1)
|
||||
v2_minor=$(echo "$version2" | cut -d. -f2)
|
||||
v2_patch=$(echo "$version2" | cut -d. -f3)
|
||||
|
||||
local v2_major=$(echo $version2 | cut -d. -f1)
|
||||
local v2_minor=$(echo $version2 | cut -d. -f2)
|
||||
local v2_patch=$(echo $version2 | cut -d. -f3)
|
||||
# Default non-numeric or empty parts to 0
|
||||
[[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
|
||||
[[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
|
||||
[[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
|
||||
[[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
|
||||
[[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
|
||||
[[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0
|
||||
|
||||
# Compare major version
|
||||
if [ "$v1_major" -lt "$v2_major" ]; then
|
||||
return 0
|
||||
elif [ "$v1_major" -gt "$v2_major" ]; then
|
||||
return 1
|
||||
fi
|
||||
if [ "$v1_major" -lt "$v2_major" ]; then return 0
|
||||
elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi
|
||||
|
||||
# Compare minor version
|
||||
if [ "$v1_minor" -lt "$v2_minor" ]; then
|
||||
return 0
|
||||
elif [ "$v1_minor" -gt "$v2_minor" ]; then
|
||||
return 1
|
||||
fi
|
||||
if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
|
||||
elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi
|
||||
|
||||
# Compare patch version
|
||||
if [ "$v1_patch" -le "$v2_patch" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
[ "$v1_patch" -le "$v2_patch" ]
|
||||
}
|
||||
|
||||
# Check Docker daemon
|
||||
@@ -336,10 +599,20 @@ fi
|
||||
|
||||
# Convert to GB for display
|
||||
if [ "$MEMORY_MB" -gt 0 ]; then
|
||||
MEMORY_GB=$((MEMORY_MB / 1024))
|
||||
print_info "Docker memory allocation: ~${MEMORY_GB}GB"
|
||||
MEMORY_GB=$(awk "BEGIN {printf \"%.1f\", $MEMORY_MB / 1024}")
|
||||
if [ "$(awk "BEGIN {print ($MEMORY_MB >= 1024)}")" = "1" ]; then
|
||||
MEMORY_DISPLAY="~${MEMORY_GB}GB"
|
||||
else
|
||||
MEMORY_DISPLAY="${MEMORY_MB}MB"
|
||||
fi
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
print_info "Docker memory allocation: ${MEMORY_DISPLAY}"
|
||||
else
|
||||
print_info "System memory: ${MEMORY_DISPLAY} (Docker uses host memory directly)"
|
||||
fi
|
||||
else
|
||||
print_warning "Could not determine Docker memory allocation"
|
||||
print_warning "Could not determine memory allocation"
|
||||
MEMORY_DISPLAY="unknown"
|
||||
MEMORY_MB=0
|
||||
fi
|
||||
|
||||
@@ -358,7 +631,7 @@ RESOURCE_WARNING=false
|
||||
EXPECTED_RAM_MB=$((EXPECTED_DOCKER_RAM_GB * 1024))
|
||||
|
||||
if [ "$MEMORY_MB" -gt 0 ] && [ "$MEMORY_MB" -lt "$EXPECTED_RAM_MB" ]; then
|
||||
print_warning "Docker has less than ${EXPECTED_DOCKER_RAM_GB}GB RAM allocated (found: ~${MEMORY_GB}GB)"
|
||||
print_warning "Less than ${EXPECTED_DOCKER_RAM_GB}GB RAM available (found: ${MEMORY_DISPLAY})"
|
||||
RESOURCE_WARNING=true
|
||||
fi
|
||||
|
||||
@@ -369,10 +642,10 @@ fi
|
||||
|
||||
if [ "$RESOURCE_WARNING" = true ]; then
|
||||
echo ""
|
||||
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance."
|
||||
echo ""
|
||||
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
|
||||
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance in standard mode."
|
||||
print_warning "Lite mode requires less resources (1-4GB RAM, 8-16GB disk depending on usage), but does not include a vector database."
|
||||
echo ""
|
||||
prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please allocate more resources and try again."
|
||||
exit 1
|
||||
@@ -385,117 +658,89 @@ print_step "Creating directory structure"
|
||||
if [ -d "${INSTALL_ROOT}" ]; then
|
||||
print_info "Directory structure already exists"
|
||||
print_success "Using existing ${INSTALL_ROOT} directory"
|
||||
else
|
||||
mkdir -p "${INSTALL_ROOT}/deployment"
|
||||
mkdir -p "${INSTALL_ROOT}/data/nginx/local"
|
||||
print_success "Directory structure created"
|
||||
fi
|
||||
mkdir -p "${INSTALL_ROOT}/deployment"
|
||||
mkdir -p "${INSTALL_ROOT}/data/nginx/local"
|
||||
print_success "Directory structure created"
|
||||
|
||||
# Download all required files
|
||||
print_step "Downloading Onyx configuration files"
|
||||
print_info "This step downloads all necessary configuration files from GitHub..."
|
||||
echo ""
|
||||
print_info "Downloading the following files:"
|
||||
echo " • docker-compose.yml - Main Docker Compose configuration"
|
||||
echo " • env.template - Environment variables template"
|
||||
echo " • nginx/app.conf.template - Nginx web server configuration"
|
||||
echo " • nginx/run-nginx.sh - Nginx startup script"
|
||||
echo " • README.md - Documentation and setup instructions"
|
||||
echo ""
|
||||
|
||||
# Download Docker Compose file
|
||||
COMPOSE_FILE="${INSTALL_ROOT}/deployment/docker-compose.yml"
|
||||
print_info "Downloading docker-compose.yml..."
|
||||
if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/null; then
|
||||
print_success "Docker Compose file downloaded successfully"
|
||||
|
||||
# Check if Docker Compose version is older than 2.24.0 and show warning
|
||||
# Skip check for dev builds (assume they're recent enough)
|
||||
if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
|
||||
print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
|
||||
echo ""
|
||||
print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
|
||||
echo ""
|
||||
print_info "To use this configuration with your current Docker Compose version, you have two options:"
|
||||
echo ""
|
||||
echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
echo ""
|
||||
echo "2. Manually replace all env_file sections in docker-compose.yml"
|
||||
echo " Change from:"
|
||||
echo " env_file:"
|
||||
echo " - path: .env"
|
||||
echo " required: false"
|
||||
echo " To:"
|
||||
echo " env_file: .env"
|
||||
echo ""
|
||||
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
|
||||
echo ""
|
||||
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
|
||||
echo ""
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
|
||||
exit 1
|
||||
fi
|
||||
print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
|
||||
fi
|
||||
else
|
||||
print_error "Failed to download Docker Compose file"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download env.template file
|
||||
ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
|
||||
print_info "Downloading env.template..."
|
||||
if curl -fsSL -o "$ENV_TEMPLATE" "${GITHUB_RAW_URL}/env.template" 2>/dev/null; then
|
||||
print_success "Environment template downloaded successfully"
|
||||
else
|
||||
print_error "Failed to download env.template"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download nginx config files
|
||||
# Ensure all required configuration files are present
|
||||
NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx"
|
||||
|
||||
# Download app.conf.template
|
||||
NGINX_CONFIG="${INSTALL_ROOT}/data/nginx/app.conf.template"
|
||||
print_info "Downloading nginx configuration template..."
|
||||
if curl -fsSL -o "$NGINX_CONFIG" "$NGINX_BASE_URL/app.conf.template" 2>/dev/null; then
|
||||
print_success "Nginx configuration template downloaded"
|
||||
if [[ "$USE_LOCAL_FILES" = true ]]; then
|
||||
print_step "Verifying existing configuration files"
|
||||
else
|
||||
print_error "Failed to download nginx configuration template"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
print_step "Downloading Onyx configuration files"
|
||||
print_info "This step downloads all necessary configuration files from GitHub..."
|
||||
fi
|
||||
|
||||
# Download run-nginx.sh script
|
||||
NGINX_RUN_SCRIPT="${INSTALL_ROOT}/data/nginx/run-nginx.sh"
|
||||
print_info "Downloading nginx startup script..."
|
||||
if curl -fsSL -o "$NGINX_RUN_SCRIPT" "$NGINX_BASE_URL/run-nginx.sh" 2>/dev/null; then
|
||||
chmod +x "$NGINX_RUN_SCRIPT"
|
||||
print_success "Nginx startup script downloaded and made executable"
|
||||
else
|
||||
print_error "Failed to download nginx startup script"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
ensure_file "${INSTALL_ROOT}/deployment/docker-compose.yml" \
|
||||
"${GITHUB_RAW_URL}/docker-compose.yml" "docker-compose.yml" || exit 1
|
||||
|
||||
# Check Docker Compose version compatibility after obtaining docker-compose.yml
|
||||
if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
|
||||
print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
|
||||
echo ""
|
||||
print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
|
||||
echo ""
|
||||
print_info "To use this configuration with your current Docker Compose version, you have two options:"
|
||||
echo ""
|
||||
echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
echo ""
|
||||
echo "2. Manually replace all env_file sections in docker-compose.yml"
|
||||
echo " Change from:"
|
||||
echo " env_file:"
|
||||
echo " - path: .env"
|
||||
echo " required: false"
|
||||
echo " To:"
|
||||
echo " env_file: .env"
|
||||
echo ""
|
||||
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
|
||||
echo ""
|
||||
prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
|
||||
exit 1
|
||||
fi
|
||||
print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
|
||||
fi
|
||||
|
||||
# Download README file
|
||||
README_FILE="${INSTALL_ROOT}/README.md"
|
||||
print_info "Downloading README.md..."
|
||||
if curl -fsSL -o "$README_FILE" "${GITHUB_RAW_URL}/README.md" 2>/dev/null; then
|
||||
print_success "README.md downloaded successfully"
|
||||
else
|
||||
print_error "Failed to download README.md"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
# Handle lite overlay: ensure it if --lite, clean up stale copies otherwise
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
|
||||
"${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
|
||||
elif [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
|
||||
if [[ -f "${INSTALL_ROOT}/deployment/.env" ]]; then
|
||||
print_warning "Existing lite overlay found but --lite was not passed."
|
||||
prompt_yn_or_default "Remove lite overlay and switch to standard mode? (y/N): " "n"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Keeping existing lite overlay. Pass --lite to keep using lite mode."
|
||||
LITE_MODE=true
|
||||
else
|
||||
rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
|
||||
print_info "Removed lite overlay (switching to standard mode)"
|
||||
fi
|
||||
else
|
||||
rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
|
||||
print_info "Removed previous lite overlay (switching to standard mode)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create empty local directory marker (if needed)
|
||||
ensure_file "${INSTALL_ROOT}/deployment/env.template" \
|
||||
"${GITHUB_RAW_URL}/env.template" "env.template" || exit 1
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/data/nginx/app.conf.template" \
|
||||
"$NGINX_BASE_URL/app.conf.template" "nginx/app.conf.template" || exit 1
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/data/nginx/run-nginx.sh" \
|
||||
"$NGINX_BASE_URL/run-nginx.sh" "nginx/run-nginx.sh" || exit 1
|
||||
chmod +x "${INSTALL_ROOT}/data/nginx/run-nginx.sh"
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/README.md" \
|
||||
"${GITHUB_RAW_URL}/README.md" "README.md" || exit 1
|
||||
|
||||
touch "${INSTALL_ROOT}/data/nginx/local/.gitkeep"
|
||||
print_success "All configuration files downloaded successfully"
|
||||
print_success "All configuration files ready"
|
||||
|
||||
# Set up deployment configuration
|
||||
print_step "Setting up deployment configs"
|
||||
@@ -513,7 +758,7 @@ if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docke
|
||||
|
||||
if [ -n "$COMPOSE_CMD" ]; then
|
||||
# Check if any containers are running
|
||||
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null | wc -l)
|
||||
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) ps -q 2>/dev/null | wc -l)
|
||||
if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
|
||||
print_error "Onyx services are currently running!"
|
||||
echo ""
|
||||
@@ -534,7 +779,7 @@ if [ -f "$ENV_FILE" ]; then
|
||||
echo "• Press Enter to restart with current configuration"
|
||||
echo "• Type 'update' to update to a newer version"
|
||||
echo ""
|
||||
read -p "Choose an option [default: restart]: " -r
|
||||
prompt_or_default "Choose an option [default: restart]: " ""
|
||||
echo ""
|
||||
|
||||
if [ "$REPLY" = "update" ]; then
|
||||
@@ -543,26 +788,30 @@ if [ -f "$ENV_FILE" ]; then
|
||||
echo "• Press Enter for latest (recommended)"
|
||||
echo "• Type a specific tag (e.g., v0.1.0)"
|
||||
echo ""
|
||||
# If --include-craft was passed, default to craft-latest
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
read -p "Enter tag [default: craft-latest]: " -r VERSION
|
||||
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
|
||||
VERSION="$REPLY"
|
||||
else
|
||||
read -p "Enter tag [default: latest]: " -r VERSION
|
||||
prompt_or_default "Enter tag [default: latest]: " "latest"
|
||||
VERSION="$REPLY"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if [ -z "$VERSION" ]; then
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
VERSION="craft-latest"
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
else
|
||||
VERSION="latest"
|
||||
print_info "Selected: Latest version"
|
||||
fi
|
||||
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
elif [ "$VERSION" = "latest" ]; then
|
||||
print_info "Selected: Latest version"
|
||||
else
|
||||
print_info "Selected: $VERSION"
|
||||
fi
|
||||
|
||||
# Reject craft image tags when running in lite mode
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
|
||||
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Update .env file with new version
|
||||
print_info "Updating configuration for version $VERSION..."
|
||||
if grep -q "^IMAGE_TAG=" "$ENV_FILE"; then
|
||||
@@ -581,13 +830,67 @@ if [ -f "$ENV_FILE" ]; then
|
||||
fi
|
||||
print_success "Configuration updated for upgrade"
|
||||
else
|
||||
# Reject restarting a craft deployment in lite mode
|
||||
EXISTING_TAG=$(grep "^IMAGE_TAG=" "$ENV_FILE" | head -1 | cut -d'=' -f2 | tr -d ' "'"'"'')
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${EXISTING_TAG:-}" == craft-* ]]; then
|
||||
print_error "Cannot restart a craft deployment (${EXISTING_TAG}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "Keeping existing configuration..."
|
||||
print_success "Will restart with current settings"
|
||||
fi
|
||||
|
||||
# Ensure COMPOSE_PROFILES is cleared when running in lite mode on an
|
||||
# existing .env (the template ships with s3-filestore enabled).
|
||||
if [[ "$LITE_MODE" = true ]] && grep -q "^COMPOSE_PROFILES=.*s3-filestore" "$ENV_FILE" 2>/dev/null; then
|
||||
sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Cleared COMPOSE_PROFILES for lite mode"
|
||||
fi
|
||||
else
|
||||
print_info "No existing .env file found. Setting up new deployment..."
|
||||
echo ""
|
||||
|
||||
# Ask for deployment mode (standard vs lite) unless already set via --lite flag
|
||||
if [[ "$LITE_MODE" = false ]]; then
|
||||
print_info "Which deployment mode would you like?"
|
||||
echo ""
|
||||
echo " 1) Standard - Full deployment with search, connectors, and RAG"
|
||||
echo " 2) Lite - Minimal deployment (no Vespa, Redis, or model servers)"
|
||||
echo " LLM chat, tools, file uploads, and Projects still work"
|
||||
echo ""
|
||||
prompt_or_default "Choose a mode (1 or 2) [default: 1]: " "1"
|
||||
echo ""
|
||||
|
||||
case "$REPLY" in
|
||||
2)
|
||||
LITE_MODE=true
|
||||
print_info "Selected: Lite mode"
|
||||
ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
|
||||
"${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
|
||||
;;
|
||||
*)
|
||||
print_info "Selected: Standard mode"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
print_info "Deployment mode: Lite (set via --lite flag)"
|
||||
fi
|
||||
|
||||
# Validate lite + craft combination (could now be set interactively)
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
|
||||
print_error "--include-craft cannot be used with Lite mode."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Adjust resource expectations for lite mode
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
EXPECTED_DOCKER_RAM_GB=4
|
||||
EXPECTED_DISK_GB=16
|
||||
fi
|
||||
|
||||
# Ask for version
|
||||
print_info "Which tag would you like to deploy?"
|
||||
echo ""
|
||||
@@ -595,23 +898,21 @@ else
|
||||
echo "• Press Enter for craft-latest (recommended for Craft)"
|
||||
echo "• Type a specific tag (e.g., craft-v1.0.0)"
|
||||
echo ""
|
||||
read -p "Enter tag [default: craft-latest]: " -r VERSION
|
||||
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
|
||||
VERSION="$REPLY"
|
||||
else
|
||||
echo "• Press Enter for latest (recommended)"
|
||||
echo "• Type a specific tag (e.g., v0.1.0)"
|
||||
echo ""
|
||||
read -p "Enter tag [default: latest]: " -r VERSION
|
||||
prompt_or_default "Enter tag [default: latest]: " "latest"
|
||||
VERSION="$REPLY"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if [ -z "$VERSION" ]; then
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
VERSION="craft-latest"
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
else
|
||||
VERSION="latest"
|
||||
print_info "Selected: Latest tag"
|
||||
fi
|
||||
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
elif [ "$VERSION" = "latest" ]; then
|
||||
print_info "Selected: Latest tag"
|
||||
else
|
||||
print_info "Selected: $VERSION"
|
||||
fi
|
||||
@@ -645,6 +946,13 @@ else
|
||||
# Use basic auth by default
|
||||
AUTH_SCHEMA="basic"
|
||||
|
||||
# Reject craft image tags when running in lite mode (must check before writing .env)
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
|
||||
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create .env file from template
|
||||
print_info "Creating .env file with your selections..."
|
||||
cp "$ENV_TEMPLATE" "$ENV_FILE"
|
||||
@@ -654,6 +962,13 @@ else
|
||||
sed -i.bak "s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/" "$ENV_FILE"
|
||||
print_success "IMAGE_TAG set to $VERSION"
|
||||
|
||||
# In lite mode, clear COMPOSE_PROFILES so profiled services (MinIO, etc.)
|
||||
# stay disabled — the template ships with s3-filestore enabled by default.
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Cleared COMPOSE_PROFILES for lite mode"
|
||||
fi
|
||||
|
||||
# Configure basic authentication (default)
|
||||
sed -i.bak 's/^AUTH_TYPE=.*/AUTH_TYPE=basic/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Basic authentication enabled in configuration"
|
||||
@@ -774,7 +1089,7 @@ print_step "Pulling Docker images"
|
||||
print_info "This may take several minutes depending on your internet connection..."
|
||||
echo ""
|
||||
print_info "Downloading Docker images (this may take a while)..."
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml pull --quiet)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Docker images downloaded successfully"
|
||||
else
|
||||
@@ -788,9 +1103,9 @@ print_info "Launching containers..."
|
||||
echo ""
|
||||
if [ "$USE_LATEST" = true ]; then
|
||||
print_info "Force pulling latest images and recreating containers..."
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d --pull always --force-recreate)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
|
||||
else
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
|
||||
fi
|
||||
if [ $? -ne 0 ]; then
|
||||
print_error "Failed to start Onyx services"
|
||||
@@ -812,7 +1127,7 @@ echo ""
|
||||
# Check for restart loops
|
||||
print_info "Checking container health status..."
|
||||
RESTART_ISSUES=false
|
||||
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null)
|
||||
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)
|
||||
|
||||
for CONTAINER in $CONTAINERS; do
|
||||
PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
|
||||
@@ -841,7 +1156,7 @@ if [ "$RESTART_ISSUES" = true ]; then
|
||||
print_error "Some containers are experiencing issues!"
|
||||
echo ""
|
||||
print_info "Please check the logs for more information:"
|
||||
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD -f docker-compose.yml logs)"
|
||||
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"
|
||||
|
||||
echo ""
|
||||
print_info "If the issue persists, please contact: founders@onyx.app"
|
||||
@@ -860,8 +1175,12 @@ check_onyx_health() {
|
||||
echo ""
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
# Check for successful HTTP responses (200, 301, 302, etc.)
|
||||
local http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port")
|
||||
local http_code=""
|
||||
if [[ "$DOWNLOADER" == "curl" ]]; then
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
|
||||
else
|
||||
http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
|
||||
fi
|
||||
if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
|
||||
return 0
|
||||
fi
|
||||
@@ -917,6 +1236,18 @@ print_info "If authentication is enabled, you can create your admin account here
|
||||
echo " • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
|
||||
echo " • The first user created will automatically have admin privileges"
|
||||
echo ""
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
echo ""
|
||||
print_info "Running in Lite mode — the following services are NOT started:"
|
||||
echo " • Vespa (vector database)"
|
||||
echo " • Redis (cache)"
|
||||
echo " • Model servers (embedding/inference)"
|
||||
echo " • Background workers (Celery)"
|
||||
echo ""
|
||||
print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
|
||||
print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
|
||||
fi
|
||||
echo ""
|
||||
print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
|
||||
echo ""
|
||||
print_info "For help or issues, contact: founders@onyx.app"
|
||||
|
||||
@@ -19,12 +19,13 @@
|
||||
|
||||
## Output template to file and inspect
|
||||
* cd charts/onyx
|
||||
* helm template test-output . > test-output.yaml
|
||||
* helm template test-output . --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!' > test-output.yaml
|
||||
|
||||
## Test the entire cluster manually
|
||||
* cd charts/onyx
|
||||
* helm install onyx . -n onyx --set postgresql.primary.persistence.enabled=false
|
||||
* helm install onyx . -n onyx --set postgresql.primary.persistence.enabled=false --set auth.opensearch.values.opensearch_admin_password='StrongPassword123!'
|
||||
* the postgres flag is to keep the storage ephemeral for testing. You probably don't want to set that in prod.
|
||||
* the OpenSearch admin password must be set on first install unless you are supplying `auth.opensearch.existingSecret`.
|
||||
* no flag for ephemeral vespa storage yet, might be good for testing
|
||||
* kubectl -n onyx port-forward service/onyx-nginx 8080:80
|
||||
* this will forward the local port 8080 to the installed chart for you to run tests, etc.
|
||||
|
||||
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
|
||||
sources:
|
||||
- "https://github.com/onyx-dot-app/onyx"
|
||||
type: application
|
||||
version: 0.4.35
|
||||
version: 0.4.36
|
||||
appVersion: latest
|
||||
annotations:
|
||||
category: Productivity
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
# Values for chart-testing (ct lint/install)
|
||||
# This file is automatically used by ct when running lint and install commands
|
||||
auth:
|
||||
opensearch:
|
||||
values:
|
||||
opensearch_admin_password: "placeholder-OpenSearch1!"
|
||||
userauth:
|
||||
values:
|
||||
user_auth_secret: "placeholder-for-ci-testing"
|
||||
|
||||
@@ -1177,12 +1177,17 @@ auth:
|
||||
# Secrets values IF existingSecret is empty. Key here must match the value
|
||||
# in secretKeys to be used. Values will be base64 encoded in the k8s
|
||||
# cluster.
|
||||
# For the bundled OpenSearch chart, the admin password is consumed during
|
||||
# initial cluster setup. Changing this value later will update Onyx's
|
||||
# client credentials, but will not rotate the OpenSearch admin password.
|
||||
# Set this before first install or use existingSecret to preserve the
|
||||
# current secret on upgrade.
|
||||
# Password must meet OpenSearch complexity requirements:
|
||||
# min 8 chars, uppercase, lowercase, digit, and special character.
|
||||
# CHANGE THIS FOR PRODUCTION.
|
||||
# Required when auth.opensearch.enabled=true and no existing secret exists.
|
||||
values:
|
||||
opensearch_admin_username: "admin"
|
||||
opensearch_admin_password: "OnyxDev1!"
|
||||
opensearch_admin_password: ""
|
||||
userauth:
|
||||
# -- Used for password reset / verification tokens and OAuth/OIDC state signing.
|
||||
# Disabled by default to preserve upgrade compatibility for existing Helm customers.
|
||||
|
||||
@@ -144,7 +144,7 @@ dev = [
|
||||
"matplotlib==3.10.8",
|
||||
"mypy-extensions==1.0.0",
|
||||
"mypy==1.13.0",
|
||||
"onyx-devtools==0.7.0",
|
||||
"onyx-devtools==0.7.1",
|
||||
"openapi-generator-cli==7.17.0",
|
||||
"pandas-stubs~=2.3.3",
|
||||
"pre-commit==3.2.2",
|
||||
|
||||
@@ -4,9 +4,11 @@ import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -92,12 +94,57 @@ Examples:
|
||||
return cmd
|
||||
}
|
||||
|
||||
func isPortAvailable(port int) bool {
|
||||
ln, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = ln.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func getProcessOnPort(port int) string {
|
||||
out, err := exec.Command("lsof", "-i", fmt.Sprintf(":%d", port), "-t").Output()
|
||||
if err != nil || len(strings.TrimSpace(string(out))) == 0 {
|
||||
return "an unknown process"
|
||||
}
|
||||
pid := strings.Split(strings.TrimSpace(string(out)), "\n")[0]
|
||||
nameOut, err := exec.Command("ps", "-p", pid, "-o", "comm=").Output()
|
||||
if err != nil || len(strings.TrimSpace(string(nameOut))) == 0 {
|
||||
return fmt.Sprintf("process (PID %s)", pid)
|
||||
}
|
||||
return fmt.Sprintf("%s (PID %s)", strings.TrimSpace(string(nameOut)), pid)
|
||||
}
|
||||
|
||||
func resolvePort(port string) string {
|
||||
portNum, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid port %q: %v", port, err)
|
||||
}
|
||||
if isPortAvailable(portNum) {
|
||||
return port
|
||||
}
|
||||
proc := getProcessOnPort(portNum)
|
||||
candidate := portNum + 1
|
||||
for candidate <= 65535 {
|
||||
if isPortAvailable(candidate) {
|
||||
log.Warnf("⚠ Port %d is in use by %s, using available port %d instead.", portNum, proc, candidate)
|
||||
return strconv.Itoa(candidate)
|
||||
}
|
||||
candidate++
|
||||
}
|
||||
log.Fatalf("No available ports found starting from %d", portNum)
|
||||
return port
|
||||
}
|
||||
|
||||
func runBackendService(name, module, port string, opts *BackendOptions) {
|
||||
root, err := paths.GitRoot()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to find git root: %v", err)
|
||||
}
|
||||
|
||||
port = resolvePort(port)
|
||||
|
||||
envFile := ensureBackendEnvFile(root)
|
||||
fileVars := loadBackendEnvFile(envFile)
|
||||
|
||||
|
||||
34
uv.lock
generated
34
uv.lock
generated
@@ -453,14 +453,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.6.7"
|
||||
version = "1.6.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/49/dc/ed1681bf1339dd6ea1ce56136bad4baabc6f7ad466e375810702b0237047/authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b", size = 164950, upload-time = "2026-02-06T14:04:14.171Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/af/98/00d3dd826d46959ad8e32af2dbb2398868fd9fd0683c26e56d0789bd0e68/authlib-1.6.9.tar.gz", hash = "sha256:d8f2421e7e5980cc1ddb4e32d3f5fa659cfaf60d8eaf3281ebed192e4ab74f04", size = 165134, upload-time = "2026-03-02T07:44:01.998Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/00/3ed12264094ec91f534fae429945efbaa9f8c666f3aa7061cc3b2a26a0cd/authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0", size = 244115, upload-time = "2026-02-06T14:04:12.141Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/53/23/b65f568ed0c22f1efacb744d2db1a33c8068f384b8c9b482b52ebdbc3ef6/authlib-1.6.9-py2.py3-none-any.whl", hash = "sha256:f08b4c14e08f0861dc18a32357b33fbcfd2ea86cfe3fe149484b4d764c4a0ac3", size = 244197, upload-time = "2026-03-02T07:44:00.307Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4458,7 +4458,7 @@ requires-dist = [
|
||||
{ name = "numpy", marker = "extra == 'model-server'", specifier = "==2.4.1" },
|
||||
{ name = "oauthlib", marker = "extra == 'backend'", specifier = "==3.2.2" },
|
||||
{ name = "office365-rest-python-client", marker = "extra == 'backend'", specifier = "==2.6.2" },
|
||||
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.0" },
|
||||
{ name = "onyx-devtools", marker = "extra == 'dev'", specifier = "==0.7.1" },
|
||||
{ name = "openai", specifier = "==2.14.0" },
|
||||
{ name = "openapi-generator-cli", marker = "extra == 'dev'", specifier = "==7.17.0" },
|
||||
{ name = "openinference-instrumentation", marker = "extra == 'backend'", specifier = "==0.1.42" },
|
||||
@@ -4563,19 +4563,19 @@ requires-dist = [{ name = "onyx", extras = ["backend", "dev", "ee"], editable =
|
||||
|
||||
[[package]]
|
||||
name = "onyx-devtools"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "openapi-generator-cli" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/22/9e/6957b11555da57d9e97092f4cd8ac09a86666264b0c9491838f4b27db5dc/onyx_devtools-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ad962a168d46ea11dcde9fa3b37e4f12ec520b4a4cb4d49d8732de110d46c4b6", size = 3998057, upload-time = "2026-03-12T03:09:11.585Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/90/c72f3d06ba677012d77c77de36195b6a32a15c755c79ba0282be74e3c366/onyx_devtools-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e46d252e2b048ff053b03519c3a875998780738d7c334eaa1c9a32ff445e3e1a", size = 3687753, upload-time = "2026-03-12T03:09:11.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/42/4e9fe36eccf9f76d67ba8f4ff6539196a09cd60351fb63f5865e1544cbfa/onyx_devtools-0.7.0-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:f280bc9320e1cc310e7d753a371009bfaab02cc0e0cfd78559663b15655b5a50", size = 3560144, upload-time = "2026-03-12T03:12:24.02Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/40/36dc12d99760b358c7f39b27361cb18fa9681ffe194107f982d0e1a74016/onyx_devtools-0.7.0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:e31df751c7540ae7e70a7fe8e1153c79c31c2254af6aa4c72c0dd54fa381d2ab", size = 3964387, upload-time = "2026-03-12T03:09:11.356Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/18/74744230c3820a5a7687335507ca5f1dbebab2c5325805041c1cd5703e6a/onyx_devtools-0.7.0-py3-none-win_amd64.whl", hash = "sha256:541bfd347c2d5b11e7f63ab5001d2594df91d215ad9d07b1562f5e715700f7e6", size = 4068030, upload-time = "2026-03-12T03:09:12.98Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/78/1320436607d3ffcb321ba7b064556c020ea15843a7e7d903fbb7529a71f5/onyx_devtools-0.7.0-py3-none-win_arm64.whl", hash = "sha256:83016330a9d39712431916cc25b2fb2cfcaa0112a55cc4f919d545da3a8974f9", size = 3626409, upload-time = "2026-03-12T03:09:10.222Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/65/9d/74bcd02583706bdf90c8ac9084eb60bd71d0671392152410ab21b7b68ea1/onyx_devtools-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:178385dce0b413fd2a1f761055a99f556ec536ef5c32963fc273e751813621eb", size = 4007974, upload-time = "2026-03-17T21:10:39.267Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/f8/d8ddb32120428c083c60eb07244479da6e07eaebd31847658a049ab33815/onyx_devtools-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7960ae6e440ebf1584e02d9e1d0c9ef543b1d54c2584cdcace15695aec3121b2", size = 3696924, upload-time = "2026-03-17T21:10:50.716Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/21/1e427280066db42ff9dd5f34c70b9dca5d9781f96d0d9a88aaa454fdb432/onyx_devtools-0.7.1-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:6785dda88ca0a3d8464a9bfab76a253ed90da89d53a9c4a67227980f37df1ccf", size = 3568300, upload-time = "2026-03-17T21:10:41.997Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/0e/afbbe1164b3d016ddb5352353cb2541eef5a8b2c04e8f02d5d1319cb8b8c/onyx_devtools-0.7.1-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:9e77f2b725c0c00061a3dda5eba199404b51638cec0bf54fc7611fee1f26db34", size = 3974668, upload-time = "2026-03-17T21:10:43.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/a5/22840643289ef4ca83931b7a79fba8f1db7e626b4b870d4b4f8206c4ff5f/onyx_devtools-0.7.1-py3-none-win_amd64.whl", hash = "sha256:de37daa0e4db9b5dccf94408a3422be4f821e380ab70081bd1032cec1e3c91e6", size = 4078640, upload-time = "2026-03-17T21:10:40.275Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/c1/a0295506a521d9942b0f55523781a113e4555420d800a386d5a2eb46a7ad/onyx_devtools-0.7.1-py3-none-win_arm64.whl", hash = "sha256:ab88c53ebda6dff27350316b4ac9bd5f258cd586c2109971a9d976411e1e22ea", size = 3636787, upload-time = "2026-03-17T21:10:37.492Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5458,11 +5458,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586, upload-time = "2026-01-16T18:04:18.534Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371, upload-time = "2026-01-16T18:04:17.174Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5643,11 +5643,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.11.0"
|
||||
version = "2.12.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a8/10/e8192be5f38f3e8e7e046716de4cae33d56fd5ae08927a823bb916be36c1/pyjwt-2.12.0.tar.gz", hash = "sha256:2f62390b667cd8257de560b850bb5a883102a388829274147f1d724453f8fb02", size = 102511, upload-time = "2026-03-12T17:15:30.831Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/70/70f895f404d363d291dcf62c12c85fdd47619ad9674ac0f53364d035925a/pyjwt-2.12.0-py3-none-any.whl", hash = "sha256:9bb459d1bdd0387967d287f5656bf7ec2b9a26645d1961628cda1764e087fd6e", size = 29700, upload-time = "2026-03-12T17:15:29.257Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
|
||||
1838
web/lib/genui-core/package-lock.json
generated
1838
web/lib/genui-core/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"name": "@onyx/genui",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"description": "Framework-agnostic structured UI rendering — parser, registry, prompt generation",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./src/index.ts",
|
||||
"default": "./src/index.ts"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ComponentDef } from "./types";
|
||||
|
||||
interface DefineComponentConfig<T extends z.ZodObject<z.ZodRawShape>> {
|
||||
name: string;
|
||||
description: string;
|
||||
props: T;
|
||||
component: unknown;
|
||||
group?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a GenUI component with typed props via Zod schema.
|
||||
* The `component` field is framework-agnostic (typed as `unknown` in core).
|
||||
* React bindings narrow this to `React.FC`.
|
||||
*/
|
||||
export function defineComponent<T extends z.ZodObject<z.ZodRawShape>>(
|
||||
config: DefineComponentConfig<T>
|
||||
): ComponentDef<T> {
|
||||
if (!/^[A-Z][a-zA-Z0-9]*$/.test(config.name)) {
|
||||
throw new Error(
|
||||
`Component name "${config.name}" must be PascalCase (start with uppercase, alphanumeric only)`
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
name: config.name,
|
||||
description: config.description,
|
||||
props: config.props,
|
||||
component: config.component,
|
||||
group: config.group,
|
||||
};
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
// ── Types ──
|
||||
export type {
|
||||
Token,
|
||||
ASTNode,
|
||||
ComponentNode,
|
||||
ArgumentNode,
|
||||
ArrayNode,
|
||||
ObjectNode,
|
||||
LiteralNode,
|
||||
ReferenceNode,
|
||||
ElementNode,
|
||||
TextElementNode,
|
||||
ResolvedNode,
|
||||
Statement,
|
||||
ParseError,
|
||||
ParseResult,
|
||||
ComponentDef,
|
||||
ParamDef,
|
||||
ParamMap,
|
||||
Library,
|
||||
PromptOptions,
|
||||
ActionEvent,
|
||||
} from "./types";
|
||||
export { TokenType } from "./types";
|
||||
|
||||
// ── Component & Library ──
|
||||
export { defineComponent } from "./component";
|
||||
export { createLibrary } from "./library";
|
||||
|
||||
// ── Parser ──
|
||||
export { Tokenizer } from "./parser/tokenizer";
|
||||
export { Parser } from "./parser/parser";
|
||||
export { autoClose } from "./parser/autoclose";
|
||||
export { resolveReferences } from "./parser/resolver";
|
||||
export { validateAndTransform } from "./parser/validator";
|
||||
export { createStreamingParser } from "./parser/streaming";
|
||||
export type { StreamParser } from "./parser/streaming";
|
||||
|
||||
// ── Prompt ──
|
||||
export { generatePrompt } from "./prompt/generator";
|
||||
export { zodToTypeString, schemaToSignature } from "./prompt/introspector";
|
||||
|
||||
// ── Convenience: one-shot parse ──
|
||||
import type { Library, ParseResult, ElementNode, ASTNode } from "./types";
|
||||
import { Parser } from "./parser/parser";
|
||||
import { resolveReferences } from "./parser/resolver";
|
||||
import { validateAndTransform } from "./parser/validator";
|
||||
|
||||
/**
|
||||
* One-shot parse: tokenize → parse → resolve → validate.
|
||||
*/
|
||||
export function parse(input: string, library: Library): ParseResult {
|
||||
const parser = Parser.fromSource(input);
|
||||
const { statements, errors: parseErrors } = parser.parse();
|
||||
const { root, errors: resolveErrors } = resolveReferences(statements);
|
||||
|
||||
const allErrors = [...parseErrors, ...resolveErrors];
|
||||
|
||||
let rootElement: ElementNode | null = null;
|
||||
if (root) {
|
||||
const { element, errors: validateErrors } = validateAndTransform(
|
||||
root,
|
||||
library
|
||||
);
|
||||
rootElement = element;
|
||||
allErrors.push(...validateErrors);
|
||||
}
|
||||
|
||||
return {
|
||||
statements,
|
||||
root: rootElement as ASTNode | null,
|
||||
errors: allErrors,
|
||||
};
|
||||
}
|
||||
@@ -1,208 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { z } from "zod";
|
||||
import { defineComponent, createLibrary, parse } from "./index";
|
||||
|
||||
/**
|
||||
* Integration tests: end-to-end from source → parsed element tree.
|
||||
*/
|
||||
|
||||
function makeTestLibrary() {
|
||||
return createLibrary([
|
||||
defineComponent({
|
||||
name: "Text",
|
||||
description: "Displays text",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
headingH2: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
group: "Content",
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Button",
|
||||
description: "Interactive button",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
main: z.boolean().optional(),
|
||||
primary: z.boolean().optional(),
|
||||
actionId: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
group: "Interactive",
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Tag",
|
||||
description: "Label tag",
|
||||
props: z.object({
|
||||
title: z.string(),
|
||||
color: z.enum(["green", "purple", "blue", "gray", "amber"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
group: "Content",
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Table",
|
||||
description: "Data table",
|
||||
props: z.object({
|
||||
columns: z.array(z.string()),
|
||||
rows: z.array(z.array(z.unknown())),
|
||||
}),
|
||||
component: null,
|
||||
group: "Content",
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Stack",
|
||||
description: "Vertical layout",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
gap: z.enum(["none", "xs", "sm", "md", "lg", "xl"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
group: "Layout",
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
describe("Integration: parse()", () => {
|
||||
it("parses the spec example end-to-end", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const input = `title = Text("Search Results", headingH2: true)
|
||||
row1 = ["Onyx Docs", Tag("PDF", color: "blue"), "2024-01-15"]
|
||||
row2 = ["API Guide", Tag("MD", color: "green"), "2024-02-01"]
|
||||
results = Table(["Name", "Type", "Date"], [row1, row2])
|
||||
action = Button("View All", main: true, primary: true, actionId: "viewAll")
|
||||
root = Stack([title, results, action], gap: "md")`;
|
||||
|
||||
const result = parse(input, lib);
|
||||
expect(result.root).not.toBeNull();
|
||||
// Root should be a Stack element
|
||||
if (result.root && "component" in result.root) {
|
||||
expect((result.root as any).component).toBe("Stack");
|
||||
}
|
||||
});
|
||||
|
||||
it("parses a single component", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const result = parse('x = Text("Hello World")', lib);
|
||||
expect(result.root).not.toBeNull();
|
||||
expect(
|
||||
result.errors.filter((e) => !e.message.includes("Unknown"))
|
||||
).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles unknown components gracefully", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const result = parse('x = UnknownWidget("test")', lib);
|
||||
expect(result.root).not.toBeNull();
|
||||
expect(
|
||||
result.errors.some((e) => e.message.includes("Unknown component"))
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("handles empty input", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const result = parse("", lib);
|
||||
expect(result.root).toBeNull();
|
||||
expect(result.errors).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Integration: library.prompt()", () => {
|
||||
it("generates a prompt with component signatures", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const prompt = lib.prompt();
|
||||
|
||||
expect(prompt).toContain("GenUI Lang");
|
||||
expect(prompt).toContain("Text(");
|
||||
expect(prompt).toContain("Button(");
|
||||
expect(prompt).toContain("Tag(");
|
||||
expect(prompt).toContain("Table(");
|
||||
expect(prompt).toContain("Stack(");
|
||||
});
|
||||
|
||||
it("includes syntax rules", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const prompt = lib.prompt();
|
||||
|
||||
expect(prompt).toContain("PascalCase");
|
||||
expect(prompt).toContain("camelCase");
|
||||
expect(prompt).toContain("positional");
|
||||
});
|
||||
|
||||
it("includes streaming guidelines by default", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const prompt = lib.prompt();
|
||||
|
||||
expect(prompt).toContain("Streaming");
|
||||
});
|
||||
|
||||
it("can disable streaming guidelines", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const prompt = lib.prompt({ streaming: false });
|
||||
|
||||
expect(prompt).not.toContain("Streaming Guidelines");
|
||||
});
|
||||
|
||||
it("includes custom examples", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const prompt = lib.prompt({
|
||||
examples: [{ description: "Test example", code: 'x = Text("test")' }],
|
||||
});
|
||||
|
||||
expect(prompt).toContain("Test example");
|
||||
expect(prompt).toContain('x = Text("test")');
|
||||
});
|
||||
});
|
||||
|
||||
describe("Integration: defineComponent", () => {
|
||||
it("rejects non-PascalCase names", () => {
|
||||
expect(() =>
|
||||
defineComponent({
|
||||
name: "button",
|
||||
description: "Invalid",
|
||||
props: z.object({}),
|
||||
component: null,
|
||||
})
|
||||
).toThrow("PascalCase");
|
||||
});
|
||||
|
||||
it("accepts valid PascalCase names", () => {
|
||||
expect(() =>
|
||||
defineComponent({
|
||||
name: "MyWidget",
|
||||
description: "Valid",
|
||||
props: z.object({}),
|
||||
component: null,
|
||||
})
|
||||
).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Integration: createLibrary", () => {
|
||||
it("rejects duplicate component names", () => {
|
||||
const comp = defineComponent({
|
||||
name: "Foo",
|
||||
description: "Foo",
|
||||
props: z.object({}),
|
||||
component: null,
|
||||
});
|
||||
|
||||
expect(() => createLibrary([comp, comp])).toThrow("Duplicate");
|
||||
});
|
||||
|
||||
it("resolves components by name", () => {
|
||||
const lib = makeTestLibrary();
|
||||
expect(lib.resolve("Text")).toBeDefined();
|
||||
expect(lib.resolve("NonExistent")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("generates param map", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const paramMap = lib.paramMap();
|
||||
|
||||
const textParams = paramMap.get("Text");
|
||||
expect(textParams).toBeDefined();
|
||||
expect(textParams![0]!.name).toBe("children");
|
||||
expect(textParams![0]!.required).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -1,84 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type {
|
||||
ComponentDef,
|
||||
Library,
|
||||
ParamDef,
|
||||
ParamMap,
|
||||
PromptOptions,
|
||||
} from "./types";
|
||||
import { generatePrompt } from "./prompt/generator";
|
||||
|
||||
/**
|
||||
* Build ordered param definitions from a Zod object schema.
|
||||
* Ordering matches the shape key order (which is insertion order in JS objects).
|
||||
*/
|
||||
function buildParamDefs(schema: z.ZodObject<z.ZodRawShape>): ParamDef[] {
|
||||
const shape = schema.shape;
|
||||
return Object.entries(shape).map(([name, zodType]) => {
|
||||
const unwrapped = zodType as z.ZodTypeAny;
|
||||
const isOptional = unwrapped.isOptional();
|
||||
|
||||
return {
|
||||
name,
|
||||
required: !isOptional,
|
||||
zodType: unwrapped,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
interface CreateLibraryOptions {
|
||||
/** Default prompt options merged with per-call options */
|
||||
defaultPromptOptions?: PromptOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a component library from an array of component definitions.
|
||||
*/
|
||||
export function createLibrary(
|
||||
components: ComponentDef[],
|
||||
options?: CreateLibraryOptions
|
||||
): Library {
|
||||
const map = new Map<string, ComponentDef>();
|
||||
|
||||
for (const comp of components) {
|
||||
if (map.has(comp.name)) {
|
||||
throw new Error(`Duplicate component name: "${comp.name}"`);
|
||||
}
|
||||
map.set(comp.name, comp);
|
||||
}
|
||||
|
||||
const cachedParamMap = new Map<string, ParamDef[]>();
|
||||
|
||||
return {
|
||||
components: map,
|
||||
|
||||
resolve(name: string): ComponentDef | undefined {
|
||||
return map.get(name);
|
||||
},
|
||||
|
||||
prompt(promptOptions?: PromptOptions): string {
|
||||
const merged: PromptOptions = {
|
||||
...options?.defaultPromptOptions,
|
||||
...promptOptions,
|
||||
additionalRules: [
|
||||
...(options?.defaultPromptOptions?.additionalRules ?? []),
|
||||
...(promptOptions?.additionalRules ?? []),
|
||||
],
|
||||
examples: [
|
||||
...(options?.defaultPromptOptions?.examples ?? []),
|
||||
...(promptOptions?.examples ?? []),
|
||||
],
|
||||
};
|
||||
return generatePrompt(this, merged);
|
||||
},
|
||||
|
||||
paramMap(): ParamMap {
|
||||
if (cachedParamMap.size === 0) {
|
||||
for (const [name, comp] of map) {
|
||||
cachedParamMap.set(name, buildParamDefs(comp.props));
|
||||
}
|
||||
}
|
||||
return cachedParamMap;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -1,321 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { z } from "zod";
|
||||
import { defineComponent, createLibrary, parse } from "./index";
|
||||
|
||||
/**
|
||||
* Smoke test that mirrors the Onyx library assembly.
|
||||
* Verifies all 16 component definitions register without errors
|
||||
* and the library generates a valid prompt.
|
||||
*/
|
||||
describe("Onyx Library Assembly (smoke test)", () => {
|
||||
// Re-define all components exactly as onyx/src/components/ does,
|
||||
// to verify the schemas are valid without needing the onyx package import.
|
||||
|
||||
const components = [
|
||||
defineComponent({
|
||||
name: "Stack",
|
||||
description: "Vertical stack layout",
|
||||
group: "Layout",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
gap: z.enum(["none", "xs", "sm", "md", "lg", "xl"]).optional(),
|
||||
align: z.enum(["start", "center", "end", "stretch"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Row",
|
||||
description: "Horizontal row layout",
|
||||
group: "Layout",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
gap: z.enum(["none", "xs", "sm", "md", "lg", "xl"]).optional(),
|
||||
align: z.enum(["start", "center", "end", "stretch"]).optional(),
|
||||
wrap: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Column",
|
||||
description: "A column within a Row",
|
||||
group: "Layout",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
width: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Card",
|
||||
description: "Container card",
|
||||
group: "Layout",
|
||||
props: z.object({
|
||||
title: z.string().optional(),
|
||||
padding: z.enum(["none", "sm", "md", "lg"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Divider",
|
||||
description: "Horizontal separator",
|
||||
group: "Layout",
|
||||
props: z.object({
|
||||
spacing: z.enum(["sm", "md", "lg"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Text",
|
||||
description: "Displays text with typography variants",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
headingH1: z.boolean().optional(),
|
||||
headingH2: z.boolean().optional(),
|
||||
headingH3: z.boolean().optional(),
|
||||
muted: z.boolean().optional(),
|
||||
mono: z.boolean().optional(),
|
||||
bold: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Tag",
|
||||
description: "Label tag with color",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
title: z.string(),
|
||||
color: z.enum(["green", "purple", "blue", "gray", "amber"]).optional(),
|
||||
size: z.enum(["sm", "md"]).optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Table",
|
||||
description: "Data table",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
columns: z.array(z.string()),
|
||||
rows: z.array(z.array(z.unknown())),
|
||||
compact: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Code",
|
||||
description: "Code block",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
language: z.string().optional(),
|
||||
showCopyButton: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Image",
|
||||
description: "Displays an image",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
src: z.string(),
|
||||
alt: z.string().optional(),
|
||||
width: z.string().optional(),
|
||||
height: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Link",
|
||||
description: "Hyperlink",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
href: z.string(),
|
||||
external: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "List",
|
||||
description: "Ordered or unordered list",
|
||||
group: "Content",
|
||||
props: z.object({
|
||||
items: z.array(z.string()),
|
||||
ordered: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Button",
|
||||
description: "Interactive button",
|
||||
group: "Interactive",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
main: z.boolean().optional(),
|
||||
action: z.boolean().optional(),
|
||||
danger: z.boolean().optional(),
|
||||
primary: z.boolean().optional(),
|
||||
secondary: z.boolean().optional(),
|
||||
tertiary: z.boolean().optional(),
|
||||
size: z.enum(["lg", "md"]).optional(),
|
||||
actionId: z.string().optional(),
|
||||
disabled: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "IconButton",
|
||||
description: "Icon button with tooltip",
|
||||
group: "Interactive",
|
||||
props: z.object({
|
||||
icon: z.string(),
|
||||
tooltip: z.string().optional(),
|
||||
main: z.boolean().optional(),
|
||||
action: z.boolean().optional(),
|
||||
danger: z.boolean().optional(),
|
||||
primary: z.boolean().optional(),
|
||||
secondary: z.boolean().optional(),
|
||||
actionId: z.string().optional(),
|
||||
disabled: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Input",
|
||||
description: "Text input field",
|
||||
group: "Interactive",
|
||||
props: z.object({
|
||||
placeholder: z.string().optional(),
|
||||
value: z.string().optional(),
|
||||
actionId: z.string().optional(),
|
||||
readOnly: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Alert",
|
||||
description: "Status message banner",
|
||||
group: "Feedback",
|
||||
props: z.object({
|
||||
text: z.string(),
|
||||
description: z.string().optional(),
|
||||
level: z
|
||||
.enum(["default", "info", "success", "warning", "error"])
|
||||
.optional(),
|
||||
showIcon: z.boolean().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
];
|
||||
|
||||
it("registers all 16 components without errors", () => {
|
||||
expect(() => createLibrary(components)).not.toThrow();
|
||||
});
|
||||
|
||||
it("creates a library with exactly 16 components", () => {
|
||||
const lib = createLibrary(components);
|
||||
expect(lib.components.size).toBe(16);
|
||||
});
|
||||
|
||||
it("resolves every component by name", () => {
|
||||
const lib = createLibrary(components);
|
||||
const names = [
|
||||
"Stack",
|
||||
"Row",
|
||||
"Column",
|
||||
"Card",
|
||||
"Divider",
|
||||
"Text",
|
||||
"Tag",
|
||||
"Table",
|
||||
"Code",
|
||||
"Image",
|
||||
"Link",
|
||||
"List",
|
||||
"Button",
|
||||
"IconButton",
|
||||
"Input",
|
||||
"Alert",
|
||||
];
|
||||
for (const name of names) {
|
||||
expect(lib.resolve(name)).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("generates param map for all components", () => {
|
||||
const lib = createLibrary(components);
|
||||
const paramMap = lib.paramMap();
|
||||
expect(paramMap.size).toBe(16);
|
||||
|
||||
// Verify a few specific param orderings
|
||||
const textParams = paramMap.get("Text")!;
|
||||
expect(textParams[0]!.name).toBe("children");
|
||||
expect(textParams[0]!.required).toBe(true);
|
||||
|
||||
const buttonParams = paramMap.get("Button")!;
|
||||
expect(buttonParams[0]!.name).toBe("children");
|
||||
expect(buttonParams.find((p) => p.name === "actionId")).toBeDefined();
|
||||
|
||||
const tagParams = paramMap.get("Tag")!;
|
||||
expect(tagParams[0]!.name).toBe("title");
|
||||
expect(tagParams[0]!.required).toBe(true);
|
||||
});
|
||||
|
||||
it("generates a prompt containing all component signatures", () => {
|
||||
const lib = createLibrary(components);
|
||||
const prompt = lib.prompt();
|
||||
|
||||
// Every component name should appear
|
||||
for (const [name] of lib.components) {
|
||||
expect(prompt).toContain(name);
|
||||
}
|
||||
|
||||
// Should contain group headers
|
||||
expect(prompt).toContain("Layout");
|
||||
expect(prompt).toContain("Content");
|
||||
expect(prompt).toContain("Interactive");
|
||||
expect(prompt).toContain("Feedback");
|
||||
|
||||
// Should have syntax section
|
||||
expect(prompt).toContain("Syntax");
|
||||
expect(prompt).toContain("Streaming");
|
||||
});
|
||||
|
||||
it("generates a prompt with correct Button signature", () => {
|
||||
const lib = createLibrary(components);
|
||||
const prompt = lib.prompt();
|
||||
|
||||
// Button should show its required `children` param and optional params
|
||||
expect(prompt).toContain("Button(children: string");
|
||||
expect(prompt).toContain("actionId?");
|
||||
});
|
||||
|
||||
it("parses a complex GenUI input using the full library", () => {
|
||||
const lib = createLibrary(components);
|
||||
|
||||
const input = `heading = Text("Dashboard", headingH1: true)
|
||||
status = Alert("All systems operational", level: "success")
|
||||
row1 = ["API Server", Tag("Running", color: "green"), "99.9%"]
|
||||
row2 = ["Database", Tag("Running", color: "green"), "99.8%"]
|
||||
row3 = ["Cache", Tag("Warning", color: "amber"), "95.2%"]
|
||||
table = Table(["Service", "Status", "Uptime"], [row1, row2, row3])
|
||||
actions = Row([
|
||||
Button("Refresh", main: true, primary: true, actionId: "refresh"),
|
||||
Button("Settings", action: true, secondary: true, actionId: "settings")
|
||||
], gap: "sm")
|
||||
divider = Divider(spacing: "md")
|
||||
code = Code("curl https://api.example.com/health", language: "bash")
|
||||
root = Stack([heading, status, table, divider, actions, code], gap: "md")`;
|
||||
|
||||
const result = parse(input, lib);
|
||||
|
||||
expect(result.root).not.toBeNull();
|
||||
expect(result.statements.length).toBeGreaterThanOrEqual(9);
|
||||
|
||||
// Should have no critical errors (unknown components, etc.)
|
||||
const criticalErrors = result.errors.filter(
|
||||
(e: { message: string }) => !e.message.includes("Unknown")
|
||||
);
|
||||
expect(criticalErrors).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -1,42 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { autoClose } from "./autoclose";
|
||||
|
||||
describe("autoClose", () => {
|
||||
it("closes unmatched parentheses", () => {
|
||||
expect(autoClose('Button("hello"')).toBe('Button("hello")');
|
||||
});
|
||||
|
||||
it("closes unmatched brackets", () => {
|
||||
expect(autoClose('["a", "b"')).toBe('["a", "b"]');
|
||||
});
|
||||
|
||||
it("closes unmatched braces", () => {
|
||||
expect(autoClose('{name: "test"')).toBe('{name: "test"}');
|
||||
});
|
||||
|
||||
it("closes unmatched strings", () => {
|
||||
expect(autoClose('"hello')).toBe('"hello"');
|
||||
});
|
||||
|
||||
it("closes nested brackets", () => {
|
||||
expect(autoClose("Foo([1, 2")).toBe("Foo([1, 2])");
|
||||
});
|
||||
|
||||
it("handles already closed input", () => {
|
||||
expect(autoClose('Button("ok")')).toBe('Button("ok")');
|
||||
});
|
||||
|
||||
it("handles empty input", () => {
|
||||
expect(autoClose("")).toBe("");
|
||||
});
|
||||
|
||||
it("handles escaped quotes inside strings", () => {
|
||||
expect(autoClose('"hello \\"world')).toBe('"hello \\"world"');
|
||||
});
|
||||
|
||||
it("handles deeply nested structures", () => {
|
||||
expect(autoClose('Stack([Row([Text("hi"')).toBe(
|
||||
'Stack([Row([Text("hi")])])'
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,63 +0,0 @@
|
||||
/**
|
||||
* Auto-close unmatched brackets and strings for streaming partial input.
|
||||
*
|
||||
* When the LLM is mid-stream, the last line may be incomplete — e.g. an
|
||||
* unclosed `(`, `[`, `{`, or string. We append the matching closers so the
|
||||
* parser can produce a valid (partial) tree from what we have so far.
|
||||
*/
|
||||
export function autoClose(input: string): string {
|
||||
const closers: string[] = [];
|
||||
let inString: string | null = null;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
const ch = input[i]!;
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "\\") {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inString !== null) {
|
||||
if (ch === inString) {
|
||||
inString = null;
|
||||
closers.pop(); // remove the string closer
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"' || ch === "'") {
|
||||
inString = ch;
|
||||
closers.push(ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (ch) {
|
||||
case "(":
|
||||
closers.push(")");
|
||||
break;
|
||||
case "[":
|
||||
closers.push("]");
|
||||
break;
|
||||
case "{":
|
||||
closers.push("}");
|
||||
break;
|
||||
case ")":
|
||||
case "]":
|
||||
case "}":
|
||||
// Pop the matching opener if present
|
||||
if (closers.length > 0 && closers[closers.length - 1] === ch) {
|
||||
closers.pop();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Append closers in reverse order
|
||||
return input + closers.reverse().join("");
|
||||
}
|
||||
@@ -1,542 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { Tokenizer } from "./tokenizer";
|
||||
import { Parser } from "./parser";
|
||||
import { TokenType } from "../types";
|
||||
|
||||
// ── Helpers ──
|
||||
|
||||
function tokenize(input: string) {
|
||||
return new Tokenizer(input).tokenize();
|
||||
}
|
||||
|
||||
function tokenTypes(input: string): TokenType[] {
|
||||
return tokenize(input).map((t) => t.type);
|
||||
}
|
||||
|
||||
function tokenValues(input: string): string[] {
|
||||
return tokenize(input).map((t) => t.value);
|
||||
}
|
||||
|
||||
function parseStatements(input: string) {
|
||||
return Parser.fromSource(input).parse();
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────
|
||||
// Tokenizer edge cases
|
||||
// ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("Tokenizer edge cases", () => {
|
||||
it("handles empty string", () => {
|
||||
const tokens = tokenize("");
|
||||
expect(tokens).toHaveLength(1);
|
||||
expect(tokens[0]!.type).toBe(TokenType.EOF);
|
||||
});
|
||||
|
||||
it("handles only whitespace (spaces and tabs)", () => {
|
||||
const tokens = tokenize(" \t\t ");
|
||||
expect(tokens).toHaveLength(1);
|
||||
expect(tokens[0]!.type).toBe(TokenType.EOF);
|
||||
});
|
||||
|
||||
it("handles only newlines", () => {
|
||||
const types = tokenTypes("\n\n\n");
|
||||
// Each newline at bracket depth 0 produces a Newline token
|
||||
expect(types.filter((t) => t === TokenType.Newline).length).toBe(3);
|
||||
expect(types[types.length - 1]).toBe(TokenType.EOF);
|
||||
});
|
||||
|
||||
it("handles unicode in string literals (emoji)", () => {
|
||||
const tokens = tokenize('"hello \u{1F680}\u{1F525}"');
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str).toBeDefined();
|
||||
expect(str!.value).toBe("hello \u{1F680}\u{1F525}");
|
||||
});
|
||||
|
||||
it("handles unicode in string literals (CJK characters)", () => {
|
||||
const tokens = tokenize('"\u4F60\u597D\u4E16\u754C"');
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("\u4F60\u597D\u4E16\u754C");
|
||||
});
|
||||
|
||||
it("handles very long string literals (1000+ chars)", () => {
|
||||
const longContent = "a".repeat(2000);
|
||||
const tokens = tokenize(`"${longContent}"`);
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe(longContent);
|
||||
expect(str!.value.length).toBe(2000);
|
||||
});
|
||||
|
||||
it("handles deeply nested brackets (10+ levels)", () => {
|
||||
const open = "(".repeat(15);
|
||||
const close = ")".repeat(15);
|
||||
const input = `Foo${open}${close}`;
|
||||
const tokens = tokenize(input);
|
||||
const lParens = tokens.filter((t) => t.type === TokenType.LParen);
|
||||
const rParens = tokens.filter((t) => t.type === TokenType.RParen);
|
||||
expect(lParens).toHaveLength(15);
|
||||
expect(rParens).toHaveLength(15);
|
||||
});
|
||||
|
||||
it("suppresses newlines inside brackets", () => {
|
||||
const input = '(\n\n"hello"\n\n)';
|
||||
const types = tokenTypes(input);
|
||||
// Newlines inside brackets should be suppressed
|
||||
expect(types).not.toContain(TokenType.Newline);
|
||||
expect(types).toContain(TokenType.LParen);
|
||||
expect(types).toContain(TokenType.String);
|
||||
expect(types).toContain(TokenType.RParen);
|
||||
});
|
||||
|
||||
it("handles single-quoted strings", () => {
|
||||
const tokens = tokenize("'hello world'");
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("hello world");
|
||||
});
|
||||
|
||||
it("handles double-quoted strings", () => {
|
||||
const tokens = tokenize('"hello world"');
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("hello world");
|
||||
});
|
||||
|
||||
it("handles single quotes inside double-quoted strings without escaping", () => {
|
||||
const tokens = tokenize('"it\'s fine"');
|
||||
// The \' escape yields a literal '
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("it's fine");
|
||||
});
|
||||
|
||||
it("handles negative decimals (-3.14)", () => {
|
||||
const tokens = tokenize("-3.14");
|
||||
const num = tokens.find((t) => t.type === TokenType.Number);
|
||||
expect(num).toBeDefined();
|
||||
expect(num!.value).toBe("-3.14");
|
||||
});
|
||||
|
||||
it("handles negative integers", () => {
|
||||
const tokens = tokenize("-42");
|
||||
const num = tokens.find((t) => t.type === TokenType.Number);
|
||||
expect(num!.value).toBe("-42");
|
||||
});
|
||||
|
||||
it("handles multiple consecutive comments", () => {
|
||||
const input = "// comment 1\n// comment 2\n// comment 3\nx";
|
||||
const tokens = tokenize(input);
|
||||
// Comments are skipped; we should get newlines and the identifier
|
||||
const identifiers = tokens.filter((t) => t.type === TokenType.Identifier);
|
||||
expect(identifiers).toHaveLength(1);
|
||||
expect(identifiers[0]!.value).toBe("x");
|
||||
});
|
||||
|
||||
it("handles comment at end of file (no trailing newline)", () => {
|
||||
const input = "x = 1\n// trailing comment";
|
||||
const tokens = tokenize(input);
|
||||
// Should not crash, last token is EOF
|
||||
expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
|
||||
// The identifier and number should be present
|
||||
expect(
|
||||
tokens.some((t) => t.type === TokenType.Identifier && t.value === "x")
|
||||
).toBe(true);
|
||||
expect(
|
||||
tokens.some((t) => t.type === TokenType.Number && t.value === "1")
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("handles all escape sequences in strings", () => {
|
||||
const input = '"\\n\\t\\\\\\"\\\'"';
|
||||
const tokens = tokenize(input);
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("\n\t\\\"'");
|
||||
});
|
||||
|
||||
it("handles unknown escape sequences by preserving the escaped char", () => {
|
||||
const tokens = tokenize('"\\x"');
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str!.value).toBe("x");
|
||||
});
|
||||
|
||||
it("handles unterminated string (EOF inside string)", () => {
|
||||
// Should not throw; tokenizer consumes until EOF
|
||||
const tokens = tokenize('"unterminated');
|
||||
const str = tokens.find((t) => t.type === TokenType.String);
|
||||
expect(str).toBeDefined();
|
||||
expect(str!.value).toBe("unterminated");
|
||||
});
|
||||
|
||||
it("handles bracket depth never going below zero on unmatched closing brackets", () => {
|
||||
// Extra closing parens should not crash
|
||||
const tokens = tokenize(")))]]]");
|
||||
expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
|
||||
});
|
||||
|
||||
it("skips unknown characters silently", () => {
|
||||
const tokens = tokenize("@ # $ %");
|
||||
// All unknown chars are skipped, only EOF remains
|
||||
expect(tokens).toHaveLength(1);
|
||||
expect(tokens[0]!.type).toBe(TokenType.EOF);
|
||||
});
|
||||
|
||||
it("tracks line and column correctly across newlines", () => {
|
||||
const tokens = tokenize("x\ny");
|
||||
const x = tokens.find((t) => t.value === "x");
|
||||
const y = tokens.find((t) => t.value === "y");
|
||||
expect(x!.line).toBe(1);
|
||||
expect(x!.column).toBe(1);
|
||||
expect(y!.line).toBe(2);
|
||||
expect(y!.column).toBe(1);
|
||||
});
|
||||
|
||||
it("treats identifier starting with underscore as valid", () => {
|
||||
const tokens = tokenize("_foo _bar123");
|
||||
const idents = tokens.filter((t) => t.type === TokenType.Identifier);
|
||||
expect(idents).toHaveLength(2);
|
||||
expect(idents[0]!.value).toBe("_foo");
|
||||
expect(idents[1]!.value).toBe("_bar123");
|
||||
});
|
||||
|
||||
it("tokenizes number with trailing dot as number then unknown", () => {
|
||||
// "42." => number "42." (reads the dot as part of decimal), then EOF
|
||||
const tokens = tokenize("42.");
|
||||
const num = tokens.find((t) => t.type === TokenType.Number);
|
||||
expect(num!.value).toBe("42.");
|
||||
});
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────────────────────
|
||||
// Parser edge cases
|
||||
// ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("Parser edge cases", () => {
|
||||
it("handles empty input", () => {
|
||||
const { statements, errors } = parseStatements("");
|
||||
expect(statements).toHaveLength(0);
|
||||
expect(errors).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles single identifier with no assignment (error recovery)", () => {
|
||||
const { statements, errors } = parseStatements("foo");
|
||||
// Should produce an error because it expects `=` after identifier
|
||||
expect(errors.length).toBeGreaterThan(0);
|
||||
expect(errors[0]!.message).toContain("Expected Equals");
|
||||
});
|
||||
|
||||
it("handles assignment with no value (error recovery)", () => {
|
||||
const { statements, errors } = parseStatements("x =");
|
||||
// Should produce an error because there's no expression after `=`
|
||||
expect(errors.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("parses component with 0 args: Foo()", () => {
|
||||
const { statements, errors } = parseStatements("x = Foo()");
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("component");
|
||||
if (node.kind === "component") {
|
||||
expect(node.name).toBe("Foo");
|
||||
expect(node.args).toHaveLength(0);
|
||||
}
|
||||
});
|
||||
|
||||
it("parses component with only named args", () => {
|
||||
const { statements, errors } = parseStatements("x = Foo(a: 1, b: 2)");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "component") {
|
||||
expect(node.args).toHaveLength(2);
|
||||
expect(node.args[0]!.key).toBe("a");
|
||||
expect(node.args[0]!.value).toEqual({ kind: "literal", value: 1 });
|
||||
expect(node.args[1]!.key).toBe("b");
|
||||
expect(node.args[1]!.value).toEqual({ kind: "literal", value: 2 });
|
||||
}
|
||||
});
|
||||
|
||||
it("parses deeply nested components", () => {
|
||||
const { statements, errors } = parseStatements('x = A(B(C(D("deep"))))');
|
||||
expect(errors).toHaveLength(0);
|
||||
const a = statements[0]!.value;
|
||||
expect(a.kind).toBe("component");
|
||||
if (a.kind === "component") {
|
||||
expect(a.name).toBe("A");
|
||||
const b = a.args[0]!.value;
|
||||
expect(b.kind).toBe("component");
|
||||
if (b.kind === "component") {
|
||||
expect(b.name).toBe("B");
|
||||
const c = b.args[0]!.value;
|
||||
expect(c.kind).toBe("component");
|
||||
if (c.kind === "component") {
|
||||
expect(c.name).toBe("C");
|
||||
const d = c.args[0]!.value;
|
||||
expect(d.kind).toBe("component");
|
||||
if (d.kind === "component") {
|
||||
expect(d.name).toBe("D");
|
||||
expect(d.args[0]!.value).toEqual({
|
||||
kind: "literal",
|
||||
value: "deep",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("parses array of arrays", () => {
|
||||
const { statements, errors } = parseStatements("x = [[1, 2], [3, 4]]");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("array");
|
||||
if (node.kind === "array") {
|
||||
expect(node.elements).toHaveLength(2);
|
||||
const first = node.elements[0]!;
|
||||
expect(first.kind).toBe("array");
|
||||
if (first.kind === "array") {
|
||||
expect(first.elements).toEqual([
|
||||
{ kind: "literal", value: 1 },
|
||||
{ kind: "literal", value: 2 },
|
||||
]);
|
||||
}
|
||||
const second = node.elements[1]!;
|
||||
if (second.kind === "array") {
|
||||
expect(second.elements).toEqual([
|
||||
{ kind: "literal", value: 3 },
|
||||
{ kind: "literal", value: 4 },
|
||||
]);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("parses object with string keys (including spaces)", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'x = {"key with spaces": 1, "another key": 2}'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("object");
|
||||
if (node.kind === "object") {
|
||||
expect(node.entries).toHaveLength(2);
|
||||
expect(node.entries[0]!.key).toBe("key with spaces");
|
||||
expect(node.entries[0]!.value).toEqual({ kind: "literal", value: 1 });
|
||||
expect(node.entries[1]!.key).toBe("another key");
|
||||
}
|
||||
});
|
||||
|
||||
it("handles trailing newlines gracefully", () => {
|
||||
const { statements, errors } = parseStatements('x = "hello"\n\n\n');
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("handles leading newlines gracefully", () => {
|
||||
const { statements, errors } = parseStatements('\n\n\nx = "hello"');
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
expect(statements[0]!.name).toBe("x");
|
||||
});
|
||||
|
||||
it("handles multiple empty lines between statements", () => {
|
||||
const { statements, errors } = parseStatements("x = 1\n\n\n\n\ny = 2");
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(2);
|
||||
expect(statements[0]!.name).toBe("x");
|
||||
expect(statements[1]!.name).toBe("y");
|
||||
});
|
||||
|
||||
it("treats PascalCase identifiers as components, not keywords: True", () => {
|
||||
// `True` is PascalCase, so it should be parsed as a component call (not boolean)
|
||||
// when followed by parens
|
||||
const { statements, errors } = parseStatements("x = True()");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("component");
|
||||
if (node.kind === "component") {
|
||||
expect(node.name).toBe("True");
|
||||
}
|
||||
});
|
||||
|
||||
it("treats PascalCase identifiers as components: Null", () => {
|
||||
const { statements, errors } = parseStatements("x = Null()");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("component");
|
||||
if (node.kind === "component") {
|
||||
expect(node.name).toBe("Null");
|
||||
}
|
||||
});
|
||||
|
||||
it("treats lowercase 'true' as boolean literal, not reference", () => {
|
||||
const { statements } = parseStatements("x = true");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: true });
|
||||
});
|
||||
|
||||
it("treats lowercase 'null' as null literal, not reference", () => {
|
||||
const { statements } = parseStatements("x = null");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: null });
|
||||
});
|
||||
|
||||
it("handles very long identifier names", () => {
|
||||
const longName = "a".repeat(500);
|
||||
const { statements, errors } = parseStatements(`${longName} = 42`);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements[0]!.name).toBe(longName);
|
||||
});
|
||||
|
||||
it("parses mixed named and positional args", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'x = Foo("pos", named: "val", "pos2")'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "component") {
|
||||
expect(node.args).toHaveLength(3);
|
||||
// First: positional
|
||||
expect(node.args[0]!.key).toBeNull();
|
||||
expect(node.args[0]!.value).toEqual({ kind: "literal", value: "pos" });
|
||||
// Second: named
|
||||
expect(node.args[1]!.key).toBe("named");
|
||||
expect(node.args[1]!.value).toEqual({ kind: "literal", value: "val" });
|
||||
// Third: positional
|
||||
expect(node.args[2]!.key).toBeNull();
|
||||
expect(node.args[2]!.value).toEqual({ kind: "literal", value: "pos2" });
|
||||
}
|
||||
});
|
||||
|
||||
it("handles trailing comma in component args", () => {
|
||||
const { statements, errors } = parseStatements("x = Foo(1, 2,)");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "component") {
|
||||
expect(node.args).toHaveLength(2);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles trailing comma in arrays", () => {
|
||||
const { statements, errors } = parseStatements("x = [1, 2, 3,]");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "array") {
|
||||
expect(node.elements).toHaveLength(3);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles trailing comma in objects", () => {
|
||||
const { statements, errors } = parseStatements("x = {a: 1, b: 2,}");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "object") {
|
||||
expect(node.entries).toHaveLength(2);
|
||||
}
|
||||
});
|
||||
|
||||
it("recovers from error and parses subsequent statements", () => {
|
||||
const { statements, errors } = parseStatements("bad\ny = 42");
|
||||
// First statement is invalid (no `=`), second is valid
|
||||
expect(errors.length).toBeGreaterThan(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
expect(statements[0]!.name).toBe("y");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: 42 });
|
||||
});
|
||||
|
||||
it("parses camelCase identifier as reference", () => {
|
||||
const { statements, errors } = parseStatements("x = myRef");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("reference");
|
||||
if (node.kind === "reference") {
|
||||
expect(node.name).toBe("myRef");
|
||||
}
|
||||
});
|
||||
|
||||
it("parses PascalCase identifier without parens as reference", () => {
|
||||
// PascalCase but no `(` following => treated as a reference, not component
|
||||
const { statements, errors } = parseStatements("x = MyComponent");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("reference");
|
||||
if (node.kind === "reference") {
|
||||
expect(node.name).toBe("MyComponent");
|
||||
}
|
||||
});
|
||||
|
||||
it("parses empty array", () => {
|
||||
const { statements, errors } = parseStatements("x = []");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("array");
|
||||
if (node.kind === "array") {
|
||||
expect(node.elements).toHaveLength(0);
|
||||
}
|
||||
});
|
||||
|
||||
it("parses empty object", () => {
|
||||
const { statements, errors } = parseStatements("x = {}");
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
expect(node.kind).toBe("object");
|
||||
if (node.kind === "object") {
|
||||
expect(node.entries).toHaveLength(0);
|
||||
}
|
||||
});
|
||||
|
||||
it("parses component as named arg value", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'x = Layout(header: Header("Title"))'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "component") {
|
||||
expect(node.name).toBe("Layout");
|
||||
expect(node.args[0]!.key).toBe("header");
|
||||
const headerVal = node.args[0]!.value;
|
||||
expect(headerVal.kind).toBe("component");
|
||||
if (headerVal.kind === "component") {
|
||||
expect(headerVal.name).toBe("Header");
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("parses negative number in expression position", () => {
|
||||
const { statements, errors } = parseStatements("x = -3.14");
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: -3.14 });
|
||||
});
|
||||
|
||||
it("parses component with array arg", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
"x = List(items: [1, 2, 3])"
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "component") {
|
||||
expect(node.args[0]!.key).toBe("items");
|
||||
expect(node.args[0]!.value.kind).toBe("array");
|
||||
}
|
||||
});
|
||||
|
||||
it("handles comments between statements", () => {
|
||||
const input = "x = 1\n// comment\ny = 2";
|
||||
const { statements, errors } = parseStatements(input);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("handles comment on the same line as a statement", () => {
|
||||
// The comment eats everything after //, so `x = 1` is before the comment on a new line
|
||||
const input = "// header comment\nx = 1";
|
||||
const { statements, errors } = parseStatements(input);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
expect(statements[0]!.name).toBe("x");
|
||||
});
|
||||
|
||||
it("parses object with mixed identifier and string keys", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'x = {name: "Alice", "full name": "Alice B"}'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const node = statements[0]!.value;
|
||||
if (node.kind === "object") {
|
||||
expect(node.entries[0]!.key).toBe("name");
|
||||
expect(node.entries[1]!.key).toBe("full name");
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,7 +0,0 @@
|
||||
export { Tokenizer } from "./tokenizer";
|
||||
export { Parser } from "./parser";
|
||||
export { autoClose } from "./autoclose";
|
||||
export { resolveReferences } from "./resolver";
|
||||
export { validateAndTransform } from "./validator";
|
||||
export { createStreamingParser } from "./streaming";
|
||||
export type { StreamParser } from "./streaming";
|
||||
@@ -1,132 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { Parser } from "./parser";
|
||||
|
||||
describe("Parser", () => {
|
||||
function parseStatements(input: string) {
|
||||
const parser = Parser.fromSource(input);
|
||||
return parser.parse();
|
||||
}
|
||||
|
||||
it("parses a simple literal assignment", () => {
|
||||
const { statements, errors } = parseStatements('x = "hello"');
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(1);
|
||||
expect(statements[0]!.name).toBe("x");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: "hello" });
|
||||
});
|
||||
|
||||
it("parses number literals", () => {
|
||||
const { statements } = parseStatements("n = 42");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: 42 });
|
||||
});
|
||||
|
||||
it("parses boolean literals", () => {
|
||||
const { statements } = parseStatements("b = true");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: true });
|
||||
});
|
||||
|
||||
it("parses null", () => {
|
||||
const { statements } = parseStatements("x = null");
|
||||
expect(statements[0]!.value).toEqual({ kind: "literal", value: null });
|
||||
});
|
||||
|
||||
it("parses a component call with positional args", () => {
|
||||
const { statements, errors } = parseStatements('btn = Button("Click me")');
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements[0]!.value).toEqual({
|
||||
kind: "component",
|
||||
name: "Button",
|
||||
args: [{ key: null, value: { kind: "literal", value: "Click me" } }],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses a component call with named args", () => {
|
||||
const { statements } = parseStatements('t = Tag("PDF", color: "blue")');
|
||||
const comp = statements[0]!.value;
|
||||
expect(comp.kind).toBe("component");
|
||||
if (comp.kind === "component") {
|
||||
expect(comp.args).toHaveLength(2);
|
||||
expect(comp.args[0]!.key).toBeNull();
|
||||
expect(comp.args[1]!.key).toBe("color");
|
||||
}
|
||||
});
|
||||
|
||||
it("parses nested components", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'row = Row([Button("A"), Button("B")])'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
const comp = statements[0]!.value;
|
||||
expect(comp.kind).toBe("component");
|
||||
});
|
||||
|
||||
it("parses arrays", () => {
|
||||
const { statements } = parseStatements('items = ["a", "b", "c"]');
|
||||
expect(statements[0]!.value).toEqual({
|
||||
kind: "array",
|
||||
elements: [
|
||||
{ kind: "literal", value: "a" },
|
||||
{ kind: "literal", value: "b" },
|
||||
{ kind: "literal", value: "c" },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses objects", () => {
|
||||
const { statements } = parseStatements('opts = {name: "test", count: 5}');
|
||||
expect(statements[0]!.value).toEqual({
|
||||
kind: "object",
|
||||
entries: [
|
||||
{ key: "name", value: { kind: "literal", value: "test" } },
|
||||
{ key: "count", value: { kind: "literal", value: 5 } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses variable references", () => {
|
||||
const { statements } = parseStatements("ref = myVar");
|
||||
expect(statements[0]!.value).toEqual({ kind: "reference", name: "myVar" });
|
||||
});
|
||||
|
||||
it("parses multiple statements", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'title = Text("Hello")\nbtn = Button("Click")'
|
||||
);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(2);
|
||||
expect(statements[0]!.name).toBe("title");
|
||||
expect(statements[1]!.name).toBe("btn");
|
||||
});
|
||||
|
||||
it("handles trailing commas", () => {
|
||||
const { statements, errors } = parseStatements('x = Button("a", "b",)');
|
||||
expect(errors).toHaveLength(0);
|
||||
const comp = statements[0]!.value;
|
||||
if (comp.kind === "component") {
|
||||
expect(comp.args).toHaveLength(2);
|
||||
}
|
||||
});
|
||||
|
||||
it("recovers from parse errors", () => {
|
||||
const { statements, errors } = parseStatements(
|
||||
'!!invalid!!\ny = Text("valid")'
|
||||
);
|
||||
expect(errors.length).toBeGreaterThan(0);
|
||||
// Should still parse the valid second line
|
||||
expect(statements.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("parses the full example from the spec", () => {
|
||||
const input = `title = Text("Search Results", headingH2: true)
|
||||
row1 = ["Onyx Docs", Tag("PDF", color: "blue"), "2024-01-15"]
|
||||
row2 = ["API Guide", Tag("MD", color: "green"), "2024-02-01"]
|
||||
results = Table(["Name", "Type", "Date"], [row1, row2])
|
||||
action = Button("View All", main: true, primary: true, actionId: "viewAll")
|
||||
root = Stack([title, results, action], gap: "md")`;
|
||||
|
||||
const { statements, errors } = parseStatements(input);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(statements).toHaveLength(6);
|
||||
expect(statements[5]!.name).toBe("root");
|
||||
});
|
||||
});
|
||||
@@ -1,305 +0,0 @@
|
||||
import type {
|
||||
ASTNode,
|
||||
ArgumentNode,
|
||||
Statement,
|
||||
ParseError,
|
||||
Token,
|
||||
} from "../types";
|
||||
import { TokenType } from "../types";
|
||||
import { Tokenizer } from "./tokenizer";
|
||||
|
||||
/**
|
||||
* Recursive descent parser for GenUI Lang.
|
||||
*
|
||||
* Grammar:
|
||||
* program = statement*
|
||||
* statement = identifier "=" expression NEWLINE
|
||||
* expression = component | array | object | literal | reference
|
||||
* component = PascalCase "(" arglist? ")"
|
||||
* arglist = arg ("," arg)*
|
||||
* arg = namedArg | expression
|
||||
* namedArg = identifier ":" expression
|
||||
* array = "[" (expression ("," expression)*)? "]"
|
||||
* object = "{" (pair ("," pair)*)? "}"
|
||||
* pair = (identifier | string) ":" expression
|
||||
* literal = string | number | boolean | null
|
||||
* reference = camelCase identifier (doesn't start with uppercase)
|
||||
*/
|
||||
export class Parser {
|
||||
private tokens: Token[];
|
||||
private pos = 0;
|
||||
private errors: ParseError[] = [];
|
||||
|
||||
constructor(tokens: Token[]) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
static fromSource(source: string): Parser {
|
||||
const tokenizer = new Tokenizer(source);
|
||||
return new Parser(tokenizer.tokenize());
|
||||
}
|
||||
|
||||
parse(): { statements: Statement[]; errors: ParseError[] } {
|
||||
const statements: Statement[] = [];
|
||||
|
||||
this.skipNewlines();
|
||||
|
||||
while (!this.isAtEnd()) {
|
||||
try {
|
||||
const stmt = this.parseStatement();
|
||||
if (stmt) {
|
||||
statements.push(stmt);
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof ParseErrorException) {
|
||||
this.errors.push(e.toParseError());
|
||||
}
|
||||
// Skip to next line to recover
|
||||
this.skipToNextStatement();
|
||||
}
|
||||
this.skipNewlines();
|
||||
}
|
||||
|
||||
return { statements, errors: this.errors };
|
||||
}
|
||||
|
||||
private parseStatement(): Statement | null {
|
||||
if (this.isAtEnd()) return null;
|
||||
|
||||
const ident = this.expect(TokenType.Identifier);
|
||||
this.expect(TokenType.Equals);
|
||||
const value = this.parseExpression();
|
||||
|
||||
return { name: ident.value, value };
|
||||
}
|
||||
|
||||
private parseExpression(): ASTNode {
|
||||
const token = this.current();
|
||||
|
||||
if (token.type === TokenType.LBracket) {
|
||||
return this.parseArray();
|
||||
}
|
||||
|
||||
if (token.type === TokenType.LBrace) {
|
||||
return this.parseObject();
|
||||
}
|
||||
|
||||
if (token.type === TokenType.String) {
|
||||
this.advance();
|
||||
return { kind: "literal", value: token.value };
|
||||
}
|
||||
|
||||
if (token.type === TokenType.Number) {
|
||||
this.advance();
|
||||
return { kind: "literal", value: Number(token.value) };
|
||||
}
|
||||
|
||||
if (token.type === TokenType.Boolean) {
|
||||
this.advance();
|
||||
return { kind: "literal", value: token.value === "true" };
|
||||
}
|
||||
|
||||
if (token.type === TokenType.Null) {
|
||||
this.advance();
|
||||
return { kind: "literal", value: null };
|
||||
}
|
||||
|
||||
if (token.type === TokenType.Identifier) {
|
||||
const isPascalCase = /^[A-Z]/.test(token.value);
|
||||
|
||||
if (isPascalCase && this.peek()?.type === TokenType.LParen) {
|
||||
return this.parseComponent();
|
||||
}
|
||||
|
||||
// camelCase identifier = variable reference
|
||||
this.advance();
|
||||
return { kind: "reference", name: token.value };
|
||||
}
|
||||
|
||||
throw this.error(`Unexpected token: ${token.type} "${token.value}"`);
|
||||
}
|
||||
|
||||
private parseComponent(): ASTNode {
|
||||
const name = this.expect(TokenType.Identifier);
|
||||
this.expect(TokenType.LParen);
|
||||
|
||||
const args: ArgumentNode[] = [];
|
||||
|
||||
if (this.current().type !== TokenType.RParen) {
|
||||
args.push(this.parseArg());
|
||||
|
||||
while (this.current().type === TokenType.Comma) {
|
||||
this.advance(); // skip comma
|
||||
if (this.current().type === TokenType.RParen) break; // trailing comma
|
||||
args.push(this.parseArg());
|
||||
}
|
||||
}
|
||||
|
||||
this.expect(TokenType.RParen);
|
||||
|
||||
return { kind: "component", name: name.value, args };
|
||||
}
|
||||
|
||||
private parseArg(): ArgumentNode {
|
||||
// Look ahead: if we see `identifier ":"`, it's a named arg
|
||||
if (
|
||||
this.current().type === TokenType.Identifier &&
|
||||
this.peek()?.type === TokenType.Colon
|
||||
) {
|
||||
// But only if the identifier is NOT PascalCase (which would be a component)
|
||||
const isPascalCase = /^[A-Z]/.test(this.current().value);
|
||||
if (!isPascalCase) {
|
||||
const key = this.current().value;
|
||||
this.advance(); // identifier
|
||||
this.advance(); // colon
|
||||
const value = this.parseExpression();
|
||||
return { key, value };
|
||||
}
|
||||
}
|
||||
|
||||
// Positional argument
|
||||
const value = this.parseExpression();
|
||||
return { key: null, value };
|
||||
}
|
||||
|
||||
private parseArray(): ASTNode {
|
||||
this.expect(TokenType.LBracket);
|
||||
|
||||
const elements: ASTNode[] = [];
|
||||
|
||||
if (this.current().type !== TokenType.RBracket) {
|
||||
elements.push(this.parseExpression());
|
||||
|
||||
while (this.current().type === TokenType.Comma) {
|
||||
this.advance();
|
||||
if (this.current().type === TokenType.RBracket) break;
|
||||
elements.push(this.parseExpression());
|
||||
}
|
||||
}
|
||||
|
||||
this.expect(TokenType.RBracket);
|
||||
|
||||
return { kind: "array", elements };
|
||||
}
|
||||
|
||||
private parseObject(): ASTNode {
|
||||
this.expect(TokenType.LBrace);
|
||||
|
||||
const entries: { key: string; value: ASTNode }[] = [];
|
||||
|
||||
if (this.current().type !== TokenType.RBrace) {
|
||||
entries.push(this.parseObjectEntry());
|
||||
|
||||
while (this.current().type === TokenType.Comma) {
|
||||
this.advance();
|
||||
if (this.current().type === TokenType.RBrace) break;
|
||||
entries.push(this.parseObjectEntry());
|
||||
}
|
||||
}
|
||||
|
||||
this.expect(TokenType.RBrace);
|
||||
|
||||
return { kind: "object", entries };
|
||||
}
|
||||
|
||||
private parseObjectEntry(): { key: string; value: ASTNode } {
|
||||
let key: string;
|
||||
|
||||
if (this.current().type === TokenType.String) {
|
||||
key = this.current().value;
|
||||
this.advance();
|
||||
} else if (this.current().type === TokenType.Identifier) {
|
||||
key = this.current().value;
|
||||
this.advance();
|
||||
} else {
|
||||
throw this.error(`Expected object key, got ${this.current().type}`);
|
||||
}
|
||||
|
||||
this.expect(TokenType.Colon);
|
||||
const value = this.parseExpression();
|
||||
|
||||
return { key, value };
|
||||
}
|
||||
|
||||
// ── Helpers ──
|
||||
|
||||
private current(): Token {
|
||||
return (
|
||||
this.tokens[this.pos] ?? {
|
||||
type: TokenType.EOF,
|
||||
value: "",
|
||||
offset: -1,
|
||||
line: -1,
|
||||
column: -1,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private peek(): Token | undefined {
|
||||
return this.tokens[this.pos + 1];
|
||||
}
|
||||
|
||||
private advance(): Token {
|
||||
const token = this.current();
|
||||
if (this.pos < this.tokens.length) this.pos++;
|
||||
return token;
|
||||
}
|
||||
|
||||
private expect(type: TokenType): Token {
|
||||
const token = this.current();
|
||||
if (token.type !== type) {
|
||||
throw this.error(`Expected ${type}, got ${token.type} "${token.value}"`);
|
||||
}
|
||||
this.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
private isAtEnd(): boolean {
|
||||
return this.current().type === TokenType.EOF;
|
||||
}
|
||||
|
||||
private skipNewlines(): void {
|
||||
while (this.current().type === TokenType.Newline) {
|
||||
this.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private skipToNextStatement(): void {
|
||||
while (!this.isAtEnd() && this.current().type !== TokenType.Newline) {
|
||||
this.advance();
|
||||
}
|
||||
this.skipNewlines();
|
||||
}
|
||||
|
||||
private error(message: string): ParseErrorException {
|
||||
const token = this.current();
|
||||
return new ParseErrorException(
|
||||
message,
|
||||
token.line,
|
||||
token.column,
|
||||
token.offset
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class ParseErrorException extends Error {
|
||||
line: number;
|
||||
column: number;
|
||||
offset: number;
|
||||
|
||||
constructor(message: string, line: number, column: number, offset: number) {
|
||||
super(message);
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
toParseError(): ParseError {
|
||||
return {
|
||||
message: this.message,
|
||||
line: this.line,
|
||||
column: this.column,
|
||||
offset: this.offset,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { resolveReferences } from "./resolver";
|
||||
import type { Statement } from "../types";
|
||||
|
||||
describe("resolveReferences", () => {
|
||||
it("resolves simple variable references", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "a", value: { kind: "literal", value: "hello" } },
|
||||
{ name: "b", value: { kind: "reference", name: "a" } },
|
||||
];
|
||||
|
||||
const { resolved, errors } = resolveReferences(statements);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(resolved.get("b")).toEqual({ kind: "literal", value: "hello" });
|
||||
});
|
||||
|
||||
it("resolves nested references in components", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "label", value: { kind: "literal", value: "Click me" } },
|
||||
{
|
||||
name: "btn",
|
||||
value: {
|
||||
kind: "component",
|
||||
name: "Button",
|
||||
args: [{ key: null, value: { kind: "reference", name: "label" } }],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const { resolved, errors } = resolveReferences(statements);
|
||||
expect(errors).toHaveLength(0);
|
||||
const btn = resolved.get("btn");
|
||||
expect(btn?.kind).toBe("component");
|
||||
if (btn?.kind === "component") {
|
||||
expect(btn.args[0]!.value).toEqual({
|
||||
kind: "literal",
|
||||
value: "Click me",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("detects circular references", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "a", value: { kind: "reference", name: "b" } },
|
||||
{ name: "b", value: { kind: "reference", name: "a" } },
|
||||
];
|
||||
|
||||
const { errors } = resolveReferences(statements);
|
||||
expect(errors.some((e) => e.message.includes("Circular"))).toBe(true);
|
||||
});
|
||||
|
||||
it("leaves unknown references as-is", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "x", value: { kind: "reference", name: "unknown" } },
|
||||
];
|
||||
|
||||
const { resolved, errors } = resolveReferences(statements);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(resolved.get("x")).toEqual({ kind: "reference", name: "unknown" });
|
||||
});
|
||||
|
||||
it("uses last statement as root by default", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "a", value: { kind: "literal", value: 1 } },
|
||||
{ name: "b", value: { kind: "literal", value: 2 } },
|
||||
];
|
||||
|
||||
const { root } = resolveReferences(statements);
|
||||
expect(root).toEqual({ kind: "literal", value: 2 });
|
||||
});
|
||||
|
||||
it("uses statement named 'root' as root", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "root", value: { kind: "literal", value: "I am root" } },
|
||||
{ name: "other", value: { kind: "literal", value: "not root" } },
|
||||
];
|
||||
|
||||
const { root } = resolveReferences(statements);
|
||||
expect(root).toEqual({ kind: "literal", value: "I am root" });
|
||||
});
|
||||
|
||||
it("resolves references in arrays", () => {
|
||||
const statements: Statement[] = [
|
||||
{ name: "item", value: { kind: "literal", value: "hello" } },
|
||||
{
|
||||
name: "list",
|
||||
value: {
|
||||
kind: "array",
|
||||
elements: [{ kind: "reference", name: "item" }],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const { resolved } = resolveReferences(statements);
|
||||
const list = resolved.get("list");
|
||||
if (list?.kind === "array") {
|
||||
expect(list.elements[0]).toEqual({ kind: "literal", value: "hello" });
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,135 +0,0 @@
|
||||
import type { ASTNode, Statement, ParseError } from "../types";
|
||||
|
||||
/**
|
||||
* Resolve variable references in the AST.
|
||||
*
|
||||
* Each statement defines `name = expression`. Later expressions can reference
|
||||
* earlier variable names. This pass replaces ReferenceNodes with the actual
|
||||
* subtree they point to, detecting cycles.
|
||||
*/
|
||||
export function resolveReferences(statements: Statement[]): {
|
||||
resolved: Map<string, ASTNode>;
|
||||
root: ASTNode | null;
|
||||
errors: ParseError[];
|
||||
} {
|
||||
const definitions = new Map<string, ASTNode>();
|
||||
const resolved = new Map<string, ASTNode>();
|
||||
const errors: ParseError[] = [];
|
||||
|
||||
// Build definition map
|
||||
for (const stmt of statements) {
|
||||
definitions.set(stmt.name, stmt.value);
|
||||
}
|
||||
|
||||
// Resolve each statement
|
||||
for (const stmt of statements) {
|
||||
const resolving = new Set<string>();
|
||||
const result = resolveNode(
|
||||
stmt.value,
|
||||
definitions,
|
||||
resolved,
|
||||
resolving,
|
||||
errors
|
||||
);
|
||||
resolved.set(stmt.name, result);
|
||||
}
|
||||
|
||||
// Root is the last statement or the one named "root"
|
||||
let root: ASTNode | null = null;
|
||||
if (resolved.has("root")) {
|
||||
root = resolved.get("root")!;
|
||||
} else if (statements.length > 0) {
|
||||
const lastStmt = statements[statements.length - 1]!;
|
||||
root = resolved.get(lastStmt.name) ?? null;
|
||||
}
|
||||
|
||||
return { resolved, root, errors };
|
||||
}
|
||||
|
||||
function resolveNode(
|
||||
node: ASTNode,
|
||||
definitions: Map<string, ASTNode>,
|
||||
resolved: Map<string, ASTNode>,
|
||||
resolving: Set<string>,
|
||||
errors: ParseError[]
|
||||
): ASTNode {
|
||||
switch (node.kind) {
|
||||
case "reference": {
|
||||
const { name } = node;
|
||||
|
||||
// Already resolved
|
||||
if (resolved.has(name)) {
|
||||
return resolved.get(name)!;
|
||||
}
|
||||
|
||||
// Cycle detection
|
||||
if (resolving.has(name)) {
|
||||
errors.push({
|
||||
message: `Circular reference detected: "${name}"`,
|
||||
line: 0,
|
||||
column: 0,
|
||||
});
|
||||
return { kind: "literal", value: null };
|
||||
}
|
||||
|
||||
// Unknown reference — leave as-is (may be defined later in streaming)
|
||||
const definition = definitions.get(name);
|
||||
if (!definition) {
|
||||
return node; // keep as unresolved reference
|
||||
}
|
||||
|
||||
resolving.add(name);
|
||||
const result = resolveNode(
|
||||
definition,
|
||||
definitions,
|
||||
resolved,
|
||||
resolving,
|
||||
errors
|
||||
);
|
||||
resolving.delete(name);
|
||||
resolved.set(name, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
case "component":
|
||||
return {
|
||||
...node,
|
||||
args: node.args.map((arg) => ({
|
||||
...arg,
|
||||
value: resolveNode(
|
||||
arg.value,
|
||||
definitions,
|
||||
resolved,
|
||||
resolving,
|
||||
errors
|
||||
),
|
||||
})),
|
||||
};
|
||||
|
||||
case "array":
|
||||
return {
|
||||
...node,
|
||||
elements: node.elements.map((el) =>
|
||||
resolveNode(el, definitions, resolved, resolving, errors)
|
||||
),
|
||||
};
|
||||
|
||||
case "object":
|
||||
return {
|
||||
...node,
|
||||
entries: node.entries.map((entry) => ({
|
||||
...entry,
|
||||
value: resolveNode(
|
||||
entry.value,
|
||||
definitions,
|
||||
resolved,
|
||||
resolving,
|
||||
errors
|
||||
),
|
||||
})),
|
||||
};
|
||||
|
||||
case "literal":
|
||||
return node;
|
||||
}
|
||||
}
|
||||
@@ -1,280 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { z } from "zod";
|
||||
import { createStreamingParser } from "./streaming";
|
||||
import { createLibrary } from "../library";
|
||||
import { defineComponent } from "../component";
|
||||
import { autoClose } from "./autoclose";
|
||||
|
||||
function makeTestLibrary() {
|
||||
return createLibrary([
|
||||
defineComponent({
|
||||
name: "Text",
|
||||
description: "Displays text",
|
||||
props: z.object({ children: z.string() }),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Button",
|
||||
description: "Clickable button",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
main: z.boolean().optional(),
|
||||
actionId: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Stack",
|
||||
description: "Vertical stack layout",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
gap: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
describe("Streaming edge cases", () => {
|
||||
it("single character at a time streaming", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
const input = 'title = Text("Hello")\n';
|
||||
let result;
|
||||
for (const ch of input) {
|
||||
result = parser.push(ch);
|
||||
}
|
||||
|
||||
expect(result!.statements).toHaveLength(1);
|
||||
expect(result!.statements[0]!.name).toBe("title");
|
||||
expect(result!.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("token split across chunks — component name", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// "Text" split as "Tex" + "t"
|
||||
parser.push("a = Tex");
|
||||
const result = parser.push('t("hello")\n');
|
||||
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.statements[0]!.value).toMatchObject({
|
||||
kind: "component",
|
||||
name: "Text",
|
||||
});
|
||||
});
|
||||
|
||||
it("string split mid-escape sequence", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// Split right before the escaped quote
|
||||
parser.push('a = Text("hel');
|
||||
const result = parser.push('lo \\"world\\"")\n');
|
||||
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("multi-line component split across chunks", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// The streaming parser splits on newlines, so multi-line expressions
|
||||
// need to be on a single line or use variables. Test that a long
|
||||
// single-line expression streamed in chunks works correctly.
|
||||
parser.push('root = Stack([Text("line 1"), Text("line');
|
||||
const result = parser.push(' 2")])\n');
|
||||
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("empty chunks do not corrupt state", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
parser.push("");
|
||||
parser.push('a = Text("hi")');
|
||||
parser.push("");
|
||||
parser.push("");
|
||||
const result = parser.push("\n");
|
||||
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.statements[0]!.name).toBe("a");
|
||||
});
|
||||
|
||||
it("very large single chunk with multiple complete statements", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
const lines =
|
||||
Array.from({ length: 50 }, (_, i) => `v${i} = Text("item ${i}")`).join(
|
||||
"\n"
|
||||
) + "\n";
|
||||
|
||||
const result = parser.push(lines);
|
||||
|
||||
expect(result.statements).toHaveLength(50);
|
||||
expect(result.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("interleaved complete and partial lines", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// Complete line followed by partial
|
||||
parser.push('a = Text("done")\nb = Text("part');
|
||||
let result = parser.result();
|
||||
|
||||
// "a" is cached complete, "b" is partial but auto-closed
|
||||
expect(result.statements.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Now finish the partial and add another complete
|
||||
result = parser.push('ial")\nc = Text("also done")\n');
|
||||
|
||||
expect(result.statements).toHaveLength(3);
|
||||
expect(result.statements.map((s) => s.name)).toEqual(["a", "b", "c"]);
|
||||
});
|
||||
|
||||
it("variable reference before definition — streaming order matters", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// Reference "label" before it's defined
|
||||
parser.push("root = Stack([label])\n");
|
||||
let result = parser.result();
|
||||
|
||||
// At this point "label" is an unresolved reference — should not crash
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.errors.length).toBeGreaterThanOrEqual(0);
|
||||
|
||||
// Now define it
|
||||
result = parser.push('label = Text("Hi")\n');
|
||||
|
||||
// After defining, root should pick it up via resolution
|
||||
expect(result.statements).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("repeated push after complete response is idempotent", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
const full = 'a = Text("done")\n';
|
||||
const first = parser.push(full);
|
||||
|
||||
// Push empty strings — result should remain stable
|
||||
const second = parser.push("");
|
||||
const third = parser.push("");
|
||||
|
||||
expect(second.statements).toEqual(first.statements);
|
||||
expect(third.statements).toEqual(first.statements);
|
||||
expect(third.root).toEqual(first.root);
|
||||
});
|
||||
|
||||
it("unicode characters split across chunk boundaries", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// JS strings are UTF-16, so multi-byte chars like emoji are fine as
|
||||
// string splits — but let's verify the parser handles them gracefully
|
||||
parser.push('a = Text("hello ');
|
||||
parser.push("🌍");
|
||||
parser.push(" world");
|
||||
const result = parser.push('")\n');
|
||||
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("unicode CJK characters streamed char by char", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
const input = 'a = Text("你好世界")\n';
|
||||
let result;
|
||||
for (const ch of input) {
|
||||
result = parser.push(ch);
|
||||
}
|
||||
|
||||
expect(result!.statements).toHaveLength(1);
|
||||
expect(result!.root).not.toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("autoClose additional edge cases", () => {
|
||||
it("mixed bracket types — ([{", () => {
|
||||
const result = autoClose("([{");
|
||||
expect(result).toBe("([{}])");
|
||||
});
|
||||
|
||||
it("string containing bracket chars is not counted", () => {
|
||||
// The ( inside the string should not produce a closer
|
||||
const result = autoClose('"hello (world"');
|
||||
// String is closed, paren inside string is ignored — no extra closers
|
||||
expect(result).toBe('"hello (world"');
|
||||
});
|
||||
|
||||
it("unclosed string containing bracket chars", () => {
|
||||
// Unclosed string with brackets inside — brackets are ignored, string gets closed
|
||||
const result = autoClose('"hello (world');
|
||||
expect(result).toBe('"hello (world"');
|
||||
});
|
||||
|
||||
it("only opening brackets — (((", () => {
|
||||
const result = autoClose("(((");
|
||||
expect(result).toBe("((()))");
|
||||
});
|
||||
|
||||
it("alternating open/close with extras — (()(", () => {
|
||||
const result = autoClose("(()(");
|
||||
// Stack: push ( → push ( → pop for ) → push ( → closers left: (, (
|
||||
expect(result).toBe("(()())");
|
||||
});
|
||||
|
||||
it("all bracket types deeply nested", () => {
|
||||
const result = autoClose("({[");
|
||||
expect(result).toBe("({[]})");
|
||||
});
|
||||
|
||||
it("partial close leaves remaining openers", () => {
|
||||
// ( [ ] — bracket closed, paren still open
|
||||
const result = autoClose("([]");
|
||||
expect(result).toBe("([])");
|
||||
});
|
||||
|
||||
it("escaped quote at end of string does not close it", () => {
|
||||
// The backslash escapes the quote, so the string is still open
|
||||
const result = autoClose('"hello\\');
|
||||
// escaped flag is set, next char would be escaped — string still open
|
||||
expect(result).toBe('"hello\\"');
|
||||
});
|
||||
|
||||
it("single quotes work the same as double quotes", () => {
|
||||
const result = autoClose("'hello");
|
||||
expect(result).toBe("'hello'");
|
||||
});
|
||||
|
||||
it("mixed string types — only the active one matters", () => {
|
||||
// Double-quoted string containing a single quote — single quote is literal
|
||||
const result = autoClose("\"it's");
|
||||
expect(result).toBe('"it\'s"');
|
||||
});
|
||||
|
||||
it("empty string input returns empty", () => {
|
||||
expect(autoClose("")).toBe("");
|
||||
});
|
||||
|
||||
it("already balanced input returns unchanged", () => {
|
||||
expect(autoClose("({[]})")).toBe("({[]})");
|
||||
});
|
||||
|
||||
it("mismatched close bracket is tolerated", () => {
|
||||
// A ] with no matching [ — should not crash, just ignored
|
||||
const result = autoClose("(]");
|
||||
// The ] doesn't match (, so it's ignored — ( still needs closing
|
||||
expect(result).toBe("(])");
|
||||
});
|
||||
});
|
||||
@@ -1,101 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { z } from "zod";
|
||||
import { createStreamingParser } from "./streaming";
|
||||
import { createLibrary } from "../library";
|
||||
import { defineComponent } from "../component";
|
||||
|
||||
function makeTestLibrary() {
|
||||
return createLibrary([
|
||||
defineComponent({
|
||||
name: "Text",
|
||||
description: "Text",
|
||||
props: z.object({ children: z.string() }),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Button",
|
||||
description: "Button",
|
||||
props: z.object({
|
||||
children: z.string(),
|
||||
main: z.boolean().optional(),
|
||||
actionId: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
defineComponent({
|
||||
name: "Stack",
|
||||
description: "Stack",
|
||||
props: z.object({
|
||||
children: z.array(z.unknown()).optional(),
|
||||
gap: z.string().optional(),
|
||||
}),
|
||||
component: null,
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
describe("StreamingParser", () => {
|
||||
it("parses a complete response", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
const result = parser.push('title = Text("Hello World")\n');
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.root).not.toBeNull();
|
||||
});
|
||||
|
||||
it("handles incremental streaming", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// First chunk — partial line
|
||||
let result = parser.push('title = Text("He');
|
||||
expect(result.statements.length).toBeGreaterThanOrEqual(0);
|
||||
|
||||
// Complete the line
|
||||
result = parser.push('llo World")\n');
|
||||
expect(result.statements).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("handles multi-line streaming", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
parser.push('a = Text("Line 1")\n');
|
||||
const result = parser.push('b = Text("Line 2")\n');
|
||||
expect(result.statements).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("caches complete lines and only re-parses partial", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
// First complete line
|
||||
parser.push('a = Text("First")\n');
|
||||
|
||||
// Partial second line — should still have first line cached
|
||||
const result = parser.push('b = Text("Sec');
|
||||
expect(result.statements.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("resets on shorter input", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
parser.push('a = Text("Hello")\n');
|
||||
parser.reset();
|
||||
|
||||
const result = parser.push('x = Text("Fresh")\n');
|
||||
expect(result.statements).toHaveLength(1);
|
||||
expect(result.statements[0]!.name).toBe("x");
|
||||
});
|
||||
|
||||
it("result() returns last parse result", () => {
|
||||
const lib = makeTestLibrary();
|
||||
const parser = createStreamingParser(lib);
|
||||
|
||||
parser.push('a = Text("Hello")\n');
|
||||
const result = parser.result();
|
||||
expect(result.statements).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user