Compare commits

..

1 Commits

Author SHA1 Message Date
pablonyx
9523df353d update 2025-04-03 12:45:25 -07:00
1121 changed files with 14011 additions and 53190 deletions

View File

@@ -25,10 +25,6 @@ inputs:
tags:
description: 'Image tags'
required: true
no-cache:
description: 'Read from cache'
required: false
default: 'false'
cache-from:
description: 'Cache sources'
required: false
@@ -59,7 +55,6 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
@@ -82,7 +77,6 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
@@ -105,7 +99,6 @@ runs:
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}

View File

@@ -7,47 +7,18 @@ on:
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
jobs:
build-and-push:
# TODO: investigate a matrix build like the web container
# See https://runs-on.com/runners/linux/
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
- run-id=${{ github.run_id }}
- tag=platform-${{ matrix.platform }}
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -63,80 +34,18 @@ jobs:
sudo apt-get install -y build-essential
- name: Backend Image Docker Build and Push
id: build
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile
platforms: ${{ matrix.platform }}
platforms: linux/amd64,linux/arm64
push: true
tags: |
${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
build-args: |
ONYX_VERSION=${{ github.ref_name }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build-and-push
steps:
# Needed for trivyignore
- name: Checkout
uses: actions/checkout@v4
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: backend-digests-*-${{ github.run_id }}
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
# trivy has their own rate limiting issues causing this action to flake
# we worked around it by hardcoding to different db repos in env
# can re-enable when they figure it out
@@ -147,8 +56,6 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
# To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}

View File

@@ -4,12 +4,12 @@ name: Build and Push Cloud Web Image on Tag
on:
push:
tags:
- "*cloud*"
- "*"
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
DEPLOYMENT: cloud
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
jobs:
build:
runs-on:
@@ -38,10 +38,9 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -54,7 +53,7 @@ jobs:
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./web
file: ./web/Dockerfile
@@ -71,12 +70,10 @@ jobs:
NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache needed due to weird interactions with the builds for different platforms
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
- name: Export digest
run: |
@@ -87,7 +84,7 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
name: digests-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
@@ -101,7 +98,7 @@ jobs:
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: cloudweb-digests-*-${{ github.run_id }}
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
@@ -112,10 +109,6 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -143,8 +136,6 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -7,13 +7,10 @@ on:
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
# don't tag cloud images with "latest"
LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
jobs:
# Bypassing this for now as the idea of not building is glitching
@@ -54,8 +51,6 @@ jobs:
if: needs.check_model_server_changes.outputs.changed == 'true'
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
env:
PLATFORM_PAIR: linux-amd64
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -80,7 +75,7 @@ jobs:
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push AMD64
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile.model_server
@@ -91,17 +86,12 @@ jobs:
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache: true
build-arm64:
needs: [check_model_server_changes]
if: needs.check_model_server_changes.outputs.changed == 'true'
runs-on:
[runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
env:
PLATFORM_PAIR: linux-arm64
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -126,7 +116,7 @@ jobs:
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push ARM64
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./backend
file: ./backend/Dockerfile.model_server
@@ -137,8 +127,6 @@ jobs:
DANSWER_VERSION=${{ github.ref_name }}
outputs: type=registry
provenance: false
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
merge-and-scan:
needs: [build-amd64, build-arm64, check_model_server_changes]
@@ -168,8 +156,6 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -8,25 +8,9 @@ on:
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
DEPLOYMENT: standalone
jobs:
precheck:
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
outputs:
should-run: ${{ steps.set-output.outputs.should-run }}
steps:
- name: Check if tag contains "cloud"
id: set-output
run: |
if [[ "${{ github.ref_name }}" == *cloud* ]]; then
echo "should-run=false" >> "$GITHUB_OUTPUT"
else
echo "should-run=true" >> "$GITHUB_OUTPUT"
fi
build:
needs: precheck
if: needs.precheck.outputs.should-run == 'true'
runs-on:
- runs-on
- runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
@@ -53,11 +37,9 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -70,7 +52,7 @@ jobs:
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./web
file: ./web/Dockerfile
@@ -80,13 +62,11 @@ jobs:
ONYX_VERSION=${{ github.ref_name }}
NODE_OPTIONS=--max-old-space-size=8192
# needed due to weird interactions with the builds for different platforms
no-cache: true
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# no-cache needed due to weird interactions with the builds for different platforms
# NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
- name: Export digest
run: |
mkdir -p /tmp/digests
@@ -96,22 +76,21 @@ jobs:
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
name: digests-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs:
- build
if: needs.precheck.outputs.should-run == 'true'
runs-on: ubuntu-latest
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: web-digests-*-${{ github.run_id }}
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
@@ -122,11 +101,6 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.ref_name }}
type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -154,8 +128,6 @@ jobs:
env:
TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
with:
image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
severity: "CRITICAL,HIGH"

View File

@@ -37,11 +37,6 @@ jobs:
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# uncomment to force run chart-testing
# - name: Force run chart-testing (list-changed)
# id: list-changed
# run: echo "changed=true" >> $GITHUB_OUTPUT
# lint all charts if any changes were detected
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'

View File

@@ -16,55 +16,15 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
PLATFORM_PAIR: linux-amd64
jobs:
integration-tests:
# See https://runs-on.com/runners/linux/
runs-on:
[
runs-on,
runner=32cpu-linux-x64,
disk=large,
"run-id=${{ github.run_id }}",
]
runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/ee.txt
- run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
pip install --retries 5 --timeout 30 -r backend/requirements/ee.txt
- name: Generate OpenAPI schema
working-directory: ./backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
- name: Generate OpenAPI Python client
working-directory: ./backend
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -101,8 +61,8 @@ jobs:
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
@@ -113,8 +73,8 @@ jobs:
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
@@ -125,8 +85,8 @@ jobs:
tags: onyxdotapp/onyx-integration:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# Start containers for multi-tenant tests
- name: Start Docker containers for multi-tenant tests
@@ -153,8 +113,6 @@ jobs:
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \
@@ -200,7 +158,6 @@ jobs:
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
docker compose -f docker-compose.dev.yml -p onyx-stack up -d
id: start_docker
@@ -253,8 +210,6 @@ jobs:
-e POSTGRES_HOST=relational_db \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \

View File

@@ -16,52 +16,15 @@ env:
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
PLATFORM_PAIR: linux-amd64
jobs:
integration-tests-mit:
# See https://runs-on.com/runners/linux/
runs-on:
[
runs-on,
runner=32cpu-linux-x64,
disk=large,
"run-id=${{ github.run_id }}",
]
runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
- run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Generate OpenAPI schema
working-directory: ./backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
- name: Generate OpenAPI Python client
working-directory: ./backend
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -98,8 +61,8 @@ jobs:
tags: onyxdotapp/onyx-backend:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build Model Server Docker image
uses: ./.github/actions/custom-build-and-push
@@ -110,8 +73,8 @@ jobs:
tags: onyxdotapp/onyx-model-server:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
- name: Build integration test Docker image
uses: ./.github/actions/custom-build-and-push
@@ -122,8 +85,8 @@ jobs:
tags: onyxdotapp/onyx-integration:test
push: false
load: true
cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
- name: Start Docker containers
@@ -189,8 +152,6 @@ jobs:
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e DB_READONLY_USER=db_readonly_user \
-e DB_READONLY_PASSWORD=password \
-e POSTGRES_POOL_PRE_PING=true \
-e POSTGRES_USE_NULL_POOL=true \
-e VESPA_HOST=index \

View File

@@ -10,7 +10,6 @@ env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MOCK_LLM_RESPONSE: true
PYTEST_PLAYWRIGHT_SKIP_INITIAL_RESET: true
jobs:
playwright-tests:

View File

@@ -31,33 +31,20 @@ jobs:
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
- name: Generate OpenAPI schema
working-directory: ./backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
- name: Generate OpenAPI Python client
working-directory: ./backend
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client
- name: Run MyPy
run: |
cd backend
mypy .
- name: Run ruff
run: |
cd backend
ruff .
- name: Check import order with reorder-python-imports
run: |
cd backend
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Check code formatting with Black
run: |

View File

@@ -12,7 +12,7 @@ env:
# AWS
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
# Confluence
CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
@@ -20,72 +20,46 @@ env:
CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
# Jira
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
# Gong
GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
# Google
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
# Slab
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
# Zendesk
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
# Airtable
AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
# Sharepoint
SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
# Github
ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
# Gitlab
GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
# Gitbook
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
# Notion
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
# Highspot
HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
# Slack
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
# Teams
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}
jobs:
connectors-check:
# See https://runs-on.com/runners/linux/
@@ -117,15 +91,7 @@ jobs:
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
-n 8 \
--dist loadfile \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/daily/connectors
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'

View File

@@ -15,9 +15,6 @@ jobs:
env:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
steps:
- name: Checkout code

26
.gitignore vendored
View File

@@ -1,28 +1,12 @@
# editors
.vscode
.zed
# macos
.env
.DS_store
# python
.venv
.mypy_cache
.idea
# testing
/deployment/data/nginx/app.conf
.vscode/
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
/web/test-results/
backend/onyx/agent_search/main/test_data.json
backend/tests/regression/answer_quality/test_data.json
backend/tests/regression/search_quality/eval-*
backend/tests/regression/search_quality/search_eval_config.yaml
backend/tests/regression/search_quality/*.json
# secret files
.env
jira_test_env
# others
/deployment/data/nginx/app.conf
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml

View File

@@ -1,13 +1,12 @@
repos:
- repo: https://github.com/psf/black
rev: 25.1.0
rev: 23.3.0
hooks:
- id: black
language_version: python3.11
# this is a fork which keeps compatibility with black
- repo: https://github.com/wimglenn/reorder-python-imports-black
rev: v3.14.0
- repo: https://github.com/asottile/reorder_python_imports
rev: v3.9.0
hooks:
- id: reorder-python-imports
args: ['--py311-plus', '--application-directories=backend/']
@@ -19,14 +18,14 @@ repos:
# These settings will remove unused imports with side effects
# Note: The repo currently does not and should not have imports with side effects
- repo: https://github.com/PyCQA/autoflake
rev: v2.3.1
rev: v2.2.0
hooks:
- id: autoflake
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.11.4
rev: v0.0.286
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier

View File

@@ -412,46 +412,6 @@
"group": "3"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",
"type": "debugpy",
"request": "launch",
"program": "scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--filename",
"generated/openapi.json",
]
},
{
// script to debug multi tenant db issues
"name": "Onyx DB Manager (Top Chunks)",
"type": "debugpy",
"request": "launch",
"program": "scripts/debugging/onyx_db.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--password",
"your_password_here",
"--port",
"5433",
"--report",
"top-chunks",
"--filename",
"generated/tenants_by_num_docs.csv"
]
},
{
"name": "Debug React Web App in Chrome",
"type": "chrome",

View File

@@ -1,101 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "austin",
"label": "Profile celery beat",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/backend"
},
"command": [
"sudo",
"-E"
],
"args": [
"celery",
"-A",
"onyx.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO"
]
},
{
"type": "shell",
"label": "Generate Onyx OpenAPI Python client",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/backend"
},
"command": [
"openapi-generator"
],
"args": [
"generate",
"-i",
"generated/openapi.json",
"-g",
"python",
"-o",
"generated/onyx_openapi_client",
"--package-name",
"onyx_openapi_client",
]
},
{
"type": "shell",
"label": "Generate Typescript Fetch client (openapi-generator)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}"
},
"command": [
"openapi-generator"
],
"args": [
"generate",
"-i",
"backend/generated/openapi.json",
"-g",
"typescript-fetch",
"-o",
"${workspaceFolder}/web/src/lib/generated/onyx_api",
"--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
]
},
{
"type": "shell",
"label": "Generate TypeScript Client (openapi-ts)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/web"
},
"command": [
"npx"
],
"args": [
"openapi-typescript",
"../backend/generated/openapi.json",
"--output",
"./src/lib/generated/onyx-schema.ts",
]
},
{
"type": "shell",
"label": "Generate TypeScript Client (orval)",
"envFile": "${workspaceFolder}/.env",
"options": {
"cwd": "${workspaceFolder}/web"
},
"command": [
"npx"
],
"args": [
"orval",
"--config",
"orval.config.js",
]
}
]
}

View File

@@ -1,4 +1,4 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx
@@ -12,8 +12,8 @@ As an open source project in a rapidly changing space, we welcome all contributi
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
To ensure that your contribution is aligned with the project's direction, please reach out to Hagen (or any other maintainer) on the Onyx team
via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
[Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
@@ -28,7 +28,7 @@ Your input is vital to making sure that Onyx moves in the right direction.
Before starting on implementation, please raise a GitHub issue.
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.
### Contributing Code

View File

@@ -1,4 +1,4 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
<a name="readme-top"></a>
@@ -13,7 +13,7 @@
<a href="https://docs.onyx.app/" target="_blank">
<img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
</a>
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
<img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
</a>
<a href="https://discord.gg/TDJ59cGV2X" target="_blank">

4
backend/.gitignore vendored
View File

@@ -9,6 +9,4 @@ api_keys.py
vespa-app.zip
dynamic_config_storage/
celerybeat-schedule*
onyx/connectors/salesforce/data/
.test.env
/generated
onyx/connectors/salesforce/data/

View File

@@ -37,8 +37,7 @@ RUN apt-get update && \
pkg-config \
gcc \
nano \
vim \
postgresql-client && \
vim && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
@@ -86,7 +85,7 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
nltk.download('punkt', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Set up application files

View File

@@ -1,4 +1,4 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
# Alembic DB Migrations

View File

@@ -24,7 +24,6 @@ from onyx.configs.constants import SSL_CERT_FILE
from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
from onyx.db.models import Base
from celery.backends.database.session import ResultModelBase # type: ignore
from onyx.db.engine import SqlEngine
# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
# hidden! (defaults to level=WARN)
@@ -148,9 +147,6 @@ async def run_async_migrations() -> None:
continue_on_error,
) = get_schema_options()
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
engine = create_async_engine(
build_connection_string(),
poolclass=pool.NullPool,
@@ -184,10 +180,10 @@ async def run_async_migrations() -> None:
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
logger.error("--continue=true is not set, raising exception!")
logger.error("--continue is not set, raising exception!")
raise
logger.warning("--continue=true is set, continuing to next schema.")
logger.warning("--continue is set, continuing to next schema.")
else:
try:
@@ -206,21 +202,10 @@ async def run_async_migrations() -> None:
def run_migrations_offline() -> None:
"""
NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
instead of migrating the db live via an open connection
Not clear on when this would be used by us or if it even works.
If it is offline, then why are there calls to the db engine?
This doesn't really get used when we migrate in the cloud."""
"""This doesn't really get used when we migrate in the cloud."""
logger.info("run_migrations_offline starting.")
# without init_engine, subsequent engine calls fail hard intentionally
SqlEngine.init_engine(pool_size=20, max_overflow=5)
schema_name, _, upgrade_all_tenants, continue_on_error = get_schema_options()
url = build_connection_string()

View File

@@ -5,7 +5,6 @@ Revises: 6fc7886d665d
Create Date: 2025-01-14 12:14:00.814390
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -1,121 +0,0 @@
"""rework-kg-config
Revision ID: 03bf8be6b53a
Revises: 65bc6e0f8500
Create Date: 2025-06-16 10:52:34.815335
"""
import json
from datetime import datetime
from datetime import timedelta
from sqlalchemy.dialects import postgresql
from sqlalchemy import text
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "03bf8be6b53a"
down_revision = "65bc6e0f8500"
branch_labels = None
depends_on = None
def upgrade() -> None:
# get current config
current_configs = (
op.get_bind()
.execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
.all()
)
current_config_dict = {
config.kg_variable_name: (
config.kg_variable_values[0]
if config.kg_variable_name
not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
else config.kg_variable_values
)
for config in current_configs
if config.kg_variable_values
}
# not using the KGConfigSettings model here in case it changes in the future
kg_config_settings = json.dumps(
{
"KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
"KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
"KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
"KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
"KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
"KG_IGNORE_EMAIL_DOMAINS", []
),
"KG_COVERAGE_START": current_config_dict.get(
"KG_COVERAGE_START",
(datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
),
"KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
"KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
"KG_MAX_PARENT_RECURSION_DEPTH", 2
),
"KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
}
)
op.execute(
f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
)
# drop kg config table
op.drop_table("kg_config")
def downgrade() -> None:
# get current config
current_config_dict = {
"KG_EXPOSED": False,
"KG_ENABLED": False,
"KG_VENDOR": [],
"KG_VENDOR_DOMAINS": [],
"KG_IGNORE_EMAIL_DOMAINS": [],
"KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
"KG_MAX_COVERAGE_DAYS": 90,
"KG_MAX_PARENT_RECURSION_DEPTH": 2,
}
current_configs = (
op.get_bind()
.execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
.one_or_none()
)
if current_configs is not None:
current_config_dict.update(current_configs[0])
insert_values = [
{
"kg_variable_name": name,
"kg_variable_values": (
[str(val).lower() if isinstance(val, bool) else str(val)]
if not isinstance(val, list)
else val
),
}
for name, val in current_config_dict.items()
]
op.create_table(
"kg_config",
sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
)
op.bulk_insert(
sa.table(
"kg_config",
sa.column("kg_variable_name", sa.String),
sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
),
insert_values,
)
op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")

View File

@@ -5,7 +5,6 @@ Revises: 8a87bd6ec550
Create Date: 2024-07-23 11:12:39.462397
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 5f4b8568a221
Create Date: 2024-03-02 23:23:49.960309
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 570282d33c49
Create Date: 2024-05-05 19:30:34.317972
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table

View File

@@ -5,7 +5,6 @@ Revises: 52a219fb5233
Create Date: 2024-09-10 15:03:48.233926
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 369644546676
Create Date: 2025-01-10 14:01:14.067144
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,7 +5,6 @@ Revises: 77d07dffae64
Create Date: 2023-11-11 20:51:24.228999
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: e50154680a5c
Create Date: 2024-03-19 15:30:44.425436
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 4ee1287bd26a
Create Date: 2024-11-21 11:49:04.488677
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 9c00a2bccb83
Create Date: 2025-02-18 10:45:13.957807
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,7 +5,6 @@ Revises: 6756efa39ada
Create Date: 2024-10-15 19:26:44.071259
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 35e6853a51d5
Create Date: 2024-09-18 11:48:59.418726
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 5fc1f54cc252
Create Date: 2024-08-10 11:13:36.070790
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -1,45 +0,0 @@
"""Add foreign key to user__external_user_group_id
Revision ID: 238b84885828
Revises: a7688ab35c45
Create Date: 2025-05-19 17:15:33.424584
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "238b84885828"
down_revision = "a7688ab35c45"
branch_labels = None
depends_on = None
def upgrade() -> None:
# First, clean up any entries that don't have a valid cc_pair_id
op.execute(
"""
DELETE FROM user__external_user_group_id
WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
"""
)
# Add foreign key constraint with cascade delete
op.create_foreign_key(
"fk_user__external_user_group_id_cc_pair_id",
"user__external_user_group_id",
"connector_credential_pair",
["cc_pair_id"],
["id"],
ondelete="CASCADE",
)
def downgrade() -> None:
# Drop the foreign key constraint
op.drop_constraint(
"fk_user__external_user_group_id_cc_pair_id",
"user__external_user_group_id",
type_="foreignkey",
)

View File

@@ -5,7 +5,6 @@ Revises: bc9771dccadf
Create Date: 2024-06-27 16:04:51.480437
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 6d387b3196c2
Create Date: 2023-05-05 15:49:35.716016
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 2daa494a0851
Create Date: 2024-11-12 13:23:29.858995
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 2666d766cb9b
Create Date: 2023-05-24 18:45:17.244495
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: c0aab6edb6dd
Create Date: 2025-01-04 11:39:43.268612
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: f5437cc136c5
Create Date: 2025-02-11 14:57:51.308775
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 4b08d97e175a
Create Date: 2024-08-21 19:15:15.762948
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: c0fd6e4da83a
Create Date: 2024-11-11 10:57:22.991157
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 33ea50e88f24
Create Date: 2025-01-31 10:30:27.289646
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 7f99be1cb9f5
Create Date: 2023-10-16 23:21:01.283424
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 91ffac7e65b3
Create Date: 2024-07-24 21:29:31.784562
"""
import random
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 5b29123cd710
Create Date: 2024-11-01 12:51:01.535003
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: a6df6b88ef81
Create Date: 2025-01-29 10:54:22.141765
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: ee3f4b47fad5
Create Date: 2024-08-15 22:37:08.397052
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 91a0a4d62b14
Create Date: 2024-09-20 21:24:04.891018
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: c99d76fcd298
Create Date: 2024-09-13 13:20:32.885317
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 2955778aa44c
Create Date: 2025-01-08 15:38:17.224380
"""
from alembic import op
from sqlalchemy import text

View File

@@ -5,7 +5,6 @@ Revises: df46c75b714e
Create Date: 2025-03-10 10:02:30.586666
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: f1c6478c3fd8
Create Date: 2024-05-11 16:11:23.718084
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 776b3bbe9092
Create Date: 2024-03-27 19:41:29.073594
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: b7c2b63c4a03
Create Date: 2025-03-05 10:50:30.516962
"""
from alembic import op
import sqlalchemy as sa
import json

View File

@@ -5,7 +5,6 @@ Revises: e0a68a81d434
Create Date: 2023-10-05 18:47:09.582849
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 8f43500ee275
Create Date: 2025-02-26 13:07:56.217791
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 27c6ecc08586
Create Date: 2023-06-14 23:45:51.760440
"""
import sqlalchemy as sa
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: aeda5f2df4f6
Create Date: 2025-01-13 12:49:51.705235
"""
from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy

View File

@@ -5,7 +5,6 @@ Revises: 703313b75876
Create Date: 2024-04-13 18:07:29.153817
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: e1392f05e840
Create Date: 2024-08-01 12:38:54.466081
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,7 +5,6 @@ Revises: d716b0791ddd
Create Date: 2024-06-28 20:01:05.927647
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: c18cdf4b497e
Create Date: 2024-06-18 20:46:09.095034
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 3c5e35aa9af0
Create Date: 2023-07-18 17:33:40.365034
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 9d97fecfab7f
Create Date: 2023-10-27 11:38:33.803145
"""
from alembic import op
from sqlalchemy import String

View File

@@ -5,7 +5,6 @@ Revises: f32615f71aeb
Create Date: 2024-09-23 12:58:03.894038
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,7 +5,6 @@ Revises: e91df4e935ef
Create Date: 2024-03-20 18:53:32.461518
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises:
Create Date: 2023-05-04 00:55:32.971991
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: ecab2b3f1a3b
Create Date: 2024-04-11 11:05:18.414438
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: f7505c5b0284
Create Date: 2025-04-02 11:26:36.180328
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -1,150 +0,0 @@
"""Fix invalid model-configurations state
Revision ID: 47a07e1a38f1
Revises: 7a70b7664e37
Create Date: 2025-04-23 15:39:43.159504
"""
from alembic import op
from pydantic import BaseModel, ConfigDict
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.llm.llm_provider_options import (
fetch_model_names_for_provider_as_set,
fetch_visible_model_names_for_provider_as_set,
)
# revision identifiers, used by Alembic.
revision = "47a07e1a38f1"
down_revision = "7a70b7664e37"
branch_labels = None
depends_on = None
class _SimpleModelConfiguration(BaseModel):
# Configure model to read from attributes
model_config = ConfigDict(from_attributes=True)
id: int
llm_provider_id: int
name: str
is_visible: bool
max_input_tokens: int | None
def upgrade() -> None:
llm_provider_table = sa.sql.table(
"llm_provider",
sa.column("id", sa.Integer),
sa.column("provider", sa.String),
sa.column("model_names", postgresql.ARRAY(sa.String)),
sa.column("display_model_names", postgresql.ARRAY(sa.String)),
sa.column("default_model_name", sa.String),
sa.column("fast_default_model_name", sa.String),
)
model_configuration_table = sa.sql.table(
"model_configuration",
sa.column("id", sa.Integer),
sa.column("llm_provider_id", sa.Integer),
sa.column("name", sa.String),
sa.column("is_visible", sa.Boolean),
sa.column("max_input_tokens", sa.Integer),
)
connection = op.get_bind()
llm_providers = connection.execute(
sa.select(
llm_provider_table.c.id,
llm_provider_table.c.provider,
)
).fetchall()
for llm_provider in llm_providers:
llm_provider_id, provider_name = llm_provider
default_models = fetch_model_names_for_provider_as_set(provider_name)
display_models = fetch_visible_model_names_for_provider_as_set(
provider_name=provider_name
)
# if `fetch_model_names_for_provider_as_set` returns `None`, then
# that means that `provider_name` is not a well-known llm provider.
if not default_models:
continue
if not display_models:
raise RuntimeError(
"If `default_models` is non-None, `display_models` must be non-None too."
)
model_configurations = [
_SimpleModelConfiguration.model_validate(model_configuration)
for model_configuration in connection.execute(
sa.select(
model_configuration_table.c.id,
model_configuration_table.c.llm_provider_id,
model_configuration_table.c.name,
model_configuration_table.c.is_visible,
model_configuration_table.c.max_input_tokens,
).where(model_configuration_table.c.llm_provider_id == llm_provider_id)
).fetchall()
]
if model_configurations:
at_least_one_is_visible = any(
[
model_configuration.is_visible
for model_configuration in model_configurations
]
)
# If there is at least one model which is public, this is a valid state.
# Therefore, don't touch it and move on to the next one.
if at_least_one_is_visible:
continue
existing_visible_model_names: set[str] = set(
[
model_configuration.name
for model_configuration in model_configurations
if model_configuration.is_visible
]
)
difference = display_models.difference(existing_visible_model_names)
for model_name in difference:
if not model_name:
continue
insert_statement = postgresql.insert(model_configuration_table).values(
llm_provider_id=llm_provider_id,
name=model_name,
is_visible=True,
max_input_tokens=None,
)
connection.execute(
insert_statement.on_conflict_do_update(
index_elements=["llm_provider_id", "name"],
set_={"is_visible": insert_statement.excluded.is_visible},
)
)
else:
for model_name in default_models:
connection.execute(
model_configuration_table.insert().values(
llm_provider_id=llm_provider_id,
name=model_name,
is_visible=model_name in display_models,
max_input_tokens=None,
)
)
def downgrade() -> None:
pass

View File

@@ -5,7 +5,6 @@ Revises: dfbe9e93d3c7
Create Date: 2024-11-05 18:55:02.221064
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: b85f02ec1308
Create Date: 2024-06-09 14:58:19.946509
"""
from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

View File

@@ -1,682 +0,0 @@
"""create knowledge graph tables
Revision ID: 495cb26ce93e
Revises: ca04500b9ee8
Create Date: 2025-03-19 08:51:14.341989
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text
from datetime import datetime, timedelta
from onyx.configs.app_configs import DB_READONLY_USER
from onyx.configs.app_configs import DB_READONLY_PASSWORD
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
# revision identifiers, used by Alembic.
revision = "495cb26ce93e"
down_revision = "ca04500b9ee8"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create a new permission-less user to be later used for knowledge graph queries.
# The user will later get temporary read privileges for a specific view that will be
# ad hoc generated specific to a knowledge graph query.
#
# Note: in order for the migration to run, the DB_READONLY_USER and DB_READONLY_PASSWORD
# environment variables MUST be set. Otherwise, an exception will be raised.
if not MULTI_TENANT:
# Enable pg_trgm extension if not already enabled
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
# Create read-only db user here only in single tenant mode. For multi-tenant mode,
# the user is created in the alembic_tenants migration.
if not (DB_READONLY_USER and DB_READONLY_PASSWORD):
raise Exception("DB_READONLY_USER or DB_READONLY_PASSWORD is not set")
op.execute(
text(
f"""
DO $$
BEGIN
-- Check if the read-only user already exists
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-- Create the read-only user with the specified password
EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');
-- First revoke all privileges to ensure a clean slate
EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
-- Grant only the CONNECT privilege to allow the user to connect to the database
-- but not perform any operations without additional specific grants
EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');
END IF;
END
$$;
"""
)
)
# Grant usage on current schema to readonly user
op.execute(
text(
f"""
DO $$
BEGIN
IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
EXECUTE format('GRANT USAGE ON SCHEMA %I TO %I', current_schema(), '{DB_READONLY_USER}');
END IF;
END
$$;
"""
)
)
op.create_table(
"kg_config",
sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
)
# Insert initial data into kg_config table
op.bulk_insert(
sa.table(
"kg_config",
sa.column("kg_variable_name", sa.String),
sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
),
[
{"kg_variable_name": "KG_EXPOSED", "kg_variable_values": ["false"]},
{"kg_variable_name": "KG_ENABLED", "kg_variable_values": ["false"]},
{"kg_variable_name": "KG_VENDOR", "kg_variable_values": []},
{"kg_variable_name": "KG_VENDOR_DOMAINS", "kg_variable_values": []},
{"kg_variable_name": "KG_IGNORE_EMAIL_DOMAINS", "kg_variable_values": []},
{
"kg_variable_name": "KG_EXTRACTION_IN_PROGRESS",
"kg_variable_values": ["false"],
},
{
"kg_variable_name": "KG_CLUSTERING_IN_PROGRESS",
"kg_variable_values": ["false"],
},
{
"kg_variable_name": "KG_COVERAGE_START",
"kg_variable_values": [
(datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d")
],
},
{"kg_variable_name": "KG_MAX_COVERAGE_DAYS", "kg_variable_values": ["90"]},
{
"kg_variable_name": "KG_MAX_PARENT_RECURSION_DEPTH",
"kg_variable_values": ["2"],
},
],
)
op.create_table(
"kg_entity_type",
sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column("description", sa.String(), nullable=True),
sa.Column("grounding", sa.String(), nullable=False),
sa.Column(
"attributes",
postgresql.JSONB,
nullable=False,
server_default="{}",
),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column("active", sa.Boolean(), nullable=False, default=False),
sa.Column("deep_extraction", sa.Boolean(), nullable=False, default=False),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.Column("grounded_source_name", sa.String(), nullable=True),
sa.Column("entity_values", postgresql.ARRAY(sa.String()), nullable=True),
sa.Column(
"clustering",
postgresql.JSONB,
nullable=False,
server_default="{}",
),
)
# Create KGRelationshipType table
op.create_table(
"kg_relationship_type",
sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column("name", sa.String(), nullable=False, index=True),
sa.Column(
"source_entity_type_id_name", sa.String(), nullable=False, index=True
),
sa.Column(
"target_entity_type_id_name", sa.String(), nullable=False, index=True
),
sa.Column("definition", sa.Boolean(), nullable=False, default=False),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column("type", sa.String(), nullable=False, index=True),
sa.Column("active", sa.Boolean(), nullable=False, default=True),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.Column(
"clustering",
postgresql.JSONB,
nullable=False,
server_default="{}",
),
sa.ForeignKeyConstraint(
["source_entity_type_id_name"], ["kg_entity_type.id_name"]
),
sa.ForeignKeyConstraint(
["target_entity_type_id_name"], ["kg_entity_type.id_name"]
),
)
# Create KGRelationshipTypeExtractionStaging table
op.create_table(
"kg_relationship_type_extraction_staging",
sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column("name", sa.String(), nullable=False, index=True),
sa.Column(
"source_entity_type_id_name", sa.String(), nullable=False, index=True
),
sa.Column(
"target_entity_type_id_name", sa.String(), nullable=False, index=True
),
sa.Column("definition", sa.Boolean(), nullable=False, default=False),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column("type", sa.String(), nullable=False, index=True),
sa.Column("active", sa.Boolean(), nullable=False, default=True),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.Column(
"clustering",
postgresql.JSONB,
nullable=False,
server_default="{}",
),
sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
sa.ForeignKeyConstraint(
["source_entity_type_id_name"], ["kg_entity_type.id_name"]
),
sa.ForeignKeyConstraint(
["target_entity_type_id_name"], ["kg_entity_type.id_name"]
),
)
# Create KGEntity table
op.create_table(
"kg_entity",
sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column("name", sa.String(), nullable=False, index=True),
sa.Column("entity_class", sa.String(), nullable=True, index=True),
sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
sa.Column("entity_key", sa.String(), nullable=True, index=True),
sa.Column("name_trigrams", postgresql.ARRAY(sa.String(3)), nullable=True),
sa.Column("document_id", sa.String(), nullable=True, index=True),
sa.Column(
"alternative_names",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
),
sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
sa.Column("description", sa.String(), nullable=True),
sa.Column(
"keywords",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column(
"acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
),
sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
sa.UniqueConstraint(
"name",
"entity_type_id_name",
"document_id",
name="uq_kg_entity_name_type_doc",
),
)
op.create_index("ix_entity_type_acl", "kg_entity", ["entity_type_id_name", "acl"])
op.create_index(
"ix_entity_name_search", "kg_entity", ["name", "entity_type_id_name"]
)
# Create KGEntityExtractionStaging table
op.create_table(
"kg_entity_extraction_staging",
sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column("name", sa.String(), nullable=False, index=True),
sa.Column("document_id", sa.String(), nullable=True, index=True),
sa.Column(
"alternative_names",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
),
sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
sa.Column("description", sa.String(), nullable=True),
sa.Column(
"keywords",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column(
"acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
),
sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
sa.Column("transferred_id_name", sa.String(), nullable=True, default=None),
sa.Column("entity_class", sa.String(), nullable=True, index=True),
sa.Column("entity_key", sa.String(), nullable=True, index=True),
sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
sa.Column("parent_key", sa.String(), nullable=True, index=True),
sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
)
op.create_index(
"ix_entity_extraction_staging_acl",
"kg_entity_extraction_staging",
["entity_type_id_name", "acl"],
)
op.create_index(
"ix_entity_extraction_staging_name_search",
"kg_entity_extraction_staging",
["name", "entity_type_id_name"],
)
# Create KGRelationship table
op.create_table(
"kg_relationship",
sa.Column("id_name", sa.String(), nullable=False, index=True),
sa.Column("source_node", sa.String(), nullable=False, index=True),
sa.Column("target_node", sa.String(), nullable=False, index=True),
sa.Column("source_node_type", sa.String(), nullable=False, index=True),
sa.Column("target_node_type", sa.String(), nullable=False, index=True),
sa.Column("source_document", sa.String(), nullable=True, index=True),
sa.Column("type", sa.String(), nullable=False, index=True),
sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.ForeignKeyConstraint(["source_node"], ["kg_entity.id_name"]),
sa.ForeignKeyConstraint(["target_node"], ["kg_entity.id_name"]),
sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
sa.ForeignKeyConstraint(
["relationship_type_id_name"], ["kg_relationship_type.id_name"]
),
sa.UniqueConstraint(
"source_node",
"target_node",
"type",
name="uq_kg_relationship_source_target_type",
),
sa.PrimaryKeyConstraint("id_name", "source_document"),
)
op.create_index(
"ix_kg_relationship_nodes", "kg_relationship", ["source_node", "target_node"]
)
# Create KGRelationshipExtractionStaging table
op.create_table(
"kg_relationship_extraction_staging",
sa.Column("id_name", sa.String(), nullable=False, index=True),
sa.Column("source_node", sa.String(), nullable=False, index=True),
sa.Column("target_node", sa.String(), nullable=False, index=True),
sa.Column("source_node_type", sa.String(), nullable=False, index=True),
sa.Column("target_node_type", sa.String(), nullable=False, index=True),
sa.Column("source_document", sa.String(), nullable=True, index=True),
sa.Column("type", sa.String(), nullable=False, index=True),
sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
sa.ForeignKeyConstraint(
["source_node"], ["kg_entity_extraction_staging.id_name"]
),
sa.ForeignKeyConstraint(
["target_node"], ["kg_entity_extraction_staging.id_name"]
),
sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
sa.ForeignKeyConstraint(
["relationship_type_id_name"],
["kg_relationship_type_extraction_staging.id_name"],
),
sa.UniqueConstraint(
"source_node",
"target_node",
"type",
name="uq_kg_relationship_extraction_staging_source_target_type",
),
sa.PrimaryKeyConstraint("id_name", "source_document"),
)
op.create_index(
"ix_kg_relationship_extraction_staging_nodes",
"kg_relationship_extraction_staging",
["source_node", "target_node"],
)
# Create KGTerm table
op.create_table(
"kg_term",
sa.Column("id_term", sa.String(), primary_key=True, nullable=False, index=True),
sa.Column(
"entity_types",
postgresql.ARRAY(sa.String()),
nullable=False,
server_default="{}",
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
onupdate=sa.text("now()"),
),
sa.Column(
"time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
),
)
op.create_index("ix_search_term_entities", "kg_term", ["entity_types"])
op.create_index("ix_search_term_term", "kg_term", ["id_term"])
op.add_column(
"document",
sa.Column("kg_stage", sa.String(), nullable=True, index=True),
)
op.add_column(
"document",
sa.Column("kg_processing_time", sa.DateTime(timezone=True), nullable=True),
)
op.add_column(
"connector",
sa.Column(
"kg_processing_enabled",
sa.Boolean(),
nullable=True,
server_default="false",
),
)
op.add_column(
"connector",
sa.Column(
"kg_coverage_days",
sa.Integer(),
nullable=True,
server_default=None,
),
)
# Create GIN index for clustering and normalization
op.execute(
"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_kg_entity_clustering_trigrams "
f"ON kg_entity USING GIN (name {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.gin_trgm_ops)"
)
op.execute(
"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_kg_entity_normalization_trigrams "
"ON kg_entity USING GIN (name_trigrams)"
)
# Create kg_entity trigger to update kg_entity.name and its trigrams
alphanum_pattern = r"[^a-z0-9]+"
truncate_length = 1000
function = "update_kg_entity_name"
op.execute(
text(
f"""
CREATE OR REPLACE FUNCTION {function}()
RETURNS TRIGGER AS $$
DECLARE
name text;
cleaned_name text;
BEGIN
-- Set name to semantic_id if document_id is not NULL
IF NEW.document_id IS NOT NULL THEN
SELECT lower(semantic_id) INTO name
FROM document
WHERE id = NEW.document_id;
ELSE
name = lower(NEW.name);
END IF;
-- Clean name and truncate if too long
cleaned_name = regexp_replace(
name,
'{alphanum_pattern}', '', 'g'
);
IF length(cleaned_name) > {truncate_length} THEN
cleaned_name = left(cleaned_name, {truncate_length});
END IF;
-- Set name and name trigrams
NEW.name = name;
NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"""
)
)
trigger = f"{function}_trigger"
op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON kg_entity")
op.execute(
f"""
CREATE TRIGGER {trigger}
BEFORE INSERT OR UPDATE OF name
ON kg_entity
FOR EACH ROW
EXECUTE FUNCTION {function}();
"""
)
# Create kg_entity trigger to update kg_entity.name and its trigrams
function = "update_kg_entity_name_from_doc"
op.execute(
text(
f"""
CREATE OR REPLACE FUNCTION {function}()
RETURNS TRIGGER AS $$
DECLARE
doc_name text;
cleaned_name text;
BEGIN
doc_name = lower(NEW.semantic_id);
-- Clean name and truncate if too long
cleaned_name = regexp_replace(
doc_name,
'{alphanum_pattern}', '', 'g'
);
IF length(cleaned_name) > {truncate_length} THEN
cleaned_name = left(cleaned_name, {truncate_length});
END IF;
-- Set name and name trigrams for all entities referencing this document
UPDATE kg_entity
SET
name = doc_name,
name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name)
WHERE document_id = NEW.id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"""
)
)
trigger = f"{function}_trigger"
op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON document")
op.execute(
f"""
CREATE TRIGGER {trigger}
AFTER UPDATE OF semantic_id
ON document
FOR EACH ROW
EXECUTE FUNCTION {function}();
"""
)
def downgrade() -> None:
# Drop all views that start with 'kg_'
op.execute(
"""
DO $$
DECLARE
view_name text;
BEGIN
FOR view_name IN
SELECT c.relname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'v'
AND n.nspname = current_schema()
AND c.relname LIKE 'kg_relationships_with_access%'
LOOP
EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
END LOOP;
END $$;
"""
)
op.execute(
"""
DO $$
DECLARE
view_name text;
BEGIN
FOR view_name IN
SELECT c.relname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'v'
AND n.nspname = current_schema()
AND c.relname LIKE 'allowed_docs%'
LOOP
EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
END LOOP;
END $$;
"""
)
for table, function in (
("kg_entity", "update_kg_entity_name"),
("document", "update_kg_entity_name_from_doc"),
):
op.execute(f"DROP TRIGGER IF EXISTS {function}_trigger ON {table}")
op.execute(f"DROP FUNCTION IF EXISTS {function}()")
# Drop index
op.execute("COMMIT") # Commit to allow CONCURRENTLY
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_kg_entity_clustering_trigrams")
op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_kg_entity_normalization_trigrams")
# Drop tables in reverse order of creation to handle dependencies
op.drop_table("kg_term")
op.drop_table("kg_relationship")
op.drop_table("kg_entity")
op.drop_table("kg_relationship_type")
op.drop_table("kg_relationship_extraction_staging")
op.drop_table("kg_relationship_type_extraction_staging")
op.drop_table("kg_entity_extraction_staging")
op.drop_table("kg_entity_type")
op.drop_column("connector", "kg_processing_enabled")
op.drop_column("connector", "kg_coverage_days")
op.drop_column("document", "kg_stage")
op.drop_column("document", "kg_processing_time")
op.drop_table("kg_config")
# Revoke usage on current schema for the readonly user
op.execute(
text(
f"""
DO $$
BEGIN
IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
EXECUTE format('REVOKE ALL ON SCHEMA %I FROM %I', current_schema(), '{DB_READONLY_USER}');
END IF;
END
$$;
"""
)
)
if not MULTI_TENANT:
# Drop read-only db user here only in single tenant mode. For multi-tenant mode,
# the user is dropped in the alembic_tenants migration.
op.execute(
text(
f"""
DO $$
BEGIN
IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-- First revoke all privileges from the database
EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
-- Then drop the user
EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');
END IF;
END
$$;
"""
)
)
op.execute(text("DROP EXTENSION IF EXISTS pg_trgm"))

View File

@@ -5,7 +5,6 @@ Revises: 7477a5f5d728
Create Date: 2024-08-10 19:20:34.527559
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: d9ec13955951
Create Date: 2024-08-20 15:28:52.993827
"""
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,7 +5,6 @@ Revises: f1ca58b2f2ec
Create Date: 2025-01-29 07:48:46.784041
"""
import logging
from typing import cast
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 47e5bef3a1d7
Create Date: 2024-11-06 13:15:53.302644
"""
from typing import cast
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 7da0ae5ad583
Create Date: 2023-11-27 17:23:29.668422
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: f7e58d357687
Create Date: 2024-08-28 17:40:46.077470
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import func

View File

@@ -5,7 +5,6 @@ Revises: 94dc3d0236f8
Create Date: 2024-12-11 18:05:05.490737
"""
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 61ff3651add4
Create Date: 2024-09-18 17:00:23.755399
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

View File

@@ -5,7 +5,6 @@ Revises: 7547d982db8f
Create Date: 2024-05-04 17:49:28.568109
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 800f48024ae9
Create Date: 2023-09-20 16:59:39.097177
"""
from alembic import op
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: d929f0c1c6af
Create Date: 2023-09-04 15:29:44.002164
"""
import fastapi_users_db_sqlalchemy
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 949b4a92a401
Create Date: 2024-10-30 19:37:59.630704
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -1,24 +0,0 @@
"""Add content type to UserFile
Revision ID: 5c448911b12f
Revises: 47a07e1a38f1
Create Date: 2025-04-25 16:59:48.182672
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "5c448911b12f"
down_revision = "47a07e1a38f1"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.add_column("user_file", sa.Column("content_type", sa.String(), nullable=True))
def downgrade() -> None:
op.drop_column("user_file", "content_type")

View File

@@ -5,7 +5,6 @@ Revises: efb35676026c
Create Date: 2024-09-13 18:52:59.256478
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: e4334d5b33ba
Create Date: 2024-10-08 15:56:07.975636
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: e6a4bbc13fe4
Create Date: 2023-08-10 21:43:09.069523
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: dbaa756c2ccf
Create Date: 2024-02-16 15:02:03.319907
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 1d6ad76d1f37
Create Date: 2024-08-06 15:35:40.278485
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,7 +5,6 @@ Revises: 1b8206b29c5d
Create Date: 2024-09-05 13:57:11.770413
"""
import fastapi_users_db_sqlalchemy
from alembic import op

View File

@@ -5,7 +5,6 @@ Revises: 0a98909f2757
Create Date: 2024-05-07 14:54:55.493100
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -1,41 +0,0 @@
"""remove kg subtype from db
Revision ID: 65bc6e0f8500
Revises: cec7ec36c505
Create Date: 2025-06-13 10:04:27.705976
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "65bc6e0f8500"
down_revision = "cec7ec36c505"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_column("kg_entity", "entity_class")
op.drop_column("kg_entity", "entity_subtype")
op.drop_column("kg_entity_extraction_staging", "entity_class")
op.drop_column("kg_entity_extraction_staging", "entity_subtype")
def downgrade() -> None:
op.add_column(
"kg_entity_extraction_staging",
sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
)
op.add_column(
"kg_entity_extraction_staging",
sa.Column("entity_class", sa.String(), nullable=True, index=True),
)
op.add_column(
"kg_entity", sa.Column("entity_subtype", sa.String(), nullable=True, index=True)
)
op.add_column(
"kg_entity", sa.Column("entity_class", sa.String(), nullable=True, index=True)
)

View File

@@ -5,7 +5,6 @@ Revises: 5d12a446f5c0
Create Date: 2024-10-15 17:47:44.108537
"""
from alembic import op
import sqlalchemy as sa

View File

@@ -5,6 +5,11 @@ Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect
import datetime
# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
@@ -13,10 +18,99 @@ branch_labels = None
depends_on = None
# Leaving this around only because some people might be on this migration
# originally was a duplicate of the user files migration
def upgrade() -> None:
pass
# Check if user_file table already exists
conn = op.get_bind()
inspector = inspect(conn)
if not inspector.has_table("user_file"):
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("link_url", sa.String(), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=True),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
sa.Column(
"cc_pair_id",
sa.Integer(),
sa.ForeignKey("connector_credential_pair.id"),
nullable=True,
unique=True,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
# Create persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
primary_key=True,
),
)
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
)
# Update existing records to have is_user_file=False instead of NULL
op.execute(
"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
)
def downgrade() -> None:

Some files were not shown because too many files have changed in this diff Show More