Compare commits

..

6 Commits
max ... ll

Author SHA1 Message Date
pablonyx
39f98e288c quick nit 2025-03-31 16:28:38 -07:00
rkuo-danswer
9e2784e6e6 also set permission upsert to medium priority (#4405)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 16:28:38 -07:00
rkuo-danswer
80ca1f90c2 Bugfix/slack rate limiting (#4386)
* use slack's built in rate limit handler for the bot

* WIP

* fix the slack rate limit handler

* change default to 8

* cleanup

* try catch int conversion just in case

* linearize this logic better

* code review comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 16:28:38 -07:00
evan-danswer
cff7a854bf minor improvement to fireflies connector (#4383)
* minor improvement to fireflies connector

* reduce time diff
2025-03-31 16:28:38 -07:00
evan-danswer
ab99f9d3c0 ensure bedrock model contains API key (#4396)
* ensure bedrock model contains API key

* fix storing bug
2025-03-31 16:28:38 -07:00
pablonyx
a49b4dd531 add user files 2025-03-31 13:23:28 -07:00
8 changed files with 6040 additions and 5884 deletions

View File

@@ -1,117 +0,0 @@
"""duplicated no-harm user file migration
Revision ID: 6a804aeb4830
Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect
import datetime
# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
down_revision = "8e1ac4f39a9f"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Check if user_file table already exists
conn = op.get_bind()
inspector = inspect(conn)
if not inspector.has_table("user_file"):
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("link_url", sa.String(), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=True),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
sa.Column(
"cc_pair_id",
sa.Integer(),
sa.ForeignKey("connector_credential_pair.id"),
nullable=True,
unique=True,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
# Create persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
primary_key=True,
),
)
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
)
# Update existing records to have is_user_file=False instead of NULL
op.execute(
"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
)
def downgrade() -> None:
pass

View File

@@ -5,9 +5,9 @@ Revises: 3781a5eb12cb
Create Date: 2025-01-26 16:08:21.551022
"""
from alembic import op
import sqlalchemy as sa
import datetime
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -1,52 +0,0 @@
"""max_length_for_instruction_system_prompt
Revision ID: e995bdf0d6f7
Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 18:32:45.123456
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "e995bdf0d6f7"
down_revision = "8e1ac4f39a9f"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Alter system_prompt and task_prompt columns to have a maximum length of 8000 characters
op.alter_column(
"prompt",
"system_prompt",
existing_type=sa.Text(),
type_=sa.String(8000),
existing_nullable=False,
)
op.alter_column(
"prompt",
"task_prompt",
existing_type=sa.Text(),
type_=sa.String(8000),
existing_nullable=False,
)
def downgrade() -> None:
# Revert system_prompt and task_prompt columns back to Text type
op.alter_column(
"prompt",
"system_prompt",
existing_type=sa.String(8000),
type_=sa.Text(),
existing_nullable=False,
)
op.alter_column(
"prompt",
"task_prompt",
existing_type=sa.String(8000),
type_=sa.Text(),
existing_nullable=False,
)

View File

@@ -1,4 +1,5 @@
import asyncio
import concurrent.futures
import logging
import uuid
@@ -506,11 +507,24 @@ async def setup_tenant(tenant_id: str) -> None:
try:
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
# Run Alembic migrations in a way that isolates it from the current event loop
# Create a new event loop for this synchronous operation
loop = asyncio.get_event_loop()
# Use run_in_executor which properly isolates the thread execution
await loop.run_in_executor(None, lambda: run_alembic_migrations(tenant_id))
# Run Alembic migrations in a completely isolated way
# This function runs in a separate thread with its own event loop context
def isolated_migration_runner():
# Reset contextvar to prevent it from being shared with the parent thread
# This ensures no shared asyncio primitives between threads
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
# Run migrations in this isolated thread
run_alembic_migrations(tenant_id)
finally:
# Clean up the contextvar
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
# Use a dedicated ThreadPoolExecutor for complete isolation
# This prevents asyncio loop/lock sharing issues
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
loop = asyncio.get_running_loop()
await loop.run_in_executor(executor, isolated_migration_runner)
# Configure the tenant with default settings
with get_session_with_tenant(tenant_id=tenant_id) as db_session:

View File

@@ -42,7 +42,6 @@ from onyx.context.search.retrieval.search_runner import (
from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine import get_session_with_current_tenant
from onyx.db.engine import get_session_with_tenant
from onyx.db.engine import SqlEngine
from onyx.db.models import SlackBot
from onyx.db.search_settings import get_current_search_settings
from onyx.db.slack_bot import fetch_slack_bot
@@ -973,9 +972,6 @@ def _get_socket_client(
if __name__ == "__main__":
# Initialize the SqlEngine
SqlEngine.init_engine(pool_size=20, max_overflow=5)
# Initialize the tenant handler which will manage tenant connections
logger.info("Starting SlackbotHandler")
tenant_handler = SlackbotHandler()

View File

@@ -2,8 +2,6 @@ import json
import os
import time
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
@@ -42,13 +40,10 @@ def highspot_connector() -> HighspotConnector:
return connector
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
@pytest.mark.xfail(
reason="Accessing postgres that isn't available in connector only tests",
)
def test_highspot_connector_basic(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None:
"""Test basic functionality of the Highspot connector."""
all_docs: list[Document] = []
test_data = load_test_data()
@@ -81,13 +76,10 @@ def test_highspot_connector_basic(
assert len(section.text) > 0
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
@pytest.mark.xfail(
reason="Possibly accessing postgres that isn't available in connector only tests",
)
def test_highspot_connector_slim(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None:
"""Test slim document retrieval."""
# Get all doc IDs from the full connector
all_full_doc_ids = set()

11695
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -469,14 +469,8 @@ export function AssistantEditor({
description: Yup.string().required(
"Must provide a description for the Assistant"
),
system_prompt: Yup.string().max(
8000,
"Instructions must be less than 8000 characters"
),
task_prompt: Yup.string().max(
8000,
"Reminders must be less than 8000 characters"
),
system_prompt: Yup.string(),
task_prompt: Yup.string(),
is_public: Yup.boolean().required(),
document_set_ids: Yup.array().of(Yup.number()),
num_chunks: Yup.number().nullable(),