Compare commits

..

9 Commits

Author SHA1 Message Date
pablonyx
5c33f33f6e k 2025-04-01 07:35:20 -07:00
pablonyx
a8f87588ff update 2025-04-01 07:34:18 -07:00
SubashMohan
4bae1318bb refactor tests for Highspot connector to use mocking for API key retrieval (#4346) 2025-04-01 02:39:05 +00:00
Weves
11c3f44c76 Init engine in slackbot 2025-03-31 17:04:20 -07:00
rkuo-danswer
cb38ac8a97 also set permission upsert to medium priority (#4405)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 14:59:31 -07:00
pablonyx
b2120b9f39 add user files (#4152) 2025-03-31 21:06:59 +00:00
rkuo-danswer
ccd372cc4a Bugfix/slack rate limiting (#4386)
* use slack's built in rate limit handler for the bot

* WIP

* fix the slack rate limit handler

* change default to 8

* cleanup

* try catch int conversion just in case

* linearize this logic better

* code review comments

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-31 21:00:26 +00:00
evan-danswer
ea30f1de1e minor improvement to fireflies connector (#4383)
* minor improvement to fireflies connector

* reduce time diff
2025-03-31 20:00:52 +00:00
evan-danswer
a7130681d9 ensure bedrock model contains API key (#4396)
* ensure bedrock model contains API key

* fix storing bug
2025-03-31 19:58:53 +00:00
5 changed files with 141 additions and 26 deletions

View File

@@ -0,0 +1,117 @@
"""duplicated no-harm user file migration
Revision ID: 6a804aeb4830
Revises: 8e1ac4f39a9f
Create Date: 2025-04-01 07:26:10.539362
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect
import datetime
# revision identifiers, used by Alembic.
revision = "6a804aeb4830"
down_revision = "8e1ac4f39a9f"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Check if user_file table already exists
conn = op.get_bind()
inspector = inspect(conn)
if not inspector.has_table("user_file"):
# Create user_folder table without parent_id
op.create_table(
"user_folder",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column("name", sa.String(length=255), nullable=True),
sa.Column("description", sa.String(length=255), nullable=True),
sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
sa.Column(
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
),
)
# Create user_file table with folder_id instead of parent_folder_id
op.create_table(
"user_file",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
nullable=True,
),
sa.Column("link_url", sa.String(), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=True),
sa.Column("file_type", sa.String(), nullable=True),
sa.Column("file_id", sa.String(length=255), nullable=False),
sa.Column("document_id", sa.String(length=255), nullable=False),
sa.Column("name", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
default=datetime.datetime.utcnow,
),
sa.Column(
"cc_pair_id",
sa.Integer(),
sa.ForeignKey("connector_credential_pair.id"),
nullable=True,
unique=True,
),
)
# Create persona__user_file table
op.create_table(
"persona__user_file",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_file_id",
sa.Integer(),
sa.ForeignKey("user_file.id"),
primary_key=True,
),
)
# Create persona__user_folder table
op.create_table(
"persona__user_folder",
sa.Column(
"persona_id",
sa.Integer(),
sa.ForeignKey("persona.id"),
primary_key=True,
),
sa.Column(
"user_folder_id",
sa.Integer(),
sa.ForeignKey("user_folder.id"),
primary_key=True,
),
)
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
)
# Update existing records to have is_user_file=False instead of NULL
op.execute(
"UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
)
def downgrade() -> None:
pass

View File

@@ -5,9 +5,9 @@ Revises: 3781a5eb12cb
Create Date: 2025-01-26 16:08:21.551022
"""
from alembic import op
import sqlalchemy as sa
import datetime
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -1,5 +1,4 @@
import asyncio
import concurrent.futures
import logging
import uuid
@@ -507,24 +506,11 @@ async def setup_tenant(tenant_id: str) -> None:
try:
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
# Run Alembic migrations in a completely isolated way
# This function runs in a separate thread with its own event loop context
def isolated_migration_runner():
# Reset contextvar to prevent it from being shared with the parent thread
# This ensures no shared asyncio primitives between threads
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
# Run migrations in this isolated thread
run_alembic_migrations(tenant_id)
finally:
# Clean up the contextvar
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
# Use a dedicated ThreadPoolExecutor for complete isolation
# This prevents asyncio loop/lock sharing issues
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
loop = asyncio.get_running_loop()
await loop.run_in_executor(executor, isolated_migration_runner)
# Run Alembic migrations in a way that isolates it from the current event loop
# Create a new event loop for this synchronous operation
loop = asyncio.get_event_loop()
# Use run_in_executor which properly isolates the thread execution
await loop.run_in_executor(None, lambda: run_alembic_migrations(tenant_id))
# Configure the tenant with default settings
with get_session_with_tenant(tenant_id=tenant_id) as db_session:

View File

@@ -42,6 +42,7 @@ from onyx.context.search.retrieval.search_runner import (
from onyx.db.engine import get_all_tenant_ids
from onyx.db.engine import get_session_with_current_tenant
from onyx.db.engine import get_session_with_tenant
from onyx.db.engine import SqlEngine
from onyx.db.models import SlackBot
from onyx.db.search_settings import get_current_search_settings
from onyx.db.slack_bot import fetch_slack_bot
@@ -972,6 +973,9 @@ def _get_socket_client(
if __name__ == "__main__":
# Initialize the SqlEngine
SqlEngine.init_engine(pool_size=20, max_overflow=5)
# Initialize the tenant handler which will manage tenant connections
logger.info("Starting SlackbotHandler")
tenant_handler = SlackbotHandler()

View File

@@ -2,6 +2,8 @@ import json
import os
import time
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
@@ -40,10 +42,13 @@ def highspot_connector() -> HighspotConnector:
return connector
@pytest.mark.xfail(
reason="Accessing postgres that isn't available in connector only tests",
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None:
def test_highspot_connector_basic(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
"""Test basic functionality of the Highspot connector."""
all_docs: list[Document] = []
test_data = load_test_data()
@@ -76,10 +81,13 @@ def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None
assert len(section.text) > 0
@pytest.mark.xfail(
reason="Possibly accessing postgres that isn't available in connector only tests",
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None:
def test_highspot_connector_slim(
mock_get_api_key: MagicMock, highspot_connector: HighspotConnector
) -> None:
"""Test slim document retrieval."""
# Get all doc IDs from the full connector
all_full_doc_ids = set()