mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-17 15:55:45 +00:00
Compare commits
17 Commits
additional
...
github_lis
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9087320a06 | ||
|
|
b0af1458c0 | ||
|
|
bb67a7a122 | ||
|
|
e239dc31c1 | ||
|
|
027128502c | ||
|
|
a7a374dc81 | ||
|
|
facc8cc2fa | ||
|
|
2c0af0a0ca | ||
|
|
bfbc1cd954 | ||
|
|
626da583aa | ||
|
|
92faca139d | ||
|
|
cec05c5ee9 | ||
|
|
eaf054ef06 | ||
|
|
a7a1a24658 | ||
|
|
687122911d | ||
|
|
40953bd4fe | ||
|
|
a7acc07e79 |
@@ -0,0 +1,125 @@
|
||||
"""Update GitHub connector repo_name to repositories
|
||||
|
||||
Revision ID: 3934b1bc7b62
|
||||
Revises: b7c2b63c4a03
|
||||
Create Date: 2025-03-05 10:50:30.516962
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import json
|
||||
import logging
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3934b1bc7b62"
|
||||
down_revision = "b7c2b63c4a03"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Get all GitHub connectors
|
||||
conn = op.get_bind()
|
||||
|
||||
# First get all GitHub connectors
|
||||
github_connectors = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT id, connector_specific_config
|
||||
FROM connector
|
||||
WHERE source = 'GITHUB'
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
# Update each connector's config
|
||||
updated_count = 0
|
||||
for connector_id, config in github_connectors:
|
||||
try:
|
||||
if not config:
|
||||
logger.warning(f"Connector {connector_id} has no config, skipping")
|
||||
continue
|
||||
|
||||
# Parse the config if it's a string
|
||||
if isinstance(config, str):
|
||||
config = json.loads(config)
|
||||
|
||||
if "repo_name" not in config:
|
||||
continue
|
||||
|
||||
# Create new config with repositories instead of repo_name
|
||||
new_config = dict(config)
|
||||
repo_name_value = new_config.pop("repo_name")
|
||||
new_config["repositories"] = repo_name_value
|
||||
|
||||
# Update the connector with the new config
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE connector
|
||||
SET connector_specific_config = :new_config
|
||||
WHERE id = :connector_id
|
||||
"""
|
||||
),
|
||||
{"connector_id": connector_id, "new_config": json.dumps(new_config)},
|
||||
)
|
||||
updated_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating connector {connector_id}: {str(e)}")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Get all GitHub connectors
|
||||
conn = op.get_bind()
|
||||
|
||||
logger.debug(
|
||||
"Starting rollback of GitHub connectors from repositories to repo_name"
|
||||
)
|
||||
|
||||
github_connectors = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT id, connector_specific_config
|
||||
FROM connector
|
||||
WHERE source = 'GITHUB'
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
|
||||
|
||||
# Revert each GitHub connector to use repo_name instead of repositories
|
||||
reverted_count = 0
|
||||
for connector_id, config in github_connectors:
|
||||
try:
|
||||
if not config:
|
||||
continue
|
||||
|
||||
# Parse the config if it's a string
|
||||
if isinstance(config, str):
|
||||
config = json.loads(config)
|
||||
|
||||
if "repositories" not in config:
|
||||
continue
|
||||
|
||||
# Create new config with repo_name instead of repositories
|
||||
new_config = dict(config)
|
||||
repositories_value = new_config.pop("repositories")
|
||||
new_config["repo_name"] = repositories_value
|
||||
|
||||
# Update the connector with the new config
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE connector
|
||||
SET connector_specific_config = :new_config
|
||||
WHERE id = :connector_id
|
||||
"""
|
||||
),
|
||||
{"new_config": json.dumps(new_config), "connector_id": connector_id},
|
||||
)
|
||||
reverted_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error reverting connector {connector_id}: {str(e)}")
|
||||
@@ -134,7 +134,9 @@ def fetch_chat_sessions_eagerly_by_time(
|
||||
limit: int | None = 500,
|
||||
initial_time: datetime | None = None,
|
||||
) -> list[ChatSession]:
|
||||
time_order: UnaryExpression = desc(ChatSession.time_created)
|
||||
"""Sorted by oldest to newest, then by message id"""
|
||||
|
||||
asc_time_order: UnaryExpression = asc(ChatSession.time_created)
|
||||
message_order: UnaryExpression = asc(ChatMessage.id)
|
||||
|
||||
filters: list[ColumnElement | BinaryExpression] = [
|
||||
@@ -147,8 +149,7 @@ def fetch_chat_sessions_eagerly_by_time(
|
||||
subquery = (
|
||||
db_session.query(ChatSession.id, ChatSession.time_created)
|
||||
.filter(*filters)
|
||||
.order_by(ChatSession.id, time_order)
|
||||
.distinct(ChatSession.id)
|
||||
.order_by(asc_time_order)
|
||||
.limit(limit)
|
||||
.subquery()
|
||||
)
|
||||
@@ -164,7 +165,7 @@ def fetch_chat_sessions_eagerly_by_time(
|
||||
ChatMessage.chat_message_feedbacks
|
||||
),
|
||||
)
|
||||
.order_by(time_order, message_order)
|
||||
.order_by(asc_time_order, message_order)
|
||||
)
|
||||
|
||||
chat_sessions = query.all()
|
||||
|
||||
@@ -16,13 +16,18 @@ from onyx.db.models import UsageReport
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
|
||||
|
||||
# Gets skeletons of all message
|
||||
# Gets skeletons of all messages in the given range
|
||||
def get_empty_chat_messages_entries__paginated(
|
||||
db_session: Session,
|
||||
period: tuple[datetime, datetime],
|
||||
limit: int | None = 500,
|
||||
initial_time: datetime | None = None,
|
||||
) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
|
||||
"""Returns a tuple where:
|
||||
first element is the most recent timestamp out of the sessions iterated
|
||||
- this timestamp can be used to paginate forward in time
|
||||
second element is a list of messages belonging to all the sessions iterated
|
||||
"""
|
||||
chat_sessions = fetch_chat_sessions_eagerly_by_time(
|
||||
start=period[0],
|
||||
end=period[1],
|
||||
@@ -52,18 +57,17 @@ def get_empty_chat_messages_entries__paginated(
|
||||
if len(chat_sessions) == 0:
|
||||
return None, []
|
||||
|
||||
return chat_sessions[0].time_created, message_skeletons
|
||||
return chat_sessions[-1].time_created, message_skeletons
|
||||
|
||||
|
||||
def get_all_empty_chat_message_entries(
|
||||
db_session: Session,
|
||||
period: tuple[datetime, datetime],
|
||||
) -> Generator[list[ChatMessageSkeleton], None, None]:
|
||||
"""period is the range of time over which to fetch messages."""
|
||||
initial_time: Optional[datetime] = period[0]
|
||||
ind = 0
|
||||
while True:
|
||||
ind += 1
|
||||
|
||||
# iterate from oldest to newest
|
||||
time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
|
||||
db_session,
|
||||
period,
|
||||
|
||||
@@ -15,7 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
|
||||
)
|
||||
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
|
||||
from ee.onyx.server.middleware.tenant_tracking import add_tenant_id_middleware
|
||||
from ee.onyx.server.oauth.api import router as oauth_router
|
||||
from ee.onyx.server.oauth.api import router as ee_oauth_router
|
||||
from ee.onyx.server.query_and_chat.chat_backend import (
|
||||
router as chat_router,
|
||||
)
|
||||
@@ -128,7 +128,7 @@ def get_application() -> FastAPI:
|
||||
include_router_with_global_prefix_prepended(application, query_router)
|
||||
include_router_with_global_prefix_prepended(application, chat_router)
|
||||
include_router_with_global_prefix_prepended(application, standard_answer_router)
|
||||
include_router_with_global_prefix_prepended(application, oauth_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_oauth_router)
|
||||
|
||||
# Enterprise-only global settings
|
||||
include_router_with_global_prefix_prepended(
|
||||
|
||||
@@ -80,6 +80,7 @@ class ConfluenceCloudOAuth:
|
||||
"search:confluence%20"
|
||||
# granular scope
|
||||
"read:attachment:confluence%20" # possibly unneeded unless calling v2 attachments api
|
||||
"read:content-details:confluence%20" # for permission sync
|
||||
"offline_access"
|
||||
)
|
||||
|
||||
|
||||
@@ -48,4 +48,5 @@ def store_product_gating(tenant_id: str, application_status: ApplicationStatus)
|
||||
|
||||
def get_gated_tenants() -> set[str]:
|
||||
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
|
||||
return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
|
||||
gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
|
||||
return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
|
||||
|
||||
@@ -55,7 +55,11 @@ logger = logging.getLogger(__name__)
|
||||
async def get_or_provision_tenant(
|
||||
email: str, referral_source: str | None = None, request: Request | None = None
|
||||
) -> str:
|
||||
"""Get existing tenant ID for an email or create a new tenant if none exists."""
|
||||
"""
|
||||
Get existing tenant ID for an email or create a new tenant if none exists.
|
||||
This function should only be called after we have verified we want this user's tenant to exist.
|
||||
It returns the tenant ID associated with the email, creating a new tenant if necessary.
|
||||
"""
|
||||
if not MULTI_TENANT:
|
||||
return POSTGRES_DEFAULT_SCHEMA
|
||||
|
||||
|
||||
@@ -587,14 +587,20 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
|
||||
) -> Optional[User]:
|
||||
email = credentials.username
|
||||
|
||||
# Get tenant_id from mapping table
|
||||
tenant_id = await fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.provisioning",
|
||||
"get_or_provision_tenant",
|
||||
async_return_default_schema,
|
||||
)(
|
||||
email=email,
|
||||
)
|
||||
tenant_id: str | None = None
|
||||
try:
|
||||
tenant_id = fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.provisioning",
|
||||
"get_tenant_id_for_email",
|
||||
None,
|
||||
)(
|
||||
email=email,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"User attempted to login with invalid credentials: {str(e)}"
|
||||
)
|
||||
|
||||
if not tenant_id:
|
||||
# User not found in mapping
|
||||
self.password_helper.hash(credentials.password)
|
||||
|
||||
@@ -240,7 +240,7 @@ class ConfluenceConnector(
|
||||
# Extract basic page information
|
||||
page_id = page["id"]
|
||||
page_title = page["title"]
|
||||
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
|
||||
page_url = f"{self.wiki_base}{page['_links']['webui']}"
|
||||
|
||||
# Get the page content
|
||||
page_content = extract_text_from_confluence_html(
|
||||
|
||||
@@ -144,6 +144,12 @@ class OnyxConfluence:
|
||||
self.static_credentials = credential_json
|
||||
return credential_json, False
|
||||
|
||||
if not OAUTH_CONFLUENCE_CLOUD_CLIENT_ID:
|
||||
raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_ID must be set!")
|
||||
|
||||
if not OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET:
|
||||
raise RuntimeError("OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET must be set!")
|
||||
|
||||
# check if we should refresh tokens. we're deciding to refresh halfway
|
||||
# to expiration
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
repo_owner: str,
|
||||
repo_name: str | None = None,
|
||||
repositories: str | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
state_filter: str = "all",
|
||||
include_prs: bool = True,
|
||||
include_issues: bool = False,
|
||||
) -> None:
|
||||
self.repo_owner = repo_owner
|
||||
self.repo_name = repo_name
|
||||
self.repositories = repositories
|
||||
self.batch_size = batch_size
|
||||
self.state_filter = state_filter
|
||||
self.include_prs = include_prs
|
||||
@@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
|
||||
return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
|
||||
except RateLimitExceededException:
|
||||
_sleep_after_rate_limit_exception(github_client)
|
||||
return self._get_github_repo(github_client, attempt_num + 1)
|
||||
|
||||
def _get_github_repos(
|
||||
self, github_client: Github, attempt_num: int = 0
|
||||
) -> list[Repository.Repository]:
|
||||
"""Get specific repositories based on comma-separated repo_name string."""
|
||||
if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
|
||||
raise RuntimeError(
|
||||
"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
|
||||
)
|
||||
|
||||
try:
|
||||
repos = []
|
||||
# Split repo_name by comma and strip whitespace
|
||||
repo_names = [
|
||||
name.strip() for name in (cast(str, self.repositories)).split(",")
|
||||
]
|
||||
|
||||
for repo_name in repo_names:
|
||||
if repo_name: # Skip empty strings
|
||||
try:
|
||||
repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
|
||||
repos.append(repo)
|
||||
except GithubException as e:
|
||||
logger.warning(
|
||||
f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
|
||||
)
|
||||
|
||||
return repos
|
||||
except RateLimitExceededException:
|
||||
_sleep_after_rate_limit_exception(github_client)
|
||||
return self._get_github_repos(github_client, attempt_num + 1)
|
||||
|
||||
def _get_all_repos(
|
||||
self, github_client: Github, attempt_num: int = 0
|
||||
) -> list[Repository.Repository]:
|
||||
@@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
if self.github_client is None:
|
||||
raise ConnectorMissingCredentialError("GitHub")
|
||||
|
||||
repos = (
|
||||
[self._get_github_repo(self.github_client)]
|
||||
if self.repo_name
|
||||
else self._get_all_repos(self.github_client)
|
||||
)
|
||||
repos = []
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
# Multiple repositories specified
|
||||
repos = self._get_github_repos(self.github_client)
|
||||
else:
|
||||
# Single repository (backward compatibility)
|
||||
repos = [self._get_github_repo(self.github_client)]
|
||||
else:
|
||||
# All repositories
|
||||
repos = self._get_all_repos(self.github_client)
|
||||
|
||||
for repo in repos:
|
||||
if self.include_prs:
|
||||
@@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
if self.repo_name:
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{self.repo_name}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
# Multiple repositories specified
|
||||
repo_names = [name.strip() for name in self.repositories.split(",")]
|
||||
if not repo_names:
|
||||
raise ConnectorValidationError(
|
||||
"Invalid connector settings: No valid repository names provided."
|
||||
)
|
||||
|
||||
# Validate at least one repository exists and is accessible
|
||||
valid_repos = False
|
||||
validation_errors = []
|
||||
|
||||
for repo_name in repo_names:
|
||||
if not repo_name:
|
||||
continue
|
||||
|
||||
try:
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{repo_name}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
valid_repos = True
|
||||
# If at least one repo is valid, we can proceed
|
||||
break
|
||||
except GithubException as e:
|
||||
validation_errors.append(
|
||||
f"Repository '{repo_name}': {e.data.get('message', str(e))}"
|
||||
)
|
||||
|
||||
if not valid_repos:
|
||||
error_msg = (
|
||||
"None of the specified repositories could be accessed: "
|
||||
)
|
||||
error_msg += ", ".join(validation_errors)
|
||||
raise ConnectorValidationError(error_msg)
|
||||
else:
|
||||
# Single repository (backward compatibility)
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{self.repositories}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
else:
|
||||
# Try to get organization first
|
||||
try:
|
||||
@@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
|
||||
)
|
||||
elif e.status == 404:
|
||||
if self.repo_name:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
|
||||
)
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
raise ConnectorValidationError(
|
||||
f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub user or organization not found: {self.repo_owner}"
|
||||
@@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected GitHub error (status={e.status}): {e.data}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
raise Exception(
|
||||
f"Unexpected error during GitHub settings validation: {exc}"
|
||||
@@ -321,7 +401,7 @@ if __name__ == "__main__":
|
||||
|
||||
connector = GithubConnector(
|
||||
repo_owner=os.environ["REPO_OWNER"],
|
||||
repo_name=os.environ["REPO_NAME"],
|
||||
repositories=os.environ["REPOSITORIES"],
|
||||
)
|
||||
connector.load_credentials(
|
||||
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
|
||||
|
||||
53
backend/onyx/db/seeding/chat_history_seeding.py
Normal file
53
backend/onyx/db/seeding/chat_history_seeding.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import random
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.db.chat import create_chat_session
|
||||
from onyx.db.chat import create_new_chat_message
|
||||
from onyx.db.chat import get_or_create_root_message
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.models import ChatSession
|
||||
|
||||
|
||||
def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
|
||||
"""Utility function to seed chat history for testing.
|
||||
|
||||
num_sessions: the number of sessions to seed
|
||||
num_messages: the number of messages to seed per sessions
|
||||
days: the number of days looking backwards from the current time over which to randomize
|
||||
the times.
|
||||
"""
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
for y in range(0, num_sessions):
|
||||
create_chat_session(db_session, f"pytest_session_{y}", None, None)
|
||||
|
||||
# randomize all session times
|
||||
rows = db_session.query(ChatSession).all()
|
||||
for row in rows:
|
||||
row.time_created = datetime.utcnow() - timedelta(
|
||||
days=random.randint(0, days)
|
||||
)
|
||||
row.time_updated = row.time_created + timedelta(
|
||||
minutes=random.randint(0, 10)
|
||||
)
|
||||
|
||||
root_message = get_or_create_root_message(row.id, db_session)
|
||||
|
||||
for x in range(0, num_messages):
|
||||
chat_message = create_new_chat_message(
|
||||
row.id,
|
||||
root_message,
|
||||
f"pytest_message_{x}",
|
||||
None,
|
||||
0,
|
||||
MessageType.USER,
|
||||
db_session,
|
||||
)
|
||||
|
||||
chat_message.time_sent = row.time_created + timedelta(
|
||||
minutes=random.randint(0, 10)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
db_session.commit()
|
||||
@@ -464,12 +464,29 @@ def index_doc_batch(
|
||||
),
|
||||
)
|
||||
|
||||
successful_doc_ids = {record.document_id for record in insertion_records}
|
||||
if successful_doc_ids != set(updatable_ids):
|
||||
all_returned_doc_ids = (
|
||||
{record.document_id for record in insertion_records}
|
||||
.union(
|
||||
{
|
||||
record.failed_document.document_id
|
||||
for record in vector_db_write_failures
|
||||
if record.failed_document
|
||||
}
|
||||
)
|
||||
.union(
|
||||
{
|
||||
record.failed_document.document_id
|
||||
for record in embedding_failures
|
||||
if record.failed_document
|
||||
}
|
||||
)
|
||||
)
|
||||
if all_returned_doc_ids != set(updatable_ids):
|
||||
raise RuntimeError(
|
||||
f"Some documents were not successfully indexed. "
|
||||
f"Updatable IDs: {updatable_ids}, "
|
||||
f"Successful IDs: {successful_doc_ids}"
|
||||
f"Returned IDs: {all_returned_doc_ids}. "
|
||||
"This should never happen."
|
||||
)
|
||||
|
||||
last_modified_ids = []
|
||||
|
||||
@@ -51,6 +51,7 @@ from onyx.server.documents.cc_pair import router as cc_pair_router
|
||||
from onyx.server.documents.connector import router as connector_router
|
||||
from onyx.server.documents.credential import router as credential_router
|
||||
from onyx.server.documents.document import router as document_router
|
||||
from onyx.server.documents.standard_oauth import router as standard_oauth_router
|
||||
from onyx.server.features.document_set.api import router as document_set_router
|
||||
from onyx.server.features.folder.api import router as folder_router
|
||||
from onyx.server.features.input_prompt.api import (
|
||||
@@ -322,6 +323,7 @@ def get_application() -> FastAPI:
|
||||
)
|
||||
include_router_with_global_prefix_prepended(application, long_term_logs_router)
|
||||
include_router_with_global_prefix_prepended(application, api_key_router)
|
||||
include_router_with_global_prefix_prepended(application, standard_oauth_router)
|
||||
|
||||
if AUTH_TYPE == AuthType.DISABLED:
|
||||
# Server logs this during auth setup verification step
|
||||
|
||||
45
backend/scripts/chat_history_seeding.py
Normal file
45
backend/scripts/chat_history_seeding.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import argparse
|
||||
import logging
|
||||
from logging import getLogger
|
||||
|
||||
from onyx.db.seeding.chat_history_seeding import seed_chat_history
|
||||
|
||||
# Configure the logger
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", # Log format
|
||||
handlers=[logging.StreamHandler()], # Output logs to console
|
||||
)
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def go_main(num_sessions: int, num_messages: int, num_days: int) -> None:
|
||||
seed_chat_history(num_sessions, num_messages, num_days)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Seed chat history")
|
||||
parser.add_argument(
|
||||
"--sessions",
|
||||
type=int,
|
||||
default=2048,
|
||||
help="Number of chat sessions to seed",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--messages",
|
||||
type=int,
|
||||
default=4,
|
||||
help="Number of chat messages to seed per session",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=90,
|
||||
help="Number of days looking backwards over which to seed the timestamps with",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
go_main(args.sessions, args.messages, args.days)
|
||||
@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
|
||||
with pytest.raises(StopIteration):
|
||||
next(doc_batch_generator)
|
||||
|
||||
assert len(doc_batch) == 3
|
||||
assert len(doc_batch) == 2
|
||||
|
||||
page_within_a_page_doc: Document | None = None
|
||||
page_doc: Document | None = None
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
|
||||
from ee.onyx.db.usage_export import get_all_empty_chat_message_entries
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.seeding.chat_history_seeding import seed_chat_history
|
||||
|
||||
|
||||
def test_usage_reports(reset: None) -> None:
|
||||
EXPECTED_SESSIONS = 2048
|
||||
MESSAGES_PER_SESSION = 4
|
||||
EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION
|
||||
|
||||
seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
# count of all entries should be exact
|
||||
period = (
|
||||
datetime.fromtimestamp(0, tz=timezone.utc),
|
||||
datetime.now(tz=timezone.utc),
|
||||
)
|
||||
|
||||
count = 0
|
||||
for entry_batch in get_all_empty_chat_message_entries(db_session, period):
|
||||
for entry in entry_batch:
|
||||
count += 1
|
||||
|
||||
assert count == EXPECTED_MESSAGES
|
||||
|
||||
# count in a one month time range should be within a certain range statistically
|
||||
# this can be improved if we seed the chat history data deterministically
|
||||
period = (
|
||||
datetime.now(tz=timezone.utc) - timedelta(days=30),
|
||||
datetime.now(tz=timezone.utc),
|
||||
)
|
||||
|
||||
count = 0
|
||||
for entry_batch in get_all_empty_chat_message_entries(db_session, period):
|
||||
for entry in entry_batch:
|
||||
count += 1
|
||||
|
||||
lower = EXPECTED_MESSAGES // 3 - (EXPECTED_MESSAGES // (3 * 3))
|
||||
upper = EXPECTED_MESSAGES // 3 + (EXPECTED_MESSAGES // (3 * 3))
|
||||
assert count > lower
|
||||
assert count < upper
|
||||
@@ -80,3 +80,13 @@ prod cluster**
|
||||
- `kubectl delete -f .`
|
||||
- To not delete the persistent volumes (Document indexes and Users), specify the specific `.yaml` files instead of
|
||||
`.` without specifying delete on persistent-volumes.yaml.
|
||||
|
||||
### Using Helm to deploy to an existing cluster
|
||||
|
||||
Onyx has a helm chart that is convenient to install all services to an existing Kubernetes cluster. To install:
|
||||
|
||||
* Currently the helm chart is not published so to install, clone the repo.
|
||||
* Configure access to the cluster via kubectl. Ensure the kubectl context is set to the cluster that you want to use
|
||||
* The default secrets, environment variables and other service level configuration are stored in `deployment/helm/charts/onyx/values.yml`. You may create another `override.yml`
|
||||
* `cd deployment/helm/charts/onyx` and run `helm install onyx -n onyx -f override.yaml .`. This will install onyx on the cluster under the `onyx` namespace.
|
||||
* Check the status of the deploy using `kubectl get pods -n onyx`
|
||||
27
deployment/helm/charts/onyx/templates/ingress-api.yaml
Normal file
27
deployment/helm/charts/onyx/templates/ingress-api.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "onyx-stack.fullname" . }}-ingress-api
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /$2
|
||||
nginx.ingress.kubernetes.io/use-regex: "true"
|
||||
cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
|
||||
spec:
|
||||
rules:
|
||||
- host: {{ .Values.ingress.api.host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /api(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "onyx-stack.fullname" . }}-api-service
|
||||
port:
|
||||
number: {{ .Values.api.service.servicePort }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.ingress.api.host }}
|
||||
secretName: {{ include "onyx-stack.fullname" . }}-ingress-api-tls
|
||||
{{- end }}
|
||||
26
deployment/helm/charts/onyx/templates/ingress-webserver.yaml
Normal file
26
deployment/helm/charts/onyx/templates/ingress-webserver.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "onyx-stack.fullname" . }}-ingress-webserver
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx
|
||||
cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
|
||||
kubernetes.io/tls-acme: "true"
|
||||
spec:
|
||||
rules:
|
||||
- host: {{ .Values.ingress.webserver.host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "onyx-stack.fullname" . }}-webserver
|
||||
port:
|
||||
number: {{ .Values.webserver.service.servicePort }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.ingress.webserver.host }}
|
||||
secretName: {{ include "onyx-stack.fullname" . }}-ingress-webserver-tls
|
||||
{{- end }}
|
||||
20
deployment/helm/charts/onyx/templates/lets-encrypt.yaml
Normal file
20
deployment/helm/charts/onyx/templates/lets-encrypt.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
{{- if .Values.letsencrypt.enabled -}}
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: {{ include "onyx-stack.fullname" . }}-letsencrypt
|
||||
spec:
|
||||
acme:
|
||||
# The ACME server URL
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
# Email address used for ACME registration
|
||||
email: {{ .Values.letsencrypt.email }}
|
||||
# Name of a secret used to store the ACME account private key
|
||||
privateKeySecretRef:
|
||||
name: {{ include "onyx-stack.fullname" . }}-letsencrypt
|
||||
# Enable the HTTP-01 challenge provider
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
{{- end }}
|
||||
@@ -376,22 +376,17 @@ redis:
|
||||
existingSecret: onyx-secrets
|
||||
existingSecretPasswordKey: redis_password
|
||||
|
||||
# ingress:
|
||||
# enabled: false
|
||||
# className: ""
|
||||
# annotations: {}
|
||||
# # kubernetes.io/ingress.class: nginx
|
||||
# # kubernetes.io/tls-acme: "true"
|
||||
# hosts:
|
||||
# - host: chart-example.local
|
||||
# paths:
|
||||
# - path: /
|
||||
# pathType: ImplementationSpecific
|
||||
# tls: []
|
||||
# # - secretName: chart-example-tls
|
||||
# # hosts:
|
||||
# # - chart-example.local
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
api:
|
||||
host: onyx.local
|
||||
webserver:
|
||||
host: onyx.local
|
||||
|
||||
letsencrypt:
|
||||
enabled: false
|
||||
email: "abc@abc.com"
|
||||
|
||||
auth:
|
||||
# existingSecret onyx-secret for storing smtp, oauth, slack, and other secrets
|
||||
|
||||
@@ -290,21 +290,24 @@ export function SettingsForm() {
|
||||
id="chatRetentionInput"
|
||||
placeholder="Infinite Retention"
|
||||
/>
|
||||
<Button
|
||||
onClick={handleSetChatRetention}
|
||||
variant="submit"
|
||||
size="sm"
|
||||
className="mr-3"
|
||||
>
|
||||
Set Retention Limit
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleClearChatRetention}
|
||||
variant="default"
|
||||
size="sm"
|
||||
>
|
||||
Retain All
|
||||
</Button>
|
||||
<div className="mr-auto flex gap-2">
|
||||
<Button
|
||||
onClick={handleSetChatRetention}
|
||||
variant="submit"
|
||||
size="sm"
|
||||
className="mr-auto"
|
||||
>
|
||||
Set Retention Limit
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleClearChatRetention}
|
||||
variant="default"
|
||||
size="sm"
|
||||
className="mr-auto"
|
||||
>
|
||||
Retain All
|
||||
</Button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
|
||||
@@ -61,6 +61,7 @@ export function EmailPasswordForm({
|
||||
|
||||
if (!response.ok) {
|
||||
setIsWorking(false);
|
||||
|
||||
const errorDetail = (await response.json()).detail;
|
||||
let errorMsg = "Unknown error";
|
||||
if (typeof errorDetail === "object" && errorDetail.reason) {
|
||||
@@ -96,12 +97,13 @@ export function EmailPasswordForm({
|
||||
} else {
|
||||
setIsWorking(false);
|
||||
const errorDetail = (await loginResponse.json()).detail;
|
||||
|
||||
let errorMsg = "Unknown error";
|
||||
if (errorDetail === "LOGIN_BAD_CREDENTIALS") {
|
||||
errorMsg = "Invalid email or password";
|
||||
} else if (errorDetail === "NO_WEB_LOGIN_AND_HAS_NO_PASSWORD") {
|
||||
errorMsg = "Create an account to set a password";
|
||||
} else if (typeof errorDetail === "string") {
|
||||
errorMsg = errorDetail;
|
||||
}
|
||||
if (loginResponse.status === 429) {
|
||||
errorMsg = "Too many requests. Please try again later.";
|
||||
|
||||
@@ -191,6 +191,7 @@ export const FolderDropdown = forwardRef<HTMLDivElement, FolderDropdownProps>(
|
||||
onChange={(e) => setNewFolderName(e.target.value)}
|
||||
className="text-sm font-medium bg-transparent outline-none w-full pb-1 border-b border-background-500 transition-colors duration-200"
|
||||
onKeyDown={(e) => {
|
||||
e.stopPropagation();
|
||||
if (e.key === "Enter") {
|
||||
handleEdit();
|
||||
}
|
||||
|
||||
@@ -303,7 +303,6 @@ const FolderItem = ({
|
||||
key={chatSession.id}
|
||||
chatSession={chatSession}
|
||||
isSelected={chatSession.id === currentChatId}
|
||||
skipGradient={isDragOver}
|
||||
showShareModal={showShareModal}
|
||||
showDeleteModal={showDeleteModal}
|
||||
/>
|
||||
|
||||
@@ -32,21 +32,17 @@ export function ChatSessionDisplay({
|
||||
chatSession,
|
||||
search,
|
||||
isSelected,
|
||||
skipGradient,
|
||||
closeSidebar,
|
||||
showShareModal,
|
||||
showDeleteModal,
|
||||
foldersExisting,
|
||||
isDragging,
|
||||
}: {
|
||||
chatSession: ChatSession;
|
||||
isSelected: boolean;
|
||||
search?: boolean;
|
||||
skipGradient?: boolean;
|
||||
closeSidebar?: () => void;
|
||||
showShareModal?: (chatSession: ChatSession) => void;
|
||||
showDeleteModal?: (chatSession: ChatSession) => void;
|
||||
foldersExisting?: boolean;
|
||||
isDragging?: boolean;
|
||||
}) {
|
||||
const router = useRouter();
|
||||
@@ -238,8 +234,12 @@ export function ChatSessionDisplay({
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
}}
|
||||
onChange={(e) => setChatName(e.target.value)}
|
||||
onChange={(e) => {
|
||||
setChatName(e.target.value);
|
||||
}}
|
||||
onKeyDown={(event) => {
|
||||
event.stopPropagation();
|
||||
|
||||
if (event.key === "Enter") {
|
||||
onRename();
|
||||
event.preventDefault();
|
||||
|
||||
@@ -264,7 +264,6 @@ export function PagesTab({
|
||||
>
|
||||
<ChatSessionDisplay
|
||||
chatSession={chat}
|
||||
foldersExisting={foldersExisting}
|
||||
isSelected={currentChatId === chat.id}
|
||||
showShareModal={showShareModal}
|
||||
showDeleteModal={showDeleteModal}
|
||||
|
||||
@@ -40,8 +40,12 @@ export const ConnectorTitle = ({
|
||||
const typedConnector = connector as Connector<GithubConfig>;
|
||||
additionalMetadata.set(
|
||||
"Repo",
|
||||
typedConnector.connector_specific_config.repo_name
|
||||
? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
|
||||
typedConnector.connector_specific_config.repositories
|
||||
? `${typedConnector.connector_specific_config.repo_owner}/${
|
||||
typedConnector.connector_specific_config.repositories.includes(",")
|
||||
? "multiple repos"
|
||||
: typedConnector.connector_specific_config.repositories
|
||||
}`
|
||||
: `${typedConnector.connector_specific_config.repo_owner}/*`
|
||||
);
|
||||
} else if (connector.source === "gitlab") {
|
||||
|
||||
@@ -190,10 +190,12 @@ export const connectorConfigs: Record<
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the repository name:",
|
||||
label: "Repository Name",
|
||||
name: "repo_name",
|
||||
query: "Enter the repository name(s):",
|
||||
label: "Repository Name(s)",
|
||||
name: "repositories",
|
||||
optional: false,
|
||||
description:
|
||||
"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -1358,7 +1360,7 @@ export interface WebConfig {
|
||||
|
||||
export interface GithubConfig {
|
||||
repo_owner: string;
|
||||
repo_name: string;
|
||||
repositories: string; // Comma-separated list of repository names
|
||||
include_prs: boolean;
|
||||
include_issues: boolean;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user