Compare commits

..

7 Commits

Author SHA1 Message Date
pablonyx
575cfa5224 fix 2025-03-27 11:16:58 -07:00
pablonyx
0d5497bb6b Add multi-tenant user invitation flow test (#4360) 2025-03-27 09:53:15 -07:00
Chris Weaver
7648627503 Save all logs + add log persistence to most Onyx-owned containers (#4368)
* Save all logs + add log persistence to most Onyx-owned containers

* Separate volumes for each container

* Small fixes
2025-03-26 22:25:39 -07:00
pablonyx
927554d5ca slight robustification (#4367) 2025-03-27 03:23:36 +00:00
pablonyx
7dcec6caf5 Fix session touching (#4363)
* fix session touching

* Revert "fix session touching"

This reverts commit c473d5c9a2.

* Revert "Revert "fix session touching""

This reverts commit 26a71d40b6.

* update

* quick nit
2025-03-27 01:18:46 +00:00
rkuo-danswer
036648146d possible fix for confluence query filter (#4280)
* possible fix for confluence query filter

* nuke the attachment filter query ... it doesn't work!

---------

Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 00:35:14 +00:00
rkuo-danswer
2aa4697ac8 permission sync runs so often that it starves out other tasks if run at high priority (#4364)
Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-03-27 00:22:53 +00:00
20 changed files with 246 additions and 44 deletions

View File

@@ -505,8 +505,11 @@ async def setup_tenant(tenant_id: str) -> None:
try:
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
# Run Alembic migrations
await asyncio.to_thread(run_alembic_migrations, tenant_id)
# Run Alembic migrations in a way that isolates it from the current event loop
# Create a new event loop for this synchronous operation
loop = asyncio.get_event_loop()
# Use run_in_executor which properly isolates the thread execution
await loop.run_in_executor(None, lambda: run_alembic_migrations(tenant_id))
# Configure the tenant with default settings
with get_session_with_tenant(tenant_id=tenant_id) as db_session:

View File

@@ -70,6 +70,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
"""
Add users to a tenant with proper transaction handling.
Checks if users already have a tenant mapping to avoid duplicates.
If a user already has an active mapping to any tenant, the new mapping will be added as inactive.
"""
with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
try:
@@ -88,9 +89,25 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
.first()
)
# If user already has an active mapping, add this one as inactive
if not existing_mapping:
# Only add if mapping doesn't exist
db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
# Check if the user already has an active mapping to any tenant
has_active_mapping = (
db_session.query(UserTenantMapping)
.filter(
UserTenantMapping.email == email,
UserTenantMapping.active == True, # noqa: E712
)
.first()
)
db_session.add(
UserTenantMapping(
email=email,
tenant_id=tenant_id,
active=False if has_active_mapping else True,
)
)
# Commit the transaction
db_session.commit()

View File

@@ -14,7 +14,7 @@ logger = setup_logger()
# Only set up memory monitoring in container environment
if is_running_in_container():
# Set up a dedicated memory monitoring logger
MEMORY_LOG_DIR = "/var/log/persisted-logs/memory"
MEMORY_LOG_DIR = "/var/log/memory"
MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024 # 10MB
MEMORY_LOG_BACKUP_COUNT = 5 # Keep 5 backup files

View File

@@ -286,7 +286,7 @@ def try_creating_permissions_sync_task(
),
queue=OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
task_id=custom_task_id,
priority=OnyxCeleryPriority.HIGH,
priority=OnyxCeleryPriority.MEDIUM,
)
# fill in the celery task id

View File

@@ -271,7 +271,7 @@ def try_creating_external_group_sync_task(
),
queue=OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,
task_id=custom_task_id,
priority=OnyxCeleryPriority.HIGH,
priority=OnyxCeleryPriority.MEDIUM,
)
payload.celery_task_id = result.id

View File

@@ -73,6 +73,7 @@ from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import reserve_message_id
from onyx.db.chat import translate_db_message_to_chat_message_detail
from onyx.db.chat import translate_db_search_doc_to_server_search_doc
from onyx.db.chat import update_chat_session_updated_at_timestamp
from onyx.db.engine import get_session_context_manager
from onyx.db.milestone import check_multi_assistant_milestone
from onyx.db.milestone import create_milestone_if_not_exists
@@ -1069,6 +1070,8 @@ def stream_chat_message_objects(
prev_message = next_answer_message
logger.debug("Committing messages")
# Explicitly update the timestamp on the chat session
update_chat_session_updated_at_timestamp(chat_session_id, db_session)
db_session.commit() # actually save user / assistant message
yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)

View File

@@ -65,20 +65,6 @@ _RESTRICTIONS_EXPANSION_FIELDS = [
_SLIM_DOC_BATCH_SIZE = 5000
_ATTACHMENT_EXTENSIONS_TO_FILTER_OUT = [
"gif",
"mp4",
"mov",
"mp3",
"wav",
]
_FULL_EXTENSION_FILTER_STRING = "".join(
[
f" and title!~'*.{extension}'"
for extension in _ATTACHMENT_EXTENSIONS_TO_FILTER_OUT
]
)
ONE_HOUR = 3600
@@ -209,7 +195,6 @@ class ConfluenceConnector(
def _construct_attachment_query(self, confluence_page_id: str) -> str:
attachment_query = f"type=attachment and container='{confluence_page_id}'"
attachment_query += self.cql_label_filter
attachment_query += _FULL_EXTENSION_FILTER_STRING
return attachment_query
def _get_comment_string_for_page_id(self, page_id: str) -> str:
@@ -374,11 +359,13 @@ class ConfluenceConnector(
if not validate_attachment_filetype(
attachment,
):
logger.info(f"Skipping attachment: {attachment['title']}")
continue
logger.info(f"Processing attachment: {attachment['title']}")
# Attempt to get textual content or image summarization:
try:
logger.info(f"Processing attachment: {attachment['title']}")
response = convert_attachment_to_content(
confluence_client=self.confluence_client,
attachment=attachment,

View File

@@ -1089,3 +1089,20 @@ def log_agent_sub_question_results(
db_session.commit()
return None
def update_chat_session_updated_at_timestamp(
chat_session_id: UUID, db_session: Session
) -> None:
"""
Explicitly update the timestamp on a chat session without modifying other fields.
This is useful when adding messages to a chat session to reflect recent activity.
"""
# Direct SQL update to avoid loading the entire object if it's not already loaded
db_session.execute(
update(ChatSession)
.where(ChatSession.id == chat_session_id)
.values(time_updated=func.now())
)
# No commit - the caller is responsible for committing the transaction

View File

@@ -15,6 +15,7 @@ EXCLUDED_IMAGE_TYPES = [
"image/tiff",
"image/gif",
"image/svg+xml",
"image/avif",
]

View File

@@ -313,7 +313,7 @@ def bulk_invite_users(
detail=f"Invalid email address: {email} - {str(e)}",
)
if MULTI_TENANT and not DEV_MODE:
if MULTI_TENANT:
try:
fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning", "add_users_to_tenant", None
@@ -335,7 +335,7 @@ def bulk_invite_users(
except Exception as e:
logger.error(f"Error sending email invite to invited users: {e}")
if not MULTI_TENANT:
if not MULTI_TENANT or DEV_MODE:
return number_of_invited_users
# for billing purposes, write to the control plane about the number of new users
@@ -376,7 +376,7 @@ def remove_invited_user(
number_of_invited_users = write_invited_users(remaining_users)
try:
if MULTI_TENANT:
if MULTI_TENANT and not DEV_MODE:
fetch_ee_implementation_or_noop(
"onyx.server.tenants.billing", "register_tenant_users", None
)(tenant_id, get_total_users_count(db_session))

View File

@@ -9,7 +9,9 @@ from requests import HTTPError
from onyx.auth.schemas import UserRole
from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
from onyx.server.documents.models import PaginatedReturn
from onyx.server.manage.models import UserInfo
from onyx.server.models import FullUserSnapshot
from onyx.server.models import InvitedUserSnapshot
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestUser
@@ -245,3 +247,69 @@ class UserManager:
total_items=data["total_items"],
)
return paginated_result
@staticmethod
def invite_user(
user_to_invite_email: str, user_performing_action: DATestUser
) -> None:
"""Invite a user by email to join the organization.
Args:
user_to_invite_email: Email of the user to invite
user_performing_action: User with admin permissions performing the invitation
"""
response = requests.put(
url=f"{API_SERVER_URL}/manage/admin/users",
headers=user_performing_action.headers,
json={"emails": [user_to_invite_email]},
)
response.raise_for_status()
@staticmethod
def accept_invitation(tenant_id: str, user_performing_action: DATestUser) -> None:
"""Accept an invitation to join the organization.
Args:
tenant_id: ID of the tenant/organization to accept invitation for
user_performing_action: User accepting the invitation
"""
response = requests.post(
url=f"{API_SERVER_URL}/tenants/users/invite/accept",
headers=user_performing_action.headers,
json={"tenant_id": tenant_id},
)
response.raise_for_status()
@staticmethod
def get_invited_users(
user_performing_action: DATestUser,
) -> list[InvitedUserSnapshot]:
"""Get a list of all invited users.
Args:
user_performing_action: User with admin permissions performing the action
Returns:
List of invited user snapshots
"""
response = requests.get(
url=f"{API_SERVER_URL}/manage/users/invited",
headers=user_performing_action.headers,
)
response.raise_for_status()
return [InvitedUserSnapshot(**user) for user in response.json()]
@staticmethod
def get_user_info(user_performing_action: DATestUser) -> UserInfo:
"""Get user info for the current user.
Args:
user_performing_action: User performing the action
"""
response = requests.get(
url=f"{API_SERVER_URL}/me",
headers=user_performing_action.headers,
)
response.raise_for_status()
return UserInfo(**response.json())

View File

@@ -0,0 +1,70 @@
from onyx.db.models import UserRole
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
INVITED_BASIC_USER = "basic_user"
INVITED_BASIC_USER_EMAIL = "basic_user@test.com"
def test_user_invitation_flow(reset_multitenant: None) -> None:
# Create first user (admin)
admin_user: DATestUser = UserManager.create(name="admin")
assert UserManager.is_role(admin_user, UserRole.ADMIN)
# Create second user
invited_user: DATestUser = UserManager.create(name="admin_invited")
assert UserManager.is_role(invited_user, UserRole.ADMIN)
# Admin user invites the previously registered and non-registered user
UserManager.invite_user(invited_user.email, admin_user)
UserManager.invite_user(INVITED_BASIC_USER_EMAIL, admin_user)
invited_basic_user: DATestUser = UserManager.create(
name=INVITED_BASIC_USER, email=INVITED_BASIC_USER_EMAIL
)
assert UserManager.is_role(invited_basic_user, UserRole.BASIC)
# Verify the user is in the invited users list
invited_users = UserManager.get_invited_users(admin_user)
assert invited_user.email in [
user.email for user in invited_users
], f"User {invited_user.email} not found in invited users list"
# Get user info to check tenant information
user_info = UserManager.get_user_info(invited_user)
# Extract the tenant_id from the invitation
invited_tenant_id = (
user_info.tenant_info.invitation.tenant_id
if user_info.tenant_info and user_info.tenant_info.invitation
else None
)
assert invited_tenant_id is not None, "Expected to find an invitation tenant_id"
UserManager.accept_invitation(invited_tenant_id, invited_user)
# Get updated user info after accepting invitation
updated_user_info = UserManager.get_user_info(invited_user)
# Verify the user is no longer in the invited users list
updated_invited_users = UserManager.get_invited_users(admin_user)
assert invited_user.email not in [
user.email for user in updated_invited_users
], f"User {invited_user.email} should not be in invited users list after accepting"
# Verify the user has BASIC role in the organization
assert (
updated_user_info.role == UserRole.BASIC
), f"Expected user to have BASIC role, but got {updated_user_info.role}"
# Verify user is in the organization
user_page = UserManager.get_user_page(
user_performing_action=admin_user, role_filter=[UserRole.BASIC]
)
# Check if the invited user is in the list of users with BASIC role
invited_user_emails = [user.email for user in user_page.items]
assert invited_user.email in invited_user_emails, (
f"User {invited_user.email} not found in the list of basic users "
f"in the organization. Available users: {invited_user_emails}"
)

View File

@@ -129,6 +129,9 @@ services:
options:
max-size: "50m"
max-file: "6"
# optional, only for debugging purposes
volumes:
- api_server_logs:/var/log
background:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
@@ -256,7 +259,7 @@ services:
- "host.docker.internal:host-gateway"
# optional, only for debugging purposes
volumes:
- log_store:/var/log/persisted-logs
- background_logs:/var/log
logging:
driver: json-file
options:
@@ -325,6 +328,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/
# optional, only for debugging purposes
- inference_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -357,6 +362,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
# optional, only for debugging purposes
- indexing_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -434,4 +441,8 @@ volumes:
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts
# for logs that we don't want to lose on container restarts
api_server_logs:
background_logs:
inference_model_server_logs:
indexing_model_server_logs:

View File

@@ -106,6 +106,9 @@ services:
options:
max-size: "50m"
max-file: "6"
volumes:
# optional, only for debugging purposes
- api_server_logs:/var/log
background:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
@@ -211,7 +214,7 @@ services:
- "host.docker.internal:host-gateway"
# optional, only for debugging purposes
volumes:
- log_store:/var/log/persisted-logs
- background_logs:/var/log
logging:
driver: json-file
options:
@@ -273,6 +276,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/
# optional, only for debugging purposes
- inference_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -310,6 +315,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
# optional, only for debugging purposes
- indexing_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -387,4 +394,8 @@ volumes:
# Created by the container itself
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts
# for logs that we don't want to lose on container restarts
api_server_logs:
background_logs:
inference_model_server_logs:
indexing_model_server_logs:

View File

@@ -244,8 +244,6 @@ services:
# - ./bundle.pem:/app/bundle.pem:ro
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- log_store:/var/log/persisted-logs
logging:
driver: json-file
options:
@@ -423,4 +421,3 @@ volumes:
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts

View File

@@ -54,9 +54,6 @@ services:
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
extra_hosts:
- "host.docker.internal:host-gateway"
# optional, only for debugging purposes
volumes:
- log_store:/var/log/persisted-logs
logging:
driver: json-file
options:
@@ -236,4 +233,3 @@ volumes:
# Created by the container itself
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts

View File

@@ -36,6 +36,10 @@ services:
options:
max-size: "50m"
max-file: "6"
volumes:
# optional, only for debugging purposes
- api_server_logs:/var/log
background:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
@@ -69,7 +73,7 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- log_store:/var/log/persisted-logs
- background_logs:/var/log
logging:
driver: json-file
options:
@@ -122,6 +126,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/
# optional, only for debugging purposes
- inference_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -150,6 +156,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
# optional, only for debugging purposes
- indexing_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -231,4 +239,8 @@ volumes:
# Created by the container itself
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts
# for logs that we don't want to lose on container restarts
api_server_logs:
background_logs:
inference_model_server_logs:
indexing_model_server_logs:

View File

@@ -32,13 +32,14 @@ services:
# - ./bundle.pem:/app/bundle.pem:ro
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- log_store:/var/log/persisted-logs
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
volumes:
- api_server_logs:/var/log
background:
image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
build:
@@ -76,7 +77,7 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- log_store:/var/log/persisted-logs
- background_logs:/var/log
logging:
driver: json-file
options:
@@ -152,6 +153,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/
# optional, only for debugging purposes
- inference_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -180,6 +183,8 @@ services:
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
# optional, only for debugging purposes
- indexing_model_server_logs:/var/log
logging:
driver: json-file
options:
@@ -264,4 +269,8 @@ volumes:
# Created by the container itself
model_cache_huggingface:
indexing_huggingface_model_cache:
log_store: # for logs that we don't want to lose on container restarts
# for logs that we don't want to lose on container restarts
api_server_logs:
background_logs:
inference_model_server_logs:
indexing_model_server_logs:

View File

@@ -63,7 +63,7 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- log_store:/var/log/persisted-logs
- log_store:/var/log
logging:
driver: json-file
options: