Compare commits

..

6 Commits

Author SHA1 Message Date
pablonyx
176a7a8150 k 2025-03-31 10:28:23 -07:00
pablonyx
0cb81539ab update 2025-03-28 10:56:02 -07:00
pablonyx
13c95014f2 update 2025-03-28 10:39:21 -07:00
pablonyx
d8bbe8d99d k 2025-03-28 10:13:13 -07:00
evan-danswer
a123661c92 fixed shared folder issue (#4371)
* fixed shared folder issue

* fix existing tests

* default allow files shared with me for service account
2025-03-27 23:39:52 +00:00
pablonyx
c554889baf Fix actions link (#4374) 2025-03-27 16:39:35 -07:00
26 changed files with 491 additions and 161 deletions

View File

@@ -6,6 +6,7 @@ from onyx.configs.constants import NotificationType
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.notification import create_notification
from onyx.db.users import add_belongs_user_if_not_exists
from onyx.server.features.persona.models import PersonaSharedNotificationData
@@ -14,6 +15,7 @@ def make_persona_private(
user_ids: list[UUID] | None,
group_ids: list[int] | None,
db_session: Session,
new_user_emails: list[str] | None = None,
) -> None:
"""NOTE(rkuo): This function batches all updates into a single commit. If we don't
dedupe the inputs, the commit will exception."""
@@ -39,6 +41,27 @@ def make_persona_private(
).model_dump(),
)
# Handle new user emails (create users that don't exist yet)
if new_user_emails is not None and len(new_user_emails) > 0:
for email in new_user_emails:
# Create a new user with BELONGS_TO_GROUP role if not exists
user = add_belongs_user_if_not_exists(db_session=db_session, email=email)
# Add user to persona
db_session.add(Persona__User(persona_id=persona_id, user_id=user.id))
# Create notification
create_notification(
user_id=user.id,
notif_type=NotificationType.PERSONA_SHARED,
db_session=db_session,
additional_data=PersonaSharedNotificationData(
persona_id=persona_id,
).model_dump(),
)
db_session.commit()
if group_ids:
group_ids_set = set(group_ids)
for group_id in group_ids_set:

View File

@@ -374,25 +374,35 @@ def _add_user_group__cc_pair_relationships__no_commit(
def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:
db_user_group = UserGroup(
name=user_group.name, time_last_modified_by_user=func.now()
)
db_session.add(db_user_group)
db_session.flush() # give the group an ID
# create the user group
new_group = UserGroup(name=user_group.name)
db_session.add(new_group)
db_session.flush()
_add_user__user_group_relationships__no_commit(
db_session=db_session,
user_group_id=db_user_group.id,
user_ids=user_group.user_ids,
)
_add_user_group__cc_pair_relationships__no_commit(
db_session=db_session,
user_group_id=db_user_group.id,
cc_pair_ids=user_group.cc_pair_ids,
)
user_ids = list(user_group.user_ids)
# Handle new user emails by creating users with BELONGS role
if user_group.new_user_emails and len(user_group.new_user_emails) > 0:
from onyx.db.users import add_belongs_user_if_not_exists
for email in user_group.new_user_emails:
user = add_belongs_user_if_not_exists(db_session=db_session, email=email)
user_ids.append(user.id)
if user_ids:
_add_user__user_group_relationships__no_commit(
db_session=db_session, user_group_id=new_group.id, user_ids=user_ids
)
if user_group.cc_pair_ids:
_add_user_group__cc_pair_relationships__no_commit(
db_session=db_session,
user_group_id=new_group.id,
cc_pair_ids=user_group.cc_pair_ids,
)
db_session.commit()
return db_user_group
return new_group
def _mark_user_group__cc_pair_relationships_outdated__no_commit(
@@ -585,8 +595,18 @@ def update_user_group(
_check_user_group_is_modifiable(db_user_group)
user_ids = list(user_group_update.user_ids)
# Handle new user emails by creating users with BELONGS role
if user_group_update.new_user_emails and len(user_group_update.new_user_emails) > 0:
from onyx.db.users import add_belongs_user_if_not_exists
for email in user_group_update.new_user_emails:
user = add_belongs_user_if_not_exists(db_session=db_session, email=email)
user_ids.append(user.id)
current_user_ids = set([user.id for user in db_user_group.users])
updated_user_ids = set(user_group_update.user_ids)
updated_user_ids = set(user_ids)
added_user_ids = list(updated_user_ids - current_user_ids)
removed_user_ids = list(current_user_ids - updated_user_ids)

View File

@@ -36,8 +36,13 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/auth/saml")
# Define non-authenticated user roles that should be re-created during SAML login
NON_AUTHENTICATED_ROLES = {UserRole.SLACK_USER, UserRole.EXT_PERM_USER}
# Define user roles that should not be re-created during SAML login
AUTHENTICATED_ROLES = {
UserRole.GLOBAL_CURATOR,
UserRole.BASIC,
UserRole.ADMIN,
UserRole.CURATOR,
}
async def upsert_saml_user(email: str) -> User:
@@ -54,7 +59,7 @@ async def upsert_saml_user(email: str) -> User:
try:
user = await user_manager.get_by_email(email)
# If user has a non-authenticated role, treat as non-existent
if user.role in NON_AUTHENTICATED_ROLES:
if user.role not in AUTHENTICATED_ROLES:
raise exceptions.UserNotExists()
return user
except exceptions.UserNotExists:

View File

@@ -1,6 +1,7 @@
from uuid import UUID
from pydantic import BaseModel
from pydantic import Field
from onyx.db.models import UserGroup as UserGroupModel
from onyx.server.documents.models import ConnectorCredentialPairDescriptor
@@ -79,11 +80,13 @@ class UserGroup(BaseModel):
class UserGroupCreate(BaseModel):
name: str
user_ids: list[UUID]
new_user_emails: list[str] = Field(default_factory=list)
cc_pair_ids: list[int]
class UserGroupUpdate(BaseModel):
user_ids: list[UUID]
new_user_emails: list[str] = Field(default_factory=list)
cc_pair_ids: list[int]

View File

@@ -16,6 +16,7 @@ class UserRole(str, Enum):
- Limited can access a limited set of basic api endpoints
- Slack are users that have used onyx via slack but dont have a web login
- External permissioned users that have been picked up during the external permissions sync process but don't have a web login
- Belongs to group are users that have been added to a group but don't have a web login yet
"""
LIMITED = "limited"
@@ -25,11 +26,13 @@ class UserRole(str, Enum):
GLOBAL_CURATOR = "global_curator"
SLACK_USER = "slack_user"
EXT_PERM_USER = "ext_perm_user"
BELONGS = "belongs"
def is_web_login(self) -> bool:
return self not in [
UserRole.SLACK_USER,
UserRole.EXT_PERM_USER,
UserRole.BELONGS,
]

View File

@@ -28,7 +28,9 @@ from onyx.connectors.google_drive.doc_conversion import (
)
from onyx.connectors.google_drive.file_retrieval import crawl_folders_for_files
from onyx.connectors.google_drive.file_retrieval import get_all_files_for_oauth
from onyx.connectors.google_drive.file_retrieval import get_all_files_in_my_drive
from onyx.connectors.google_drive.file_retrieval import (
get_all_files_in_my_drive_and_shared,
)
from onyx.connectors.google_drive.file_retrieval import get_files_in_shared_drive
from onyx.connectors.google_drive.file_retrieval import get_root_folder_id
from onyx.connectors.google_drive.models import DriveRetrievalStage
@@ -86,13 +88,18 @@ def _extract_ids_from_urls(urls: list[str]) -> list[str]:
def _convert_single_file(
creds: Any,
primary_admin_email: str,
allow_images: bool,
size_threshold: int,
retriever_email: str,
file: dict[str, Any],
) -> Document | ConnectorFailure | None:
user_email = file.get("owners", [{}])[0].get("emailAddress") or primary_admin_email
# We used to always get the user email from the file owners when available,
# but this was causing issues with shared folders where the owner was not included in the service account
# now we use the email of the account that successfully listed the file. Leaving this in case we end up
# wanting to retry with file owners and/or admin email at some point.
# user_email = file.get("owners", [{}])[0].get("emailAddress") or primary_admin_email
user_email = retriever_email
# Only construct these services when needed
user_drive_service = lazy_eval(
lambda: get_drive_service(creds, user_email=user_email)
@@ -450,10 +457,11 @@ class GoogleDriveConnector(SlimConnector, CheckpointConnector[GoogleDriveCheckpo
logger.info(f"Getting all files in my drive as '{user_email}'")
yield from add_retrieval_info(
get_all_files_in_my_drive(
get_all_files_in_my_drive_and_shared(
service=drive_service,
update_traversed_ids_func=self._update_traversed_parent_ids,
is_slim=is_slim,
include_shared_with_me=self.include_files_shared_with_me,
start=curr_stage.completed_until if resuming else start,
end=end,
),
@@ -916,20 +924,28 @@ class GoogleDriveConnector(SlimConnector, CheckpointConnector[GoogleDriveCheckpo
convert_func = partial(
_convert_single_file,
self.creds,
self.primary_admin_email,
self.allow_images,
self.size_threshold,
)
# Fetch files in batches
batches_complete = 0
files_batch: list[GoogleDriveFileType] = []
files_batch: list[RetrievedDriveFile] = []
def _yield_batch(
files_batch: list[GoogleDriveFileType],
files_batch: list[RetrievedDriveFile],
) -> Iterator[Document | ConnectorFailure]:
nonlocal batches_complete
# Process the batch using run_functions_tuples_in_parallel
func_with_args = [(convert_func, (file,)) for file in files_batch]
func_with_args = [
(
convert_func,
(
file.user_email,
file.drive_file,
),
)
for file in files_batch
]
results = cast(
list[Document | ConnectorFailure | None],
run_functions_tuples_in_parallel(func_with_args, max_workers=8),
@@ -967,7 +983,7 @@ class GoogleDriveConnector(SlimConnector, CheckpointConnector[GoogleDriveCheckpo
)
continue
files_batch.append(retrieved_file.drive_file)
files_batch.append(retrieved_file)
if len(files_batch) < self.batch_size:
continue

View File

@@ -87,35 +87,17 @@ def _download_and_extract_sections_basic(
mime_type = file["mimeType"]
link = file.get("webViewLink", "")
try:
# skip images if not explicitly enabled
if not allow_images and is_gdrive_image_mime_type(mime_type):
return []
# skip images if not explicitly enabled
if not allow_images and is_gdrive_image_mime_type(mime_type):
return []
# For Google Docs, Sheets, and Slides, export as plain text
if mime_type in GOOGLE_MIME_TYPES_TO_EXPORT:
export_mime_type = GOOGLE_MIME_TYPES_TO_EXPORT[mime_type]
# Use the correct API call for exporting files
request = service.files().export_media(
fileId=file_id, mimeType=export_mime_type
)
response_bytes = io.BytesIO()
downloader = MediaIoBaseDownload(response_bytes, request)
done = False
while not done:
_, done = downloader.next_chunk()
response = response_bytes.getvalue()
if not response:
logger.warning(f"Failed to export {file_name} as {export_mime_type}")
return []
text = response.decode("utf-8")
return [TextSection(link=link, text=text)]
# For other file types, download the file
# Use the correct API call for downloading files
request = service.files().get_media(fileId=file_id)
# For Google Docs, Sheets, and Slides, export as plain text
if mime_type in GOOGLE_MIME_TYPES_TO_EXPORT:
export_mime_type = GOOGLE_MIME_TYPES_TO_EXPORT[mime_type]
# Use the correct API call for exporting files
request = service.files().export_media(
fileId=file_id, mimeType=export_mime_type
)
response_bytes = io.BytesIO()
downloader = MediaIoBaseDownload(response_bytes, request)
done = False
@@ -124,88 +106,100 @@ def _download_and_extract_sections_basic(
response = response_bytes.getvalue()
if not response:
logger.warning(f"Failed to download {file_name}")
logger.warning(f"Failed to export {file_name} as {export_mime_type}")
return []
# Process based on mime type
if mime_type == "text/plain":
text = response.decode("utf-8")
return [TextSection(link=link, text=text)]
text = response.decode("utf-8")
return [TextSection(link=link, text=text)]
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
text, _ = docx_to_text_and_images(io.BytesIO(response))
return [TextSection(link=link, text=text)]
# For other file types, download the file
# Use the correct API call for downloading files
request = service.files().get_media(fileId=file_id)
response_bytes = io.BytesIO()
downloader = MediaIoBaseDownload(response_bytes, request)
done = False
while not done:
_, done = downloader.next_chunk()
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
):
text = xlsx_to_text(io.BytesIO(response))
return [TextSection(link=link, text=text)]
response = response_bytes.getvalue()
if not response:
logger.warning(f"Failed to download {file_name}")
return []
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
):
text = pptx_to_text(io.BytesIO(response))
return [TextSection(link=link, text=text)]
# Process based on mime type
if mime_type == "text/plain":
text = response.decode("utf-8")
return [TextSection(link=link, text=text)]
elif is_gdrive_image_mime_type(mime_type):
# For images, store them for later processing
sections: list[TextSection | ImageSection] = []
try:
with get_session_with_current_tenant() as db_session:
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
text, _ = docx_to_text_and_images(io.BytesIO(response))
return [TextSection(link=link, text=text)]
elif (
mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
):
text = xlsx_to_text(io.BytesIO(response))
return [TextSection(link=link, text=text)]
elif (
mime_type
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
):
text = pptx_to_text(io.BytesIO(response))
return [TextSection(link=link, text=text)]
elif is_gdrive_image_mime_type(mime_type):
# For images, store them for later processing
sections: list[TextSection | ImageSection] = []
try:
with get_session_with_current_tenant() as db_session:
section, embedded_id = store_image_and_create_section(
db_session=db_session,
image_data=response,
file_name=file_id,
display_name=file_name,
media_type=mime_type,
file_origin=FileOrigin.CONNECTOR,
link=link,
)
sections.append(section)
except Exception as e:
logger.error(f"Failed to process image {file_name}: {e}")
return sections
elif mime_type == "application/pdf":
text, _pdf_meta, images = read_pdf_file(io.BytesIO(response))
pdf_sections: list[TextSection | ImageSection] = [
TextSection(link=link, text=text)
]
# Process embedded images in the PDF
try:
with get_session_with_current_tenant() as db_session:
for idx, (img_data, img_name) in enumerate(images):
section, embedded_id = store_image_and_create_section(
db_session=db_session,
image_data=response,
file_name=file_id,
display_name=file_name,
media_type=mime_type,
image_data=img_data,
file_name=f"{file_id}_img_{idx}",
display_name=img_name or f"{file_name} - image {idx}",
file_origin=FileOrigin.CONNECTOR,
link=link,
)
sections.append(section)
except Exception as e:
logger.error(f"Failed to process image {file_name}: {e}")
return sections
pdf_sections.append(section)
except Exception as e:
logger.error(f"Failed to process PDF images in {file_name}: {e}")
return pdf_sections
elif mime_type == "application/pdf":
text, _pdf_meta, images = read_pdf_file(io.BytesIO(response))
pdf_sections: list[TextSection | ImageSection] = [
TextSection(link=link, text=text)
]
# Process embedded images in the PDF
try:
with get_session_with_current_tenant() as db_session:
for idx, (img_data, img_name) in enumerate(images):
section, embedded_id = store_image_and_create_section(
db_session=db_session,
image_data=img_data,
file_name=f"{file_id}_img_{idx}",
display_name=img_name or f"{file_name} - image {idx}",
file_origin=FileOrigin.CONNECTOR,
)
pdf_sections.append(section)
except Exception as e:
logger.error(f"Failed to process PDF images in {file_name}: {e}")
return pdf_sections
else:
# For unsupported file types, try to extract text
try:
text = extract_file_text(io.BytesIO(response), file_name)
return [TextSection(link=link, text=text)]
except Exception as e:
logger.warning(f"Failed to extract text from {file_name}: {e}")
return []
except Exception as e:
logger.error(f"Error processing file {file_name}: {e}")
return []
else:
# For unsupported file types, try to extract text
try:
text = extract_file_text(io.BytesIO(response), file_name)
return [TextSection(link=link, text=text)]
except Exception as e:
logger.warning(f"Failed to extract text from {file_name}: {e}")
return []
def convert_drive_item_to_document(

View File

@@ -214,10 +214,11 @@ def get_files_in_shared_drive(
yield file
def get_all_files_in_my_drive(
def get_all_files_in_my_drive_and_shared(
service: GoogleDriveService,
update_traversed_ids_func: Callable,
is_slim: bool,
include_shared_with_me: bool,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
@@ -229,7 +230,8 @@ def get_all_files_in_my_drive(
# Get all folders being queried and add them to the traversed set
folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
folder_query += " and trashed = false"
folder_query += " and 'me' in owners"
if not include_shared_with_me:
folder_query += " and 'me' in owners"
found_folders = False
for file in execute_paginated_retrieval(
retrieval_function=service.files().list,
@@ -246,7 +248,8 @@ def get_all_files_in_my_drive(
# Then get the files
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
file_query += " and trashed = false"
file_query += " and 'me' in owners"
if not include_shared_with_me:
file_query += " and 'me' in owners"
file_query += _generate_time_range_filter(start, end)
yield from execute_paginated_retrieval(
retrieval_function=service.files().list,

View File

@@ -35,6 +35,7 @@ from onyx.db.models import User
from onyx.db.models import User__UserGroup
from onyx.db.models import UserGroup
from onyx.db.notification import create_notification
from onyx.db.users import add_belongs_user_if_not_exists
from onyx.server.features.persona.models import PersonaSharedNotificationData
from onyx.server.features.persona.models import PersonaSnapshot
from onyx.server.features.persona.models import PersonaUpsertRequest
@@ -169,6 +170,7 @@ def make_persona_private(
user_ids: list[UUID] | None,
group_ids: list[int] | None,
db_session: Session,
new_user_emails: list[str] | None = None,
) -> None:
if user_ids is not None:
db_session.query(Persona__User).filter(
@@ -189,6 +191,27 @@ def make_persona_private(
db_session.commit()
# Handle new user emails (create users that don't exist yet)
if new_user_emails is not None and len(new_user_emails) > 0:
for email in new_user_emails:
# Create a new user with BELONGS_TO_GROUP role if not exists
user = add_belongs_user_if_not_exists(db_session=db_session, email=email)
# Add user to persona
db_session.add(Persona__User(persona_id=persona_id, user_id=user.id))
# Create notification
create_notification(
user_id=user.id,
notif_type=NotificationType.PERSONA_SHARED,
db_session=db_session,
additional_data=PersonaSharedNotificationData(
persona_id=persona_id,
).model_dump(),
)
db_session.commit()
# May cause error if someone switches down to MIT from EE
if group_ids:
raise NotImplementedError("Onyx MIT does not support private Personas")
@@ -262,6 +285,7 @@ def create_update_persona(
user_ids=create_persona_request.users,
group_ids=create_persona_request.groups,
db_session=db_session,
new_user_emails=create_persona_request.new_user_emails,
)
except ValueError as e:

View File

@@ -346,3 +346,31 @@ def delete_user_from_db(
if remaining_user_email != user_to_delete.email
]
write_invited_users(remaining_users)
def _generate_belongs_user(email: str) -> User:
fastapi_users_pw_helper = PasswordHelper()
password = fastapi_users_pw_helper.generate()
hashed_pass = fastapi_users_pw_helper.hash(password)
return User(
email=email,
hashed_password=hashed_pass,
role=UserRole.BELONGS,
)
def add_belongs_user_if_not_exists(db_session: Session, email: str) -> User:
email = email.lower()
user = get_user_by_email(email, db_session)
if user is not None:
# If the user is an external permissioned user, we update it to a belongs to group user
# TODO: clarify the hierarchy of roles here
if not user.role.is_web_login():
user.role = UserRole.BELONGS
db_session.commit()
return user
user = _generate_belongs_user(email=email)
db_session.add(user)
db_session.commit()
return user

View File

@@ -821,30 +821,26 @@ class VespaIndex(DocumentIndex):
num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
) -> list[InferenceChunkUncleaned]:
vespa_where_clauses = build_vespa_filters(
filters, include_hidden=True, remove_trailing_and=True
vespa_where_clauses = build_vespa_filters(filters, include_hidden=True)
yql = (
YQL_BASE.format(index_name=self.index_name)
+ vespa_where_clauses
+ '({grammar: "weakAnd"}userInput(@query) '
# `({defaultIndex: "content_summary"}userInput(@query))` section is
# needed for highlighting while the N-gram highlighting is broken /
# not working as desired
+ f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
)
yql = YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses
params: dict[str, str | int] = {
"yql": yql,
"query": query,
"hits": num_to_retrieve,
"offset": 0,
"ranking.profile": "admin_search",
"timeout": VESPA_TIMEOUT,
}
if len(query.strip()) > 0:
yql += (
' and ({grammar: "weakAnd"}userInput(@query) '
# `({defaultIndex: "content_summary"}userInput(@query))` section is
# needed for highlighting while the N-gram highlighting is broken /
# not working as desired
+ f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
)
params["yql"] = yql
params["query"] = query
return query_vespa(params)
# Retrieves chunk information for a document:

View File

@@ -74,6 +74,9 @@ class PersonaUpsertRequest(BaseModel):
starter_messages: list[StarterMessage] | None = None
# For Private Personas, who should be able to access these
users: list[UUID] = Field(default_factory=list)
new_user_emails: list[str] = Field(
default_factory=list
) # New field for adding users by email that don't exist yet
groups: list[int] = Field(default_factory=list)
# e.g. ID of SearchTool or ImageGenerationTool or <USER_DEFINED_TOOL>
tool_ids: list[int]

View File

@@ -58,6 +58,16 @@ SECTIONS_FOLDER_URL = (
"https://drive.google.com/drive/u/5/folders/1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33"
)
EXTERNAL_SHARED_FOLDER_URL = (
"https://drive.google.com/drive/folders/1sWC7Oi0aQGgifLiMnhTjvkhRWVeDa-XS"
)
EXTERNAL_SHARED_DOCS_IN_FOLDER = [
"https://docs.google.com/document/d/1Sywmv1-H6ENk2GcgieKou3kQHR_0te1mhIUcq8XlcdY"
]
EXTERNAL_SHARED_DOC_SINGLETON = (
"https://docs.google.com/document/d/11kmisDfdvNcw5LYZbkdPVjTOdj-Uc5ma6Jep68xzeeA"
)
SHARED_DRIVE_3_URL = "https://drive.google.com/drive/folders/0AJYm2K_I_vtNUk9PVA"
ADMIN_EMAIL = "admin@onyx-test.com"

View File

@@ -1,6 +1,7 @@
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch
from urllib.parse import urlparse
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
@@ -9,6 +10,15 @@ from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_
from tests.daily.connectors.google_drive.consts_and_utils import (
assert_expected_docs_in_retrieved_docs,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
EXTERNAL_SHARED_DOC_SINGLETON,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
EXTERNAL_SHARED_DOCS_IN_FOLDER,
)
from tests.daily.connectors.google_drive.consts_and_utils import (
EXTERNAL_SHARED_FOLDER_URL,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
@@ -100,7 +110,8 @@ def test_include_shared_drives_only_with_size_threshold(
retrieved_docs = load_all_docs(connector)
assert len(retrieved_docs) == 50
# 2 extra files from shared drive owned by non-admin and not shared with admin
assert len(retrieved_docs) == 52
@patch(
@@ -137,7 +148,8 @@ def test_include_shared_drives_only(
+ SECTIONS_FILE_IDS
)
assert len(retrieved_docs) == 51
# 2 extra files from shared drive owned by non-admin and not shared with admin
assert len(retrieved_docs) == 53
assert_expected_docs_in_retrieved_docs(
retrieved_docs=retrieved_docs,
@@ -294,6 +306,64 @@ def test_folders_only(
)
def test_shared_folder_owned_by_external_user(
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_folder_owned_by_external_user")
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_drive_urls=None,
shared_folder_urls=EXTERNAL_SHARED_FOLDER_URL,
my_drive_emails=None,
)
retrieved_docs = load_all_docs(connector)
expected_docs = EXTERNAL_SHARED_DOCS_IN_FOLDER
assert len(retrieved_docs) == len(expected_docs) # 1 for now
assert expected_docs[0] in retrieved_docs[0].id
def test_shared_with_me(
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_with_me")
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=True,
include_files_shared_with_me=True,
shared_drive_urls=None,
shared_folder_urls=None,
my_drive_emails=None,
)
retrieved_docs = load_all_docs(connector)
print(retrieved_docs)
expected_file_ids = (
ADMIN_FILE_IDS
+ ADMIN_FOLDER_3_FILE_IDS
+ TEST_USER_1_FILE_IDS
+ TEST_USER_2_FILE_IDS
+ TEST_USER_3_FILE_IDS
)
assert_expected_docs_in_retrieved_docs(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
retrieved_ids = {urlparse(doc.id).path.split("/")[-2] for doc in retrieved_docs}
for id in retrieved_ids:
print(id)
assert EXTERNAL_SHARED_DOC_SINGLETON.split("/")[-1] in retrieved_ids
assert EXTERNAL_SHARED_DOCS_IN_FOLDER[0].split("/")[-1] in retrieved_ids
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,

View File

@@ -45,7 +45,7 @@ export function ActionsTable({ tools }: { tools: ToolSnapshot[] }) {
className="mr-1 my-auto cursor-pointer"
onClick={() =>
router.push(
`/admin/tools/edit/${tool.id}?u=${Date.now()}`
`/admin/actions/edit/${tool.id}?u=${Date.now()}`
)
}
/>

View File

@@ -505,16 +505,30 @@ export function AssistantEditor({
// don't set groups if marked as public
const groups = values.is_public ? [] : values.selectedGroups;
// Extract new user emails from users with isNew flag
const newUserEmails = values.selectedUsers
.filter(
(u: MinimalUserSnapshot & { isNew?: boolean }) =>
u.isNew && u.id === null
)
.map((u: MinimalUserSnapshot) => u.email);
const submissionData: PersonaUpsertParameters = {
...values,
existing_prompt_id: existingPrompt?.id ?? null,
starter_messages: starterMessages,
groups: groups,
new_user_emails: newUserEmails,
users: values.is_public
? undefined
: [
...(user && !checkUserIsNoAuthUser(user.id) ? [user.id] : []),
...values.selectedUsers.map((u: MinimalUserSnapshot) => u.id),
...values.selectedUsers
.filter(
(u: MinimalUserSnapshot & { isNew?: boolean }) => !u.isNew
)
.map((u: MinimalUserSnapshot) => u.id),
],
tool_ids: enabledTools,
remove_image: removePersonaImage,
@@ -1110,6 +1124,14 @@ export function AssistantEditor({
</Label>
<SearchMultiSelectDropdown
allowCustomValues
customValueValidator={(value) => {
// Simple email validation regex
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
return emailRegex.test(value);
}}
customValueErrorMessage="Please enter a valid email address"
placeholder="Search users and groups or enter an email address"
options={[
...(Array.isArray(users) ? users : [])
.filter(
@@ -1155,6 +1177,13 @@ export function AssistantEditor({
]);
}
}}
onCustomValueSelect={(email: string) => {
// Add the email as a user that doesn't exist yet
setFieldValue("selectedUsers", [
...values.selectedUsers,
{ email: email, id: null, isNew: true },
]);
}}
/>
</div>
<div className="flex flex-wrap gap-2 mt-2">

View File

@@ -19,6 +19,7 @@ interface PersonaUpsertRequest {
llm_model_version_override: string | null;
starter_messages: StarterMessage[] | null;
users?: string[];
new_user_emails?: string[];
groups: number[];
tool_ids: number[];
icon_color: string | null;
@@ -47,6 +48,7 @@ export interface PersonaUpsertParameters {
llm_model_version_override: string | null;
starter_messages: StarterMessage[] | null;
users?: string[];
new_user_emails?: string[];
groups: number[];
tool_ids: number[];
icon_color: string | null;
@@ -109,6 +111,7 @@ function buildPersonaUpsertRequest(
existing_prompt_id,
datetime_aware,
users,
new_user_emails,
tool_ids,
icon_color,
icon_shape,
@@ -127,6 +130,7 @@ function buildPersonaUpsertRequest(
uploaded_image_id,
groups,
users,
new_user_emails,
tool_ids,
icon_color,
icon_shape,

View File

@@ -281,7 +281,7 @@ export default function AddConnector({
return (
<Formik
initialValues={{
...createConnectorInitialValues(connector),
...createConnectorInitialValues(connector, currentCredential),
...Object.fromEntries(
connectorConfigs[connector].advanced_values.map((field) => [
field.name,

View File

@@ -148,8 +148,7 @@ export function Explorer({
clearTimeout(timeoutId);
}
let doSearch = true;
if (doSearch) {
if (query && query.trim() !== "") {
router.replace(
`/admin/documents/explorer?query=${encodeURIComponent(query)}`
);

View File

@@ -2,7 +2,7 @@ import { User } from "@/lib/types";
import { FiPlus, FiX } from "react-icons/fi";
import { SearchMultiSelectDropdown } from "@/components/Dropdown";
import { UsersIcon } from "@/components/icons/icons";
import { Button } from "@/components/Button";
import { Button } from "@/components/ui/button";
interface UserEditorProps {
selectedUserIds: string[];
@@ -10,6 +10,8 @@ interface UserEditorProps {
allUsers: User[];
existingUsers: User[];
onSubmit?: (users: User[]) => void;
newUserEmails?: string[];
setNewUserEmails?: (emails: string[]) => void;
}
export const UserEditor = ({
@@ -18,6 +20,8 @@ export const UserEditor = ({
allUsers,
existingUsers,
onSubmit,
newUserEmails = [],
setNewUserEmails = () => {},
}: UserEditorProps) => {
const selectedUsers = allUsers.filter((user) =>
selectedUserIds.includes(user.id)
@@ -48,10 +52,39 @@ export const UserEditor = ({
{selectedUser.email} <FiX className="ml-1 my-auto" />
</div>
))}
{newUserEmails.length > 0 &&
newUserEmails.map((email) => (
<div
key={email}
onClick={() => {
setNewUserEmails(newUserEmails.filter((e) => e !== email));
}}
className={`
flex
rounded-lg
px-2
py-1
border
border-border
hover:bg-accent-background
cursor-pointer`}
>
{email} (new) <FiX className="ml-1 my-auto" />
</div>
))}
</div>
<div className="flex">
<SearchMultiSelectDropdown
allowCustomValues
customValueValidator={(value) => {
// Simple email validation regex
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
return emailRegex.test(value);
}}
customValueErrorMessage="Please enter a valid email address"
placeholder="Search users or enter an email address"
options={allUsers
.filter(
(user) =>
@@ -71,6 +104,12 @@ export const UserEditor = ({
),
]);
}}
onCustomValueSelect={(email: string) => {
// Make sure it's not already in the list
if (!newUserEmails.includes(email)) {
setNewUserEmails([...newUserEmails, email]);
}
}}
itemComponent={({ option }) => (
<div className="flex px-4 py-2.5 cursor-pointer hover:bg-accent-background-hovered">
<UsersIcon className="mr-2 my-auto" />

View File

@@ -9,6 +9,7 @@ import { ConnectorEditor } from "./ConnectorEditor";
import { Modal } from "@/components/Modal";
import { Button } from "@/components/ui/button";
import { Separator } from "@/components/ui/separator";
import { useState } from "react";
interface UserGroupCreationFormProps {
onClose: () => void;
@@ -26,6 +27,7 @@ export const UserGroupCreationForm = ({
existingUserGroup,
}: UserGroupCreationFormProps) => {
const isUpdate = existingUserGroup !== undefined;
const [newUserEmails, setNewUserEmails] = useState<string[]>([]);
// Filter out ccPairs that aren't access_type "private"
const privateCcPairs = ccPairs.filter(
@@ -33,7 +35,7 @@ export const UserGroupCreationForm = ({
);
return (
<Modal className="w-fit" onOutsideClick={onClose}>
<Modal className="w-fit overflow-visible" onOutsideClick={onClose}>
<>
<h2 className="text-xl font-bold flex">
{isUpdate ? "Update a User Group" : "Create a new User Group"}
@@ -55,7 +57,10 @@ export const UserGroupCreationForm = ({
onSubmit={async (values, formikHelpers) => {
formikHelpers.setSubmitting(true);
let response;
response = await createUserGroup(values);
response = await createUserGroup({
...values,
new_user_emails: newUserEmails,
});
formikHelpers.setSubmitting(false);
if (response.ok) {
setPopup({
@@ -123,6 +128,8 @@ export const UserGroupCreationForm = ({
}
allUsers={users}
existingUsers={[]}
newUserEmails={newUserEmails}
setNewUserEmails={setNewUserEmails}
/>
</div>
<div className="flex">

View File

@@ -19,10 +19,11 @@ export const AddMemberForm: React.FC<AddMemberFormProps> = ({
setPopup,
}) => {
const [selectedUserIds, setSelectedUserIds] = useState<string[]>([]);
const [newUserEmails, setNewUserEmails] = useState<string[]>([]);
return (
<Modal
className="max-w-xl"
className="max-w-xl overflow-visible"
title="Add New User"
onOutsideClick={() => onClose()}
>
@@ -32,6 +33,8 @@ export const AddMemberForm: React.FC<AddMemberFormProps> = ({
setSelectedUserIds={setSelectedUserIds}
allUsers={users}
existingUsers={userGroup.users}
newUserEmails={newUserEmails}
setNewUserEmails={setNewUserEmails}
onSubmit={async (selectedUsers) => {
const newUserIds = [
...Array.from(
@@ -45,6 +48,7 @@ export const AddMemberForm: React.FC<AddMemberFormProps> = ({
const response = await updateUserGroup(userGroup.id, {
user_ids: newUserIds,
cc_pair_ids: userGroup.cc_pairs.map((ccPair) => ccPair.id),
new_user_emails: newUserEmails,
});
if (response.ok) {
setPopup({

View File

@@ -1,5 +1,6 @@
export interface UserGroupUpdate {
user_ids: string[];
new_user_emails?: string[];
cc_pair_ids: number[];
}
@@ -11,5 +12,6 @@ export interface SetCuratorRequest {
export interface UserGroupCreation {
name: string;
user_ids: string[];
new_user_emails?: string[];
cc_pair_ids: number[];
}

View File

@@ -55,6 +55,10 @@ export function SearchMultiSelectDropdown({
onSearchTermChange,
initialSearchTerm = "",
allowCustomValues = false,
customValueValidator,
customValueErrorMessage,
onCustomValueSelect,
placeholder,
}: {
options: StringOrNumberOption[];
onSelect: (selected: StringOrNumberOption) => void;
@@ -64,15 +68,21 @@ export function SearchMultiSelectDropdown({
onSearchTermChange?: (term: string) => void;
initialSearchTerm?: string;
allowCustomValues?: boolean;
customValueValidator?: (value: string) => boolean;
customValueErrorMessage?: string;
onCustomValueSelect?: (value: string) => void;
placeholder?: string;
}) {
const [isOpen, setIsOpen] = useState(false);
const [searchTerm, setSearchTerm] = useState(initialSearchTerm);
const [validationError, setValidationError] = useState<string | null>(null);
const dropdownRef = useRef<HTMLDivElement>(null);
const handleSelect = (option: StringOrNumberOption) => {
onSelect(option);
setIsOpen(false);
setSearchTerm(""); // Clear search term after selection
setValidationError(null);
};
const filteredOptions = options.filter((option) =>
@@ -82,12 +92,30 @@ export function SearchMultiSelectDropdown({
// Handle selecting a custom value not in the options list
const handleCustomValueSelect = () => {
if (allowCustomValues && searchTerm.trim() !== "") {
// If validator is provided, check if the value is valid
if (customValueValidator && !customValueValidator(searchTerm)) {
setValidationError(customValueErrorMessage || "Invalid value");
return;
}
// If onCustomValueSelect is provided, use it instead
if (onCustomValueSelect) {
onCustomValueSelect(searchTerm);
setIsOpen(false);
setSearchTerm("");
setValidationError(null);
return;
}
// Default behavior
const customOption: StringOrNumberOption = {
name: searchTerm,
value: searchTerm,
};
onSelect(customOption);
setIsOpen(false);
setSearchTerm("");
setValidationError(null);
}
};
@@ -122,12 +150,16 @@ export function SearchMultiSelectDropdown({
<input
type="text"
placeholder={
allowCustomValues ? "Search or enter custom value..." : "Search..."
placeholder ||
(allowCustomValues
? "Search or enter custom value..."
: "Search...")
}
value={searchTerm}
onChange={(e: ChangeEvent<HTMLInputElement>) => {
const newValue = e.target.value;
setSearchTerm(newValue);
setValidationError(null);
if (onSearchTermChange) {
onSearchTermChange(newValue);
}
@@ -148,7 +180,9 @@ export function SearchMultiSelectDropdown({
handleCustomValueSelect();
}
}}
className="inline-flex justify-between w-full px-4 py-2 text-sm bg-white dark:bg-transparent text-text-800 border border-background-300 rounded-md shadow-sm"
className={`inline-flex justify-between w-full px-4 py-2 text-sm bg-white dark:bg-transparent text-text-800 border ${
validationError ? "border-red-500" : "border-background-300"
} rounded-md shadow-sm`}
/>
<button
type="button"
@@ -161,6 +195,10 @@ export function SearchMultiSelectDropdown({
</button>
</div>
{validationError && (
<div className="text-red-500 text-xs mt-1">{validationError}</div>
)}
{isOpen && (
<div className="absolute z-10 mt-1 w-full rounded-md shadow-lg bg-white border border-background-300 max-h-60 overflow-y-auto">
<div

View File

@@ -102,7 +102,7 @@ export function UserProvider({
};
// Use the custom token refresh hook
// useTokenRefresh(upToDateUser, fetchUser);
useTokenRefresh(upToDateUser, fetchUser);
const updateUserTemperatureOverrideEnabled = async (enabled: boolean) => {
try {

View File

@@ -1292,7 +1292,8 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
},
};
export function createConnectorInitialValues(
connector: ConfigurableSources
connector: ConfigurableSources,
currentCredential: Credential<any> | null = null
): Record<string, any> & AccessTypeGroupSelectorFormType {
const configuration = connectorConfigs[connector];
@@ -1307,7 +1308,16 @@ export function createConnectorInitialValues(
} else if (field.type === "list") {
acc[field.name] = field.default || [];
} else if (field.type === "checkbox") {
acc[field.name] = field.default || false;
// Special case for include_files_shared_with_me when using service account
if (
field.name === "include_files_shared_with_me" &&
currentCredential &&
!currentCredential.credential_json?.google_tokens
) {
acc[field.name] = true;
} else {
acc[field.name] = field.default || false;
}
} else if (field.default !== undefined) {
acc[field.name] = field.default;
}