mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-02 14:15:44 +00:00
Compare commits
10 Commits
embed_imag
...
v0.24.0-cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60fb21ddca | ||
|
|
fc89d745c1 | ||
|
|
a748bb28a7 | ||
|
|
2dd304739d | ||
|
|
778a7eeb5a | ||
|
|
0b39a45ae7 | ||
|
|
a540038660 | ||
|
|
1c4ceb36be | ||
|
|
f3e2ab25f8 | ||
|
|
9b91babed1 |
@@ -0,0 +1,32 @@
|
||||
"""Add public_external_user_group table
|
||||
|
||||
Revision ID: a7688ab35c45
|
||||
Revises: 5c448911b12f
|
||||
Create Date: 2025-05-06 20:55:12.747875
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "a7688ab35c45"
|
||||
down_revision = "5c448911b12f"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"public_external_user_group",
|
||||
sa.Column("external_user_group_id", sa.String(), nullable=False),
|
||||
sa.Column("cc_pair_id", sa.Integer(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("external_user_group_id", "cc_pair_id"),
|
||||
sa.ForeignKeyConstraint(
|
||||
["cc_pair_id"], ["connector_credential_pair.id"], ondelete="CASCADE"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("public_external_user_group")
|
||||
@@ -1,6 +1,7 @@
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.external_perm import fetch_external_groups_for_user
|
||||
from ee.onyx.db.external_perm import fetch_public_external_group_ids
|
||||
from ee.onyx.db.user_group import fetch_user_groups_for_documents
|
||||
from ee.onyx.db.user_group import fetch_user_groups_for_user
|
||||
from ee.onyx.external_permissions.post_query_censoring import (
|
||||
@@ -63,6 +64,8 @@ def _get_access_for_documents(
|
||||
document_ids=document_ids,
|
||||
)
|
||||
|
||||
all_public_ext_u_group_ids = set(fetch_public_external_group_ids(db_session))
|
||||
|
||||
access_map = {}
|
||||
for document_id, non_ee_access in non_ee_access_dict.items():
|
||||
document = doc_id_map[document_id]
|
||||
@@ -89,7 +92,10 @@ def _get_access_for_documents(
|
||||
# If its censored, then it's public anywhere during the search and then permissions are
|
||||
# applied after the search
|
||||
is_public_anywhere = (
|
||||
document.is_public or non_ee_access.is_public or is_only_censored
|
||||
document.is_public
|
||||
or non_ee_access.is_public
|
||||
or is_only_censored
|
||||
or any(u_group in all_public_ext_u_group_ids for u_group in ext_u_groups)
|
||||
)
|
||||
|
||||
# To avoid collisions of group namings between connectors, they need to be prefixed
|
||||
|
||||
@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.access.utils import build_ext_group_name_for_onyx
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.db.models import PublicExternalUserGroup
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import User__ExternalUserGroupId
|
||||
from onyx.db.users import batch_add_ext_perm_user_if_not_exists
|
||||
@@ -20,6 +21,12 @@ logger = setup_logger()
|
||||
class ExternalUserGroup(BaseModel):
|
||||
id: str
|
||||
user_emails: list[str]
|
||||
# `True` for cases like a Folder in Google Drive that give domain-wide
|
||||
# or "Anyone with link" access to all files in the folder.
|
||||
# if this is set, `user_emails` don't really matter.
|
||||
# When this is `True`, this `ExternalUserGroup` object doesn't really represent
|
||||
# an actual "group" in the source.
|
||||
gives_anyone_access: bool = False
|
||||
|
||||
|
||||
def delete_user__ext_group_for_user__no_commit(
|
||||
@@ -44,6 +51,17 @@ def delete_user__ext_group_for_cc_pair__no_commit(
|
||||
)
|
||||
|
||||
|
||||
def delete_public_external_group_for_cc_pair__no_commit(
|
||||
db_session: Session,
|
||||
cc_pair_id: int,
|
||||
) -> None:
|
||||
db_session.execute(
|
||||
delete(PublicExternalUserGroup).where(
|
||||
PublicExternalUserGroup.cc_pair_id == cc_pair_id
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def replace_user__ext_group_for_cc_pair(
|
||||
db_session: Session,
|
||||
cc_pair_id: int,
|
||||
@@ -72,13 +90,22 @@ def replace_user__ext_group_for_cc_pair(
|
||||
db_session=db_session,
|
||||
cc_pair_id=cc_pair_id,
|
||||
)
|
||||
delete_public_external_group_for_cc_pair__no_commit(
|
||||
db_session=db_session,
|
||||
cc_pair_id=cc_pair_id,
|
||||
)
|
||||
|
||||
# map emails to ids
|
||||
email_id_map = {user.email: user.id for user in all_group_members}
|
||||
|
||||
# use these ids to create new external user group relations relating group_id to user_ids
|
||||
new_external_permissions = []
|
||||
new_external_permissions: list[User__ExternalUserGroupId] = []
|
||||
new_public_external_groups: list[PublicExternalUserGroup] = []
|
||||
for external_group in group_defs:
|
||||
external_group_id = build_ext_group_name_for_onyx(
|
||||
ext_group_name=external_group.id,
|
||||
source=source,
|
||||
)
|
||||
for user_email in external_group.user_emails:
|
||||
user_id = email_id_map.get(user_email.lower())
|
||||
if user_id is None:
|
||||
@@ -87,10 +114,6 @@ def replace_user__ext_group_for_cc_pair(
|
||||
f" with email {user_email} not found"
|
||||
)
|
||||
continue
|
||||
external_group_id = build_ext_group_name_for_onyx(
|
||||
ext_group_name=external_group.id,
|
||||
source=source,
|
||||
)
|
||||
new_external_permissions.append(
|
||||
User__ExternalUserGroupId(
|
||||
user_id=user_id,
|
||||
@@ -99,7 +122,16 @@ def replace_user__ext_group_for_cc_pair(
|
||||
)
|
||||
)
|
||||
|
||||
if external_group.gives_anyone_access:
|
||||
new_public_external_groups.append(
|
||||
PublicExternalUserGroup(
|
||||
external_user_group_id=external_group_id,
|
||||
cc_pair_id=cc_pair_id,
|
||||
)
|
||||
)
|
||||
|
||||
db_session.add_all(new_external_permissions)
|
||||
db_session.add_all(new_public_external_groups)
|
||||
db_session.commit()
|
||||
|
||||
|
||||
@@ -130,3 +162,11 @@ def fetch_external_groups_for_user_email_and_group_ids(
|
||||
)
|
||||
).all()
|
||||
return list(user_ext_groups)
|
||||
|
||||
|
||||
def fetch_public_external_group_ids(
|
||||
db_session: Session,
|
||||
) -> list[str]:
|
||||
return list(
|
||||
db_session.scalars(select(PublicExternalUserGroup.external_user_group_id)).all()
|
||||
)
|
||||
|
||||
@@ -3,11 +3,15 @@ from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
|
||||
from ee.onyx.external_permissions.google_drive.models import PermissionType
|
||||
from ee.onyx.external_permissions.google_drive.permission_retrieval import (
|
||||
get_permissions_by_ids,
|
||||
)
|
||||
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
|
||||
from onyx.access.models import DocExternalAccess
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||
from onyx.connectors.google_utils.resources import get_drive_service
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.models import SlimDocument
|
||||
@@ -17,8 +21,6 @@ from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_PERMISSION_ID_PERMISSION_MAP: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
def _get_slim_doc_generator(
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
@@ -41,46 +43,28 @@ def _get_slim_doc_generator(
|
||||
|
||||
def _fetch_permissions_for_permission_ids(
|
||||
google_drive_connector: GoogleDriveConnector,
|
||||
permission_ids: list[str],
|
||||
permission_info: dict[str, Any],
|
||||
) -> list[dict[str, Any]]:
|
||||
) -> list[GoogleDrivePermission]:
|
||||
doc_id = permission_info.get("doc_id")
|
||||
if not permission_info or not doc_id:
|
||||
return []
|
||||
|
||||
permissions = [
|
||||
_PERMISSION_ID_PERMISSION_MAP[pid]
|
||||
for pid in permission_ids
|
||||
if pid in _PERMISSION_ID_PERMISSION_MAP
|
||||
]
|
||||
|
||||
if len(permissions) == len(permission_ids):
|
||||
return permissions
|
||||
|
||||
owner_email = permission_info.get("owner_email")
|
||||
permission_ids = permission_info.get("permission_ids", [])
|
||||
if not permission_ids:
|
||||
return []
|
||||
|
||||
drive_service = get_drive_service(
|
||||
creds=google_drive_connector.creds,
|
||||
user_email=(owner_email or google_drive_connector.primary_admin_email),
|
||||
)
|
||||
|
||||
# We continue on 404 or 403 because the document may not exist or the user may not have access to it
|
||||
fetched_permissions = execute_paginated_retrieval(
|
||||
retrieval_function=drive_service.permissions().list,
|
||||
list_key="permissions",
|
||||
fileId=doc_id,
|
||||
fields="permissions(id, emailAddress, type, domain),nextPageToken",
|
||||
supportsAllDrives=True,
|
||||
continue_on_404_or_403=True,
|
||||
return get_permissions_by_ids(
|
||||
drive_service=drive_service,
|
||||
doc_id=doc_id,
|
||||
permission_ids=permission_ids,
|
||||
)
|
||||
|
||||
permissions_for_doc_id = []
|
||||
for permission in fetched_permissions:
|
||||
permissions_for_doc_id.append(permission)
|
||||
_PERMISSION_ID_PERMISSION_MAP[permission["id"]] = permission
|
||||
|
||||
return permissions_for_doc_id
|
||||
|
||||
|
||||
def _get_permissions_from_slim_doc(
|
||||
google_drive_connector: GoogleDriveConnector,
|
||||
@@ -88,14 +72,13 @@ def _get_permissions_from_slim_doc(
|
||||
) -> ExternalAccess:
|
||||
permission_info = slim_doc.perm_sync_data or {}
|
||||
|
||||
permissions_list = permission_info.get("permissions", [])
|
||||
if not permissions_list:
|
||||
if permission_ids := permission_info.get("permission_ids"):
|
||||
permissions_list = _fetch_permissions_for_permission_ids(
|
||||
google_drive_connector=google_drive_connector,
|
||||
permission_ids=permission_ids,
|
||||
permission_info=permission_info,
|
||||
)
|
||||
permissions_list: list[GoogleDrivePermission] = []
|
||||
raw_permissions_list = permission_info.get("permissions", [])
|
||||
if not raw_permissions_list:
|
||||
permissions_list = _fetch_permissions_for_permission_ids(
|
||||
google_drive_connector=google_drive_connector,
|
||||
permission_info=permission_info,
|
||||
)
|
||||
if not permissions_list:
|
||||
logger.warning(f"No permissions found for document {slim_doc.id}")
|
||||
return ExternalAccess(
|
||||
@@ -103,41 +86,71 @@ def _get_permissions_from_slim_doc(
|
||||
external_user_group_ids=set(),
|
||||
is_public=False,
|
||||
)
|
||||
else:
|
||||
permissions_list = [
|
||||
GoogleDrivePermission.from_drive_permission(p) for p in raw_permissions_list
|
||||
]
|
||||
|
||||
company_domain = google_drive_connector.google_domain
|
||||
folder_ids_to_inherit_permissions_from: set[str] = set()
|
||||
user_emails: set[str] = set()
|
||||
group_emails: set[str] = set()
|
||||
public = False
|
||||
skipped_permissions = 0
|
||||
|
||||
for permission in permissions_list:
|
||||
if not permission:
|
||||
skipped_permissions += 1
|
||||
continue
|
||||
# if the permission is inherited, do not add it directly to the file
|
||||
# instead, add the folder ID as a group that has access to the file
|
||||
# we will then handle mapping that folder to the list of Onyx users
|
||||
# in the group sync job
|
||||
# NOTE: this doesn't handle the case where a folder initially has no
|
||||
# permissioning, but then later that folder is shared with a user or group.
|
||||
# We could fetch all ancestors of the file to get the list of folders that
|
||||
# might affect the permissions of the file, but this will get replaced with
|
||||
# an audit-log based approach in the future so not doing it now.
|
||||
if (
|
||||
permission.permission_details
|
||||
and permission.permission_details.inherited_from
|
||||
):
|
||||
folder_ids_to_inherit_permissions_from.add(
|
||||
permission.permission_details.inherited_from
|
||||
)
|
||||
|
||||
permission_type = permission["type"]
|
||||
if permission_type == "user":
|
||||
user_emails.add(permission["emailAddress"])
|
||||
elif permission_type == "group":
|
||||
group_emails.add(permission["emailAddress"])
|
||||
elif permission_type == "domain" and company_domain:
|
||||
if permission.get("domain") == company_domain:
|
||||
if permission.type == PermissionType.USER:
|
||||
if permission.email_address:
|
||||
user_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.error(
|
||||
"Permission is type `user` but no email address is "
|
||||
f"provided for document {slim_doc.id}"
|
||||
f"\n {permission}"
|
||||
)
|
||||
elif permission.type == PermissionType.GROUP:
|
||||
# groups are represented as email addresses within Drive
|
||||
if permission.email_address:
|
||||
group_emails.add(permission.email_address)
|
||||
else:
|
||||
logger.error(
|
||||
"Permission is type `group` but no email address is "
|
||||
f"provided for document {slim_doc.id}"
|
||||
f"\n {permission}"
|
||||
)
|
||||
elif permission.type == PermissionType.DOMAIN and company_domain:
|
||||
if permission.domain == company_domain:
|
||||
public = True
|
||||
else:
|
||||
logger.warning(
|
||||
"Permission is type domain but does not match company domain:"
|
||||
f"\n {permission}"
|
||||
)
|
||||
elif permission_type == "anyone":
|
||||
elif permission.type == PermissionType.ANYONE:
|
||||
public = True
|
||||
|
||||
if skipped_permissions > 0:
|
||||
logger.warning(
|
||||
f"Skipped {skipped_permissions} permissions of {len(permissions_list)} for document {slim_doc.id}"
|
||||
)
|
||||
|
||||
drive_id = permission_info.get("drive_id")
|
||||
group_ids = group_emails | ({drive_id} if drive_id is not None else set())
|
||||
group_ids = (
|
||||
group_emails
|
||||
| folder_ids_to_inherit_permissions_from
|
||||
| ({drive_id} if drive_id is not None else set())
|
||||
)
|
||||
|
||||
return ExternalAccess(
|
||||
external_user_emails=user_emails,
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
from collections.abc import Iterator
|
||||
|
||||
from googleapiclient.discovery import Resource # type: ignore
|
||||
|
||||
from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
|
||||
from ee.onyx.external_permissions.google_drive.permission_retrieval import (
|
||||
get_permissions_by_ids,
|
||||
)
|
||||
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
|
||||
from onyx.connectors.google_drive.file_retrieval import generate_time_range_filter
|
||||
from onyx.connectors.google_drive.models import GoogleDriveFileType
|
||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Only include fields we need - folder ID and permissions
|
||||
# IMPORTANT: must fetch permissionIds, since sometimes the drive API
|
||||
# seems to miss permissions when requesting them directly
|
||||
FOLDER_PERMISSION_FIELDS = (
|
||||
"nextPageToken, files(id, name, permissionIds, "
|
||||
"permissions(id, emailAddress, type, domain, permissionDetails))"
|
||||
)
|
||||
|
||||
|
||||
def get_folder_permissions_by_ids(
|
||||
service: Resource,
|
||||
folder_id: str,
|
||||
permission_ids: list[str],
|
||||
) -> list[GoogleDrivePermission]:
|
||||
"""
|
||||
Retrieves permissions for a specific folder filtered by permission IDs.
|
||||
|
||||
Args:
|
||||
service: The Google Drive service instance
|
||||
folder_id: The ID of the folder to fetch permissions for
|
||||
permission_ids: A list of permission IDs to filter by
|
||||
|
||||
Returns:
|
||||
A list of permissions matching the provided permission IDs
|
||||
"""
|
||||
return get_permissions_by_ids(
|
||||
drive_service=service,
|
||||
doc_id=folder_id,
|
||||
permission_ids=permission_ids,
|
||||
)
|
||||
|
||||
|
||||
def get_modified_folders(
|
||||
service: Resource,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
"""
|
||||
Retrieves all folders that were modified within the specified time range.
|
||||
Only includes folder ID and permission information, not any contained files.
|
||||
|
||||
Args:
|
||||
service: The Google Drive service instance
|
||||
start: The start time as seconds since Unix epoch (inclusive)
|
||||
end: The end time as seconds since Unix epoch (inclusive)
|
||||
|
||||
Returns:
|
||||
An iterator yielding folder information including ID and permissions
|
||||
"""
|
||||
# Build query for folders
|
||||
query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
|
||||
query += " and trashed = false"
|
||||
query += generate_time_range_filter(start, end)
|
||||
|
||||
# Retrieve and yield folders
|
||||
for folder in execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
continue_on_404_or_403=True,
|
||||
corpora="allDrives",
|
||||
supportsAllDrives=True,
|
||||
includeItemsFromAllDrives=True,
|
||||
includePermissionsForView="published",
|
||||
fields=FOLDER_PERMISSION_FIELDS,
|
||||
q=query,
|
||||
):
|
||||
yield folder
|
||||
@@ -1,6 +1,15 @@
|
||||
from googleapiclient.errors import HttpError # type: ignore
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ee.onyx.db.external_perm import ExternalUserGroup
|
||||
from ee.onyx.external_permissions.google_drive.folder_retrieval import (
|
||||
get_folder_permissions_by_ids,
|
||||
)
|
||||
from ee.onyx.external_permissions.google_drive.folder_retrieval import (
|
||||
get_modified_folders,
|
||||
)
|
||||
from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
|
||||
from ee.onyx.external_permissions.google_drive.models import PermissionType
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||
from onyx.connectors.google_utils.resources import AdminService
|
||||
@@ -12,6 +21,72 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
"""
|
||||
Folder Permission Sync.
|
||||
|
||||
Each folder is treated as a group. Each file has all ancestor folders
|
||||
as groups.
|
||||
"""
|
||||
|
||||
|
||||
class FolderInfo(BaseModel):
|
||||
id: str
|
||||
permissions: list[GoogleDrivePermission]
|
||||
|
||||
|
||||
def _get_all_folders(google_drive_connector: GoogleDriveConnector) -> list[FolderInfo]:
|
||||
"""Have to get all folders since the group syncing system assumes all groups
|
||||
are returned every time.
|
||||
|
||||
TODO: tweak things so we can fetch deltas.
|
||||
"""
|
||||
all_folders: list[FolderInfo] = []
|
||||
seen_folder_ids: set[str] = set()
|
||||
|
||||
user_emails = google_drive_connector._get_all_user_emails()
|
||||
for user_email in user_emails:
|
||||
drive_service = get_drive_service(
|
||||
google_drive_connector.creds,
|
||||
user_email,
|
||||
)
|
||||
|
||||
for folder in get_modified_folders(
|
||||
service=drive_service,
|
||||
):
|
||||
folder_id = folder["id"]
|
||||
if folder_id in seen_folder_ids:
|
||||
logger.debug(f"Folder {folder_id} has already been seen. Skipping.")
|
||||
continue
|
||||
|
||||
# Check if the folder has permission IDs but no permissions
|
||||
permission_ids = folder.get("permissionIds", [])
|
||||
raw_permissions = folder.get("permissions", [])
|
||||
|
||||
if not raw_permissions and permission_ids:
|
||||
# Fetch permissions using the IDs
|
||||
permissions = get_folder_permissions_by_ids(
|
||||
drive_service, folder_id, permission_ids
|
||||
)
|
||||
else:
|
||||
permissions = [
|
||||
GoogleDrivePermission.from_drive_permission(permission)
|
||||
for permission in raw_permissions
|
||||
]
|
||||
|
||||
all_folders.append(
|
||||
FolderInfo(
|
||||
id=folder_id,
|
||||
permissions=permissions,
|
||||
)
|
||||
)
|
||||
seen_folder_ids.add(folder_id)
|
||||
|
||||
return all_folders
|
||||
|
||||
|
||||
"""Individual Shared Drive / My Drive Permission Sync"""
|
||||
|
||||
|
||||
def _get_drive_members(
|
||||
google_drive_connector: GoogleDriveConnector,
|
||||
admin_service: AdminService,
|
||||
@@ -57,9 +132,11 @@ def _get_drive_members(
|
||||
# is an admin
|
||||
useDomainAdminAccess=is_admin,
|
||||
):
|
||||
if permission["type"] == "group":
|
||||
# NOTE: don't need to check for PermissionType.ANYONE since
|
||||
# you can't share a drive with the internet
|
||||
if permission["type"] == PermissionType.GROUP:
|
||||
group_emails.add(permission["emailAddress"])
|
||||
elif permission["type"] == "user":
|
||||
elif permission["type"] == PermissionType.USER:
|
||||
user_emails.add(permission["emailAddress"])
|
||||
except HttpError as e:
|
||||
if e.status_code == 404:
|
||||
@@ -118,6 +195,7 @@ def _map_group_email_to_member_emails(
|
||||
def _build_onyx_groups(
|
||||
drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],
|
||||
group_email_to_member_emails_map: dict[str, set[str]],
|
||||
folder_info: list[FolderInfo],
|
||||
) -> list[ExternalUserGroup]:
|
||||
onyx_groups: list[ExternalUserGroup] = []
|
||||
|
||||
@@ -125,18 +203,52 @@ def _build_onyx_groups(
|
||||
# This is because having drive level access means you have
|
||||
# irrevocable access to all the files in the drive.
|
||||
for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():
|
||||
all_member_emails: set[str] = user_emails
|
||||
drive_member_emails: set[str] = user_emails
|
||||
for group_email in group_emails:
|
||||
if group_email not in group_email_to_member_emails_map:
|
||||
logger.warning(
|
||||
f"Group email {group_email} not found in group_email_to_member_emails_map"
|
||||
f"Group email {group_email} for drive {drive_id} not found in "
|
||||
"group_email_to_member_emails_map"
|
||||
)
|
||||
continue
|
||||
all_member_emails.update(group_email_to_member_emails_map[group_email])
|
||||
drive_member_emails.update(group_email_to_member_emails_map[group_email])
|
||||
onyx_groups.append(
|
||||
ExternalUserGroup(
|
||||
id=drive_id,
|
||||
user_emails=list(all_member_emails),
|
||||
user_emails=list(drive_member_emails),
|
||||
)
|
||||
)
|
||||
|
||||
# Convert all folder permissions to onyx groups
|
||||
for folder in folder_info:
|
||||
anyone_can_access = False
|
||||
folder_member_emails: set[str] = set()
|
||||
for permission in folder.permissions:
|
||||
if permission.type == PermissionType.USER:
|
||||
if permission.email_address is None:
|
||||
logger.warning(
|
||||
f"User email is None for folder {folder.id} permission {permission}"
|
||||
)
|
||||
continue
|
||||
folder_member_emails.add(permission.email_address)
|
||||
elif permission.type == PermissionType.GROUP:
|
||||
if permission.email_address not in group_email_to_member_emails_map:
|
||||
logger.warning(
|
||||
f"Group email {permission.email_address} for folder {folder.id} "
|
||||
"not found in group_email_to_member_emails_map"
|
||||
)
|
||||
continue
|
||||
folder_member_emails.update(
|
||||
group_email_to_member_emails_map[permission.email_address]
|
||||
)
|
||||
elif permission.type == PermissionType.ANYONE:
|
||||
anyone_can_access = True
|
||||
|
||||
onyx_groups.append(
|
||||
ExternalUserGroup(
|
||||
id=folder.id,
|
||||
user_emails=list(folder_member_emails),
|
||||
gives_anyone_access=anyone_can_access,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -173,6 +285,9 @@ def gdrive_group_sync(
|
||||
admin_service, google_drive_connector.google_domain
|
||||
)
|
||||
|
||||
# Get all folder permissions
|
||||
folder_info = _get_all_folders(google_drive_connector)
|
||||
|
||||
# Map group emails to their members
|
||||
group_email_to_member_emails_map = _map_group_email_to_member_emails(
|
||||
admin_service, all_group_emails
|
||||
@@ -182,6 +297,7 @@ def gdrive_group_sync(
|
||||
onyx_groups = _build_onyx_groups(
|
||||
drive_id_to_members_map=drive_id_to_members_map,
|
||||
group_email_to_member_emails_map=group_email_to_member_emails_map,
|
||||
folder_info=folder_info,
|
||||
)
|
||||
|
||||
return onyx_groups
|
||||
|
||||
59
backend/ee/onyx/external_permissions/google_drive/models.py
Normal file
59
backend/ee/onyx/external_permissions/google_drive/models.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PermissionType(str, Enum):
|
||||
USER = "user"
|
||||
GROUP = "group"
|
||||
DOMAIN = "domain"
|
||||
ANYONE = "anyone"
|
||||
|
||||
|
||||
class GoogleDrivePermissionDetails(BaseModel):
|
||||
# this is "file", "member", etc.
|
||||
# different from the `type` field within `GoogleDrivePermission`
|
||||
# Sometimes can be not, although not sure why...
|
||||
permission_type: str | None
|
||||
# this is "reader", "writer", "owner", etc.
|
||||
role: str
|
||||
# this is the id of the parent permission
|
||||
inherited_from: str | None
|
||||
|
||||
|
||||
class GoogleDrivePermission(BaseModel):
|
||||
id: str
|
||||
# groups are also represented as email addresses within Drive
|
||||
# will be None for domain/global permissions
|
||||
email_address: str | None
|
||||
type: PermissionType
|
||||
domain: str | None # only applies to domain permissions
|
||||
permission_details: GoogleDrivePermissionDetails | None
|
||||
|
||||
@classmethod
|
||||
def from_drive_permission(
|
||||
cls, drive_permission: dict[str, Any]
|
||||
) -> "GoogleDrivePermission":
|
||||
# we seem to only get details for permissions that are inherited
|
||||
# we can get multiple details if a permission is inherited from multiple
|
||||
# parents
|
||||
permission_details_list = drive_permission.get("permissionDetails", [])
|
||||
permission_details: dict[str, Any] | None = (
|
||||
permission_details_list[0] if permission_details_list else None
|
||||
)
|
||||
return cls(
|
||||
id=drive_permission["id"],
|
||||
email_address=drive_permission.get("emailAddress"),
|
||||
type=PermissionType(drive_permission["type"]),
|
||||
domain=drive_permission.get("domain"),
|
||||
permission_details=(
|
||||
GoogleDrivePermissionDetails(
|
||||
permission_type=permission_details.get("type"),
|
||||
role=permission_details.get("role", ""),
|
||||
inherited_from=permission_details.get("inheritedFrom"),
|
||||
)
|
||||
if permission_details
|
||||
else None
|
||||
),
|
||||
)
|
||||
@@ -0,0 +1,60 @@
|
||||
from googleapiclient.discovery import Resource # type: ignore
|
||||
|
||||
from ee.onyx.external_permissions.google_drive.models import GoogleDrivePermission
|
||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def get_permissions_by_ids(
|
||||
drive_service: Resource,
|
||||
doc_id: str,
|
||||
permission_ids: list[str],
|
||||
) -> list[GoogleDrivePermission]:
|
||||
"""
|
||||
Fetches permissions for a document based on a list of permission IDs.
|
||||
|
||||
Args:
|
||||
drive_service: The Google Drive service instance
|
||||
doc_id: The ID of the document to fetch permissions for
|
||||
permission_ids: A list of permission IDs to filter by
|
||||
|
||||
Returns:
|
||||
A list of GoogleDrivePermission objects matching the provided permission IDs
|
||||
"""
|
||||
if not permission_ids:
|
||||
return []
|
||||
|
||||
# Create a set for faster lookup
|
||||
permission_id_set = set(permission_ids)
|
||||
|
||||
# Fetch all permissions for the document
|
||||
fetched_permissions = execute_paginated_retrieval(
|
||||
retrieval_function=drive_service.permissions().list,
|
||||
list_key="permissions",
|
||||
fileId=doc_id,
|
||||
fields="permissions(id, emailAddress, type, domain, permissionDetails),nextPageToken",
|
||||
supportsAllDrives=True,
|
||||
continue_on_404_or_403=True,
|
||||
)
|
||||
|
||||
# Filter permissions by ID and convert to GoogleDrivePermission objects
|
||||
filtered_permissions = []
|
||||
for permission in fetched_permissions:
|
||||
permission_id = permission.get("id")
|
||||
if permission_id in permission_id_set:
|
||||
google_drive_permission = GoogleDrivePermission.from_drive_permission(
|
||||
permission
|
||||
)
|
||||
filtered_permissions.append(google_drive_permission)
|
||||
|
||||
# Log if we couldn't find all requested permission IDs
|
||||
if len(filtered_permissions) < len(permission_ids):
|
||||
missing_ids = permission_id_set - {p.id for p in filtered_permissions if p.id}
|
||||
logger.warning(
|
||||
f"Could not find all requested permission IDs for document {doc_id}. "
|
||||
f"Missing IDs: {missing_ids}"
|
||||
)
|
||||
|
||||
return filtered_permissions
|
||||
@@ -21,18 +21,21 @@ from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
PERMISSION_FULL_DESCRIPTION = (
|
||||
"permissions(id, emailAddress, type, domain, permissionDetails)"
|
||||
)
|
||||
FILE_FIELDS = (
|
||||
"nextPageToken, files(mimeType, id, name, permissions, modifiedTime, webViewLink, "
|
||||
"shortcutDetails, owners(emailAddress), size)"
|
||||
)
|
||||
SLIM_FILE_FIELDS = (
|
||||
"nextPageToken, files(mimeType, driveId, id, name, permissions(emailAddress, type, domain), "
|
||||
f"nextPageToken, files(mimeType, driveId, id, name, {PERMISSION_FULL_DESCRIPTION}, "
|
||||
"permissionIds, webViewLink, owners(emailAddress))"
|
||||
)
|
||||
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
|
||||
|
||||
|
||||
def _generate_time_range_filter(
|
||||
def generate_time_range_filter(
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> str:
|
||||
@@ -81,7 +84,7 @@ def _get_files_in_parent(
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
query = f"mimeType != '{DRIVE_FOLDER_TYPE}' and '{parent_id}' in parents"
|
||||
query += " and trashed = false"
|
||||
query += _generate_time_range_filter(start, end)
|
||||
query += generate_time_range_filter(start, end)
|
||||
|
||||
for file in execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
@@ -204,7 +207,7 @@ def get_files_in_shared_drive(
|
||||
# Get all files in the shared drive
|
||||
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
file_query += " and trashed = false"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
file_query += generate_time_range_filter(start, end)
|
||||
|
||||
for file in execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
@@ -264,7 +267,7 @@ def get_all_files_in_my_drive_and_shared(
|
||||
file_query += " and trashed = false"
|
||||
if not include_shared_with_me:
|
||||
file_query += " and 'me' in owners"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
file_query += generate_time_range_filter(start, end)
|
||||
yield from execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
@@ -297,7 +300,7 @@ def get_all_files_for_oauth(
|
||||
|
||||
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
file_query += " and trashed = false"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
file_query += generate_time_range_filter(start, end)
|
||||
|
||||
if not should_get_all:
|
||||
if include_files_shared_with_me and not include_my_drives:
|
||||
|
||||
@@ -2368,6 +2368,21 @@ class User__ExternalUserGroupId(Base):
|
||||
)
|
||||
|
||||
|
||||
class PublicExternalUserGroup(Base):
|
||||
"""Stores all public external user "groups".
|
||||
|
||||
For example, things like Google Drive folders that are marked
|
||||
as `Anyone with the link` or `Anyone in the domain`
|
||||
"""
|
||||
|
||||
__tablename__ = "public_external_user_group"
|
||||
|
||||
external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)
|
||||
cc_pair_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("connector_credential_pair.id", ondelete="CASCADE"), primary_key=True
|
||||
)
|
||||
|
||||
|
||||
class UsageReport(Base):
|
||||
"""This stores metadata about usage reports generated by admin including user who generated
|
||||
them as well las the period they cover. The actual zip file of the report is stored as a lo
|
||||
|
||||
@@ -62,6 +62,28 @@ def parse_credentials(env_str: str) -> dict:
|
||||
return json.loads(unescaped)
|
||||
|
||||
|
||||
def get_credentials_from_env(email: str, oauth: bool) -> dict:
|
||||
if oauth:
|
||||
raw_credential_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[email]]
|
||||
else:
|
||||
raw_credential_string = os.environ[
|
||||
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[email]
|
||||
]
|
||||
|
||||
refried_credential_string = json.dumps(parse_credentials(raw_credential_string))
|
||||
|
||||
cred_key = (
|
||||
DB_CREDENTIALS_DICT_TOKEN_KEY
|
||||
if oauth
|
||||
else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
|
||||
)
|
||||
return {
|
||||
cred_key: refried_credential_string,
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def google_drive_oauth_uploaded_connector_factory() -> (
|
||||
Callable[..., GoogleDriveConnector]
|
||||
@@ -85,13 +107,7 @@ def google_drive_oauth_uploaded_connector_factory() -> (
|
||||
shared_folder_urls=shared_folder_urls,
|
||||
)
|
||||
|
||||
json_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[primary_admin_email]]
|
||||
refried_json_string = json.dumps(parse_credentials(json_string))
|
||||
credentials_json = {
|
||||
DB_CREDENTIALS_DICT_TOKEN_KEY: refried_json_string,
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
|
||||
}
|
||||
credentials_json = get_credentials_from_env(primary_admin_email, oauth=True)
|
||||
connector.load_credentials(credentials_json)
|
||||
return connector
|
||||
|
||||
@@ -123,19 +139,11 @@ def google_drive_service_acct_connector_factory() -> (
|
||||
specific_user_emails=specific_user_emails,
|
||||
)
|
||||
|
||||
json_string = os.environ[
|
||||
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[primary_admin_email]
|
||||
]
|
||||
refried_json_string = json.dumps(parse_credentials(json_string))
|
||||
|
||||
# Load Service Account Credentials
|
||||
connector.load_credentials(
|
||||
{
|
||||
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: refried_json_string,
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email,
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: GoogleOAuthAuthenticationMethod.UPLOADED.value,
|
||||
}
|
||||
credentials_json = get_credentials_from_env(
|
||||
email=primary_admin_email, oauth=False
|
||||
)
|
||||
connector.load_credentials(credentials_json)
|
||||
return connector
|
||||
|
||||
return _connector_factory
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"12": "https://drive.google.com/file/d/1u7nynrG4WuFZeuZs8yyhqJF_lbo-op-m/view?usp=drivesdk",
|
||||
"10": "https://drive.google.com/file/d/1LFcVuXuXIdNJ7hkL0C40eYn_cQtryUVQ/view?usp=drivesdk",
|
||||
"13": "https://drive.google.com/file/d/1muQMyYAJe0_F-HiDFIfFMt-4qsgMlREM/view?usp=drivesdk",
|
||||
"11": "https://drive.google.com/file/d/1oHNtlsdJJtk7dE10NgH83Kn5_f2L-Su1/view?usp=drivesdk",
|
||||
"14": "https://drive.google.com/file/d/1sAw-DrsqpnqLF5A8P59BZwIpt9-LrlaL/view?usp=drivesdk",
|
||||
"18": "https://drive.google.com/file/d/1qqKH3esasdqV6ryEhdoSQezDPlKj11At/view?usp=drivesdk",
|
||||
"17": "https://drive.google.com/file/d/1z08VsrCUTozpc5Quzb7mEDUwNkXU3foT/view?usp=drivesdk",
|
||||
"15": "https://drive.google.com/file/d/1QQ6ZGyYP49IJNeGKNmqZISyVLzTOtK4v/view?usp=drivesdk",
|
||||
"19": "https://drive.google.com/file/d/172as_pb7E15bXUd63mIIBRotk_tT7h56/view?usp=drivesdk",
|
||||
"16": "https://drive.google.com/file/d/1552S6HEjJ81q8JXr46BtixQiVq9xlW_I/view?usp=drivesdk",
|
||||
"5": "https://drive.google.com/file/d/1sv9epxLcNlgM6C-oPDeD_heFw7AIZMgp/view?usp=drivesdk",
|
||||
"7": "https://drive.google.com/file/d/1S_S0LpQW90EUPPPjJX4jfu5p9gOQjiQF/view?usp=drivesdk",
|
||||
"9": "https://drive.google.com/file/d/1wH2dBrWzmiGJ88ySHWu6srb7Jsj7qYbA/view?usp=drivesdk",
|
||||
"8": "https://drive.google.com/file/d/14URUm6RKSZziH1lUtT6gs-xnCTWkXpSn/view?usp=drivesdk",
|
||||
"6": "https://drive.google.com/file/d/1LBKBuTMRSss-kVw8ut3rMk51wSbTM95j/view?usp=drivesdk",
|
||||
"3": "https://drive.google.com/file/d/1nNazkPrkuRXHFOl8gdA68pU2g8cy-h6n/view?usp=drivesdk",
|
||||
"2": "https://drive.google.com/file/d/1miG_QpqXe2QIMApcrlNzaB6fsXW5WMFX/view?usp=drivesdk",
|
||||
"4": "https://drive.google.com/file/d/1o-i8can6ciL1XXzy2pVUPHZEXEjBJi6C/view?usp=drivesdk",
|
||||
"0": "https://drive.google.com/file/d/1d3Y59Sns8I0FIW9CtOAjVVLE2MEe_3nP/view?usp=drivesdk",
|
||||
"1": "https://drive.google.com/file/d/1ipSqxJajs_NkfSKFxgltIMNc0ffdt-NX/view?usp=drivesdk",
|
||||
"68": "https://drive.google.com/file/d/1rCBZsbhQ-ULWGztiKB0JYhFth9EChiSZ/view?usp=drivesdk",
|
||||
"66": "https://drive.google.com/file/d/1WVAlbWcu9-Braa0aG6w3cShrY5dbIYcY/view?usp=drivesdk",
|
||||
"67": "https://drive.google.com/file/d/1p44poOCdNLnVYMxTL9b3h-BXsOQ2RDgM/view?usp=drivesdk",
|
||||
"69": "https://drive.google.com/file/d/1HFYsaqC14aE-EaobQdwkw0FOlAYMYqkV/view?usp=drivesdk",
|
||||
"65": "https://drive.google.com/file/d/1RyE07CpTIDYMO3b-atwjWH6ZHFDjyoCl/view?usp=drivesdk",
|
||||
"32": "https://drive.google.com/file/d/17egJ5W-0bvS2akLBqvxylTIViN0d9nG7/view?usp=drivesdk",
|
||||
"28": "https://drive.google.com/file/d/1HNqSM2XGqgHnyNYT5wp8hyski18HMcfO/view?usp=drivesdk",
|
||||
"37": "https://drive.google.com/file/d/16Tdu3gveWkFL0VBUzYSzKxFO4ffv-8h7/view?usp=drivesdk",
|
||||
"30": "https://drive.google.com/file/d/1uj69jGyYnNOXXqKmLNIp-4KKrVC1qaPy/view?usp=drivesdk",
|
||||
"25": "https://drive.google.com/file/d/1bw6NFlR4ZxOV6reQK1Oqeq_UaYFVpNV6/view?usp=drivesdk",
|
||||
"33": "https://drive.google.com/file/d/1FkmXBkt__lOFXg_uhxLI0QIuxWbIGySL/view?usp=drivesdk",
|
||||
"20": "https://drive.google.com/file/d/1r77uBVOHkuiDQFa9iz9FU8QbfjImOAjF/view?usp=drivesdk",
|
||||
"24": "https://drive.google.com/file/d/1kwLrdhTgCdjNrOcSwRI14K3gXnS48xne/view?usp=drivesdk",
|
||||
"39": "https://drive.google.com/file/d/1V3av9F47t44Nf3jcO12U6OIsjsX-B7L1/view?usp=drivesdk",
|
||||
"29": "https://drive.google.com/file/d/172dCAUNaaoZX0RHqEi7Ev12eV930LtTa/view?usp=drivesdk",
|
||||
"31": "https://drive.google.com/file/d/17zzfgMSWBVebWGnpSHKd6g1LFN4vn-YP/view?usp=drivesdk",
|
||||
"38": "https://drive.google.com/file/d/1xOQvIBlBJ2swTGp78WkCZJUQ-d1F8pVu/view?usp=drivesdk",
|
||||
"23": "https://drive.google.com/file/d/1X89y_CoTWWjh3BWq0ZgeGydCvg3gMZeJ/view?usp=drivesdk",
|
||||
"34": "https://drive.google.com/file/d/1VNDhcbA_-Ckjp084hKyl9bwP4E3l9K_2/view?usp=drivesdk",
|
||||
"47": "https://drive.google.com/file/d/1O8E7haA8WcJIma0iKcvebd4_dlC5Zr7S/view?usp=drivesdk",
|
||||
"52": "https://drive.google.com/file/d/1o-ateliXHj4TyugOxb9zYYXwrkhFl4FX/view?usp=drivesdk",
|
||||
"27": "https://drive.google.com/file/d/1aZ1CwNVWJt_OtIBVO-9zv1UUqXTDlM1F/view?usp=drivesdk",
|
||||
"26": "https://drive.google.com/file/d/1qegrc27hYeECs0KexnEuuG0WQm-8Y9oZ/view?usp=drivesdk",
|
||||
"59": "https://drive.google.com/file/d/1L9oWKHMTjQreGW_k8rNy7kBQ7c0FuXFm/view?usp=drivesdk",
|
||||
"35": "https://drive.google.com/file/d/1NewjF092B9KKDBs-dpnZ9dzVl2GAs2LW/view?usp=drivesdk",
|
||||
"49": "https://drive.google.com/file/d/1TsUrBlr2nxJtH122nKQ_GzdMc0DFFERB/view?usp=drivesdk",
|
||||
"41": "https://drive.google.com/file/d/1gc2Vo3HZF-Bm_WhZ0zyFedWNfVL2BEol/view?usp=drivesdk",
|
||||
"22": "https://drive.google.com/file/d/1iPfQeganYriuqHO2e5npUPeuX5VIbhG3/view?usp=drivesdk",
|
||||
"36": "https://drive.google.com/file/d/1KyNoHRTfGMNR15dCRpcVW74l2z-wVm0V/view?usp=drivesdk",
|
||||
"44": "https://drive.google.com/file/d/1PDuxwmrD20s54FHQIhXn3ucdFmXSX5kS/view?usp=drivesdk",
|
||||
"21": "https://drive.google.com/file/d/1ZwO5cCfBJgGpZTIpoi8p2js8zuHT_qxe/view?usp=drivesdk",
|
||||
"53": "https://drive.google.com/file/d/140NZAuAOoiqrNVqWmF4TPNv6njd_guwE/view?usp=drivesdk",
|
||||
"50": "https://drive.google.com/file/d/1MBmy7nQi7pMwwIPZHJjB_iuQeO07QWsN/view?usp=drivesdk",
|
||||
"54": "https://drive.google.com/file/d/1TtIJ-ULYWyv0yUvUVdfTPuBNlBt_j1Yd/view?usp=drivesdk",
|
||||
"57": "https://drive.google.com/file/d/19V5d3NcR029AhGiRibk2nlTmFNCVGBgO/view?usp=drivesdk",
|
||||
"43": "https://drive.google.com/file/d/1kLChcxIWZS_kHLEHThLcm7ekcgwYP0jF/view?usp=drivesdk",
|
||||
"42": "https://drive.google.com/file/d/1HKW3C1B5vFYUuXmFieMKYAfq4CwtnEZ_/view?usp=drivesdk",
|
||||
"48": "https://drive.google.com/file/d/1EJGd47XpWZDXJKWU0CGp84Hm7K47GNVt/view?usp=drivesdk",
|
||||
"40": "https://drive.google.com/file/d/1Fr4dVKdOvth_O-Td8PTwgNGzZz8ridAl/view?usp=drivesdk",
|
||||
"58": "https://drive.google.com/file/d/1lUFpiwE7ISzLbowHvCtEUj4sfG4w0Gst/view?usp=drivesdk",
|
||||
"51": "https://drive.google.com/file/d/1V6fOoKgA8QSTJYWPP5GVHz8WFAQIRLNB/view?usp=drivesdk",
|
||||
"45": "https://drive.google.com/file/d/1hSrPOwyxFEth4GWWN1e4BjBftmnKa8px/view?usp=drivesdk",
|
||||
"46": "https://drive.google.com/file/d/1jCynzDt1r0EISpwcrFuk3RlKWHM9u7Mj/view?usp=drivesdk",
|
||||
"55": "https://drive.google.com/file/d/1Db01f4I_Xn8Bs9piQgZU59ZWAeC2MaQm/view?usp=drivesdk",
|
||||
"56": "https://drive.google.com/file/d/1NxVfwIxm6FVVR1XnxQNMWWbQEVX66cQm/view?usp=drivesdk",
|
||||
"61": "https://docs.google.com/document/d/1eAaZJAqjXMZ2VvG_r04EGtn6EGcYycofdNUkDHEA8vY/edit?usp=drivesdk"
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
|
||||
from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ACCESS_MAPPING
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE
|
||||
|
||||
|
||||
def _build_connector(
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> GoogleDriveConnector:
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
# don't need this anymore, it's been called in the factory
|
||||
connector.load_credentials = MagicMock() # type: ignore
|
||||
return connector
|
||||
|
||||
|
||||
def test_gdrive_perm_sync_with_real_data(
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
"""
|
||||
Test gdrive_doc_sync and gdrive_group_sync with real data from the test drive.
|
||||
|
||||
This test uses the real connector to make actual API calls to Google Drive
|
||||
and verifies the permission structure returned.
|
||||
"""
|
||||
# Create a mock cc_pair that will use our real connector
|
||||
mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
|
||||
mock_cc_pair.connector = MagicMock()
|
||||
mock_cc_pair.connector.connector_specific_config = {}
|
||||
mock_cc_pair.credential_id = 1
|
||||
mock_cc_pair.credential.credential_json = {}
|
||||
mock_cc_pair.last_time_perm_sync = None
|
||||
mock_cc_pair.last_time_external_group_sync = None
|
||||
|
||||
# Create a mock heartbeat
|
||||
mock_heartbeat = MagicMock(spec=IndexingHeartbeatInterface)
|
||||
mock_heartbeat.should_stop.return_value = False
|
||||
|
||||
# Load drive_id_mapping.json
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), "drive_id_mapping.json"), "r"
|
||||
) as f:
|
||||
drive_id_mapping = json.load(f)
|
||||
|
||||
# Invert the mapping to get URL -> ID
|
||||
url_to_id_mapping = {url: int(id) for id, url in drive_id_mapping.items()}
|
||||
|
||||
# Use the connector directly without mocking Google Drive API calls
|
||||
with patch(
|
||||
"ee.onyx.external_permissions.google_drive.doc_sync.GoogleDriveConnector",
|
||||
return_value=_build_connector(google_drive_service_acct_connector_factory),
|
||||
):
|
||||
# Call the function under test
|
||||
doc_access_generator = gdrive_doc_sync(mock_cc_pair, lambda: [], mock_heartbeat)
|
||||
doc_access_list = list(doc_access_generator)
|
||||
|
||||
# create new connector
|
||||
with patch(
|
||||
"ee.onyx.external_permissions.google_drive.group_sync.GoogleDriveConnector",
|
||||
return_value=_build_connector(google_drive_service_acct_connector_factory),
|
||||
):
|
||||
external_user_groups = gdrive_group_sync("test_tenant", mock_cc_pair)
|
||||
|
||||
# Verify we got some results
|
||||
assert len(doc_access_list) > 0
|
||||
print(f"Found {len(doc_access_list)} documents with permissions")
|
||||
|
||||
# map group ids to emails
|
||||
group_id_to_email_mapping: dict[str, set[str]] = defaultdict(set)
|
||||
groups_with_anyone_access: set[str] = set()
|
||||
for group in external_user_groups:
|
||||
for email in group.user_emails:
|
||||
group_id_to_email_mapping[group.id].add(email)
|
||||
|
||||
if group.gives_anyone_access:
|
||||
groups_with_anyone_access.add(group.id)
|
||||
|
||||
# Map documents to their permissions (flattening groups)
|
||||
doc_to_email_mapping: dict[str, set[str]] = {}
|
||||
doc_to_raw_result_mapping: dict[str, set[str]] = {}
|
||||
public_doc_ids: set[str] = set()
|
||||
|
||||
for doc_access in doc_access_list:
|
||||
doc_id = doc_access.doc_id
|
||||
# make sure they are new sets to avoid mutating the original
|
||||
doc_to_email_mapping[doc_id] = copy.deepcopy(
|
||||
doc_access.external_access.external_user_emails
|
||||
)
|
||||
doc_to_raw_result_mapping[doc_id] = copy.deepcopy(
|
||||
doc_access.external_access.external_user_emails
|
||||
)
|
||||
|
||||
for group_id in doc_access.external_access.external_user_group_ids:
|
||||
doc_to_email_mapping[doc_id].update(group_id_to_email_mapping[group_id])
|
||||
doc_to_raw_result_mapping[doc_id].add(group_id)
|
||||
|
||||
if doc_access.external_access.is_public:
|
||||
public_doc_ids.add(doc_id)
|
||||
|
||||
if any(
|
||||
group_id in groups_with_anyone_access
|
||||
for group_id in doc_access.external_access.external_user_group_ids
|
||||
):
|
||||
public_doc_ids.add(doc_id)
|
||||
|
||||
# Check permissions based on drive_id_mapping.json and ACCESS_MAPPING
|
||||
# For each document URL that exists in our mapping
|
||||
checked_files = 0
|
||||
for doc_id, emails_with_access in doc_to_email_mapping.items():
|
||||
# Skip URLs that aren't in our mapping, we don't want new stuff to interfere
|
||||
# with the test.
|
||||
if doc_id not in url_to_id_mapping:
|
||||
continue
|
||||
|
||||
file_numeric_id = url_to_id_mapping.get(doc_id)
|
||||
if file_numeric_id is None:
|
||||
raise ValueError(f"File {doc_id} not found in drive_id_mapping.json")
|
||||
|
||||
checked_files += 1
|
||||
|
||||
# Check which users should have access to this file according to ACCESS_MAPPING
|
||||
expected_users = set()
|
||||
for user_email, file_ids in ACCESS_MAPPING.items():
|
||||
if file_numeric_id in file_ids:
|
||||
expected_users.add(user_email)
|
||||
|
||||
# Verify the permissions match
|
||||
if file_numeric_id in PUBLIC_RANGE:
|
||||
assert (
|
||||
doc_id in public_doc_ids
|
||||
), f"File {doc_id} (ID: {file_numeric_id}) should be public but is not in the public_doc_ids set"
|
||||
else:
|
||||
assert expected_users == emails_with_access, (
|
||||
f"File {doc_id} (ID: {file_numeric_id}) should be accessible to users {expected_users} "
|
||||
f"but is accessible to {emails_with_access}. Raw result: {doc_to_raw_result_mapping[doc_id]} "
|
||||
)
|
||||
|
||||
print(f"Checked permissions for {checked_files} files from drive_id_mapping.json")
|
||||
130
backend/tests/daily/connectors/google_drive/test_map_test_ids.py
Normal file
130
backend/tests/daily/connectors/google_drive/test_map_test_ids.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from tests.daily.connectors.google_drive.conftest import get_credentials_from_env
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import file_name_template
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import load_all_docs
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS
|
||||
|
||||
|
||||
def generate_test_id_to_drive_id_mapping() -> dict[int, str]:
|
||||
"""
|
||||
Generate a mapping from test file IDs to actual Google Drive file IDs.
|
||||
|
||||
This is useful for writing tests that need to verify specific files
|
||||
are accessible to specific users.
|
||||
|
||||
Returns:
|
||||
dict: Mapping from test file ID (int) to Google Drive file ID (str)
|
||||
"""
|
||||
# Set up the connector with real credentials
|
||||
connector = GoogleDriveConnector(
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
)
|
||||
|
||||
# Load credentials
|
||||
connector.load_credentials(get_credentials_from_env(email=ADMIN_EMAIL, oauth=False))
|
||||
|
||||
# Get all documents from the connector
|
||||
docs = load_all_docs(connector)
|
||||
|
||||
# Create a mapping from test file ID to actual Drive file ID
|
||||
test_id_to_drive_id = {}
|
||||
|
||||
# Process all documents retrieved from Drive
|
||||
for doc in docs:
|
||||
# Check if this document's name matches our test file naming pattern (file_X.txt)
|
||||
if not doc.semantic_identifier.startswith(
|
||||
file_name_template.format("").split("_")[0]
|
||||
):
|
||||
continue
|
||||
|
||||
try:
|
||||
# Extract the test file ID from the filename (file_X.txt -> X)
|
||||
file_id_str = doc.semantic_identifier.split("_")[1].split(".")[0]
|
||||
test_file_id = int(file_id_str)
|
||||
|
||||
# Store the mapping from test ID to actual Drive ID
|
||||
# Extract Drive ID from document URL
|
||||
test_id_to_drive_id[test_file_id] = doc.id
|
||||
except (ValueError, IndexError):
|
||||
# Skip files that don't follow our naming convention
|
||||
continue
|
||||
|
||||
# Print the mapping for all defined test file ID ranges
|
||||
all_test_ranges = {
|
||||
"ADMIN_FILE_IDS": ADMIN_FILE_IDS,
|
||||
"TEST_USER_1_FILE_IDS": TEST_USER_1_FILE_IDS,
|
||||
"TEST_USER_2_FILE_IDS": TEST_USER_2_FILE_IDS,
|
||||
"TEST_USER_3_FILE_IDS": TEST_USER_3_FILE_IDS,
|
||||
"SHARED_DRIVE_1_FILE_IDS": SHARED_DRIVE_1_FILE_IDS,
|
||||
"SHARED_DRIVE_2_FILE_IDS": SHARED_DRIVE_2_FILE_IDS,
|
||||
"FOLDER_1_FILE_IDS": FOLDER_1_FILE_IDS,
|
||||
"FOLDER_1_1_FILE_IDS": FOLDER_1_1_FILE_IDS,
|
||||
"FOLDER_1_2_FILE_IDS": FOLDER_1_2_FILE_IDS,
|
||||
"FOLDER_2_FILE_IDS": FOLDER_2_FILE_IDS,
|
||||
"FOLDER_2_1_FILE_IDS": FOLDER_2_1_FILE_IDS,
|
||||
"FOLDER_2_2_FILE_IDS": FOLDER_2_2_FILE_IDS,
|
||||
"FOLDER_3_FILE_IDS": FOLDER_3_FILE_IDS,
|
||||
}
|
||||
|
||||
# Print the mapping for each test range
|
||||
for range_name, file_ids in all_test_ranges.items():
|
||||
print(f"\n{range_name}:")
|
||||
for test_id in file_ids:
|
||||
drive_id = test_id_to_drive_id.get(test_id, "NOT_FOUND")
|
||||
print(f" {test_id} -> {drive_id}")
|
||||
|
||||
return test_id_to_drive_id
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not os.getenv("RUN_MANUAL_TESTS"),
|
||||
reason="This test maps test IDs to actual Google Drive IDs. Set RUN_MANUAL_TESTS=1 to run.",
|
||||
)
|
||||
def test_generate_drive_id_mapping() -> None:
|
||||
"""Test to generate mapping from test IDs to actual Google Drive IDs.
|
||||
|
||||
This test is skipped by default as it requires real Google Drive credentials
|
||||
and is primarily used to generate mappings for other tests.
|
||||
|
||||
Run with:
|
||||
|
||||
RUN_MANUAL_TESTS=true pytest -xvs tests/daily/connectors/google_drive/test_map_test_ids.py::test_generate_drive_id_mapping
|
||||
"""
|
||||
mapping = generate_test_id_to_drive_id_mapping()
|
||||
assert mapping, "Failed to generate any test ID to drive ID mappings"
|
||||
|
||||
# Write the mapping to a JSON file
|
||||
output_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
mapping_file = os.path.join(output_dir, "drive_id_mapping.json")
|
||||
|
||||
# Convert int keys to strings for JSON compatibility
|
||||
json_mapping = {str(k): v for k, v in mapping.items()}
|
||||
|
||||
# Write the mapping to a JSON file
|
||||
with open(mapping_file, "w") as f:
|
||||
json.dump(json_mapping, f, indent=2)
|
||||
|
||||
print(f"\nMapping written to: {mapping_file}")
|
||||
raise RuntimeError("Mapping written to file, test complete")
|
||||
@@ -1,209 +0,0 @@
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from ee.onyx.external_permissions.google_drive.doc_sync import (
|
||||
_get_permissions_from_slim_doc,
|
||||
)
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||
from onyx.connectors.google_utils.resources import get_admin_service
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ACCESS_MAPPING
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import file_name_template
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import filter_invalid_prefixes
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import print_discrepancies
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS
|
||||
|
||||
|
||||
def get_keys_available_to_user_from_access_map(
|
||||
user_email: str,
|
||||
group_map: dict[str, list[str]],
|
||||
access_map: dict[str, ExternalAccess],
|
||||
) -> list[str]:
|
||||
"""
|
||||
Extracts the names of the files available to the user from the access map
|
||||
through their own email or group memberships or public access
|
||||
"""
|
||||
group_emails_for_user = []
|
||||
for group_email, user_in_group_email_list in group_map.items():
|
||||
if user_email in user_in_group_email_list:
|
||||
group_emails_for_user.append(group_email)
|
||||
|
||||
accessible_file_names_for_user = []
|
||||
for file_name, external_access in access_map.items():
|
||||
if external_access.is_public:
|
||||
accessible_file_names_for_user.append(file_name)
|
||||
elif user_email in external_access.external_user_emails:
|
||||
accessible_file_names_for_user.append(file_name)
|
||||
elif any(
|
||||
group_email in external_access.external_user_group_ids
|
||||
for group_email in group_emails_for_user
|
||||
):
|
||||
accessible_file_names_for_user.append(file_name)
|
||||
return accessible_file_names_for_user
|
||||
|
||||
|
||||
def assert_correct_access_for_user(
|
||||
user_email: str,
|
||||
expected_access_ids: list[int],
|
||||
group_map: dict[str, list[str]],
|
||||
retrieved_access_map: dict[str, ExternalAccess],
|
||||
) -> None:
|
||||
"""
|
||||
compares the expected access range of the user to the keys available to the user
|
||||
retrieved from the source
|
||||
"""
|
||||
retrieved_keys_available_to_user = get_keys_available_to_user_from_access_map(
|
||||
user_email, group_map, retrieved_access_map
|
||||
)
|
||||
retrieved_file_names = set(retrieved_keys_available_to_user)
|
||||
|
||||
# Combine public and user-specific access IDs
|
||||
all_accessible_ids = expected_access_ids + PUBLIC_RANGE
|
||||
expected_file_names = {file_name_template.format(i) for i in all_accessible_ids}
|
||||
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(retrieved_file_names)
|
||||
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
|
||||
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
|
||||
# This function is supposed to map to the group_sync.py file for the google drive connector
|
||||
# TODO: Call it directly
|
||||
def get_group_map(google_drive_connector: GoogleDriveConnector) -> dict[str, list[str]]:
|
||||
admin_service = get_admin_service(
|
||||
creds=google_drive_connector.creds,
|
||||
user_email=google_drive_connector.primary_admin_email,
|
||||
)
|
||||
|
||||
group_map: dict[str, list[str]] = {}
|
||||
for group in execute_paginated_retrieval(
|
||||
admin_service.groups().list,
|
||||
list_key="groups",
|
||||
domain=google_drive_connector.google_domain,
|
||||
fields="groups(email),nextPageToken",
|
||||
):
|
||||
# The id is the group email
|
||||
group_email = group["email"]
|
||||
|
||||
# Gather group member emails
|
||||
group_member_emails: list[str] = []
|
||||
for member in execute_paginated_retrieval(
|
||||
admin_service.members().list,
|
||||
list_key="members",
|
||||
groupKey=group_email,
|
||||
fields="members(email),nextPageToken",
|
||||
):
|
||||
group_member_emails.append(member["email"])
|
||||
group_map[group_email] = group_member_emails
|
||||
return group_map
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_all_permissions(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
google_drive_connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
|
||||
access_map: dict[str, ExternalAccess] = {}
|
||||
found_file_names = set()
|
||||
for slim_doc_batch in google_drive_connector.retrieve_all_slim_documents(
|
||||
0, time.time()
|
||||
):
|
||||
for slim_doc in slim_doc_batch:
|
||||
name = (slim_doc.perm_sync_data or {})["name"]
|
||||
access_map[name] = _get_permissions_from_slim_doc(
|
||||
google_drive_connector=google_drive_connector,
|
||||
slim_doc=slim_doc,
|
||||
)
|
||||
found_file_names.add(name)
|
||||
|
||||
for file_name, external_access in access_map.items():
|
||||
print(file_name, external_access)
|
||||
|
||||
expected_file_range = (
|
||||
ADMIN_FILE_IDS # Admin's My Drive
|
||||
+ ADMIN_FOLDER_3_FILE_IDS # Admin's Folder 3
|
||||
+ TEST_USER_1_FILE_IDS # TEST_USER_1's My Drive
|
||||
+ TEST_USER_2_FILE_IDS # TEST_USER_2's My Drive
|
||||
+ TEST_USER_3_FILE_IDS # TEST_USER_3's My Drive
|
||||
+ SHARED_DRIVE_1_FILE_IDS # Shared Drive 1
|
||||
+ FOLDER_1_FILE_IDS # Folder 1
|
||||
+ FOLDER_1_1_FILE_IDS # Folder 1_1
|
||||
+ FOLDER_1_2_FILE_IDS # Folder 1_2
|
||||
+ SHARED_DRIVE_2_FILE_IDS # Shared Drive 2
|
||||
+ FOLDER_2_FILE_IDS # Folder 2
|
||||
+ FOLDER_2_1_FILE_IDS # Folder 2_1
|
||||
+ FOLDER_2_2_FILE_IDS # Folder 2_2
|
||||
+ SECTIONS_FILE_IDS # Sections
|
||||
)
|
||||
expected_file_names = {
|
||||
file_name_template.format(file_id) for file_id in expected_file_range
|
||||
}
|
||||
|
||||
# Should get everything
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(found_file_names)
|
||||
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
group_map = get_group_map(google_drive_connector)
|
||||
|
||||
print("groups:\n", group_map)
|
||||
|
||||
assert_correct_access_for_user(
|
||||
user_email=ADMIN_EMAIL,
|
||||
expected_access_ids=ACCESS_MAPPING[ADMIN_EMAIL],
|
||||
group_map=group_map,
|
||||
retrieved_access_map=access_map,
|
||||
)
|
||||
assert_correct_access_for_user(
|
||||
user_email=TEST_USER_1_EMAIL,
|
||||
expected_access_ids=ACCESS_MAPPING[TEST_USER_1_EMAIL],
|
||||
group_map=group_map,
|
||||
retrieved_access_map=access_map,
|
||||
)
|
||||
|
||||
assert_correct_access_for_user(
|
||||
user_email=TEST_USER_2_EMAIL,
|
||||
expected_access_ids=ACCESS_MAPPING[TEST_USER_2_EMAIL],
|
||||
group_map=group_map,
|
||||
retrieved_access_map=access_map,
|
||||
)
|
||||
assert_correct_access_for_user(
|
||||
user_email=TEST_USER_3_EMAIL,
|
||||
expected_access_ids=ACCESS_MAPPING[TEST_USER_3_EMAIL],
|
||||
group_map=group_map,
|
||||
retrieved_access_map=access_map,
|
||||
)
|
||||
Reference in New Issue
Block a user