Compare commits

...

4 Commits

Author SHA1 Message Date
Weves
9645972b03 . 2025-11-14 18:16:37 -08:00
Weves
09695130de . 2025-11-14 16:37:51 -08:00
Weves
3f8560e9ea . 2025-11-14 16:37:36 -08:00
Weves
c365c3ef88 chore: more logging in confluence perm sync 2025-11-14 16:34:02 -08:00
6 changed files with 63 additions and 14 deletions

View File

@@ -10,6 +10,7 @@ from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFun
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.access import get_all_space_permissions
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.db.models import ConnectorCredentialPair
@@ -34,8 +35,25 @@ def confluence_doc_sync(
Compares fetched documents against existing documents in the DB for the connector.
If a document exists in the DB but not in the Confluence fetch, it's marked as restricted.
"""
# get space level access info
confluence_client_for_space_level_access = ConfluenceConnector(
**cc_pair.connector.connector_specific_config,
)
space_level_access_info = get_all_space_permissions(
confluence_client_for_space_level_access.confluence_client,
confluence_client_for_space_level_access.is_cloud,
)
if not space_level_access_info:
raise ValueError(
"No space level access info found. Likely missing "
"permissions to retrieve spaces/space permissions."
)
# get doc level access info
confluence_connector = ConfluenceConnector(
**cc_pair.connector.connector_specific_config
**cc_pair.connector.connector_specific_config,
space_level_access_info=space_level_access_info,
)
provider = OnyxDBCredentialsProvider(

View File

@@ -19,7 +19,7 @@ def _build_group_member_email_map(
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
for user in confluence_client.paginated_cql_user_retrieval():
logger.debug(f"Processing groups for user: {user}")
logger.info(f"Processing groups for user: {user}")
email = user.email
if not email:
@@ -31,6 +31,8 @@ def _build_group_member_email_map(
confluence_client=confluence_client,
user_name=user_name,
)
else:
logger.error(f"user result missing username field: {user}")
if not email:
# If we still don't have an email, skip this user
@@ -54,7 +56,7 @@ def _build_group_member_email_map(
emit_background_error(msg, cc_pair_id=cc_pair_id)
logger.error(msg)
else:
logger.debug(f"Found groups {all_users_groups} for user with email {email}")
logger.info(f"Found groups {all_users_groups} for user with email {email}")
if not group_member_emails:
msg = "No groups found for any users."

View File

@@ -148,12 +148,14 @@ def get_all_space_permissions(
]
# Gets the permissions for each space
logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
space_permissions_by_space_key: dict[str, ExternalAccess] = {}
for space_key in all_space_keys:
logger.info(f"Getting space permissions for {space_key}")
space_permissions = get_space_permission(confluence_client, space_key, is_cloud)
# Stores the permissions for each space
space_permissions_by_space_key[space_key] = space_permissions
logger.info(f"Got space permissions for {space_key}: {space_permissions}")
return space_permissions_by_space_key

View File

@@ -15,7 +15,6 @@ from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.access import get_all_space_permissions
from onyx.connectors.confluence.access import get_page_restrictions
from onyx.connectors.confluence.onyx_confluence import extract_text_from_confluence_html
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
@@ -111,6 +110,7 @@ class ConfluenceConnector(
labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
scoped_token: bool = False,
space_level_access_info: dict[str, ExternalAccess] | None = None,
) -> None:
self.wiki_base = wiki_base
self.is_cloud = is_cloud
@@ -122,6 +122,7 @@ class ConfluenceConnector(
self.labels_to_skip = labels_to_skip
self.timezone_offset = timezone_offset
self.scoped_token = scoped_token
self.space_level_access_info = space_level_access_info or {}
self._confluence_client: OnyxConfluence | None = None
self._low_timeout_confluence_client: OnyxConfluence | None = None
self._fetched_titles: set[str] = set()
@@ -650,18 +651,16 @@ class ConfluenceConnector(
doc_metadata_list: list[SlimDocument] = []
restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)
space_level_access_info: dict[str, ExternalAccess] = {}
if include_permissions:
space_level_access_info = get_all_space_permissions(
self.confluence_client, self.is_cloud
)
def get_external_access(
doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]
) -> ExternalAccess | None:
return get_page_restrictions(
self.confluence_client, doc_id, restrictions, ancestors
) or space_level_access_info.get(page_space_key)
space_level_access = self.space_level_access_info.get(page_space_key)
return (
get_page_restrictions(
self.confluence_client, doc_id, restrictions, ancestors
)
or space_level_access
)
# Query pages
page_query = self.base_cql_page_query + self.cql_label_filter

View File

@@ -956,6 +956,11 @@ def get_user_email_from_username__server(
try:
response = confluence_client.get_mobile_parameters(user_name)
email = response.get("email")
if not email:
logger.warning(
f"failed to get confluence email for {user_name}, "
f"missing email field in response. Got response: {response}."
)
except Exception:
logger.warning(f"failed to get confluence email for {user_name}")
# For now, we'll just return None and log a warning. This means

View File

@@ -318,6 +318,29 @@ def test_retrieve_all_slim_docs_perm_sync(
MagicMock(json=lambda: {"results": []}),
]
confluence_client.get_space = MagicMock(
return_value={
"permissions": [
{"subjects": {"user": {"results": [{"email": "test@example.com"}]}}}
]
}
)
confluence_client.get_mobile_parameters = MagicMock(
# real response
return_value={
"userName": "admin",
"fullName": "Admin Test",
"avatarUrl": "/images/icons/profilepics/default.svg",
"url": "/display/~admin",
"email": "admin@onyx-test.com",
"userPreferences": {"watchOwnContent": True},
"unknownUser": False,
"about": "",
"anonymous": False,
}
)
# Call retrieve_all_slim_docs_perm_sync
batches = list(confluence_connector.retrieve_all_slim_docs_perm_sync(0, 100))
assert get_mock.call_count == 4