Compare commits

...

3 Commits

Author SHA1 Message Date
hagen-danswer
00584643a2 mypy fixes 2024-12-03 15:07:00 -08:00
hagen-danswer
725b23d9b0 test finished 2024-11-28 13:28:23 -08:00
hagen-danswer
c13c7bd73a Added jira permission sync and tests 2024-11-26 12:43:28 -08:00
15 changed files with 286 additions and 26 deletions

View File

@@ -318,12 +318,14 @@ def build_confluence_client(
credentials: dict[str, Any],
is_cloud: bool,
wiki_base: str,
should_validate: bool = True,
) -> OnyxConfluence:
_validate_connector_configuration(
credentials=credentials,
is_cloud=is_cloud,
wiki_base=wiki_base,
)
if should_validate:
_validate_connector_configuration(
credentials=credentials,
is_cloud=is_cloud,
wiki_base=wiki_base,
)
return OnyxConfluence(
api_version="cloud" if is_cloud else "latest",
# Remove trailing slash from wiki_base if present

View File

@@ -35,8 +35,8 @@ from danswer.utils.logger import setup_logger
logger = setup_logger()
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
_JIRA_SLIM_PAGE_SIZE = 500
_JIRA_FULL_PAGE_SIZE = 50
_JIRA_SLIM_PAGE_SIZE = 1000
_JIRA_FULL_PAGE_SIZE = 100
def _paginate_jql_search(
@@ -70,7 +70,7 @@ def _paginate_jql_search(
start += max_results
def fetch_jira_issues_batch(
def _fetch_jira_issues_as_docs(
jira_client: JIRA,
jql: str,
batch_size: int,
@@ -111,7 +111,7 @@ def fetch_jira_issues_batch(
)
continue
page_url = f"{jira_client.client_info()}/browse/{issue.key}"
page_url = build_jira_url(jira_client, issue.key)
people = set()
try:
@@ -196,7 +196,7 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
jql = f"project = {self.quoted_jira_project}"
document_batch = []
for doc in fetch_jira_issues_batch(
for doc in _fetch_jira_issues_as_docs(
jira_client=self.jira_client,
jql=jql,
batch_size=_JIRA_FULL_PAGE_SIZE,
@@ -227,7 +227,7 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):
)
document_batch = []
for doc in fetch_jira_issues_batch(
for doc in _fetch_jira_issues_as_docs(
jira_client=self.jira_client,
jql=jql,
batch_size=_JIRA_FULL_PAGE_SIZE,

View File

@@ -39,13 +39,37 @@ def best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None:
def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
if hasattr(jira_issue.fields, field):
return getattr(jira_issue.fields, field)
"""
Try to get a field from the issue in the following order:
1. jira_issue.fields.field
2. jira_issue.raw["fields"][field]
3. jira_issue.field
4. jira_issue.raw[field]
"""
try:
if hasattr(jira_issue.fields, field):
return getattr(jira_issue.fields, field)
except Exception:
pass
try:
return jira_issue.raw["fields"][field]
except Exception:
return None
pass
try:
if hasattr(jira_issue, field):
return getattr(jira_issue, field)
except Exception:
pass
try:
return jira_issue.raw[field]
except Exception:
pass
return None
def extract_text_from_adf(adf: dict | None) -> str:

View File

@@ -258,14 +258,11 @@ def confluence_doc_sync(
**cc_pair.connector.connector_specific_config
)
confluence_connector.load_credentials(cc_pair.credential.credential_json)
if confluence_connector.confluence_client is None:
raise ValueError("Failed to load credentials")
confluence_client = confluence_connector.confluence_client
is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)
space_permissions_by_space_key = _get_space_permissions(
confluence_client=confluence_client,
confluence_client=confluence_connector.confluence_client,
is_cloud=is_cloud,
)
@@ -274,7 +271,7 @@ def confluence_doc_sync(
slim_docs.extend(doc_batch)
return _fetch_all_page_restrictions_for_space(
confluence_client=confluence_client,
confluence_client=confluence_connector.confluence_client,
slim_docs=slim_docs,
space_permissions_by_space_key=space_permissions_by_space_key,
)

View File

@@ -9,7 +9,7 @@ from ee.danswer.db.external_perm import ExternalUserGroup
logger = setup_logger()
def _build_group_member_email_map(
def build_group_member_email_map(
confluence_client: OnyxConfluence,
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
@@ -46,7 +46,7 @@ def confluence_group_sync(
wiki_base=cc_pair.connector.connector_specific_config["wiki_base"],
)
group_member_email_map = _build_group_member_email_map(
group_member_email_map = build_group_member_email_map(
confluence_client=confluence_client,
)
danswer_groups: list[ExternalUserGroup] = []

View File

@@ -0,0 +1,73 @@
from jira import JIRA
from danswer.access.models import DocExternalAccess
from danswer.access.models import ExternalAccess
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.danswer_jira.utils import extract_jira_project
from danswer.db.models import ConnectorCredentialPair
from danswer.utils.logger import setup_logger
logger = setup_logger()
# Max is 1k
_PAGE_SIZE = 1000
def _get_project_permissions(
jira_client: JIRA,
jira_project_key: str,
) -> ExternalAccess:
query = {
"query": "*",
"projectKey": jira_project_key,
"maxResults": _PAGE_SIZE,
}
start_at = 0
user_emails = set()
while True:
query["startAt"] = start_at
result = jira_client._get_json(path="/user/viewissue/search", params=query)
for user in result:
if email := user.get("emailAddress"):
user_emails.add(email)
if len(result) < _PAGE_SIZE:
break
start_at += _PAGE_SIZE
return ExternalAccess(
external_user_emails=user_emails,
# Group names are not given space permissions, so we these are empty per document
external_user_group_ids=set(),
is_public=False,
)
def jira_doc_sync(
cc_pair: ConnectorCredentialPair,
) -> list[DocExternalAccess]:
"""
We assume each Jira connector has a 1-1 relationship with a Jira project
So all documents from a Jira connector inherit the permissions of the Jira project
"""
jira_connector = JiraConnector(**cc_pair.connector.connector_specific_config)
jira_connector.load_credentials(cc_pair.credential.credential_json)
_, jira_project_key = extract_jira_project(
cc_pair.connector.connector_specific_config["jira_project_url"]
)
project_permissions = _get_project_permissions(
jira_client=jira_connector.jira_client,
jira_project_key=jira_project_key,
)
doc_permissions: list[DocExternalAccess] = []
for slim_doc_batch in jira_connector.retrieve_all_slim_documents():
for slim_doc in slim_doc_batch:
doc_permissions.append(
DocExternalAccess(
doc_id=slim_doc.id,
external_access=project_permissions,
)
)
return doc_permissions

View File

@@ -0,0 +1,81 @@
from typing import Any
from danswer.connectors.confluence.onyx_confluence import build_confluence_client
from danswer.connectors.danswer_jira.utils import extract_jira_project
from danswer.db.models import ConnectorCredentialPair
from danswer.utils.logger import setup_logger
from ee.danswer.db.external_perm import ExternalUserGroup
from ee.danswer.external_permissions.confluence.group_sync import (
build_group_member_email_map,
)
logger = setup_logger()
def _convert_jira_credentials_to_confluence_credentials(
jira_credentials: dict[str, Any]
) -> dict[str, Any]:
return {
# This one is optional in jira connector
# (and probably should be optional in conflunece setup as well)
"confluence_username": jira_credentials.get("jira_user_email"),
# This one is not optional
"confluence_access_token": jira_credentials["jira_api_token"],
}
_POTENTIAL_CLOUD_DOMAINS = ["atlassian.net", "jira.com"]
def _determine_if_config_is_cloud(
credentials: dict[str, Any],
given_jira_url: str,
) -> bool:
"""
This may not work if someone has a Jira Server instance that contains atlassian.net or jira.com
in the URL.
Or if someone has atlassian cloud instance that doesn't contain atlassian.net or jira.com
"""
if not credentials.get("jira_user_email"):
return False
return any(domain in given_jira_url for domain in _POTENTIAL_CLOUD_DOMAINS)
def jira_group_sync(
cc_pair: ConnectorCredentialPair,
) -> list[ExternalUserGroup]:
"""
We use the existing confluence group sync helper functions because atlassian groups
are shared between confluence and jira
"""
jira_base_url, _ = extract_jira_project(
cc_pair.connector.connector_specific_config["jira_project_url"]
)
confluence_credentials = _convert_jira_credentials_to_confluence_credentials(
cc_pair.credential.credential_json
)
is_cloud = _determine_if_config_is_cloud(
credentials=cc_pair.credential.credential_json,
given_jira_url=jira_base_url,
)
confluence_client = build_confluence_client(
credentials=confluence_credentials,
is_cloud=is_cloud,
wiki_base=jira_base_url,
should_validate=False,
)
group_member_email_map = build_group_member_email_map(confluence_client)
danswer_groups: list[ExternalUserGroup] = []
for group_id, group_member_emails in group_member_email_map.items():
danswer_groups.append(
ExternalUserGroup(
id=group_id,
user_emails=list(group_member_emails),
)
)
return danswer_groups

View File

@@ -9,6 +9,7 @@ from ee.danswer.external_permissions.confluence.group_sync import confluence_gro
from ee.danswer.external_permissions.gmail.doc_sync import gmail_doc_sync
from ee.danswer.external_permissions.google_drive.doc_sync import gdrive_doc_sync
from ee.danswer.external_permissions.google_drive.group_sync import gdrive_group_sync
from ee.danswer.external_permissions.jira.doc_sync import jira_doc_sync
from ee.danswer.external_permissions.slack.doc_sync import slack_doc_sync
# Defining the input/output types for the sync functions
@@ -36,6 +37,7 @@ DOC_PERMISSIONS_FUNC_MAP: dict[DocumentSource, DocSyncFuncType] = {
DocumentSource.CONFLUENCE: confluence_doc_sync,
DocumentSource.SLACK: slack_doc_sync,
DocumentSource.GMAIL: gmail_doc_sync,
DocumentSource.JIRA: jira_doc_sync,
}
# These functions update:
@@ -45,6 +47,7 @@ DOC_PERMISSIONS_FUNC_MAP: dict[DocumentSource, DocSyncFuncType] = {
GROUP_PERMISSIONS_FUNC_MAP: dict[DocumentSource, GroupSyncFuncType] = {
DocumentSource.GOOGLE_DRIVE: gdrive_group_sync,
DocumentSource.CONFLUENCE: confluence_group_sync,
DocumentSource.JIRA: confluence_group_sync,
}

View File

@@ -0,0 +1,36 @@
import os
from unittest.mock import MagicMock
import pytest
from danswer.access.models import DocExternalAccess
from danswer.db.models import ConnectorCredentialPair
from ee.danswer.external_permissions.jira.doc_sync import jira_doc_sync
@pytest.fixture
def mock_jira_cc_pair() -> ConnectorCredentialPair:
mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
mock_cc_pair.connector.connector_specific_config = {
"jira_project_url": "https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6"
}
mock_cc_pair.credential.credential_json = {
"jira_user_email": os.environ["JIRA_USER_EMAIL"],
"jira_api_token": os.environ["JIRA_API_TOKEN"],
}
return mock_cc_pair
# remove this once it's setup for our test accounts
@pytest.mark.xfail(reason="This is set up to our dev instance which may cause flakes")
def test_jira_doc_sync(mock_jira_cc_pair: ConnectorCredentialPair) -> None:
retrieved_docs: list[DocExternalAccess] = jira_doc_sync(mock_jira_cc_pair)
assert len(retrieved_docs) == 1
main_issue = retrieved_docs[0]
assert main_issue.doc_id == "https://danswerai.atlassian.net/browse/AS-2"
assert main_issue.external_access.external_user_emails == {
"chris@danswer.ai",
"hagen@danswer.ai",
}

View File

@@ -0,0 +1,42 @@
import os
from unittest.mock import MagicMock
import pytest
from danswer.db.models import ConnectorCredentialPair
from ee.danswer.db.external_perm import ExternalUserGroup
from ee.danswer.external_permissions.jira.group_sync import jira_group_sync
@pytest.fixture
def mock_jira_cc_pair() -> ConnectorCredentialPair:
mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
mock_cc_pair.connector.connector_specific_config = {
"jira_project_url": "https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6"
}
mock_cc_pair.credential.credential_json = {
"jira_user_email": os.environ["JIRA_USER_EMAIL"],
"jira_api_token": os.environ["JIRA_API_TOKEN"],
}
return mock_cc_pair
# remove this once it's setup for our test accounts
@pytest.mark.xfail(reason="This is set up to our dev instance which may cause flakes")
def test_jira_group_sync(mock_jira_cc_pair: ConnectorCredentialPair) -> None:
retrieved_groups: list[ExternalUserGroup] = jira_group_sync(mock_jira_cc_pair)
assert len(retrieved_groups) == 6
expected_groups: dict[str, set[str]] = {
"org-admins": {"chris@danswer.ai"},
"jira-users-danswerai": {"chris@danswer.ai", "hagen@danswer.ai"},
"jira-admins-danswerai": {"hagen@danswer.ai"},
"confluence-user-access-admins-danswerai": {"hagen@danswer.ai"},
"jira-user-access-admins-danswerai": {"hagen@danswer.ai"},
"confluence-users-danswerai": {"chris@danswer.ai", "hagen@danswer.ai"},
}
for group in retrieved_groups:
assert group.id in expected_groups
assert set(group.user_emails) == expected_groups[group.id]

View File

@@ -7,7 +7,7 @@ import pytest
from jira.resources import Issue
from pytest_mock import MockFixture
from danswer.connectors.danswer_jira.connector import fetch_jira_issues_batch
from danswer.connectors.danswer_jira.connector import _fetch_jira_issues_as_docs
@pytest.fixture
@@ -79,7 +79,7 @@ def test_fetch_jira_issues_batch_small_ticket(
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small]
docs = list(fetch_jira_issues_batch(mock_jira_client, "project = TEST", 50))
docs = list(_fetch_jira_issues_as_docs(mock_jira_client, "project = TEST", 50))
assert len(docs) == 1
assert docs[0].id.endswith("/SMALL-1")
@@ -95,7 +95,7 @@ def test_fetch_jira_issues_batch_large_ticket(
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_large]
docs = list(fetch_jira_issues_batch(mock_jira_client, "project = TEST", 50))
docs = list(_fetch_jira_issues_as_docs(mock_jira_client, "project = TEST", 50))
assert len(docs) == 0 # The large ticket should be skipped
@@ -108,7 +108,7 @@ def test_fetch_jira_issues_batch_mixed_tickets(
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]
docs = list(fetch_jira_issues_batch(mock_jira_client, "project = TEST", 50))
docs = list(_fetch_jira_issues_as_docs(mock_jira_client, "project = TEST", 50))
assert len(docs) == 1 # Only the small ticket should be included
assert docs[0].id.endswith("/SMALL-1")
@@ -123,6 +123,6 @@ def test_fetch_jira_issues_batch_custom_size_limit(
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]
docs = list(fetch_jira_issues_batch(mock_jira_client, "project = TEST", 50))
docs = list(_fetch_jira_issues_as_docs(mock_jira_client, "project = TEST", 50))
assert len(docs) == 0 # Both tickets should be skipped due to the low size limit

0
jira/client.py Normal file
View File

View File

@@ -15,4 +15,5 @@ export const autoSyncConfigBySource: Record<
google_drive: {},
gmail: {},
slack: {},
jira: {},
};

View File

@@ -311,5 +311,6 @@ export const validAutoSyncSources = [
"google_drive",
"gmail",
"slack",
"jira",
] as const;
export type ValidAutoSyncSources = (typeof validAutoSyncSources)[number];