mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-27 20:55:45 +00:00
Compare commits
12 Commits
default_py
...
v1.1.0-con
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8b0b30a34 | ||
|
|
a08950386e | ||
|
|
0b6a78ab96 | ||
|
|
a717f8f996 | ||
|
|
94f93a5915 | ||
|
|
01ecaff4c7 | ||
|
|
fe49844d92 | ||
|
|
65c40bca0e | ||
|
|
5a873c97a3 | ||
|
|
021859f3be | ||
|
|
61453a4a99 | ||
|
|
19c2465a17 |
@@ -1,3 +1,17 @@
|
||||
"""
|
||||
# README (notes on Confluence pagination):
|
||||
|
||||
We've noticed that the `search/users` and `users/memberof` endpoints use offset-based pagination as opposed to cursor-based.
|
||||
We also know that page-retrieval uses cursor-based pagination.
|
||||
|
||||
Our default pagination strategy right now for cloud is to assume cursor-based.
|
||||
However, if you notice that a cloud API is not being properly paginated (i.e., if the `_links.next` is not appearing in the
|
||||
returned payload), then you can force offset-based pagination.
|
||||
|
||||
# TODO (@raunakab)
|
||||
We haven't explored all of the cloud APIs' pagination strategies. @raunakab take time to go through this and figure them out.
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
@@ -46,15 +60,13 @@ _REPLACEMENT_EXPANSIONS = "body.view.value"
|
||||
_USER_NOT_FOUND = "Unknown Confluence User"
|
||||
_USER_ID_TO_DISPLAY_NAME_CACHE: dict[str, str | None] = {}
|
||||
_USER_EMAIL_CACHE: dict[str, str | None] = {}
|
||||
_DEFAULT_PAGINATION_LIMIT = 1000
|
||||
|
||||
|
||||
class ConfluenceRateLimitError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
_DEFAULT_PAGINATION_LIMIT = 1000
|
||||
|
||||
|
||||
class OnyxConfluence:
|
||||
"""
|
||||
This is a custom Confluence class that:
|
||||
@@ -463,6 +475,7 @@ class OnyxConfluence:
|
||||
limit: int | None = None,
|
||||
# Called with the next url to use to get the next page
|
||||
next_page_callback: Callable[[str], None] | None = None,
|
||||
force_offset_pagination: bool = False,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
"""
|
||||
This will paginate through the top level query.
|
||||
@@ -548,37 +561,31 @@ class OnyxConfluence:
|
||||
)
|
||||
raise e
|
||||
|
||||
# yield the results individually
|
||||
# Yield the results individually.
|
||||
results = cast(list[dict[str, Any]], next_response.get("results", []))
|
||||
# make sure we don't update the start by more than the amount
|
||||
total_results = len(results)
|
||||
# Make sure we don't update the start by more than the amount
|
||||
# of results we were able to retrieve. The Confluence API has a
|
||||
# weird behavior where if you pass in a limit that is too large for
|
||||
# the configured server, it will artificially limit the amount of
|
||||
# results returned BUT will not apply this to the start parameter.
|
||||
# This will cause us to miss results.
|
||||
old_url_suffix = url_suffix
|
||||
updated_start = get_start_param_from_url(old_url_suffix)
|
||||
url_suffix = cast(str, next_response.get("_links", {}).get("next", ""))
|
||||
for i, result in enumerate(results):
|
||||
updated_start += 1
|
||||
if url_suffix and next_page_callback and i == len(results) - 1:
|
||||
# update the url if we're on the last result in the page
|
||||
if not self._is_cloud:
|
||||
# If confluence claims there are more results, we update the start param
|
||||
# based on how many results were returned and try again.
|
||||
if i == total_results - 1:
|
||||
updated_start = get_start_param_from_url(url_suffix) + total_results
|
||||
if not self._is_cloud or force_offset_pagination:
|
||||
url_suffix = update_param_in_path(
|
||||
url_suffix, "start", str(updated_start)
|
||||
)
|
||||
# notify the caller of the new url
|
||||
next_page_callback(url_suffix)
|
||||
# Notify the caller of the new url.
|
||||
if next_page_callback:
|
||||
next_page_callback(url_suffix)
|
||||
|
||||
yield result
|
||||
|
||||
# we've observed that Confluence sometimes returns a next link despite giving
|
||||
# 0 results. This is a bug with Confluence, so we need to check for it and
|
||||
# stop paginating.
|
||||
if url_suffix and not results:
|
||||
if not results:
|
||||
logger.info(
|
||||
f"No results found for call '{old_url_suffix}' despite next link "
|
||||
f"No results found for call '{url_suffix=}' despite next link "
|
||||
"being present. Stopping pagination."
|
||||
)
|
||||
break
|
||||
@@ -596,8 +603,10 @@ class OnyxConfluence:
|
||||
"""
|
||||
The content/search endpoint can be used to fetch pages, attachments, and comments.
|
||||
"""
|
||||
cql_url = self.build_cql_url(cql, expand)
|
||||
yield from self._paginate_url(cql_url, limit)
|
||||
cql_url = self.build_cql_url(cql=cql, expand=expand)
|
||||
yield from self._paginate_url(
|
||||
url_suffix=cql_url, limit=limit, force_offset_pagination=True
|
||||
)
|
||||
|
||||
def paginated_page_retrieval(
|
||||
self,
|
||||
@@ -613,7 +622,9 @@ class OnyxConfluence:
|
||||
"""
|
||||
try:
|
||||
yield from self._paginate_url(
|
||||
cql_url, limit=limit, next_page_callback=next_page_callback
|
||||
cql_url,
|
||||
limit=limit,
|
||||
next_page_callback=next_page_callback,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in paginated_page_retrieval: {e}")
|
||||
@@ -668,7 +679,9 @@ class OnyxConfluence:
|
||||
url = "rest/api/search/user"
|
||||
expand_string = f"&expand={expand}" if expand else ""
|
||||
url += f"?cql={cql}{expand_string}"
|
||||
for user_result in self._paginate_url(url, limit):
|
||||
for user_result in self._paginate_url(
|
||||
url, limit, force_offset_pagination=True
|
||||
):
|
||||
# Example response:
|
||||
# {
|
||||
# 'user': {
|
||||
@@ -758,7 +771,7 @@ class OnyxConfluence:
|
||||
user_query = f"{user_field}={quote(user_value)}"
|
||||
|
||||
url = f"rest/api/user/memberof?{user_query}"
|
||||
yield from self._paginate_url(url, limit)
|
||||
yield from self._paginate_url(url, limit, force_offset_pagination=True)
|
||||
|
||||
def paginated_groups_retrieval(
|
||||
self,
|
||||
|
||||
35
backend/tests/daily/connectors/confluence/conftest.py
Normal file
35
backend/tests/daily/connectors/confluence/conftest.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def confluence_connector_config() -> dict[str, Any]:
|
||||
url_base = os.environ.get("CONFLUENCE_URL")
|
||||
space_key = os.environ.get("CONFLUENCE_SPACE_KEY")
|
||||
page_id = os.environ.get("CONFLUENCE_PAGE_ID")
|
||||
is_cloud = os.environ.get("CONFLUENCE_IS_CLOUD", "").lower() == "true"
|
||||
|
||||
assert url_base, "CONFLUENCE_URL environment variable is required"
|
||||
|
||||
return {
|
||||
"wiki_base": url_base,
|
||||
"is_cloud": is_cloud,
|
||||
"space": space_key or "",
|
||||
"page_id": page_id or "",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def confluence_credential_json() -> dict[str, Any]:
|
||||
username = os.environ.get("CONFLUENCE_USERNAME")
|
||||
access_token = os.environ.get("CONFLUENCE_ACCESS_TOKEN")
|
||||
|
||||
assert username, "CONFLUENCE_USERNAME environment variable is required"
|
||||
assert access_token, "CONFLUENCE_ACCESS_TOKEN environment variable is required"
|
||||
|
||||
return {
|
||||
"confluence_username": username,
|
||||
"confluence_access_token": access_token,
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
from typing import Any
|
||||
|
||||
from ee.onyx.external_permissions.confluence.group_sync import confluence_group_sync
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.models import InputType
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.engine.sql_engine import SqlEngine
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.models import Connector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.db.models import Credential
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
|
||||
def test_confluence_group_sync(
|
||||
confluence_connector_config: dict[str, Any],
|
||||
confluence_credential_json: dict[str, Any],
|
||||
) -> None:
|
||||
SqlEngine.init_engine(pool_size=10, max_overflow=10)
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
connector = Connector(
|
||||
name="Test Connector",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
input_type=InputType.POLL,
|
||||
connector_specific_config=confluence_connector_config,
|
||||
refresh_freq=None,
|
||||
prune_freq=None,
|
||||
indexing_start=None,
|
||||
)
|
||||
db_session.add(connector)
|
||||
db_session.flush()
|
||||
|
||||
credential = Credential(
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
credential_json=confluence_credential_json,
|
||||
)
|
||||
db_session.add(credential)
|
||||
db_session.flush()
|
||||
|
||||
cc_pair = ConnectorCredentialPair(
|
||||
connector_id=connector.id,
|
||||
credential_id=credential.id,
|
||||
name="Test CC Pair",
|
||||
status=ConnectorCredentialPairStatus.ACTIVE,
|
||||
access_type=AccessType.SYNC,
|
||||
auto_sync_options=None,
|
||||
)
|
||||
db_session.add(cc_pair)
|
||||
db_session.commit()
|
||||
db_session.refresh(cc_pair)
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
group_sync_iter = confluence_group_sync(
|
||||
tenant_id=tenant_id,
|
||||
cc_pair=cc_pair,
|
||||
)
|
||||
|
||||
for external_user_group in group_sync_iter:
|
||||
print(external_user_group)
|
||||
@@ -30,7 +30,7 @@ def jira_connector() -> JiraConnector:
|
||||
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_jira_connector_basic(reset: None, jira_connector: JiraConnector) -> None:
|
||||
def test_jira_connector_basic(patched: None, jira_connector: JiraConnector) -> None:
|
||||
docs = load_all_docs_from_checkpoint_connector(
|
||||
connector=jira_connector,
|
||||
start=0,
|
||||
|
||||
@@ -12,7 +12,6 @@ import pytest
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.confluence.connector import ConfluenceCheckpoint
|
||||
from onyx.connectors.confluence.connector import ConfluenceConnector
|
||||
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
|
||||
from onyx.connectors.exceptions import CredentialExpiredError
|
||||
@@ -152,12 +151,7 @@ def test_load_from_checkpoint_happy_path(
|
||||
confluence_client.get = get_mock # type: ignore
|
||||
get_mock.side_effect = [
|
||||
# First page response
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [mock_page1, mock_page2],
|
||||
"_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": [mock_page1, mock_page2]}),
|
||||
# links and attachemnts responses
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
@@ -178,27 +172,34 @@ def test_load_from_checkpoint_happy_path(
|
||||
confluence_connector, 0, end_time
|
||||
)
|
||||
|
||||
# Check that the documents were returned
|
||||
assert len(outputs) == 2
|
||||
actual_length = len(outputs)
|
||||
# We expect 3 batches -even though the API only should fetch 2 batches (since there are 3 docs in total)-
|
||||
# because the way pagination has been updated to act.
|
||||
#
|
||||
# Since `search/user` and `user/memberof` perform offset-based pagination, they'll always return a "new" endpoint to hit.
|
||||
# However, that doesn't actually mean that there is more to fetch. Therefore, we get 3 batches, with the last one being empty.
|
||||
expected_length = 3
|
||||
assert actual_length == expected_length
|
||||
|
||||
checkpoint_output1 = outputs[0]
|
||||
assert len(checkpoint_output1.items) == 2
|
||||
document1 = checkpoint_output1.items[0]
|
||||
assert isinstance(document1, Document)
|
||||
assert document1.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/1"
|
||||
document2 = checkpoint_output1.items[1]
|
||||
assert isinstance(document2, Document)
|
||||
assert document2.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/2"
|
||||
assert checkpoint_output1.next_checkpoint == ConfluenceCheckpoint(
|
||||
has_more=True, next_page_url="rest/api/content/search?cql=type%3Dpage&start=2"
|
||||
)
|
||||
checkpoint_output_0 = outputs[0]
|
||||
assert len(checkpoint_output_0.items) == 2
|
||||
document_0 = checkpoint_output_0.items[0]
|
||||
assert isinstance(document_0, Document)
|
||||
assert document_0.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/1"
|
||||
document_1 = checkpoint_output_0.items[1]
|
||||
assert isinstance(document_1, Document)
|
||||
assert document_1.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/2"
|
||||
|
||||
checkpoint_output2 = outputs[1]
|
||||
assert len(checkpoint_output2.items) == 1
|
||||
document3 = checkpoint_output2.items[0]
|
||||
assert isinstance(document3, Document)
|
||||
assert document3.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/3"
|
||||
assert not checkpoint_output2.next_checkpoint.has_more
|
||||
checkpoint_output_1 = outputs[1]
|
||||
assert len(checkpoint_output_1.items) == 1
|
||||
document_2 = checkpoint_output_1.items[0]
|
||||
assert isinstance(document_2, Document)
|
||||
assert document_2.id == f"{confluence_connector.wiki_base}/spaces/TEST/pages/3"
|
||||
assert checkpoint_output_1.next_checkpoint.has_more
|
||||
|
||||
checkpoint_output_2 = outputs[2]
|
||||
assert not checkpoint_output_2.items
|
||||
assert not checkpoint_output_2.next_checkpoint.has_more
|
||||
|
||||
|
||||
def test_load_from_checkpoint_with_page_processing_error(
|
||||
@@ -217,12 +218,7 @@ def test_load_from_checkpoint_with_page_processing_error(
|
||||
confluence_client.get = get_mock # type: ignore
|
||||
get_mock.side_effect = [
|
||||
# First page response
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [mock_page1, mock_page2],
|
||||
"_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": [mock_page1, mock_page2]}),
|
||||
# Comments for page 1
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
# Attachments for page 1
|
||||
@@ -232,12 +228,7 @@ def test_load_from_checkpoint_with_page_processing_error(
|
||||
# Attachments for page 2
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
# Second page response (empty)
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [],
|
||||
"_links": {},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
]
|
||||
|
||||
# Mock _convert_page_to_document to fail for the second page
|
||||
@@ -306,12 +297,7 @@ def test_retrieve_all_slim_documents(
|
||||
confluence_client.get = get_mock # type: ignore
|
||||
get_mock.side_effect = [
|
||||
# First page response
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [mock_page1, mock_page2],
|
||||
"_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": [mock_page1, mock_page2]}),
|
||||
# links and attachments responses
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
@@ -405,12 +391,7 @@ def test_checkpoint_progress(
|
||||
confluence_client.get = get_mock # type: ignore
|
||||
get_mock.side_effect = [
|
||||
# First page response
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [mock_page1, mock_page2],
|
||||
"_links": {"next": "rest/api/content/search?cql=type=page&start=2"},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": [mock_page1, mock_page2]}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
@@ -426,10 +407,6 @@ def test_checkpoint_progress(
|
||||
|
||||
first_checkpoint = outputs[0].next_checkpoint
|
||||
|
||||
assert (
|
||||
first_checkpoint.next_page_url
|
||||
== "rest/api/content/search?cql=type%3Dpage&start=2"
|
||||
)
|
||||
assert not outputs[-1].next_checkpoint.has_more
|
||||
|
||||
assert len(outputs[0].items) == 2
|
||||
@@ -442,12 +419,7 @@ def test_checkpoint_progress(
|
||||
# Reset the mock to return the same pages
|
||||
get_mock.side_effect = [
|
||||
# First page response
|
||||
MagicMock(
|
||||
json=lambda: {
|
||||
"results": [mock_page3],
|
||||
"_links": {"next": "rest/api/content/search?cql=type=page&start=3"},
|
||||
}
|
||||
),
|
||||
MagicMock(json=lambda: {"results": [mock_page3]}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
MagicMock(json=lambda: {"results": []}),
|
||||
|
||||
@@ -104,9 +104,9 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
base_top_level_path = (
|
||||
f"rest/api/content/search?cql={top_level_cql}&expand={top_level_expand}"
|
||||
)
|
||||
initial_top_level_path = f"{base_top_level_path}&limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
|
||||
# --- Mock Responses ---
|
||||
initial_top_level_path = f"{base_top_level_path}&limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
top_level_raw_response = {
|
||||
"results": [
|
||||
{
|
||||
@@ -140,7 +140,6 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
},
|
||||
},
|
||||
],
|
||||
"_links": {},
|
||||
"size": 3,
|
||||
}
|
||||
top_level_response = _create_mock_response(
|
||||
@@ -149,27 +148,49 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
url=initial_top_level_path,
|
||||
)
|
||||
|
||||
initial_top_level_path_final = f"{initial_top_level_path}&start=3"
|
||||
top_level_response_final = _create_mock_response(
|
||||
200,
|
||||
{"results": [], "size": 0},
|
||||
url=initial_top_level_path_final,
|
||||
)
|
||||
|
||||
# Expansion 1 - Needs 2 pages
|
||||
exp1_page1_path = f"rest/api/content/1/child?limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
# Note: _paginate_url internally calculates start for the next page
|
||||
exp1_page2_path = (
|
||||
f"rest/api/content/1/child?start=1&limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
)
|
||||
exp1_page1_response = _create_mock_response(
|
||||
200,
|
||||
{
|
||||
"results": [{"child_id": 101}],
|
||||
"_links": {"next": f"/{exp1_page2_path}"},
|
||||
"size": 1,
|
||||
},
|
||||
url=exp1_page1_path,
|
||||
)
|
||||
|
||||
# exp1_page1_path_final = f"{exp1_page1_path}&start=1"
|
||||
# exp1_page1_response_final = _create_mock_response(
|
||||
# 200,
|
||||
# {
|
||||
# "results": [],
|
||||
# "size": 0,
|
||||
# },
|
||||
# url=exp1_page1_path_final,
|
||||
# )
|
||||
|
||||
# Note: _paginate_url internally calculates start for the next page
|
||||
exp1_page2_path = f"{exp1_page1_path}&start=1"
|
||||
exp1_page2_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [{"child_id": 102}], "_links": {}, "size": 1},
|
||||
{"results": [{"child_id": 102}], "size": 1},
|
||||
url=exp1_page2_path,
|
||||
)
|
||||
|
||||
exp1_page2_path_final = f"{exp1_page1_path}&start=2"
|
||||
exp1_page2_response_final = _create_mock_response(
|
||||
200,
|
||||
{"results": [], "size": 0},
|
||||
url=exp1_page2_path_final,
|
||||
)
|
||||
|
||||
# Problematic Expansion 2 URLs and Errors during limit reduction
|
||||
exp2_base_path = "rest/api/content/2/child"
|
||||
exp2_reduction_errors = {}
|
||||
@@ -195,7 +216,6 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"child_id": 201}],
|
||||
"_links": {"next": f"/{exp2_limit1_page2_path}"},
|
||||
"size": 1,
|
||||
},
|
||||
url=exp2_limit1_page1_path,
|
||||
@@ -205,7 +225,6 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"child_id": 203}],
|
||||
"_links": {"next": f"/{exp2_limit1_page4_path}"},
|
||||
"size": 1,
|
||||
},
|
||||
url=exp2_limit1_page3_path,
|
||||
@@ -214,17 +233,24 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
500, url=exp2_limit1_page4_path
|
||||
) # This is the one we expect to bubble up
|
||||
exp2_limit1_page5_response = _create_mock_response(
|
||||
200, {"results": [], "_links": {}, "size": 0}, url=exp2_limit1_page5_path
|
||||
200, {"results": [], "size": 0}, url=exp2_limit1_page5_path
|
||||
)
|
||||
|
||||
# Expansion 3
|
||||
exp3_page1_path = f"rest/api/content/3/child?limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
exp3_page1_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [{"child_id": 301}], "_links": {}, "size": 1},
|
||||
{"results": [{"child_id": 301}], "size": 1},
|
||||
url=exp3_page1_path,
|
||||
)
|
||||
|
||||
exp3_page1_path_final = f"{exp3_page1_path}&start=1"
|
||||
exp3_page1_response_final = _create_mock_response(
|
||||
200,
|
||||
{"results": [], "size": 0},
|
||||
url=exp3_page1_path_final,
|
||||
)
|
||||
|
||||
# --- Side Effect Logic ---
|
||||
mock_get_call_paths: list[str] = []
|
||||
call_counts: dict[str, int] = {} # Track calls to specific failing paths
|
||||
@@ -243,16 +269,25 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
if path == initial_top_level_path:
|
||||
print(f"-> Returning top level response for {path}")
|
||||
return top_level_response
|
||||
elif path == initial_top_level_path_final:
|
||||
print(f"-> Returning final top level response for {path}")
|
||||
return top_level_response_final
|
||||
|
||||
# Expansion 1 - Page 1
|
||||
elif path == exp1_page1_path:
|
||||
print(f"-> Returning expansion 1 page 1 for {path}")
|
||||
return exp1_page1_response
|
||||
# elif path == exp1_page1_path_final:
|
||||
# print(f"-> Returning empty expansion 1 page 1 for {path}")
|
||||
# return exp1_page1_response_final
|
||||
|
||||
# Expansion 1 - Page 2
|
||||
elif path == exp1_page2_path:
|
||||
print(f"-> Returning expansion 1 page 2 for {path}")
|
||||
return exp1_page2_response
|
||||
elif path == exp1_page2_path_final:
|
||||
print(f"-> Returning empty expansion 1 page 2 for {path}")
|
||||
return exp1_page2_response_final
|
||||
|
||||
# Expansion 2 - Limit Reduction Errors
|
||||
elif path in exp2_reduction_errors:
|
||||
@@ -287,6 +322,9 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
elif path == exp3_page1_path:
|
||||
print(f"-> Returning expansion 3 page 1 for {path}")
|
||||
return exp3_page1_response
|
||||
elif path == exp3_page1_path_final:
|
||||
print(f"-> Returning final expansion 3 page 1 for {path}")
|
||||
return exp3_page1_response_final
|
||||
|
||||
# Fallback
|
||||
print(f"!!! Unexpected GET path in mock: {path}")
|
||||
@@ -305,8 +343,8 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
)
|
||||
|
||||
# Verify log for the failures during expansion 2 pagination (page 2 + 4)
|
||||
assert f"Error in confluence call to /{exp2_limit1_page2_path}" in caplog.text
|
||||
assert f"Error in confluence call to /{exp2_limit1_page4_path}" in caplog.text
|
||||
# assert f"Error in confluence call to {exp2_limit1_page2_path}" in caplog.text
|
||||
# assert f"Error in confluence call to {exp2_limit1_page4_path}" in caplog.text
|
||||
|
||||
# Verify sequence of calls to 'get'
|
||||
# 1. Top level
|
||||
@@ -315,13 +353,14 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
assert mock_get_call_paths[1] == exp1_page1_path
|
||||
# 3. Expansion 1 (page 2)
|
||||
assert mock_get_call_paths[2] == exp1_page2_path
|
||||
assert mock_get_call_paths[3] == exp1_page2_path_final # final query; gets nothing
|
||||
# 4. Expansion 2 (initial attempt)
|
||||
assert (
|
||||
mock_get_call_paths[3] == f"{exp2_base_path}?limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
mock_get_call_paths[4] == f"{exp2_base_path}?limit={_DEFAULT_PAGINATION_LIMIT}"
|
||||
)
|
||||
|
||||
# 5+. Expansion 2 (retries due to 500s, down to limit=1)
|
||||
call_index = 4
|
||||
call_index = 5
|
||||
|
||||
# 5+N. Expansion 2 (limit=1, page 1 success)
|
||||
assert mock_get_call_paths[call_index] == exp2_limit1_page1_path
|
||||
@@ -345,6 +384,12 @@ def test_cql_paginate_all_expansions_handles_internal_pagination_error(
|
||||
assert mock_get_call_paths[call_index] == exp3_page1_path
|
||||
call_index += 1
|
||||
|
||||
# Final (empty) calls to wrap everything up
|
||||
assert mock_get_call_paths[call_index] == exp3_page1_path_final
|
||||
call_index += 1
|
||||
assert mock_get_call_paths[call_index] == initial_top_level_path_final
|
||||
call_index += 1
|
||||
|
||||
# Ensure correct number of calls
|
||||
assert len(mock_get_call_paths) == call_index
|
||||
|
||||
@@ -394,6 +439,8 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
page2_initial_path = f"{base_path}&limit={test_limit}&start={test_limit}"
|
||||
# Page 3 starts after the problematic page 2 is processed (start=test_limit * 2)
|
||||
page3_path = f"{base_path}&limit={test_limit}&start={test_limit * 2}"
|
||||
# Page 4 starts where page 3 left off (start=test_limit * 2 + 2)
|
||||
page4_path = f"{base_path}&limit={test_limit}&start={(test_limit * 2) + 2}"
|
||||
|
||||
# --- Mock Responses ---
|
||||
# Page 1: Success (4 items)
|
||||
@@ -401,7 +448,6 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}],
|
||||
"_links": {"next": f"/{page2_initial_path}"},
|
||||
"size": 4,
|
||||
},
|
||||
url=page1_path,
|
||||
@@ -429,7 +475,6 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": 5}],
|
||||
"_links": {"next": f"/{page2_limit1_item2_path}"},
|
||||
"size": 1,
|
||||
}, # Note: next link might be present but we check results
|
||||
url=page2_limit1_item1_path,
|
||||
@@ -441,7 +486,6 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": 7}],
|
||||
"_links": {"next": f"/{page2_limit1_item4_path}"},
|
||||
"size": 1,
|
||||
},
|
||||
url=page2_limit1_item3_path,
|
||||
@@ -450,7 +494,6 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": 8}],
|
||||
"_links": {"next": f"/{page3_path}"},
|
||||
"size": 1,
|
||||
},
|
||||
url=page2_limit1_item4_path,
|
||||
@@ -459,7 +502,14 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
# Page 3: Success (2 items)
|
||||
page3_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [{"id": 9}, {"id": 10}], "_links": {}, "size": 2}, # No more pages
|
||||
{"results": [{"id": 9}, {"id": 10}], "size": 2}, # No more pages
|
||||
url=page3_path,
|
||||
)
|
||||
|
||||
# Empty page: Success (0 items)
|
||||
page4_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [], "size": 0},
|
||||
url=page3_path,
|
||||
)
|
||||
|
||||
@@ -502,6 +552,9 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
elif path == page3_path:
|
||||
print(f"-> Returning page 3 success for {path}")
|
||||
return page3_response
|
||||
elif path == page4_path:
|
||||
print(f"-> Returning page 4 success for {path}")
|
||||
return page4_response
|
||||
# Fallback
|
||||
else:
|
||||
print(f"!!! Unexpected GET path in mock: {path}")
|
||||
@@ -537,7 +590,7 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
assert results == expected_results
|
||||
|
||||
# Verify log for the skipped item failure
|
||||
assert f"Error in confluence call to /{page2_limit1_item2_path}" in caplog.text
|
||||
assert f"Error in confluence call to {page2_limit1_item2_path}" in caplog.text
|
||||
|
||||
# Verify sequence of calls
|
||||
expected_calls = [
|
||||
@@ -550,6 +603,7 @@ def test_paginated_cql_retrieval_handles_pagination_error(
|
||||
page2_limit1_item4_path, # Page 2 retry item 4 success
|
||||
# _paginate_url continues to next calculated page (page 3)
|
||||
page3_path, # Page 3 success
|
||||
page4_path, # Page 4 success
|
||||
]
|
||||
assert mock_get_call_paths == expected_calls
|
||||
|
||||
@@ -571,10 +625,12 @@ def test_paginated_cql_retrieval_skips_completely_failing_page(
|
||||
|
||||
base_path = f"rest/api/content/search?cql={encoded_cql}"
|
||||
page1_path = f"{base_path}&limit={test_limit}"
|
||||
# Page 2 starts where page 1 left off (start=test_limit)
|
||||
# Page 2 starts where page 1 left off (start=test_limit).
|
||||
page2_initial_path = f"{base_path}&limit={test_limit}&start={test_limit}"
|
||||
# Page 3 starts after the completely failed page 2 (start=test_limit * 2)
|
||||
# Page 3 starts after the completely failed page 2 (start=test_limit * 2).
|
||||
page3_path = f"{base_path}&limit={test_limit}&start={test_limit * 2}"
|
||||
# Empty tail call (since `paginated_cql_retrieval` performs offset-based pagination).
|
||||
page4_path = f"{base_path}&limit={test_limit}&start={(test_limit * 2) + 2}"
|
||||
|
||||
# --- Mock Responses ---
|
||||
# Page 1: Success (3 items)
|
||||
@@ -582,7 +638,6 @@ def test_paginated_cql_retrieval_skips_completely_failing_page(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": 1}, {"id": 2}, {"id": 3}],
|
||||
"_links": {"next": f"/{page2_initial_path}"},
|
||||
"size": 3,
|
||||
},
|
||||
url=page1_path,
|
||||
@@ -602,10 +657,17 @@ def test_paginated_cql_retrieval_skips_completely_failing_page(
|
||||
# Page 3: Success (2 items)
|
||||
page3_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [{"id": 7}, {"id": 8}], "_links": {}, "size": 2},
|
||||
{"results": [{"id": 7}, {"id": 8}], "size": 2},
|
||||
url=page3_path,
|
||||
)
|
||||
|
||||
# Page 4: Success (0 items; this is the end)
|
||||
page4_response = _create_mock_response(
|
||||
200,
|
||||
{"results": [], "size": 0},
|
||||
url=page4_path,
|
||||
)
|
||||
|
||||
# --- Side Effect Logic ---
|
||||
mock_get_call_paths: list[str] = []
|
||||
call_counts: dict[str, int] = {}
|
||||
@@ -632,6 +694,9 @@ def test_paginated_cql_retrieval_skips_completely_failing_page(
|
||||
elif path == page3_path:
|
||||
print(f"-> Returning page 3 success for {path}")
|
||||
return page3_response
|
||||
elif path == page4_path:
|
||||
print(f"-> Returning page 4 success for {path}")
|
||||
return page4_response
|
||||
else:
|
||||
print(f"!!! Unexpected GET path in mock: {path}")
|
||||
raise RuntimeError(f"Unexpected GET path in mock: {path}")
|
||||
@@ -660,22 +725,16 @@ def test_paginated_cql_retrieval_skips_completely_failing_page(
|
||||
|
||||
# Verify logs for the failed retry attempts on page 2
|
||||
for failed_path in page2_limit1_retry_errors:
|
||||
assert f"Error in confluence call to /{failed_path}" in caplog.text
|
||||
assert (
|
||||
f"Error in confluence call to {page2_initial_path}" not in caplog.text
|
||||
) # Initial error triggers retry, not direct logging in _paginate_url
|
||||
assert f"Error in confluence call to {failed_path}" in caplog.text
|
||||
|
||||
# Verify sequence of calls
|
||||
expected_calls = [
|
||||
page1_path, # Page 1 success
|
||||
page2_initial_path, # Page 2 initial fail (500)
|
||||
*page2_limit1_retry_errors,
|
||||
page3_path,
|
||||
page4_path,
|
||||
]
|
||||
# Add the failed limit=1 retry calls for page 2
|
||||
expected_calls.extend(list(page2_limit1_retry_errors.keys()))
|
||||
# The retry loop should make one final call to check if there are more items
|
||||
# expected_calls.append(page2_limit1_final_empty_path)
|
||||
# Add the call to page 3
|
||||
expected_calls.append(page3_path)
|
||||
|
||||
assert mock_get_call_paths == expected_calls
|
||||
|
||||
@@ -713,7 +772,6 @@ def test_paginated_cql_retrieval_cloud_no_retry_on_error(
|
||||
200,
|
||||
{
|
||||
"results": [{"id": i} for i in range(test_limit)],
|
||||
"_links": {"next": f"/{page2_path}"},
|
||||
"size": test_limit,
|
||||
},
|
||||
url=page1_path,
|
||||
|
||||
Reference in New Issue
Block a user