Compare commits

..

6 Commits

Author SHA1 Message Date
pablonyx
9087320a06 fix 2025-03-06 14:46:20 -08:00
pablonyx
b0af1458c0 ensure checks pass 2025-03-06 14:46:20 -08:00
pablonyx
bb67a7a122 remove unnecessary logs 2025-03-06 14:46:20 -08:00
pablonyx
e239dc31c1 rename 2025-03-06 14:46:19 -08:00
pablonyx
027128502c add csl 2025-03-06 14:46:19 -08:00
Chris Weaver
a7a374dc81 Confluence fixes (#4220)
* Confluence fixes

* Small tweak

* Address greptile comments
2025-03-06 20:57:07 +00:00
10 changed files with 307 additions and 54 deletions

View File

@@ -0,0 +1,125 @@
"""Update GitHub connector repo_name to repositories
Revision ID: 3934b1bc7b62
Revises: b7c2b63c4a03
Create Date: 2025-03-05 10:50:30.516962
"""
from alembic import op
import sqlalchemy as sa
import json
import logging
# revision identifiers, used by Alembic.
revision = "3934b1bc7b62"
down_revision = "b7c2b63c4a03"
branch_labels = None
depends_on = None
logger = logging.getLogger("alembic.runtime.migration")
def upgrade() -> None:
# Get all GitHub connectors
conn = op.get_bind()
# First get all GitHub connectors
github_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = 'GITHUB'
"""
)
).fetchall()
# Update each connector's config
updated_count = 0
for connector_id, config in github_connectors:
try:
if not config:
logger.warning(f"Connector {connector_id} has no config, skipping")
continue
# Parse the config if it's a string
if isinstance(config, str):
config = json.loads(config)
if "repo_name" not in config:
continue
# Create new config with repositories instead of repo_name
new_config = dict(config)
repo_name_value = new_config.pop("repo_name")
new_config["repositories"] = repo_name_value
# Update the connector with the new config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :new_config
WHERE id = :connector_id
"""
),
{"connector_id": connector_id, "new_config": json.dumps(new_config)},
)
updated_count += 1
except Exception as e:
logger.error(f"Error updating connector {connector_id}: {str(e)}")
def downgrade() -> None:
# Get all GitHub connectors
conn = op.get_bind()
logger.debug(
"Starting rollback of GitHub connectors from repositories to repo_name"
)
github_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = 'GITHUB'
"""
)
).fetchall()
logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
# Revert each GitHub connector to use repo_name instead of repositories
reverted_count = 0
for connector_id, config in github_connectors:
try:
if not config:
continue
# Parse the config if it's a string
if isinstance(config, str):
config = json.loads(config)
if "repositories" not in config:
continue
# Create new config with repo_name instead of repositories
new_config = dict(config)
repositories_value = new_config.pop("repositories")
new_config["repo_name"] = repositories_value
# Update the connector with the new config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :new_config
WHERE id = :connector_id
"""
),
{"new_config": json.dumps(new_config), "connector_id": connector_id},
)
reverted_count += 1
except Exception as e:
logger.error(f"Error reverting connector {connector_id}: {str(e)}")

View File

@@ -240,7 +240,7 @@ class ConfluenceConnector(
# Extract basic page information
page_id = page["id"]
page_title = page["title"]
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
page_url = f"{self.wiki_base}{page['_links']['webui']}"
# Get the page content
page_content = extract_text_from_confluence_html(

View File

@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
def __init__(
self,
repo_owner: str,
repo_name: str | None = None,
repositories: str | None = None,
batch_size: int = INDEX_BATCH_SIZE,
state_filter: str = "all",
include_prs: bool = True,
include_issues: bool = False,
) -> None:
self.repo_owner = repo_owner
self.repo_name = repo_name
self.repositories = repositories
self.batch_size = batch_size
self.state_filter = state_filter
self.include_prs = include_prs
@@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector):
)
try:
return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
except RateLimitExceededException:
_sleep_after_rate_limit_exception(github_client)
return self._get_github_repo(github_client, attempt_num + 1)
def _get_github_repos(
self, github_client: Github, attempt_num: int = 0
) -> list[Repository.Repository]:
"""Get specific repositories based on comma-separated repo_name string."""
if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
raise RuntimeError(
"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
)
try:
repos = []
# Split repo_name by comma and strip whitespace
repo_names = [
name.strip() for name in (cast(str, self.repositories)).split(",")
]
for repo_name in repo_names:
if repo_name: # Skip empty strings
try:
repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
repos.append(repo)
except GithubException as e:
logger.warning(
f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
)
return repos
except RateLimitExceededException:
_sleep_after_rate_limit_exception(github_client)
return self._get_github_repos(github_client, attempt_num + 1)
def _get_all_repos(
self, github_client: Github, attempt_num: int = 0
) -> list[Repository.Repository]:
@@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector):
if self.github_client is None:
raise ConnectorMissingCredentialError("GitHub")
repos = (
[self._get_github_repo(self.github_client)]
if self.repo_name
else self._get_all_repos(self.github_client)
)
repos = []
if self.repositories:
if "," in self.repositories:
# Multiple repositories specified
repos = self._get_github_repos(self.github_client)
else:
# Single repository (backward compatibility)
repos = [self._get_github_repo(self.github_client)]
else:
# All repositories
repos = self._get_all_repos(self.github_client)
for repo in repos:
if self.include_prs:
@@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector):
)
try:
if self.repo_name:
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{self.repo_name}"
)
test_repo.get_contents("")
if self.repositories:
if "," in self.repositories:
# Multiple repositories specified
repo_names = [name.strip() for name in self.repositories.split(",")]
if not repo_names:
raise ConnectorValidationError(
"Invalid connector settings: No valid repository names provided."
)
# Validate at least one repository exists and is accessible
valid_repos = False
validation_errors = []
for repo_name in repo_names:
if not repo_name:
continue
try:
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{repo_name}"
)
test_repo.get_contents("")
valid_repos = True
# If at least one repo is valid, we can proceed
break
except GithubException as e:
validation_errors.append(
f"Repository '{repo_name}': {e.data.get('message', str(e))}"
)
if not valid_repos:
error_msg = (
"None of the specified repositories could be accessed: "
)
error_msg += ", ".join(validation_errors)
raise ConnectorValidationError(error_msg)
else:
# Single repository (backward compatibility)
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{self.repositories}"
)
test_repo.get_contents("")
else:
# Try to get organization first
try:
@@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector):
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
)
elif e.status == 404:
if self.repo_name:
raise ConnectorValidationError(
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
)
if self.repositories:
if "," in self.repositories:
raise ConnectorValidationError(
f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
)
else:
raise ConnectorValidationError(
f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
)
else:
raise ConnectorValidationError(
f"GitHub user or organization not found: {self.repo_owner}"
@@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector):
raise ConnectorValidationError(
f"Unexpected GitHub error (status={e.status}): {e.data}"
)
except Exception as exc:
raise Exception(
f"Unexpected error during GitHub settings validation: {exc}"
@@ -321,7 +401,7 @@ if __name__ == "__main__":
connector = GithubConnector(
repo_owner=os.environ["REPO_OWNER"],
repo_name=os.environ["REPO_NAME"],
repositories=os.environ["REPOSITORIES"],
)
connector.load_credentials(
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}

View File

@@ -464,12 +464,29 @@ def index_doc_batch(
),
)
successful_doc_ids = {record.document_id for record in insertion_records}
if successful_doc_ids != set(updatable_ids):
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Successful IDs: {successful_doc_ids}"
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
)
last_modified_ids = []

View File

@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
with pytest.raises(StopIteration):
next(doc_batch_generator)
assert len(doc_batch) == 3
assert len(doc_batch) == 2
page_within_a_page_doc: Document | None = None
page_doc: Document | None = None

View File

@@ -1,14 +1,17 @@
import {
AnthropicIcon,
AmazonIcon,
AWSIcon,
AzureIcon,
CPUIcon,
MicrosoftIconSVG,
MistralIcon,
MetaIcon,
GeminiIcon,
AnthropicSVG,
IconProps,
OpenAIISVG,
DeepseekIcon,
OpenAISVG,
} from "@/components/icons/icons";
export interface CustomConfigKey {
@@ -71,7 +74,7 @@ export interface LLMProviderDescriptor {
}
export const getProviderIcon = (providerName: string, modelName?: string) => {
const iconMap: Record<
const modelIconMap: Record<
string,
({ size, className }: IconProps) => JSX.Element
> = {
@@ -83,30 +86,34 @@ export const getProviderIcon = (providerName: string, modelName?: string) => {
gemini: GeminiIcon,
deepseek: DeepseekIcon,
claude: AnthropicIcon,
anthropic: AnthropicIcon,
openai: OpenAISVG,
microsoft: MicrosoftIconSVG,
meta: MetaIcon,
google: GeminiIcon,
};
// First check if provider name directly matches an icon
if (providerName.toLowerCase() in iconMap) {
return iconMap[providerName.toLowerCase()];
}
// Then check if model name contains any of the keys
if (modelName) {
const lowerModelName = modelName.toLowerCase();
for (const [key, icon] of Object.entries(iconMap)) {
if (lowerModelName.includes(key)) {
const modelNameToIcon = (
modelName: string,
fallbackIcon: ({ size, className }: IconProps) => JSX.Element
): (({ size, className }: IconProps) => JSX.Element) => {
const lowerModelName = modelName?.toLowerCase();
for (const [key, icon] of Object.entries(modelIconMap)) {
if (lowerModelName?.includes(key)) {
return icon;
}
}
}
return fallbackIcon;
};
// Fallback to CPU icon if no matches
return CPUIcon;
switch (providerName) {
case "openai":
// Special cases for openai based on modelName
return modelNameToIcon(modelName || "", OpenAIISVG);
case "anthropic":
return AnthropicSVG;
case "bedrock":
return AWSIcon;
case "azure":
return AzureIcon;
default:
return modelNameToIcon(modelName || "", CPUIcon);
}
};
export const isAnthropic = (provider: string, modelName: string) =>

View File

@@ -185,10 +185,7 @@ export const FilterComponent = forwardRef<
hasActiveFilters ? "border-primary bg-primary/5" : ""
}`}
>
<SortIcon
size={20}
className="text-neutral-800 dark:text-neutral-200"
/>
<SortIcon size={20} className="text-neutral-800" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent
@@ -368,7 +365,7 @@ export const FilterComponent = forwardRef<
{hasActiveFilters && (
<div className="absolute -top-1 -right-1">
<Badge className="h-2 !bg-red-400 !border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
<Badge className="h-2 bg-red-400 border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
</div>
)}
</div>

View File

@@ -40,8 +40,12 @@ export const ConnectorTitle = ({
const typedConnector = connector as Connector<GithubConfig>;
additionalMetadata.set(
"Repo",
typedConnector.connector_specific_config.repo_name
? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
typedConnector.connector_specific_config.repositories
? `${typedConnector.connector_specific_config.repo_owner}/${
typedConnector.connector_specific_config.repositories.includes(",")
? "multiple repos"
: typedConnector.connector_specific_config.repositories
}`
: `${typedConnector.connector_specific_config.repo_owner}/*`
);
} else if (connector.source === "gitlab") {

View File

@@ -3102,6 +3102,27 @@ export const OpenAISVG = ({
);
};
export const AnthropicSVG = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<svg
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 92.2 65"
xmlSpace="preserve"
fill="currentColor"
>
<path
fill="currentColor"
d="M66.5,0H52.4l25.7,65h14.1L66.5,0z M25.7,0L0,65h14.4l5.3-13.6h26.9L51.8,65h14.4L40.5,0C40.5,0,25.7,0,25.7,0z M24.3,39.3l8.8-22.8l8.8,22.8H24.3z"
/>
</svg>
);
};
export const SourcesIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@@ -190,10 +190,12 @@ export const connectorConfigs: Record<
fields: [
{
type: "text",
query: "Enter the repository name:",
label: "Repository Name",
name: "repo_name",
query: "Enter the repository name(s):",
label: "Repository Name(s)",
name: "repositories",
optional: false,
description:
"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
},
],
},
@@ -1358,7 +1360,7 @@ export interface WebConfig {
export interface GithubConfig {
repo_owner: string;
repo_name: string;
repositories: string; // Comma-separated list of repository names
include_prs: boolean;
include_issues: boolean;
}