mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-26 04:05:48 +00:00
Compare commits
6 Commits
consistent
...
github_lis
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9087320a06 | ||
|
|
b0af1458c0 | ||
|
|
bb67a7a122 | ||
|
|
e239dc31c1 | ||
|
|
027128502c | ||
|
|
a7a374dc81 |
@@ -0,0 +1,125 @@
|
||||
"""Update GitHub connector repo_name to repositories
|
||||
|
||||
Revision ID: 3934b1bc7b62
|
||||
Revises: b7c2b63c4a03
|
||||
Create Date: 2025-03-05 10:50:30.516962
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import json
|
||||
import logging
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "3934b1bc7b62"
|
||||
down_revision = "b7c2b63c4a03"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
logger = logging.getLogger("alembic.runtime.migration")
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Get all GitHub connectors
|
||||
conn = op.get_bind()
|
||||
|
||||
# First get all GitHub connectors
|
||||
github_connectors = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT id, connector_specific_config
|
||||
FROM connector
|
||||
WHERE source = 'GITHUB'
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
# Update each connector's config
|
||||
updated_count = 0
|
||||
for connector_id, config in github_connectors:
|
||||
try:
|
||||
if not config:
|
||||
logger.warning(f"Connector {connector_id} has no config, skipping")
|
||||
continue
|
||||
|
||||
# Parse the config if it's a string
|
||||
if isinstance(config, str):
|
||||
config = json.loads(config)
|
||||
|
||||
if "repo_name" not in config:
|
||||
continue
|
||||
|
||||
# Create new config with repositories instead of repo_name
|
||||
new_config = dict(config)
|
||||
repo_name_value = new_config.pop("repo_name")
|
||||
new_config["repositories"] = repo_name_value
|
||||
|
||||
# Update the connector with the new config
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE connector
|
||||
SET connector_specific_config = :new_config
|
||||
WHERE id = :connector_id
|
||||
"""
|
||||
),
|
||||
{"connector_id": connector_id, "new_config": json.dumps(new_config)},
|
||||
)
|
||||
updated_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating connector {connector_id}: {str(e)}")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Get all GitHub connectors
|
||||
conn = op.get_bind()
|
||||
|
||||
logger.debug(
|
||||
"Starting rollback of GitHub connectors from repositories to repo_name"
|
||||
)
|
||||
|
||||
github_connectors = conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
SELECT id, connector_specific_config
|
||||
FROM connector
|
||||
WHERE source = 'GITHUB'
|
||||
"""
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
|
||||
|
||||
# Revert each GitHub connector to use repo_name instead of repositories
|
||||
reverted_count = 0
|
||||
for connector_id, config in github_connectors:
|
||||
try:
|
||||
if not config:
|
||||
continue
|
||||
|
||||
# Parse the config if it's a string
|
||||
if isinstance(config, str):
|
||||
config = json.loads(config)
|
||||
|
||||
if "repositories" not in config:
|
||||
continue
|
||||
|
||||
# Create new config with repo_name instead of repositories
|
||||
new_config = dict(config)
|
||||
repositories_value = new_config.pop("repositories")
|
||||
new_config["repo_name"] = repositories_value
|
||||
|
||||
# Update the connector with the new config
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"""
|
||||
UPDATE connector
|
||||
SET connector_specific_config = :new_config
|
||||
WHERE id = :connector_id
|
||||
"""
|
||||
),
|
||||
{"new_config": json.dumps(new_config), "connector_id": connector_id},
|
||||
)
|
||||
reverted_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error reverting connector {connector_id}: {str(e)}")
|
||||
@@ -240,7 +240,7 @@ class ConfluenceConnector(
|
||||
# Extract basic page information
|
||||
page_id = page["id"]
|
||||
page_title = page["title"]
|
||||
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
|
||||
page_url = f"{self.wiki_base}{page['_links']['webui']}"
|
||||
|
||||
# Get the page content
|
||||
page_content = extract_text_from_confluence_html(
|
||||
|
||||
@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
repo_owner: str,
|
||||
repo_name: str | None = None,
|
||||
repositories: str | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
state_filter: str = "all",
|
||||
include_prs: bool = True,
|
||||
include_issues: bool = False,
|
||||
) -> None:
|
||||
self.repo_owner = repo_owner
|
||||
self.repo_name = repo_name
|
||||
self.repositories = repositories
|
||||
self.batch_size = batch_size
|
||||
self.state_filter = state_filter
|
||||
self.include_prs = include_prs
|
||||
@@ -157,11 +157,42 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
|
||||
return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
|
||||
except RateLimitExceededException:
|
||||
_sleep_after_rate_limit_exception(github_client)
|
||||
return self._get_github_repo(github_client, attempt_num + 1)
|
||||
|
||||
def _get_github_repos(
|
||||
self, github_client: Github, attempt_num: int = 0
|
||||
) -> list[Repository.Repository]:
|
||||
"""Get specific repositories based on comma-separated repo_name string."""
|
||||
if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
|
||||
raise RuntimeError(
|
||||
"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
|
||||
)
|
||||
|
||||
try:
|
||||
repos = []
|
||||
# Split repo_name by comma and strip whitespace
|
||||
repo_names = [
|
||||
name.strip() for name in (cast(str, self.repositories)).split(",")
|
||||
]
|
||||
|
||||
for repo_name in repo_names:
|
||||
if repo_name: # Skip empty strings
|
||||
try:
|
||||
repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
|
||||
repos.append(repo)
|
||||
except GithubException as e:
|
||||
logger.warning(
|
||||
f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
|
||||
)
|
||||
|
||||
return repos
|
||||
except RateLimitExceededException:
|
||||
_sleep_after_rate_limit_exception(github_client)
|
||||
return self._get_github_repos(github_client, attempt_num + 1)
|
||||
|
||||
def _get_all_repos(
|
||||
self, github_client: Github, attempt_num: int = 0
|
||||
) -> list[Repository.Repository]:
|
||||
@@ -189,11 +220,17 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
if self.github_client is None:
|
||||
raise ConnectorMissingCredentialError("GitHub")
|
||||
|
||||
repos = (
|
||||
[self._get_github_repo(self.github_client)]
|
||||
if self.repo_name
|
||||
else self._get_all_repos(self.github_client)
|
||||
)
|
||||
repos = []
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
# Multiple repositories specified
|
||||
repos = self._get_github_repos(self.github_client)
|
||||
else:
|
||||
# Single repository (backward compatibility)
|
||||
repos = [self._get_github_repo(self.github_client)]
|
||||
else:
|
||||
# All repositories
|
||||
repos = self._get_all_repos(self.github_client)
|
||||
|
||||
for repo in repos:
|
||||
if self.include_prs:
|
||||
@@ -268,11 +305,48 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
if self.repo_name:
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{self.repo_name}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
# Multiple repositories specified
|
||||
repo_names = [name.strip() for name in self.repositories.split(",")]
|
||||
if not repo_names:
|
||||
raise ConnectorValidationError(
|
||||
"Invalid connector settings: No valid repository names provided."
|
||||
)
|
||||
|
||||
# Validate at least one repository exists and is accessible
|
||||
valid_repos = False
|
||||
validation_errors = []
|
||||
|
||||
for repo_name in repo_names:
|
||||
if not repo_name:
|
||||
continue
|
||||
|
||||
try:
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{repo_name}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
valid_repos = True
|
||||
# If at least one repo is valid, we can proceed
|
||||
break
|
||||
except GithubException as e:
|
||||
validation_errors.append(
|
||||
f"Repository '{repo_name}': {e.data.get('message', str(e))}"
|
||||
)
|
||||
|
||||
if not valid_repos:
|
||||
error_msg = (
|
||||
"None of the specified repositories could be accessed: "
|
||||
)
|
||||
error_msg += ", ".join(validation_errors)
|
||||
raise ConnectorValidationError(error_msg)
|
||||
else:
|
||||
# Single repository (backward compatibility)
|
||||
test_repo = self.github_client.get_repo(
|
||||
f"{self.repo_owner}/{self.repositories}"
|
||||
)
|
||||
test_repo.get_contents("")
|
||||
else:
|
||||
# Try to get organization first
|
||||
try:
|
||||
@@ -298,10 +372,15 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
|
||||
)
|
||||
elif e.status == 404:
|
||||
if self.repo_name:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
|
||||
)
|
||||
if self.repositories:
|
||||
if "," in self.repositories:
|
||||
raise ConnectorValidationError(
|
||||
f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
|
||||
)
|
||||
else:
|
||||
raise ConnectorValidationError(
|
||||
f"GitHub user or organization not found: {self.repo_owner}"
|
||||
@@ -310,6 +389,7 @@ class GithubConnector(LoadConnector, PollConnector):
|
||||
raise ConnectorValidationError(
|
||||
f"Unexpected GitHub error (status={e.status}): {e.data}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
raise Exception(
|
||||
f"Unexpected error during GitHub settings validation: {exc}"
|
||||
@@ -321,7 +401,7 @@ if __name__ == "__main__":
|
||||
|
||||
connector = GithubConnector(
|
||||
repo_owner=os.environ["REPO_OWNER"],
|
||||
repo_name=os.environ["REPO_NAME"],
|
||||
repositories=os.environ["REPOSITORIES"],
|
||||
)
|
||||
connector.load_credentials(
|
||||
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}
|
||||
|
||||
@@ -464,12 +464,29 @@ def index_doc_batch(
|
||||
),
|
||||
)
|
||||
|
||||
successful_doc_ids = {record.document_id for record in insertion_records}
|
||||
if successful_doc_ids != set(updatable_ids):
|
||||
all_returned_doc_ids = (
|
||||
{record.document_id for record in insertion_records}
|
||||
.union(
|
||||
{
|
||||
record.failed_document.document_id
|
||||
for record in vector_db_write_failures
|
||||
if record.failed_document
|
||||
}
|
||||
)
|
||||
.union(
|
||||
{
|
||||
record.failed_document.document_id
|
||||
for record in embedding_failures
|
||||
if record.failed_document
|
||||
}
|
||||
)
|
||||
)
|
||||
if all_returned_doc_ids != set(updatable_ids):
|
||||
raise RuntimeError(
|
||||
f"Some documents were not successfully indexed. "
|
||||
f"Updatable IDs: {updatable_ids}, "
|
||||
f"Successful IDs: {successful_doc_ids}"
|
||||
f"Returned IDs: {all_returned_doc_ids}. "
|
||||
"This should never happen."
|
||||
)
|
||||
|
||||
last_modified_ids = []
|
||||
|
||||
@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
|
||||
with pytest.raises(StopIteration):
|
||||
next(doc_batch_generator)
|
||||
|
||||
assert len(doc_batch) == 3
|
||||
assert len(doc_batch) == 2
|
||||
|
||||
page_within_a_page_doc: Document | None = None
|
||||
page_doc: Document | None = None
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import {
|
||||
AnthropicIcon,
|
||||
AmazonIcon,
|
||||
AWSIcon,
|
||||
AzureIcon,
|
||||
CPUIcon,
|
||||
MicrosoftIconSVG,
|
||||
MistralIcon,
|
||||
MetaIcon,
|
||||
GeminiIcon,
|
||||
AnthropicSVG,
|
||||
IconProps,
|
||||
OpenAIISVG,
|
||||
DeepseekIcon,
|
||||
OpenAISVG,
|
||||
} from "@/components/icons/icons";
|
||||
|
||||
export interface CustomConfigKey {
|
||||
@@ -71,7 +74,7 @@ export interface LLMProviderDescriptor {
|
||||
}
|
||||
|
||||
export const getProviderIcon = (providerName: string, modelName?: string) => {
|
||||
const iconMap: Record<
|
||||
const modelIconMap: Record<
|
||||
string,
|
||||
({ size, className }: IconProps) => JSX.Element
|
||||
> = {
|
||||
@@ -83,30 +86,34 @@ export const getProviderIcon = (providerName: string, modelName?: string) => {
|
||||
gemini: GeminiIcon,
|
||||
deepseek: DeepseekIcon,
|
||||
claude: AnthropicIcon,
|
||||
anthropic: AnthropicIcon,
|
||||
openai: OpenAISVG,
|
||||
microsoft: MicrosoftIconSVG,
|
||||
meta: MetaIcon,
|
||||
google: GeminiIcon,
|
||||
};
|
||||
|
||||
// First check if provider name directly matches an icon
|
||||
if (providerName.toLowerCase() in iconMap) {
|
||||
return iconMap[providerName.toLowerCase()];
|
||||
}
|
||||
|
||||
// Then check if model name contains any of the keys
|
||||
if (modelName) {
|
||||
const lowerModelName = modelName.toLowerCase();
|
||||
for (const [key, icon] of Object.entries(iconMap)) {
|
||||
if (lowerModelName.includes(key)) {
|
||||
const modelNameToIcon = (
|
||||
modelName: string,
|
||||
fallbackIcon: ({ size, className }: IconProps) => JSX.Element
|
||||
): (({ size, className }: IconProps) => JSX.Element) => {
|
||||
const lowerModelName = modelName?.toLowerCase();
|
||||
for (const [key, icon] of Object.entries(modelIconMap)) {
|
||||
if (lowerModelName?.includes(key)) {
|
||||
return icon;
|
||||
}
|
||||
}
|
||||
}
|
||||
return fallbackIcon;
|
||||
};
|
||||
|
||||
// Fallback to CPU icon if no matches
|
||||
return CPUIcon;
|
||||
switch (providerName) {
|
||||
case "openai":
|
||||
// Special cases for openai based on modelName
|
||||
return modelNameToIcon(modelName || "", OpenAIISVG);
|
||||
case "anthropic":
|
||||
return AnthropicSVG;
|
||||
case "bedrock":
|
||||
return AWSIcon;
|
||||
case "azure":
|
||||
return AzureIcon;
|
||||
default:
|
||||
return modelNameToIcon(modelName || "", CPUIcon);
|
||||
}
|
||||
};
|
||||
|
||||
export const isAnthropic = (provider: string, modelName: string) =>
|
||||
|
||||
@@ -185,10 +185,7 @@ export const FilterComponent = forwardRef<
|
||||
hasActiveFilters ? "border-primary bg-primary/5" : ""
|
||||
}`}
|
||||
>
|
||||
<SortIcon
|
||||
size={20}
|
||||
className="text-neutral-800 dark:text-neutral-200"
|
||||
/>
|
||||
<SortIcon size={20} className="text-neutral-800" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent
|
||||
@@ -368,7 +365,7 @@ export const FilterComponent = forwardRef<
|
||||
|
||||
{hasActiveFilters && (
|
||||
<div className="absolute -top-1 -right-1">
|
||||
<Badge className="h-2 !bg-red-400 !border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
|
||||
<Badge className="h-2 bg-red-400 border-red-400 w-2 p-0 border-2 flex items-center justify-center" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -40,8 +40,12 @@ export const ConnectorTitle = ({
|
||||
const typedConnector = connector as Connector<GithubConfig>;
|
||||
additionalMetadata.set(
|
||||
"Repo",
|
||||
typedConnector.connector_specific_config.repo_name
|
||||
? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
|
||||
typedConnector.connector_specific_config.repositories
|
||||
? `${typedConnector.connector_specific_config.repo_owner}/${
|
||||
typedConnector.connector_specific_config.repositories.includes(",")
|
||||
? "multiple repos"
|
||||
: typedConnector.connector_specific_config.repositories
|
||||
}`
|
||||
: `${typedConnector.connector_specific_config.repo_owner}/*`
|
||||
);
|
||||
} else if (connector.source === "gitlab") {
|
||||
|
||||
@@ -3102,6 +3102,27 @@ export const OpenAISVG = ({
|
||||
);
|
||||
};
|
||||
|
||||
export const AnthropicSVG = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => {
|
||||
return (
|
||||
<svg
|
||||
style={{ width: `${size}px`, height: `${size}px` }}
|
||||
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 92.2 65"
|
||||
xmlSpace="preserve"
|
||||
fill="currentColor"
|
||||
>
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M66.5,0H52.4l25.7,65h14.1L66.5,0z M25.7,0L0,65h14.4l5.3-13.6h26.9L51.8,65h14.4L40.5,0C40.5,0,25.7,0,25.7,0z M24.3,39.3l8.8-22.8l8.8,22.8H24.3z"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
};
|
||||
|
||||
export const SourcesIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
||||
@@ -190,10 +190,12 @@ export const connectorConfigs: Record<
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the repository name:",
|
||||
label: "Repository Name",
|
||||
name: "repo_name",
|
||||
query: "Enter the repository name(s):",
|
||||
label: "Repository Name(s)",
|
||||
name: "repositories",
|
||||
optional: false,
|
||||
description:
|
||||
"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -1358,7 +1360,7 @@ export interface WebConfig {
|
||||
|
||||
export interface GithubConfig {
|
||||
repo_owner: string;
|
||||
repo_name: string;
|
||||
repositories: string; // Comma-separated list of repository names
|
||||
include_prs: boolean;
|
||||
include_issues: boolean;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user