Compare commits

..

1 Commits

Author SHA1 Message Date
pablonyx
252cd06fcf k 2025-03-06 10:07:45 -08:00
17 changed files with 55 additions and 375 deletions

View File

@@ -1,125 +0,0 @@
"""Update GitHub connector repo_name to repositories
Revision ID: 3934b1bc7b62
Revises: b7c2b63c4a03
Create Date: 2025-03-05 10:50:30.516962
"""
from alembic import op
import sqlalchemy as sa
import json
import logging
# revision identifiers, used by Alembic.
revision = "3934b1bc7b62"
down_revision = "b7c2b63c4a03"
branch_labels = None
depends_on = None
logger = logging.getLogger("alembic.runtime.migration")
def upgrade() -> None:
# Get all GitHub connectors
conn = op.get_bind()
# First get all GitHub connectors
github_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = 'GITHUB'
"""
)
).fetchall()
# Update each connector's config
updated_count = 0
for connector_id, config in github_connectors:
try:
if not config:
logger.warning(f"Connector {connector_id} has no config, skipping")
continue
# Parse the config if it's a string
if isinstance(config, str):
config = json.loads(config)
if "repo_name" not in config:
continue
# Create new config with repositories instead of repo_name
new_config = dict(config)
repo_name_value = new_config.pop("repo_name")
new_config["repositories"] = repo_name_value
# Update the connector with the new config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :new_config
WHERE id = :connector_id
"""
),
{"connector_id": connector_id, "new_config": json.dumps(new_config)},
)
updated_count += 1
except Exception as e:
logger.error(f"Error updating connector {connector_id}: {str(e)}")
def downgrade() -> None:
# Get all GitHub connectors
conn = op.get_bind()
logger.debug(
"Starting rollback of GitHub connectors from repositories to repo_name"
)
github_connectors = conn.execute(
sa.text(
"""
SELECT id, connector_specific_config
FROM connector
WHERE source = 'GITHUB'
"""
)
).fetchall()
logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
# Revert each GitHub connector to use repo_name instead of repositories
reverted_count = 0
for connector_id, config in github_connectors:
try:
if not config:
continue
# Parse the config if it's a string
if isinstance(config, str):
config = json.loads(config)
if "repositories" not in config:
continue
# Create new config with repo_name instead of repositories
new_config = dict(config)
repositories_value = new_config.pop("repositories")
new_config["repo_name"] = repositories_value
# Update the connector with the new config
conn.execute(
sa.text(
"""
UPDATE connector
SET connector_specific_config = :new_config
WHERE id = :connector_id
"""
),
{"new_config": json.dumps(new_config), "connector_id": connector_id},
)
reverted_count += 1
except Exception as e:
logger.error(f"Error reverting connector {connector_id}: {str(e)}")

View File

@@ -80,7 +80,6 @@ class ConfluenceCloudOAuth:
"search:confluence%20"
# granular scope
"read:attachment:confluence%20" # possibly unneeded unless calling v2 attachments api
"read:content-details:confluence%20" # for permission sync
"offline_access"
)

View File

@@ -48,5 +48,4 @@ def store_product_gating(tenant_id: str, application_status: ApplicationStatus)
def get_gated_tenants() -> set[str]:
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))

View File

@@ -55,11 +55,7 @@ logger = logging.getLogger(__name__)
async def get_or_provision_tenant(
email: str, referral_source: str | None = None, request: Request | None = None
) -> str:
"""
Get existing tenant ID for an email or create a new tenant if none exists.
This function should only be called after we have verified we want this user's tenant to exist.
It returns the tenant ID associated with the email, creating a new tenant if necessary.
"""
"""Get existing tenant ID for an email or create a new tenant if none exists."""
if not MULTI_TENANT:
return POSTGRES_DEFAULT_SCHEMA

View File

@@ -587,20 +587,14 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
) -> Optional[User]:
email = credentials.username
tenant_id: str | None = None
try:
tenant_id = fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_tenant_id_for_email",
None,
)(
email=email,
)
except Exception as e:
logger.warning(
f"User attempted to login with invalid credentials: {str(e)}"
)
# Get tenant_id from mapping table
tenant_id = await fetch_ee_implementation_or_noop(
"onyx.server.tenants.provisioning",
"get_or_provision_tenant",
async_return_default_schema,
)(
email=email,
)
if not tenant_id:
# User not found in mapping
self.password_helper.hash(credentials.password)

View File

@@ -240,7 +240,7 @@ class ConfluenceConnector(
# Extract basic page information
page_id = page["id"]
page_title = page["title"]
page_url = f"{self.wiki_base}{page['_links']['webui']}"
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
# Get the page content
page_content = extract_text_from_confluence_html(

View File

@@ -124,14 +124,14 @@ class GithubConnector(LoadConnector, PollConnector):
def __init__(
self,
repo_owner: str,
repositories: str | None = None,
repo_name: str | None = None,
batch_size: int = INDEX_BATCH_SIZE,
state_filter: str = "all",
include_prs: bool = True,
include_issues: bool = False,
) -> None:
self.repo_owner = repo_owner
self.repositories = repositories
self.repo_name = repo_name
self.batch_size = batch_size
self.state_filter = state_filter
self.include_prs = include_prs
@@ -157,42 +157,11 @@ class GithubConnector(LoadConnector, PollConnector):
)
try:
return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
return github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
except RateLimitExceededException:
_sleep_after_rate_limit_exception(github_client)
return self._get_github_repo(github_client, attempt_num + 1)
def _get_github_repos(
self, github_client: Github, attempt_num: int = 0
) -> list[Repository.Repository]:
"""Get specific repositories based on comma-separated repo_name string."""
if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
raise RuntimeError(
"Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
)
try:
repos = []
# Split repo_name by comma and strip whitespace
repo_names = [
name.strip() for name in (cast(str, self.repositories)).split(",")
]
for repo_name in repo_names:
if repo_name: # Skip empty strings
try:
repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
repos.append(repo)
except GithubException as e:
logger.warning(
f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
)
return repos
except RateLimitExceededException:
_sleep_after_rate_limit_exception(github_client)
return self._get_github_repos(github_client, attempt_num + 1)
def _get_all_repos(
self, github_client: Github, attempt_num: int = 0
) -> list[Repository.Repository]:
@@ -220,17 +189,11 @@ class GithubConnector(LoadConnector, PollConnector):
if self.github_client is None:
raise ConnectorMissingCredentialError("GitHub")
repos = []
if self.repositories:
if "," in self.repositories:
# Multiple repositories specified
repos = self._get_github_repos(self.github_client)
else:
# Single repository (backward compatibility)
repos = [self._get_github_repo(self.github_client)]
else:
# All repositories
repos = self._get_all_repos(self.github_client)
repos = (
[self._get_github_repo(self.github_client)]
if self.repo_name
else self._get_all_repos(self.github_client)
)
for repo in repos:
if self.include_prs:
@@ -305,48 +268,11 @@ class GithubConnector(LoadConnector, PollConnector):
)
try:
if self.repositories:
if "," in self.repositories:
# Multiple repositories specified
repo_names = [name.strip() for name in self.repositories.split(",")]
if not repo_names:
raise ConnectorValidationError(
"Invalid connector settings: No valid repository names provided."
)
# Validate at least one repository exists and is accessible
valid_repos = False
validation_errors = []
for repo_name in repo_names:
if not repo_name:
continue
try:
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{repo_name}"
)
test_repo.get_contents("")
valid_repos = True
# If at least one repo is valid, we can proceed
break
except GithubException as e:
validation_errors.append(
f"Repository '{repo_name}': {e.data.get('message', str(e))}"
)
if not valid_repos:
error_msg = (
"None of the specified repositories could be accessed: "
)
error_msg += ", ".join(validation_errors)
raise ConnectorValidationError(error_msg)
else:
# Single repository (backward compatibility)
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{self.repositories}"
)
test_repo.get_contents("")
if self.repo_name:
test_repo = self.github_client.get_repo(
f"{self.repo_owner}/{self.repo_name}"
)
test_repo.get_contents("")
else:
# Try to get organization first
try:
@@ -372,15 +298,10 @@ class GithubConnector(LoadConnector, PollConnector):
"Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
)
elif e.status == 404:
if self.repositories:
if "," in self.repositories:
raise ConnectorValidationError(
f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
)
else:
raise ConnectorValidationError(
f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
)
if self.repo_name:
raise ConnectorValidationError(
f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
)
else:
raise ConnectorValidationError(
f"GitHub user or organization not found: {self.repo_owner}"
@@ -389,7 +310,6 @@ class GithubConnector(LoadConnector, PollConnector):
raise ConnectorValidationError(
f"Unexpected GitHub error (status={e.status}): {e.data}"
)
except Exception as exc:
raise Exception(
f"Unexpected error during GitHub settings validation: {exc}"
@@ -401,7 +321,7 @@ if __name__ == "__main__":
connector = GithubConnector(
repo_owner=os.environ["REPO_OWNER"],
repositories=os.environ["REPOSITORIES"],
repo_name=os.environ["REPO_NAME"],
)
connector.load_credentials(
{"github_access_token": os.environ["GITHUB_ACCESS_TOKEN"]}

View File

@@ -464,29 +464,12 @@ def index_doc_batch(
),
)
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
successful_doc_ids = {record.document_id for record in insertion_records}
if successful_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
f"Successful IDs: {successful_doc_ids}"
)
last_modified_ids = []

View File

@@ -45,7 +45,7 @@ def test_confluence_connector_basic(
with pytest.raises(StopIteration):
next(doc_batch_generator)
assert len(doc_batch) == 2
assert len(doc_batch) == 3
page_within_a_page_doc: Document | None = None
page_doc: Document | None = None

View File

@@ -80,13 +80,3 @@ prod cluster**
- `kubectl delete -f .`
- To not delete the persistent volumes (Document indexes and Users), specify the specific `.yaml` files instead of
`.` without specifying delete on persistent-volumes.yaml.
### Using Helm to deploy to an existing cluster
Onyx has a helm chart that is convenient to install all services to an existing Kubernetes cluster. To install:
* Currently the helm chart is not published so to install, clone the repo.
* Configure access to the cluster via kubectl. Ensure the kubectl context is set to the cluster that you want to use
* The default secrets, environment variables and other service level configuration are stored in `deployment/helm/charts/onyx/values.yml`. You may create another `override.yml`
* `cd deployment/helm/charts/onyx` and run `helm install onyx -n onyx -f override.yaml .`. This will install onyx on the cluster under the `onyx` namespace.
* Check the status of the deploy using `kubectl get pods -n onyx`

View File

@@ -1,27 +0,0 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "onyx-stack.fullname" . }}-ingress-api
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/use-regex: "true"
cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
spec:
rules:
- host: {{ .Values.ingress.api.host }}
http:
paths:
- path: /api(/|$)(.*)
pathType: Prefix
backend:
service:
name: {{ include "onyx-stack.fullname" . }}-api-service
port:
number: {{ .Values.api.service.servicePort }}
tls:
- hosts:
- {{ .Values.ingress.api.host }}
secretName: {{ include "onyx-stack.fullname" . }}-ingress-api-tls
{{- end }}

View File

@@ -1,26 +0,0 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "onyx-stack.fullname" . }}-ingress-webserver
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: {{ include "onyx-stack.fullname" . }}-letsencrypt
kubernetes.io/tls-acme: "true"
spec:
rules:
- host: {{ .Values.ingress.webserver.host }}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: {{ include "onyx-stack.fullname" . }}-webserver
port:
number: {{ .Values.webserver.service.servicePort }}
tls:
- hosts:
- {{ .Values.ingress.webserver.host }}
secretName: {{ include "onyx-stack.fullname" . }}-ingress-webserver-tls
{{- end }}

View File

@@ -1,20 +0,0 @@
{{- if .Values.letsencrypt.enabled -}}
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: {{ include "onyx-stack.fullname" . }}-letsencrypt
spec:
acme:
# The ACME server URL
server: https://acme-v02.api.letsencrypt.org/directory
# Email address used for ACME registration
email: {{ .Values.letsencrypt.email }}
# Name of a secret used to store the ACME account private key
privateKeySecretRef:
name: {{ include "onyx-stack.fullname" . }}-letsencrypt
# Enable the HTTP-01 challenge provider
solvers:
- http01:
ingress:
class: nginx
{{- end }}

View File

@@ -376,17 +376,22 @@ redis:
existingSecret: onyx-secrets
existingSecretPasswordKey: redis_password
ingress:
enabled: false
className: ""
api:
host: onyx.local
webserver:
host: onyx.local
# ingress:
# enabled: false
# className: ""
# annotations: {}
# # kubernetes.io/ingress.class: nginx
# # kubernetes.io/tls-acme: "true"
# hosts:
# - host: chart-example.local
# paths:
# - path: /
# pathType: ImplementationSpecific
# tls: []
# # - secretName: chart-example-tls
# # hosts:
# # - chart-example.local
letsencrypt:
enabled: false
email: "abc@abc.com"
auth:
# existingSecret onyx-secret for storing smtp, oauth, slack, and other secrets

View File

@@ -61,7 +61,6 @@ export function EmailPasswordForm({
if (!response.ok) {
setIsWorking(false);
const errorDetail = (await response.json()).detail;
let errorMsg = "Unknown error";
if (typeof errorDetail === "object" && errorDetail.reason) {
@@ -97,13 +96,12 @@ export function EmailPasswordForm({
} else {
setIsWorking(false);
const errorDetail = (await loginResponse.json()).detail;
let errorMsg = "Unknown error";
if (errorDetail === "LOGIN_BAD_CREDENTIALS") {
errorMsg = "Invalid email or password";
} else if (errorDetail === "NO_WEB_LOGIN_AND_HAS_NO_PASSWORD") {
errorMsg = "Create an account to set a password";
} else if (typeof errorDetail === "string") {
errorMsg = errorDetail;
}
if (loginResponse.status === 429) {
errorMsg = "Too many requests. Please try again later.";

View File

@@ -40,12 +40,8 @@ export const ConnectorTitle = ({
const typedConnector = connector as Connector<GithubConfig>;
additionalMetadata.set(
"Repo",
typedConnector.connector_specific_config.repositories
? `${typedConnector.connector_specific_config.repo_owner}/${
typedConnector.connector_specific_config.repositories.includes(",")
? "multiple repos"
: typedConnector.connector_specific_config.repositories
}`
typedConnector.connector_specific_config.repo_name
? `${typedConnector.connector_specific_config.repo_owner}/${typedConnector.connector_specific_config.repo_name}`
: `${typedConnector.connector_specific_config.repo_owner}/*`
);
} else if (connector.source === "gitlab") {

View File

@@ -190,12 +190,10 @@ export const connectorConfigs: Record<
fields: [
{
type: "text",
query: "Enter the repository name(s):",
label: "Repository Name(s)",
name: "repositories",
query: "Enter the repository name:",
label: "Repository Name",
name: "repo_name",
optional: false,
description:
"For multiple repositories, enter comma-separated names (e.g., repo1,repo2,repo3)",
},
],
},
@@ -1360,7 +1358,7 @@ export interface WebConfig {
export interface GithubConfig {
repo_owner: string;
repositories: string; // Comma-separated list of repository names
repo_name: string;
include_prs: boolean;
include_issues: boolean;
}