mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-12 03:02:43 +00:00
Compare commits
4 Commits
chore/upda
...
xlsx-parse
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83558ae04c | ||
|
|
005009602c | ||
|
|
b93875353b | ||
|
|
2290141b53 |
@@ -1,8 +1,6 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
|
||||
from jira import JIRA
|
||||
from jira.exceptions import JIRAError
|
||||
|
||||
from ee.onyx.db.external_perm import ExternalUserGroup
|
||||
from onyx.connectors.jira.utils import build_jira_client
|
||||
@@ -11,102 +9,107 @@ from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_ATLASSIAN_ACCOUNT_TYPE = "atlassian"
|
||||
_GROUP_MEMBER_PAGE_SIZE = 50
|
||||
|
||||
# The GET /group/member endpoint was introduced in Jira 6.0.
|
||||
# Jira versions older than 6.0 do not have group management REST APIs at all.
|
||||
_MIN_JIRA_VERSION_FOR_GROUP_MEMBER = "6.0"
|
||||
|
||||
|
||||
def _fetch_group_member_page(
|
||||
def _get_jira_group_members_email(
|
||||
jira_client: JIRA,
|
||||
group_name: str,
|
||||
start_at: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Fetch a single page from the non-deprecated GET /group/member endpoint.
|
||||
) -> list[str]:
|
||||
"""Get all member emails for a Jira group.
|
||||
|
||||
The old GET /group endpoint (used by jira_client.group_members()) is deprecated
|
||||
and decommissioned in Jira Server 10.3+. This uses the replacement endpoint
|
||||
directly via the library's internal _get_json helper, following the same pattern
|
||||
as enhanced_search_ids / bulk_fetch_issues in connector.py.
|
||||
|
||||
There is an open PR to the library to switch to this endpoint since last year:
|
||||
https://github.com/pycontribs/jira/pull/2356
|
||||
so once it is merged and released, we can switch to using the library function.
|
||||
Filters out app accounts (bots, integrations) and only returns real user emails.
|
||||
"""
|
||||
emails: list[str] = []
|
||||
|
||||
try:
|
||||
return jira_client._get_json(
|
||||
"group/member",
|
||||
params={
|
||||
"groupname": group_name,
|
||||
"includeInactiveUsers": "false",
|
||||
"startAt": start_at,
|
||||
"maxResults": _GROUP_MEMBER_PAGE_SIZE,
|
||||
},
|
||||
)
|
||||
except JIRAError as e:
|
||||
if e.status_code == 404:
|
||||
raise RuntimeError(
|
||||
f"GET /group/member returned 404 for group '{group_name}'. "
|
||||
f"This endpoint requires Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}+. "
|
||||
f"If you are running a self-hosted Jira instance, please upgrade "
|
||||
f"to at least Jira {_MIN_JIRA_VERSION_FOR_GROUP_MEMBER}."
|
||||
) from e
|
||||
raise
|
||||
# group_members returns an OrderedDict of account_id -> member_info
|
||||
members = jira_client.group_members(group=group_name)
|
||||
|
||||
if not members:
|
||||
logger.warning(f"No members found for group {group_name}")
|
||||
return emails
|
||||
|
||||
def _get_group_member_emails(
|
||||
jira_client: JIRA,
|
||||
group_name: str,
|
||||
) -> set[str]:
|
||||
"""Get all member emails for a single Jira group.
|
||||
for account_id, member_info in members.items():
|
||||
# member_info is a dict with keys like 'fullname', 'email', 'active'
|
||||
email = member_info.get("email")
|
||||
|
||||
Uses the non-deprecated GET /group/member endpoint which returns full user
|
||||
objects including accountType, so we can filter out app/customer accounts
|
||||
without making separate user() calls.
|
||||
"""
|
||||
emails: set[str] = set()
|
||||
start_at = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
page = _fetch_group_member_page(jira_client, group_name, start_at)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching members for group {group_name}: {e}")
|
||||
raise
|
||||
|
||||
members: list[dict[str, Any]] = page.get("values", [])
|
||||
for member in members:
|
||||
account_type = member.get("accountType")
|
||||
# On Jira DC < 9.0, accountType is absent; include those users.
|
||||
# On Cloud / DC 9.0+, filter to real user accounts only.
|
||||
if account_type is not None and account_type != _ATLASSIAN_ACCOUNT_TYPE:
|
||||
continue
|
||||
|
||||
email = member.get("emailAddress")
|
||||
if email:
|
||||
emails.add(email)
|
||||
# Skip "hidden" emails - these are typically app accounts
|
||||
if email and email != "hidden":
|
||||
emails.append(email)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Atlassian user {member.get('accountId', 'unknown')} "
|
||||
f"in group {group_name} has no visible email address"
|
||||
)
|
||||
# For cloud, we might need to fetch user details separately
|
||||
try:
|
||||
user = jira_client.user(id=account_id)
|
||||
|
||||
if page.get("isLast", True) or not members:
|
||||
break
|
||||
start_at += len(members)
|
||||
# Skip app accounts (bots, integrations, etc.)
|
||||
if hasattr(user, "accountType") and user.accountType == "app":
|
||||
logger.info(
|
||||
f"Skipping app account {account_id} for group {group_name}"
|
||||
)
|
||||
continue
|
||||
|
||||
if hasattr(user, "emailAddress") and user.emailAddress:
|
||||
emails.append(user.emailAddress)
|
||||
else:
|
||||
logger.warning(f"User {account_id} has no email address")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Could not fetch email for user {account_id} in group {group_name}: {e}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching members for group {group_name}: {e}")
|
||||
|
||||
return emails
|
||||
|
||||
|
||||
def _build_group_member_email_map(
|
||||
jira_client: JIRA,
|
||||
) -> dict[str, set[str]]:
|
||||
"""Build a map of group names to member emails."""
|
||||
group_member_emails: dict[str, set[str]] = {}
|
||||
|
||||
try:
|
||||
# Get all groups from Jira - returns a list of group name strings
|
||||
group_names = jira_client.groups()
|
||||
|
||||
if not group_names:
|
||||
logger.warning("No groups found in Jira")
|
||||
return group_member_emails
|
||||
|
||||
logger.info(f"Found {len(group_names)} groups in Jira")
|
||||
|
||||
for group_name in group_names:
|
||||
if not group_name:
|
||||
continue
|
||||
|
||||
member_emails = _get_jira_group_members_email(
|
||||
jira_client=jira_client,
|
||||
group_name=group_name,
|
||||
)
|
||||
|
||||
if member_emails:
|
||||
group_member_emails[group_name] = set(member_emails)
|
||||
logger.debug(
|
||||
f"Found {len(member_emails)} members for group {group_name}"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"No members found for group {group_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building group member email map: {e}")
|
||||
|
||||
return group_member_emails
|
||||
|
||||
|
||||
def jira_group_sync(
|
||||
tenant_id: str, # noqa: ARG001
|
||||
cc_pair: ConnectorCredentialPair,
|
||||
) -> Generator[ExternalUserGroup, None, None]:
|
||||
"""Sync Jira groups and their members, yielding one group at a time.
|
||||
"""
|
||||
Sync Jira groups and their members.
|
||||
|
||||
Streams group-by-group rather than accumulating all groups in memory.
|
||||
This function fetches all groups from Jira and yields ExternalUserGroup
|
||||
objects containing the group ID and member emails.
|
||||
"""
|
||||
jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
|
||||
scoped_token = cc_pair.connector.connector_specific_config.get(
|
||||
@@ -127,26 +130,12 @@ def jira_group_sync(
|
||||
scoped_token=scoped_token,
|
||||
)
|
||||
|
||||
group_names = jira_client.groups()
|
||||
if not group_names:
|
||||
raise ValueError(f"No groups found for cc_pair_id={cc_pair.id}")
|
||||
group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
|
||||
if not group_member_email_map:
|
||||
raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
|
||||
|
||||
logger.info(f"Found {len(group_names)} groups in Jira")
|
||||
|
||||
for group_name in group_names:
|
||||
if not group_name:
|
||||
continue
|
||||
|
||||
member_emails = _get_group_member_emails(
|
||||
jira_client=jira_client,
|
||||
group_name=group_name,
|
||||
)
|
||||
if not member_emails:
|
||||
logger.debug(f"No members found for group {group_name}")
|
||||
continue
|
||||
|
||||
logger.debug(f"Found {len(member_emails)} members for group {group_name}")
|
||||
for group_id, group_member_emails in group_member_email_map.items():
|
||||
yield ExternalUserGroup(
|
||||
id=group_name,
|
||||
user_emails=list(member_emails),
|
||||
id=group_id,
|
||||
user_emails=list(group_member_emails),
|
||||
)
|
||||
|
||||
@@ -11,9 +11,6 @@
|
||||
# lock after its cleanup which happens at most after its soft timeout.
|
||||
|
||||
# Constants corresponding to migrate_documents_from_vespa_to_opensearch_task.
|
||||
from onyx.configs.app_configs import OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE
|
||||
|
||||
|
||||
MIGRATION_TASK_SOFT_TIME_LIMIT_S = 60 * 5 # 5 minutes.
|
||||
MIGRATION_TASK_TIME_LIMIT_S = 60 * 6 # 6 minutes.
|
||||
# The maximum time the lock can be held for. Will automatically be released
|
||||
@@ -47,7 +44,7 @@ TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE = 15
|
||||
|
||||
# WARNING: Do not change these values without knowing what changes also need to
|
||||
# be made to OpenSearchTenantMigrationRecord.
|
||||
GET_VESPA_CHUNKS_PAGE_SIZE = OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE
|
||||
GET_VESPA_CHUNKS_PAGE_SIZE = 500
|
||||
GET_VESPA_CHUNKS_SLICE_COUNT = 4
|
||||
|
||||
# String used to indicate in the vespa_visit_continuation_token mapping that the
|
||||
|
||||
@@ -311,12 +311,6 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
|
||||
os.environ.get("VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT", "true").lower()
|
||||
== "true"
|
||||
)
|
||||
OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
|
||||
os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
|
||||
)
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = int(
|
||||
os.environ.get("OPENSEARCH_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES") or 0
|
||||
)
|
||||
|
||||
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
|
||||
# NOTE: this is used if and only if the vespa config server is accessible via a
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
# Default value for the maximum number of tokens a chunk can hold, if none is
|
||||
# specified when creating an index.
|
||||
from onyx.configs.app_configs import (
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_MAX_CHUNK_SIZE = 512
|
||||
|
||||
# Size of the dynamic list used to consider elements during kNN graph creation.
|
||||
@@ -15,43 +10,27 @@ EF_CONSTRUCTION = 256
|
||||
# quality but increase memory footprint. Values typically range between 12 - 48.
|
||||
M = 32 # Set relatively high for better accuracy.
|
||||
|
||||
# When performing hybrid search, we need to consider more candidates than the
|
||||
# number of results to be returned. This is because the scoring is hybrid and
|
||||
# the results are reordered due to the hybrid scoring. Higher = more candidates
|
||||
# for hybrid fusion = better retrieval accuracy, but results in more computation
|
||||
# per query. Imagine a simple case with a single keyword query and a single
|
||||
# vector query and we want 10 final docs. If we only fetch 10 candidates from
|
||||
# each of keyword and vector, they would have to have perfect overlap to get a
|
||||
# good hybrid ranking for the 10 results. If we fetch 1000 candidates from each,
|
||||
# we have a much higher chance of all 10 of the final desired docs showing up
|
||||
# and getting scored. In worse situations, the final 10 docs don't even show up
|
||||
# as the final 10 (worse than just a miss at the reranking step).
|
||||
DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
|
||||
OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
|
||||
if OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES > 0
|
||||
else 750
|
||||
)
|
||||
# When performing hybrid search, we need to consider more candidates than the number of results to be returned.
|
||||
# This is because the scoring is hybrid and the results are reordered due to the hybrid scoring.
|
||||
# Higher = more candidates for hybrid fusion = better retrieval accuracy, but results in more computation per query.
|
||||
# Imagine a simple case with a single keyword query and a single vector query and we want 10 final docs.
|
||||
# If we only fetch 10 candidates from each of keyword and vector, they would have to have perfect overlap to get a good hybrid
|
||||
# ranking for the 10 results. If we fetch 1000 candidates from each, we have a much higher chance of all 10 of the final desired
|
||||
# docs showing up and getting scored. In worse situations, the final 10 docs don't even show up as the final 10 (worse than just
|
||||
# a miss at the reranking step).
|
||||
DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = 750
|
||||
|
||||
# Number of vectors to examine to decide the top k neighbors for the HNSW
|
||||
# method.
|
||||
# NOTE: "When creating a search query, you must specify k. If you provide both k
|
||||
# and ef_search, then the larger value is passed to the engine. If ef_search is
|
||||
# larger than k, you can provide the size parameter to limit the final number of
|
||||
# results to k." from
|
||||
# https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search
|
||||
# Number of vectors to examine for top k neighbors for the HNSW method.
|
||||
EF_SEARCH = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
|
||||
|
||||
# Since the titles are included in the contents, the embedding matches are
|
||||
# heavily downweighted as they act as a boost rather than an independent scoring
|
||||
# component.
|
||||
# Since the titles are included in the contents, they are heavily downweighted as they act as a boost
|
||||
# rather than an independent scoring component.
|
||||
SEARCH_TITLE_VECTOR_WEIGHT = 0.1
|
||||
SEARCH_CONTENT_VECTOR_WEIGHT = 0.45
|
||||
# Single keyword weight for both title and content (merged from former title
|
||||
# keyword + content keyword).
|
||||
# Single keyword weight for both title and content (merged from former title keyword + content keyword).
|
||||
SEARCH_KEYWORD_WEIGHT = 0.45
|
||||
|
||||
# NOTE: It is critical that the order of these weights matches the order of the
|
||||
# sub-queries in the hybrid search.
|
||||
# NOTE: it is critical that the order of these weights matches the order of the sub-queries in the hybrid search.
|
||||
HYBRID_SEARCH_NORMALIZATION_WEIGHTS = [
|
||||
SEARCH_TITLE_VECTOR_WEIGHT,
|
||||
SEARCH_CONTENT_VECTOR_WEIGHT,
|
||||
|
||||
@@ -285,16 +285,13 @@ class DocumentQuery:
|
||||
hybrid_search_query: dict[str, Any] = {
|
||||
"hybrid": {
|
||||
"queries": hybrid_search_subqueries,
|
||||
# Max results per subquery per shard before aggregation. Ensures
|
||||
# keyword and vector subqueries contribute equally to the
|
||||
# candidate pool for hybrid fusion.
|
||||
# Max results per subquery per shard before aggregation. Ensures keyword and vector
|
||||
# subqueries contribute equally to the candidate pool for hybrid fusion.
|
||||
# Sources:
|
||||
# https://docs.opensearch.org/latest/vector-search/ai-search/hybrid-search/pagination/
|
||||
# https://opensearch.org/blog/navigating-pagination-in-hybrid-queries-with-the-pagination_depth-parameter/
|
||||
"pagination_depth": DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
# Applied to all the sub-queries independently (this avoids
|
||||
# subqueries having a lot of results thrown out during
|
||||
# aggregation).
|
||||
# Applied to all the sub-queries independently (this avoids having subqueries having a lot of results thrown out).
|
||||
# Sources:
|
||||
# https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
|
||||
# https://opensearch.org/blog/introducing-common-filter-support-for-hybrid-search-queries
|
||||
@@ -377,10 +374,9 @@ class DocumentQuery:
|
||||
def _get_hybrid_search_subqueries(
|
||||
query_text: str,
|
||||
query_vector: list[float],
|
||||
# The default number of neighbors to consider for knn vector similarity
|
||||
# search. This is higher than the number of results because the scoring
|
||||
# is hybrid. For a detailed breakdown, see where the default value is
|
||||
# set.
|
||||
# The default number of neighbors to consider for knn vector similarity search.
|
||||
# This is higher than the number of results because the scoring is hybrid.
|
||||
# for a detailed breakdown, see where the default value is set.
|
||||
vector_candidates: int = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Returns subqueries for hybrid search.
|
||||
@@ -404,27 +400,20 @@ class DocumentQuery:
|
||||
in a single hybrid query. Source:
|
||||
https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
|
||||
|
||||
NOTE: Each query is independent during the search phase, there is no
|
||||
backfilling of scores for missing query components. What this means is
|
||||
that if a document was a good vector match but did not show up for
|
||||
keyword, it gets a score of 0 for the keyword component of the hybrid
|
||||
scoring. This is not as bad as just disregarding a score though as there
|
||||
is normalization applied after. So really it is "increasing" the missing
|
||||
score compared to if it was included and the range was renormalized.
|
||||
This does however mean that between docs that have high scores for say
|
||||
the vector field, the keyword scores between them are completely ignored
|
||||
unless they also showed up in the keyword query as a reasonably high
|
||||
match. TLDR, this is a bit of unique funky behavior but it seems ok.
|
||||
NOTE: Each query is independent during the search phase, there is no backfilling of scores for missing query components.
|
||||
What this means is that if a document was a good vector match but did not show up for keyword, it gets a score of 0 for
|
||||
the keyword component of the hybrid scoring. This is not as bad as just disregarding a score though as there is
|
||||
normalization applied after. So really it is "increasing" the missing score compared to if it was included and the range
|
||||
was renormalized. This does however mean that between docs that have high scores for say the vector field, the keyword
|
||||
scores between them are completely ignored unless they also showed up in the keyword query as a reasonably high match.
|
||||
TLDR, this is a bit of unique funky behavior but it seems ok.
|
||||
|
||||
NOTE: Options considered and rejected:
|
||||
- minimum_should_match: Since it's hybrid search and users often provide
|
||||
semantic queries, there is often a lot of terms, and very low number
|
||||
of meaningful keywords (and a low ratio of keywords).
|
||||
- fuzziness AUTO: Typo tolerance (0/1/2 edit distance by term length).
|
||||
It's mostly for typos as the analyzer ("english" by default) already
|
||||
does some stemming and tokenization. In testing datasets, this makes
|
||||
recall slightly worse. It also is less performant so not really any
|
||||
reason to do it.
|
||||
- minimum_should_match: Since it's hybrid search and users often provide semantic queries, there is often a lot of terms,
|
||||
and very low number of meaningful keywords (and a low ratio of keywords).
|
||||
- fuzziness AUTO: typo tolerance (0/1/2 edit distance by term length). It's mostly for typos as the analyzer ("english by
|
||||
default") already does some stemming and tokenization. In testing datasets, this makes recall slightly worse. It also is
|
||||
less performant so not really any reason to do it.
|
||||
|
||||
Args:
|
||||
query_text: The text of the query to search for.
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import csv
|
||||
import gc
|
||||
import io
|
||||
import json
|
||||
@@ -19,6 +20,7 @@ from zipfile import BadZipFile
|
||||
|
||||
import chardet
|
||||
import openpyxl
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from PIL import Image
|
||||
|
||||
from onyx.configs.constants import ONYX_METADATA_FILENAME
|
||||
@@ -352,6 +354,65 @@ def pptx_to_text(file: IO[Any], file_name: str = "") -> str:
|
||||
return presentation.markdown
|
||||
|
||||
|
||||
def _worksheet_to_matrix(
|
||||
worksheet: Worksheet,
|
||||
) -> list[list[str]]:
|
||||
"""
|
||||
Converts a singular worksheet to a matrix of values
|
||||
"""
|
||||
rows: list[list[str]] = []
|
||||
for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):
|
||||
row = ["" if cell is None else str(cell) for cell in worksheet_row]
|
||||
rows.append(row)
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def _clean_worksheet_matrix(matrix: list[list[str]]) -> list[list[str]]:
|
||||
"""
|
||||
Cleans a worksheet matrix by removing rows if there are N consecutive empty
|
||||
rows and removing cols if there are M consecutive empty columns
|
||||
"""
|
||||
MAX_EMPTY_ROWS = 2 # Runs longer than this are capped to max_empty; shorter runs are preserved as-is
|
||||
MAX_EMPTY_COLS = 2
|
||||
|
||||
# Row cleanup
|
||||
matrix = _remove_empty_runs(matrix, max_empty=MAX_EMPTY_ROWS)
|
||||
|
||||
# Column cleanup (transpose, clean, transpose back)
|
||||
transposed = list(map(list, zip(*matrix))) if matrix else []
|
||||
transposed = _remove_empty_runs(transposed, max_empty=MAX_EMPTY_COLS)
|
||||
matrix = list(map(list, zip(*transposed))) if transposed else []
|
||||
|
||||
return matrix
|
||||
|
||||
|
||||
def _remove_empty_runs(
|
||||
rows: list[list[str]],
|
||||
max_empty: int,
|
||||
) -> list[list[str]]:
|
||||
"""Removes entire runs of empty rows when the run length exceeds max_empty.
|
||||
|
||||
Leading and trailing empty rows are always dropped regardless of run length,
|
||||
since there is no adjacent non-empty row to bound the run.
|
||||
"""
|
||||
result: list[list[str]] = []
|
||||
empty_buffer: list[list[str]] = []
|
||||
|
||||
for row in rows:
|
||||
# Check if empty
|
||||
if not any(row):
|
||||
empty_buffer.append(row)
|
||||
else:
|
||||
# Add upto max empty rows onto the result - that's what we allow
|
||||
result.extend(empty_buffer[:max_empty])
|
||||
# Add the new non-empty row
|
||||
result.append(row)
|
||||
empty_buffer = []
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
|
||||
# TODO: switch back to this approach in a few months when markitdown
|
||||
# fixes their handling of excel files
|
||||
@@ -390,30 +451,15 @@ def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
|
||||
f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
|
||||
)
|
||||
return ""
|
||||
raise e
|
||||
raise
|
||||
|
||||
text_content = []
|
||||
for sheet in workbook.worksheets:
|
||||
rows = []
|
||||
num_empty_consecutive_rows = 0
|
||||
for row in sheet.iter_rows(min_row=1, values_only=True):
|
||||
row_str = ",".join(str(cell or "") for cell in row)
|
||||
|
||||
# Only add the row if there are any values in the cells
|
||||
if len(row_str) >= len(row):
|
||||
rows.append(row_str)
|
||||
num_empty_consecutive_rows = 0
|
||||
else:
|
||||
num_empty_consecutive_rows += 1
|
||||
|
||||
if num_empty_consecutive_rows > 100:
|
||||
# handle massive excel sheets with mostly empty cells
|
||||
logger.warning(
|
||||
f"Found {num_empty_consecutive_rows} empty rows in {file_name}, skipping rest of file"
|
||||
)
|
||||
break
|
||||
sheet_str = "\n".join(rows)
|
||||
text_content.append(sheet_str)
|
||||
sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
|
||||
buf = io.StringIO()
|
||||
writer = csv.writer(buf, lineterminator="\n")
|
||||
writer.writerows(sheet_matrix)
|
||||
text_content.append(buf.getvalue().rstrip("\n"))
|
||||
return TEXT_SECTION_SEPARATOR.join(text_content)
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ from fastapi.testclient import TestClient
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import UserRole
|
||||
from onyx.main import get_application
|
||||
from onyx.main import fetch_versioned_implementation
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@@ -51,8 +51,11 @@ def client() -> Generator[TestClient, None, None]:
|
||||
# Patch out prometheus metrics setup to avoid "Duplicated timeseries in
|
||||
# CollectorRegistry" errors when multiple tests each create a new app
|
||||
# (prometheus registers metrics globally and rejects duplicate names).
|
||||
get_app = fetch_versioned_implementation(
|
||||
module="onyx.main", attribute="get_application"
|
||||
)
|
||||
with patch("onyx.main.setup_prometheus_metrics"):
|
||||
app: FastAPI = get_application(lifespan_override=test_lifespan)
|
||||
app: FastAPI = get_app(lifespan_override=test_lifespan)
|
||||
|
||||
# Override the database session dependency with a mock
|
||||
# (these tests don't actually need DB access)
|
||||
|
||||
@@ -17,9 +17,6 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.background.celery.tasks.opensearch_migration.constants import (
|
||||
GET_VESPA_CHUNKS_SLICE_COUNT,
|
||||
)
|
||||
from onyx.background.celery.tasks.opensearch_migration.tasks import (
|
||||
is_continuation_token_done_for_all_slices,
|
||||
)
|
||||
@@ -323,15 +320,9 @@ def test_embedding_dimension(db_session: Session) -> Generator[int, None, None]:
|
||||
@pytest.fixture(scope="function")
|
||||
def patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:
|
||||
test_page_size = 5
|
||||
with (
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
test_page_size,
|
||||
),
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
test_page_size,
|
||||
),
|
||||
with patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
test_page_size,
|
||||
):
|
||||
yield test_page_size # Test runs here.
|
||||
|
||||
@@ -591,175 +582,6 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
|
||||
document_chunks[document.id][opensearch_chunk.chunk_index],
|
||||
)
|
||||
|
||||
def test_chunk_migration_visits_all_chunks_even_when_batch_size_varies(
|
||||
self,
|
||||
db_session: Session,
|
||||
test_documents: list[Document],
|
||||
vespa_document_index: VespaDocumentIndex,
|
||||
opensearch_client: OpenSearchIndexClient,
|
||||
test_embedding_dimension: int,
|
||||
clean_migration_tables: None, # noqa: ARG002
|
||||
enable_opensearch_indexing_for_onyx: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""
|
||||
Tests that chunk migration works correctly even when the batch size
|
||||
changes halfway through a migration.
|
||||
|
||||
Simulates task time running out my mocking the locking behavior.
|
||||
"""
|
||||
# Precondition.
|
||||
# Index chunks into Vespa.
|
||||
document_chunks: dict[str, list[dict[str, Any]]] = {
|
||||
document.id: [
|
||||
_create_raw_document_chunk(
|
||||
document_id=document.id,
|
||||
chunk_index=i,
|
||||
content=f"Test content {i} for {document.id}",
|
||||
embedding=_generate_test_vector(test_embedding_dimension),
|
||||
now=datetime.now(),
|
||||
title=f"Test title {document.id}",
|
||||
title_embedding=_generate_test_vector(test_embedding_dimension),
|
||||
)
|
||||
for i in range(CHUNK_COUNT)
|
||||
]
|
||||
for document in test_documents
|
||||
}
|
||||
all_chunks: list[dict[str, Any]] = []
|
||||
for chunks in document_chunks.values():
|
||||
all_chunks.extend(chunks)
|
||||
vespa_document_index.index_raw_chunks(all_chunks)
|
||||
|
||||
# Run the initial batch. To simulate partial progress we will mock the
|
||||
# redis lock to return True for the first invocation of .owned() and
|
||||
# False subsequently.
|
||||
# NOTE: The batch size is currently set to 5 in
|
||||
# patch_get_vespa_chunks_page_size.
|
||||
mock_redis_client = Mock()
|
||||
mock_lock = Mock()
|
||||
mock_lock.owned.side_effect = [True, False, False]
|
||||
mock_lock.acquire.return_value = True
|
||||
mock_redis_client.lock.return_value = mock_lock
|
||||
with patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
|
||||
return_value=mock_redis_client,
|
||||
):
|
||||
result_1 = migrate_chunks_from_vespa_to_opensearch_task(
|
||||
tenant_id=get_current_tenant_id()
|
||||
)
|
||||
|
||||
assert result_1 is True
|
||||
# Expire the session cache to see the committed changes from the task.
|
||||
db_session.expire_all()
|
||||
|
||||
# Verify partial progress was saved.
|
||||
tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
|
||||
assert tenant_record is not None
|
||||
partial_chunks_migrated = tenant_record.total_chunks_migrated
|
||||
assert partial_chunks_migrated > 0
|
||||
# page_size applies per slice, so one iteration can fetch up to
|
||||
# page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
|
||||
assert partial_chunks_migrated <= 5 * GET_VESPA_CHUNKS_SLICE_COUNT
|
||||
assert tenant_record.vespa_visit_continuation_token is not None
|
||||
# Slices are not necessarily evenly distributed across all document
|
||||
# chunks so we can't test that every token is non-None, but certainly at
|
||||
# least one must be.
|
||||
assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
|
||||
assert tenant_record.migration_completed_at is None
|
||||
assert tenant_record.approx_chunk_count_in_vespa is not None
|
||||
|
||||
# Under test.
|
||||
# Now patch the batch size to be some other number, like 2.
|
||||
mock_redis_client = Mock()
|
||||
mock_lock = Mock()
|
||||
mock_lock.owned.side_effect = [True, False, False]
|
||||
mock_lock.acquire.return_value = True
|
||||
mock_redis_client.lock.return_value = mock_lock
|
||||
with (
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
2,
|
||||
),
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
2,
|
||||
),
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.get_redis_client",
|
||||
return_value=mock_redis_client,
|
||||
),
|
||||
):
|
||||
result_2 = migrate_chunks_from_vespa_to_opensearch_task(
|
||||
tenant_id=get_current_tenant_id()
|
||||
)
|
||||
|
||||
# Postcondition.
|
||||
assert result_2 is True
|
||||
# Expire the session cache to see the committed changes from the task.
|
||||
db_session.expire_all()
|
||||
|
||||
# Verify next partial progress was saved.
|
||||
tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
|
||||
assert tenant_record is not None
|
||||
new_partial_chunks_migrated = tenant_record.total_chunks_migrated
|
||||
assert new_partial_chunks_migrated > partial_chunks_migrated
|
||||
# page_size applies per slice, so one iteration can fetch up to
|
||||
# page_size * GET_VESPA_CHUNKS_SLICE_COUNT chunks total.
|
||||
assert new_partial_chunks_migrated <= (5 + 2) * GET_VESPA_CHUNKS_SLICE_COUNT
|
||||
assert tenant_record.vespa_visit_continuation_token is not None
|
||||
# Slices are not necessarily evenly distributed across all document
|
||||
# chunks so we can't test that every token is non-None, but certainly at
|
||||
# least one must be.
|
||||
assert any(json.loads(tenant_record.vespa_visit_continuation_token).values())
|
||||
assert tenant_record.migration_completed_at is None
|
||||
assert tenant_record.approx_chunk_count_in_vespa is not None
|
||||
|
||||
# Under test.
|
||||
# Run the remainder of the migration.
|
||||
with (
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.tasks.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
2,
|
||||
),
|
||||
patch(
|
||||
"onyx.background.celery.tasks.opensearch_migration.constants.GET_VESPA_CHUNKS_PAGE_SIZE",
|
||||
2,
|
||||
),
|
||||
):
|
||||
result_3 = migrate_chunks_from_vespa_to_opensearch_task(
|
||||
tenant_id=get_current_tenant_id()
|
||||
)
|
||||
|
||||
# Postcondition.
|
||||
assert result_3 is True
|
||||
# Expire the session cache to see the committed changes from the task.
|
||||
db_session.expire_all()
|
||||
|
||||
# Verify completion.
|
||||
tenant_record = db_session.query(OpenSearchTenantMigrationRecord).first()
|
||||
assert tenant_record is not None
|
||||
assert tenant_record.total_chunks_migrated > new_partial_chunks_migrated
|
||||
assert tenant_record.total_chunks_migrated == len(all_chunks)
|
||||
# Visit is complete so continuation token should be None.
|
||||
assert tenant_record.vespa_visit_continuation_token is not None
|
||||
assert is_continuation_token_done_for_all_slices(
|
||||
json.loads(tenant_record.vespa_visit_continuation_token)
|
||||
)
|
||||
assert tenant_record.migration_completed_at is not None
|
||||
assert tenant_record.approx_chunk_count_in_vespa == len(all_chunks)
|
||||
|
||||
# Verify chunks were indexed in OpenSearch.
|
||||
for document in test_documents:
|
||||
opensearch_chunks = _get_document_chunks_from_opensearch(
|
||||
opensearch_client, document.id, get_current_tenant_id()
|
||||
)
|
||||
assert len(opensearch_chunks) == CHUNK_COUNT
|
||||
opensearch_chunks.sort(key=lambda x: x.chunk_index)
|
||||
for opensearch_chunk in opensearch_chunks:
|
||||
_assert_chunk_matches_vespa_chunk(
|
||||
opensearch_chunk,
|
||||
document_chunks[document.id][opensearch_chunk.chunk_index],
|
||||
)
|
||||
|
||||
def test_chunk_migration_empty_vespa(
|
||||
self,
|
||||
db_session: Session,
|
||||
|
||||
0
backend/tests/unit/onyx/file_processing/__init__.py
Normal file
0
backend/tests/unit/onyx/file_processing/__init__.py
Normal file
196
backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
Normal file
196
backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
Normal file
@@ -0,0 +1,196 @@
|
||||
import io
|
||||
|
||||
import openpyxl
|
||||
|
||||
from onyx.file_processing.extract_file_text import xlsx_to_text
|
||||
|
||||
|
||||
def _make_xlsx(sheets: dict[str, list[list[str]]]) -> io.BytesIO:
|
||||
"""Create an in-memory xlsx file from a dict of sheet_name -> matrix of strings."""
|
||||
wb = openpyxl.Workbook()
|
||||
if wb.active is not None:
|
||||
wb.remove(wb.active)
|
||||
for sheet_name, rows in sheets.items():
|
||||
ws = wb.create_sheet(title=sheet_name)
|
||||
for row in rows:
|
||||
ws.append(row)
|
||||
buf = io.BytesIO()
|
||||
wb.save(buf)
|
||||
buf.seek(0)
|
||||
return buf
|
||||
|
||||
|
||||
class TestXlsxToText:
|
||||
def test_single_sheet_basic(self) -> None:
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["Name", "Age"],
|
||||
["Alice", "30"],
|
||||
["Bob", "25"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
assert len(lines) == 3
|
||||
assert "Name" in lines[0]
|
||||
assert "Age" in lines[0]
|
||||
assert "Alice" in lines[1]
|
||||
assert "30" in lines[1]
|
||||
assert "Bob" in lines[2]
|
||||
|
||||
def test_multiple_sheets_separated(self) -> None:
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [["a", "b"]],
|
||||
"Sheet2": [["c", "d"]],
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
# TEXT_SECTION_SEPARATOR is "\n\n"
|
||||
assert "\n\n" in result
|
||||
parts = result.split("\n\n")
|
||||
assert any("a" in p for p in parts)
|
||||
assert any("c" in p for p in parts)
|
||||
|
||||
def test_empty_cells(self) -> None:
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["a", "", "b"],
|
||||
["", "c", ""],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_commas_in_cells_are_quoted(self) -> None:
|
||||
"""Cells containing commas should be quoted in CSV output."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["hello, world", "normal"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
assert '"hello, world"' in result
|
||||
|
||||
def test_empty_workbook(self) -> None:
|
||||
xlsx = _make_xlsx({"Sheet1": []})
|
||||
result = xlsx_to_text(xlsx)
|
||||
assert result.strip() == ""
|
||||
|
||||
def test_long_empty_row_run_capped(self) -> None:
|
||||
"""Runs of >2 empty rows should be capped to 2."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["header"],
|
||||
[""],
|
||||
[""],
|
||||
[""],
|
||||
[""],
|
||||
["data"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
# 4 empty rows capped to 2, so: header + 2 empty + data = 4 lines
|
||||
assert len(lines) == 4
|
||||
assert "header" in lines[0]
|
||||
assert "data" in lines[-1]
|
||||
|
||||
def test_long_empty_col_run_capped(self) -> None:
|
||||
"""Runs of >2 empty columns should be capped to 2."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["a", "", "", "", "b"],
|
||||
["c", "", "", "", "d"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
assert len(lines) == 2
|
||||
# Each row should have 4 fields (a + 2 empty + b), not 5
|
||||
# csv format: a,,,b (3 commas = 4 fields)
|
||||
first_line = lines[0].strip()
|
||||
# Count commas to verify column reduction
|
||||
assert first_line.count(",") == 3
|
||||
|
||||
def test_short_empty_runs_kept(self) -> None:
|
||||
"""Runs of <=2 empty rows/cols should be preserved."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["a", "b"],
|
||||
["", ""],
|
||||
["", ""],
|
||||
["c", "d"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
# All 4 rows preserved (2 empty rows <= threshold)
|
||||
assert len(lines) == 4
|
||||
|
||||
def test_bad_zip_file_returns_empty(self) -> None:
|
||||
bad_file = io.BytesIO(b"not a zip file")
|
||||
result = xlsx_to_text(bad_file, file_name="test.xlsx")
|
||||
assert result == ""
|
||||
|
||||
def test_bad_zip_tilde_file_returns_empty(self) -> None:
|
||||
bad_file = io.BytesIO(b"not a zip file")
|
||||
result = xlsx_to_text(bad_file, file_name="~$temp.xlsx")
|
||||
assert result == ""
|
||||
|
||||
def test_large_sparse_sheet(self) -> None:
|
||||
"""A sheet with data, a big empty gap, and more data — gap is capped to 2."""
|
||||
rows: list[list[str]] = [["row1_data"]]
|
||||
rows.extend([[""] for _ in range(10)])
|
||||
rows.append(["row2_data"])
|
||||
xlsx = _make_xlsx({"Sheet1": rows})
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
# 10 empty rows capped to 2: row1_data + 2 empty + row2_data = 4
|
||||
assert len(lines) == 4
|
||||
assert "row1_data" in lines[0]
|
||||
assert "row2_data" in lines[-1]
|
||||
|
||||
def test_quotes_in_cells(self) -> None:
|
||||
"""Cells containing quotes should be properly escaped."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
['say "hello"', "normal"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
# csv.writer escapes quotes by doubling them
|
||||
assert '""hello""' in result
|
||||
|
||||
def test_each_row_is_separate_line(self) -> None:
|
||||
"""Each row should produce its own line (regression for writerow vs writerows)."""
|
||||
xlsx = _make_xlsx(
|
||||
{
|
||||
"Sheet1": [
|
||||
["r1c1", "r1c2"],
|
||||
["r2c1", "r2c2"],
|
||||
["r3c1", "r3c2"],
|
||||
]
|
||||
}
|
||||
)
|
||||
result = xlsx_to_text(xlsx)
|
||||
lines = [line for line in result.strip().split("\n") if line.strip()]
|
||||
assert len(lines) == 3
|
||||
assert "r1c1" in lines[0] and "r1c2" in lines[0]
|
||||
assert "r2c1" in lines[1] and "r2c2" in lines[1]
|
||||
assert "r3c1" in lines[2] and "r3c2" in lines[2]
|
||||
@@ -15,9 +15,8 @@
|
||||
# -f docker-compose.dev.yml up -d --wait
|
||||
#
|
||||
# This overlay:
|
||||
# - Moves Vespa (index), both model servers, OpenSearch, MinIO,
|
||||
# Redis (cache), and the background worker to profiles so they do
|
||||
# not start by default
|
||||
# - Moves Vespa (index), both model servers, code-interpreter, Redis (cache),
|
||||
# and the background worker to profiles so they do not start by default
|
||||
# - Makes depends_on references to removed services optional
|
||||
# - Sets DISABLE_VECTOR_DB=true on the api_server
|
||||
# - Uses PostgreSQL for caching and auth instead of Redis
|
||||
@@ -28,8 +27,7 @@
|
||||
# --profile inference Inference model server
|
||||
# --profile background Background worker (Celery) — also needs redis
|
||||
# --profile redis Redis cache
|
||||
# --profile opensearch OpenSearch
|
||||
# --profile s3-filestore MinIO (S3-compatible file store)
|
||||
# --profile code-interpreter Code interpreter
|
||||
# =============================================================================
|
||||
|
||||
name: onyx
|
||||
@@ -40,9 +38,6 @@ services:
|
||||
index:
|
||||
condition: service_started
|
||||
required: false
|
||||
opensearch:
|
||||
condition: service_started
|
||||
required: false
|
||||
cache:
|
||||
condition: service_started
|
||||
required: false
|
||||
@@ -89,10 +84,4 @@ services:
|
||||
inference_model_server:
|
||||
profiles: ["inference"]
|
||||
|
||||
# OpenSearch is not needed in lite mode (no indexing).
|
||||
opensearch:
|
||||
profiles: ["opensearch"]
|
||||
|
||||
# MinIO is not needed in lite mode (Postgres handles file storage).
|
||||
minio:
|
||||
profiles: ["s3-filestore"]
|
||||
code-interpreter: {}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
set -e
|
||||
|
||||
# Expected resource requirements (overridden below if --lite)
|
||||
# Expected resource requirements
|
||||
EXPECTED_DOCKER_RAM_GB=10
|
||||
EXPECTED_DISK_GB=32
|
||||
|
||||
@@ -10,11 +10,6 @@ EXPECTED_DISK_GB=32
|
||||
SHUTDOWN_MODE=false
|
||||
DELETE_DATA_MODE=false
|
||||
INCLUDE_CRAFT=false # Disabled by default, use --include-craft to enable
|
||||
LITE_MODE=false # Disabled by default, use --lite to enable
|
||||
USE_LOCAL_FILES=false # Disabled by default, use --local to skip downloading config files
|
||||
NO_PROMPT=false
|
||||
DRY_RUN=false
|
||||
VERBOSE=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -30,26 +25,6 @@ while [[ $# -gt 0 ]]; do
|
||||
INCLUDE_CRAFT=true
|
||||
shift
|
||||
;;
|
||||
--lite)
|
||||
LITE_MODE=true
|
||||
shift
|
||||
;;
|
||||
--local)
|
||||
USE_LOCAL_FILES=true
|
||||
shift
|
||||
;;
|
||||
--no-prompt)
|
||||
NO_PROMPT=true
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Onyx Installation Script"
|
||||
echo ""
|
||||
@@ -57,23 +32,15 @@ while [[ $# -gt 0 ]]; do
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --include-craft Enable Onyx Craft (AI-powered web app building)"
|
||||
echo " --lite Deploy Onyx Lite (no Vespa, Redis, or model servers)"
|
||||
echo " --local Use existing config files instead of downloading from GitHub"
|
||||
echo " --shutdown Stop (pause) Onyx containers"
|
||||
echo " --delete-data Remove all Onyx data (containers, volumes, and files)"
|
||||
echo " --no-prompt Run non-interactively with defaults (for CI/automation)"
|
||||
echo " --dry-run Show what would be done without making changes"
|
||||
echo " --verbose Show detailed output for debugging"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Install Onyx"
|
||||
echo " $0 --lite # Install Onyx Lite (minimal deployment)"
|
||||
echo " $0 --include-craft # Install Onyx with Craft enabled"
|
||||
echo " $0 --shutdown # Pause Onyx services"
|
||||
echo " $0 --delete-data # Completely remove Onyx and all data"
|
||||
echo " $0 --local # Re-run using existing config files on disk"
|
||||
echo " $0 --no-prompt # Non-interactive install with defaults"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -84,129 +51,8 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$VERBOSE" = true ]]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
|
||||
echo "ERROR: --lite and --include-craft cannot be used together."
|
||||
echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# When --lite is passed as a flag, lower resource thresholds early (before the
|
||||
# resource check). When lite is chosen interactively, the thresholds are adjusted
|
||||
# inside the new-deployment flow, after the resource check has already passed
|
||||
# with the standard thresholds — which is the safer direction.
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
EXPECTED_DOCKER_RAM_GB=4
|
||||
EXPECTED_DISK_GB=16
|
||||
fi
|
||||
|
||||
INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"
|
||||
|
||||
LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"
|
||||
|
||||
# Build the -f flags for docker compose.
|
||||
# Pass "true" as $1 to auto-detect a previously-downloaded lite overlay
|
||||
# (used by shutdown/delete-data so users don't need to remember --lite).
|
||||
# Without the argument, the lite overlay is only included when --lite was
|
||||
# explicitly passed — preventing install/start from silently staying in
|
||||
# lite mode just because the file exists on disk from a prior run.
|
||||
compose_file_args() {
|
||||
local auto_detect="${1:-false}"
|
||||
local args="-f docker-compose.yml"
|
||||
if [[ "$LITE_MODE" = true ]] || { [[ "$auto_detect" = true ]] && [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; }; then
|
||||
args="$args -f ${LITE_COMPOSE_FILE}"
|
||||
fi
|
||||
echo "$args"
|
||||
}
|
||||
|
||||
# --- Downloader detection (curl with wget fallback) ---
|
||||
DOWNLOADER=""
|
||||
detect_downloader() {
|
||||
if command -v curl &> /dev/null; then
|
||||
DOWNLOADER="curl"
|
||||
return 0
|
||||
fi
|
||||
if command -v wget &> /dev/null; then
|
||||
DOWNLOADER="wget"
|
||||
return 0
|
||||
fi
|
||||
echo "ERROR: Neither curl nor wget found. Please install one and retry."
|
||||
exit 1
|
||||
}
|
||||
detect_downloader
|
||||
|
||||
download_file() {
|
||||
local url="$1"
|
||||
local output="$2"
|
||||
if [[ "$DOWNLOADER" == "curl" ]]; then
|
||||
curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
|
||||
else
|
||||
wget -q --tries=3 --timeout=20 -O "$output" "$url"
|
||||
fi
|
||||
}
|
||||
|
||||
# Ensures a required file is present. With --local, verifies the file exists on
|
||||
# disk. Otherwise, downloads it from the given URL. Returns 0 on success, 1 on
|
||||
# failure (caller should handle the exit).
|
||||
ensure_file() {
|
||||
local path="$1"
|
||||
local url="$2"
|
||||
local desc="$3"
|
||||
|
||||
if [[ "$USE_LOCAL_FILES" = true ]]; then
|
||||
if [[ -f "$path" ]]; then
|
||||
print_success "Using existing ${desc}"
|
||||
return 0
|
||||
fi
|
||||
print_error "Required file missing: ${desc} (${path})"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_info "Downloading ${desc}..."
|
||||
if download_file "$url" "$path" 2>/dev/null; then
|
||||
print_success "${desc} downloaded"
|
||||
return 0
|
||||
fi
|
||||
print_error "Failed to download ${desc}"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
return 1
|
||||
}
|
||||
|
||||
# --- Interactive prompt helpers ---
|
||||
is_interactive() {
|
||||
[[ "$NO_PROMPT" = false ]] && [[ -t 0 ]]
|
||||
}
|
||||
|
||||
prompt_or_default() {
|
||||
local prompt_text="$1"
|
||||
local default_value="$2"
|
||||
if is_interactive; then
|
||||
read -p "$prompt_text" -r REPLY
|
||||
if [[ -z "$REPLY" ]]; then
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
else
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
}
|
||||
|
||||
prompt_yn_or_default() {
|
||||
local prompt_text="$1"
|
||||
local default_value="$2"
|
||||
if is_interactive; then
|
||||
read -p "$prompt_text" -n 1 -r
|
||||
echo ""
|
||||
if [[ -z "$REPLY" ]]; then
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
else
|
||||
REPLY="$default_value"
|
||||
fi
|
||||
}
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
@@ -265,7 +111,7 @@ if [ "$SHUTDOWN_MODE" = true ]; then
|
||||
fi
|
||||
|
||||
# Stop containers (without removing them)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) stop)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml stop)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Onyx containers stopped (paused)"
|
||||
else
|
||||
@@ -294,17 +140,12 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
echo " • All downloaded files and configurations"
|
||||
echo " • All user data and documents"
|
||||
echo ""
|
||||
if is_interactive; then
|
||||
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
|
||||
echo ""
|
||||
if [ "$REPLY" != "DELETE" ]; then
|
||||
print_info "Operation cancelled."
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
print_error "Cannot confirm destructive operation in non-interactive mode."
|
||||
print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
|
||||
exit 1
|
||||
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
|
||||
echo ""
|
||||
|
||||
if [ "$REPLY" != "DELETE" ]; then
|
||||
print_info "Operation cancelled."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
print_info "Removing Onyx containers and volumes..."
|
||||
@@ -323,7 +164,7 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
fi
|
||||
|
||||
# Stop and remove containers with volumes
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) down -v)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml down -v)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Onyx containers and volumes removed"
|
||||
else
|
||||
@@ -345,117 +186,6 @@ if [ "$DELETE_DATA_MODE" = true ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Auto-install Docker (Linux only) ---
|
||||
# Runs before the banner so a group-based re-exec doesn't repeat it.
|
||||
install_docker_linux() {
|
||||
local distro_id=""
|
||||
if [[ -f /etc/os-release ]]; then
|
||||
distro_id="$(. /etc/os-release && echo "${ID:-}")"
|
||||
fi
|
||||
|
||||
case "$distro_id" in
|
||||
amzn)
|
||||
print_info "Detected Amazon Linux — installing Docker via package manager..."
|
||||
if command -v dnf &> /dev/null; then
|
||||
sudo dnf install -y docker
|
||||
else
|
||||
sudo yum install -y docker
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
print_info "Installing Docker via get.docker.com..."
|
||||
download_file "https://get.docker.com" /tmp/get-docker.sh
|
||||
sudo sh /tmp/get-docker.sh
|
||||
rm -f /tmp/get-docker.sh
|
||||
;;
|
||||
esac
|
||||
|
||||
sudo systemctl start docker 2>/dev/null || sudo service docker start 2>/dev/null || true
|
||||
sudo systemctl enable docker 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Detect OS (including WSL)
|
||||
IS_WSL=false
|
||||
if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
|
||||
IS_WSL=true
|
||||
fi
|
||||
|
||||
# Dry-run: show plan and exit
|
||||
if [[ "$DRY_RUN" = true ]]; then
|
||||
print_info "Dry run mode — showing what would happen:"
|
||||
echo " • Install root: ${INSTALL_ROOT}"
|
||||
echo " • Lite mode: ${LITE_MODE}"
|
||||
echo " • Include Craft: ${INCLUDE_CRAFT}"
|
||||
echo " • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
|
||||
echo " • Downloader: ${DOWNLOADER}"
|
||||
echo ""
|
||||
print_success "Dry run complete (no changes made)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! command -v docker &> /dev/null; then
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
|
||||
install_docker_linux
|
||||
if ! command -v docker &> /dev/null; then
|
||||
print_error "Docker installation failed."
|
||||
echo " Visit: https://docs.docker.com/get-docker/"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Docker installed successfully"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Auto-install Docker Compose plugin (Linux only) ---
|
||||
if command -v docker &> /dev/null \
|
||||
&& ! docker compose version &> /dev/null \
|
||||
&& ! command -v docker-compose &> /dev/null \
|
||||
&& { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; }; then
|
||||
|
||||
print_info "Docker Compose not found — installing plugin..."
|
||||
COMPOSE_ARCH="$(uname -m)"
|
||||
COMPOSE_URL="https://github.com/docker/compose/releases/latest/download/docker-compose-linux-${COMPOSE_ARCH}"
|
||||
COMPOSE_DIR="/usr/local/lib/docker/cli-plugins"
|
||||
COMPOSE_TMP="$(mktemp)"
|
||||
sudo mkdir -p "$COMPOSE_DIR"
|
||||
if download_file "$COMPOSE_URL" "$COMPOSE_TMP"; then
|
||||
sudo mv "$COMPOSE_TMP" "$COMPOSE_DIR/docker-compose"
|
||||
sudo chmod +x "$COMPOSE_DIR/docker-compose"
|
||||
if docker compose version &> /dev/null; then
|
||||
print_success "Docker Compose plugin installed"
|
||||
else
|
||||
print_error "Docker Compose plugin installed but not detected."
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
rm -f "$COMPOSE_TMP"
|
||||
print_error "Failed to download Docker Compose plugin."
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# On Linux, ensure the current user can talk to the Docker daemon without
|
||||
# sudo. If necessary, add them to the "docker" group and re-exec the
|
||||
# script under that group so the rest of the install proceeds normally.
|
||||
if command -v docker &> /dev/null \
|
||||
&& { [[ "$OSTYPE" == "linux-gnu"* ]] || [[ -n "${WSL_DISTRO_NAME:-}" ]]; } \
|
||||
&& [[ "$(id -u)" -ne 0 ]] \
|
||||
&& ! docker info &> /dev/null; then
|
||||
if [[ "${_ONYX_REEXEC:-}" = "1" ]]; then
|
||||
print_error "Cannot connect to Docker after group re-exec."
|
||||
print_info "Log out and back in, then run the script again."
|
||||
exit 1
|
||||
fi
|
||||
if ! getent group docker &> /dev/null; then
|
||||
sudo groupadd docker
|
||||
fi
|
||||
print_info "Adding $USER to the docker group..."
|
||||
sudo usermod -aG docker "$USER"
|
||||
print_info "Re-launching with docker group active..."
|
||||
exec sg docker -c "_ONYX_REEXEC=1 bash $(printf '%q ' "$0" "$@")"
|
||||
fi
|
||||
|
||||
# ASCII Art Banner
|
||||
echo ""
|
||||
echo -e "${BLUE}${BOLD}"
|
||||
@@ -479,7 +209,8 @@ echo "2. Check your system resources (Docker, memory, disk space)"
|
||||
echo "3. Guide you through deployment options (version, authentication)"
|
||||
echo ""
|
||||
|
||||
if is_interactive; then
|
||||
# Only prompt for acknowledgment if running interactively
|
||||
if [ -t 0 ]; then
|
||||
echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
|
||||
read -r
|
||||
echo ""
|
||||
@@ -529,35 +260,41 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Returns 0 if $1 <= $2, 1 if $1 > $2
|
||||
# Handles missing or non-numeric parts gracefully (treats them as 0)
|
||||
# Function to compare version numbers
|
||||
version_compare() {
|
||||
local version1="${1:-0.0.0}"
|
||||
local version2="${2:-0.0.0}"
|
||||
# Returns 0 if $1 <= $2, 1 if $1 > $2
|
||||
local version1=$1
|
||||
local version2=$2
|
||||
|
||||
local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
|
||||
v1_major=$(echo "$version1" | cut -d. -f1)
|
||||
v1_minor=$(echo "$version1" | cut -d. -f2)
|
||||
v1_patch=$(echo "$version1" | cut -d. -f3)
|
||||
v2_major=$(echo "$version2" | cut -d. -f1)
|
||||
v2_minor=$(echo "$version2" | cut -d. -f2)
|
||||
v2_patch=$(echo "$version2" | cut -d. -f3)
|
||||
# Split versions into components
|
||||
local v1_major=$(echo $version1 | cut -d. -f1)
|
||||
local v1_minor=$(echo $version1 | cut -d. -f2)
|
||||
local v1_patch=$(echo $version1 | cut -d. -f3)
|
||||
|
||||
# Default non-numeric or empty parts to 0
|
||||
[[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
|
||||
[[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
|
||||
[[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
|
||||
[[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
|
||||
[[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
|
||||
[[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0
|
||||
local v2_major=$(echo $version2 | cut -d. -f1)
|
||||
local v2_minor=$(echo $version2 | cut -d. -f2)
|
||||
local v2_patch=$(echo $version2 | cut -d. -f3)
|
||||
|
||||
if [ "$v1_major" -lt "$v2_major" ]; then return 0
|
||||
elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi
|
||||
# Compare major version
|
||||
if [ "$v1_major" -lt "$v2_major" ]; then
|
||||
return 0
|
||||
elif [ "$v1_major" -gt "$v2_major" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
|
||||
elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi
|
||||
# Compare minor version
|
||||
if [ "$v1_minor" -lt "$v2_minor" ]; then
|
||||
return 0
|
||||
elif [ "$v1_minor" -gt "$v2_minor" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
[ "$v1_patch" -le "$v2_patch" ]
|
||||
# Compare patch version
|
||||
if [ "$v1_patch" -le "$v2_patch" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check Docker daemon
|
||||
@@ -599,20 +336,10 @@ fi
|
||||
|
||||
# Convert to GB for display
|
||||
if [ "$MEMORY_MB" -gt 0 ]; then
|
||||
MEMORY_GB=$(awk "BEGIN {printf \"%.1f\", $MEMORY_MB / 1024}")
|
||||
if [ "$(awk "BEGIN {print ($MEMORY_MB >= 1024)}")" = "1" ]; then
|
||||
MEMORY_DISPLAY="~${MEMORY_GB}GB"
|
||||
else
|
||||
MEMORY_DISPLAY="${MEMORY_MB}MB"
|
||||
fi
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
print_info "Docker memory allocation: ${MEMORY_DISPLAY}"
|
||||
else
|
||||
print_info "System memory: ${MEMORY_DISPLAY} (Docker uses host memory directly)"
|
||||
fi
|
||||
MEMORY_GB=$((MEMORY_MB / 1024))
|
||||
print_info "Docker memory allocation: ~${MEMORY_GB}GB"
|
||||
else
|
||||
print_warning "Could not determine memory allocation"
|
||||
MEMORY_DISPLAY="unknown"
|
||||
print_warning "Could not determine Docker memory allocation"
|
||||
MEMORY_MB=0
|
||||
fi
|
||||
|
||||
@@ -631,7 +358,7 @@ RESOURCE_WARNING=false
|
||||
EXPECTED_RAM_MB=$((EXPECTED_DOCKER_RAM_GB * 1024))
|
||||
|
||||
if [ "$MEMORY_MB" -gt 0 ] && [ "$MEMORY_MB" -lt "$EXPECTED_RAM_MB" ]; then
|
||||
print_warning "Less than ${EXPECTED_DOCKER_RAM_GB}GB RAM available (found: ${MEMORY_DISPLAY})"
|
||||
print_warning "Docker has less than ${EXPECTED_DOCKER_RAM_GB}GB RAM allocated (found: ~${MEMORY_GB}GB)"
|
||||
RESOURCE_WARNING=true
|
||||
fi
|
||||
|
||||
@@ -642,10 +369,10 @@ fi
|
||||
|
||||
if [ "$RESOURCE_WARNING" = true ]; then
|
||||
echo ""
|
||||
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance in standard mode."
|
||||
print_warning "Lite mode requires less resources (1-4GB RAM, 8-16GB disk depending on usage), but does not include a vector database."
|
||||
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance."
|
||||
echo ""
|
||||
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
|
||||
echo ""
|
||||
prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please allocate more resources and try again."
|
||||
exit 1
|
||||
@@ -658,89 +385,117 @@ print_step "Creating directory structure"
|
||||
if [ -d "${INSTALL_ROOT}" ]; then
|
||||
print_info "Directory structure already exists"
|
||||
print_success "Using existing ${INSTALL_ROOT} directory"
|
||||
else
|
||||
mkdir -p "${INSTALL_ROOT}/deployment"
|
||||
mkdir -p "${INSTALL_ROOT}/data/nginx/local"
|
||||
print_success "Directory structure created"
|
||||
fi
|
||||
mkdir -p "${INSTALL_ROOT}/deployment"
|
||||
mkdir -p "${INSTALL_ROOT}/data/nginx/local"
|
||||
print_success "Directory structure created"
|
||||
|
||||
# Ensure all required configuration files are present
|
||||
# Download all required files
|
||||
print_step "Downloading Onyx configuration files"
|
||||
print_info "This step downloads all necessary configuration files from GitHub..."
|
||||
echo ""
|
||||
print_info "Downloading the following files:"
|
||||
echo " • docker-compose.yml - Main Docker Compose configuration"
|
||||
echo " • env.template - Environment variables template"
|
||||
echo " • nginx/app.conf.template - Nginx web server configuration"
|
||||
echo " • nginx/run-nginx.sh - Nginx startup script"
|
||||
echo " • README.md - Documentation and setup instructions"
|
||||
echo ""
|
||||
|
||||
# Download Docker Compose file
|
||||
COMPOSE_FILE="${INSTALL_ROOT}/deployment/docker-compose.yml"
|
||||
print_info "Downloading docker-compose.yml..."
|
||||
if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/null; then
|
||||
print_success "Docker Compose file downloaded successfully"
|
||||
|
||||
# Check if Docker Compose version is older than 2.24.0 and show warning
|
||||
# Skip check for dev builds (assume they're recent enough)
|
||||
if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
|
||||
print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
|
||||
echo ""
|
||||
print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
|
||||
echo ""
|
||||
print_info "To use this configuration with your current Docker Compose version, you have two options:"
|
||||
echo ""
|
||||
echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
echo ""
|
||||
echo "2. Manually replace all env_file sections in docker-compose.yml"
|
||||
echo " Change from:"
|
||||
echo " env_file:"
|
||||
echo " - path: .env"
|
||||
echo " required: false"
|
||||
echo " To:"
|
||||
echo " env_file: .env"
|
||||
echo ""
|
||||
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
|
||||
echo ""
|
||||
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
|
||||
echo ""
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
|
||||
exit 1
|
||||
fi
|
||||
print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
|
||||
fi
|
||||
else
|
||||
print_error "Failed to download Docker Compose file"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download env.template file
|
||||
ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
|
||||
print_info "Downloading env.template..."
|
||||
if curl -fsSL -o "$ENV_TEMPLATE" "${GITHUB_RAW_URL}/env.template" 2>/dev/null; then
|
||||
print_success "Environment template downloaded successfully"
|
||||
else
|
||||
print_error "Failed to download env.template"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download nginx config files
|
||||
NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/data/nginx"
|
||||
|
||||
if [[ "$USE_LOCAL_FILES" = true ]]; then
|
||||
print_step "Verifying existing configuration files"
|
||||
# Download app.conf.template
|
||||
NGINX_CONFIG="${INSTALL_ROOT}/data/nginx/app.conf.template"
|
||||
print_info "Downloading nginx configuration template..."
|
||||
if curl -fsSL -o "$NGINX_CONFIG" "$NGINX_BASE_URL/app.conf.template" 2>/dev/null; then
|
||||
print_success "Nginx configuration template downloaded"
|
||||
else
|
||||
print_step "Downloading Onyx configuration files"
|
||||
print_info "This step downloads all necessary configuration files from GitHub..."
|
||||
print_error "Failed to download nginx configuration template"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/deployment/docker-compose.yml" \
|
||||
"${GITHUB_RAW_URL}/docker-compose.yml" "docker-compose.yml" || exit 1
|
||||
|
||||
# Check Docker Compose version compatibility after obtaining docker-compose.yml
|
||||
if [ "$COMPOSE_VERSION" != "dev" ] && version_compare "$COMPOSE_VERSION" "2.24.0"; then
|
||||
print_warning "Docker Compose version $COMPOSE_VERSION is older than 2.24.0"
|
||||
echo ""
|
||||
print_warning "The docker-compose.yml file uses the newer env_file format that requires Docker Compose 2.24.0 or later."
|
||||
echo ""
|
||||
print_info "To use this configuration with your current Docker Compose version, you have two options:"
|
||||
echo ""
|
||||
echo "1. Upgrade Docker Compose to version 2.24.0 or later (recommended)"
|
||||
echo " Visit: https://docs.docker.com/compose/install/"
|
||||
echo ""
|
||||
echo "2. Manually replace all env_file sections in docker-compose.yml"
|
||||
echo " Change from:"
|
||||
echo " env_file:"
|
||||
echo " - path: .env"
|
||||
echo " required: false"
|
||||
echo " To:"
|
||||
echo " env_file: .env"
|
||||
echo ""
|
||||
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
|
||||
echo ""
|
||||
prompt_yn_or_default "Do you want to continue anyway? (Y/n): " "y"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
|
||||
exit 1
|
||||
fi
|
||||
print_info "Proceeding with installation despite Docker Compose version compatibility issues..."
|
||||
# Download run-nginx.sh script
|
||||
NGINX_RUN_SCRIPT="${INSTALL_ROOT}/data/nginx/run-nginx.sh"
|
||||
print_info "Downloading nginx startup script..."
|
||||
if curl -fsSL -o "$NGINX_RUN_SCRIPT" "$NGINX_BASE_URL/run-nginx.sh" 2>/dev/null; then
|
||||
chmod +x "$NGINX_RUN_SCRIPT"
|
||||
print_success "Nginx startup script downloaded and made executable"
|
||||
else
|
||||
print_error "Failed to download nginx startup script"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle lite overlay: ensure it if --lite, clean up stale copies otherwise
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
|
||||
"${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
|
||||
elif [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
|
||||
if [[ -f "${INSTALL_ROOT}/deployment/.env" ]]; then
|
||||
print_warning "Existing lite overlay found but --lite was not passed."
|
||||
prompt_yn_or_default "Remove lite overlay and switch to standard mode? (y/N): " "n"
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_info "Keeping existing lite overlay. Pass --lite to keep using lite mode."
|
||||
LITE_MODE=true
|
||||
else
|
||||
rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
|
||||
print_info "Removed lite overlay (switching to standard mode)"
|
||||
fi
|
||||
else
|
||||
rm -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
|
||||
print_info "Removed previous lite overlay (switching to standard mode)"
|
||||
fi
|
||||
# Download README file
|
||||
README_FILE="${INSTALL_ROOT}/README.md"
|
||||
print_info "Downloading README.md..."
|
||||
if curl -fsSL -o "$README_FILE" "${GITHUB_RAW_URL}/README.md" 2>/dev/null; then
|
||||
print_success "README.md downloaded successfully"
|
||||
else
|
||||
print_error "Failed to download README.md"
|
||||
print_info "Please ensure you have internet connection and try again"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/deployment/env.template" \
|
||||
"${GITHUB_RAW_URL}/env.template" "env.template" || exit 1
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/data/nginx/app.conf.template" \
|
||||
"$NGINX_BASE_URL/app.conf.template" "nginx/app.conf.template" || exit 1
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/data/nginx/run-nginx.sh" \
|
||||
"$NGINX_BASE_URL/run-nginx.sh" "nginx/run-nginx.sh" || exit 1
|
||||
chmod +x "${INSTALL_ROOT}/data/nginx/run-nginx.sh"
|
||||
|
||||
ensure_file "${INSTALL_ROOT}/README.md" \
|
||||
"${GITHUB_RAW_URL}/README.md" "README.md" || exit 1
|
||||
|
||||
# Create empty local directory marker (if needed)
|
||||
touch "${INSTALL_ROOT}/data/nginx/local/.gitkeep"
|
||||
print_success "All configuration files ready"
|
||||
print_success "All configuration files downloaded successfully"
|
||||
|
||||
# Set up deployment configuration
|
||||
print_step "Setting up deployment configs"
|
||||
@@ -758,7 +513,7 @@ if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docke
|
||||
|
||||
if [ -n "$COMPOSE_CMD" ]; then
|
||||
# Check if any containers are running
|
||||
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args true) ps -q 2>/dev/null | wc -l)
|
||||
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null | wc -l)
|
||||
if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
|
||||
print_error "Onyx services are currently running!"
|
||||
echo ""
|
||||
@@ -779,7 +534,7 @@ if [ -f "$ENV_FILE" ]; then
|
||||
echo "• Press Enter to restart with current configuration"
|
||||
echo "• Type 'update' to update to a newer version"
|
||||
echo ""
|
||||
prompt_or_default "Choose an option [default: restart]: " ""
|
||||
read -p "Choose an option [default: restart]: " -r
|
||||
echo ""
|
||||
|
||||
if [ "$REPLY" = "update" ]; then
|
||||
@@ -788,30 +543,26 @@ if [ -f "$ENV_FILE" ]; then
|
||||
echo "• Press Enter for latest (recommended)"
|
||||
echo "• Type a specific tag (e.g., v0.1.0)"
|
||||
echo ""
|
||||
# If --include-craft was passed, default to craft-latest
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
|
||||
VERSION="$REPLY"
|
||||
read -p "Enter tag [default: craft-latest]: " -r VERSION
|
||||
else
|
||||
prompt_or_default "Enter tag [default: latest]: " "latest"
|
||||
VERSION="$REPLY"
|
||||
read -p "Enter tag [default: latest]: " -r VERSION
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
elif [ "$VERSION" = "latest" ]; then
|
||||
print_info "Selected: Latest version"
|
||||
if [ -z "$VERSION" ]; then
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
VERSION="craft-latest"
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
else
|
||||
VERSION="latest"
|
||||
print_info "Selected: Latest version"
|
||||
fi
|
||||
else
|
||||
print_info "Selected: $VERSION"
|
||||
fi
|
||||
|
||||
# Reject craft image tags when running in lite mode
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
|
||||
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Update .env file with new version
|
||||
print_info "Updating configuration for version $VERSION..."
|
||||
if grep -q "^IMAGE_TAG=" "$ENV_FILE"; then
|
||||
@@ -830,67 +581,13 @@ if [ -f "$ENV_FILE" ]; then
|
||||
fi
|
||||
print_success "Configuration updated for upgrade"
|
||||
else
|
||||
# Reject restarting a craft deployment in lite mode
|
||||
EXISTING_TAG=$(grep "^IMAGE_TAG=" "$ENV_FILE" | head -1 | cut -d'=' -f2 | tr -d ' "'"'"'')
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${EXISTING_TAG:-}" == craft-* ]]; then
|
||||
print_error "Cannot restart a craft deployment (${EXISTING_TAG}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_info "Keeping existing configuration..."
|
||||
print_success "Will restart with current settings"
|
||||
fi
|
||||
|
||||
# Ensure COMPOSE_PROFILES is cleared when running in lite mode on an
|
||||
# existing .env (the template ships with s3-filestore enabled).
|
||||
if [[ "$LITE_MODE" = true ]] && grep -q "^COMPOSE_PROFILES=.*s3-filestore" "$ENV_FILE" 2>/dev/null; then
|
||||
sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Cleared COMPOSE_PROFILES for lite mode"
|
||||
fi
|
||||
else
|
||||
print_info "No existing .env file found. Setting up new deployment..."
|
||||
echo ""
|
||||
|
||||
# Ask for deployment mode (standard vs lite) unless already set via --lite flag
|
||||
if [[ "$LITE_MODE" = false ]]; then
|
||||
print_info "Which deployment mode would you like?"
|
||||
echo ""
|
||||
echo " 1) Standard - Full deployment with search, connectors, and RAG"
|
||||
echo " 2) Lite - Minimal deployment (no Vespa, Redis, or model servers)"
|
||||
echo " LLM chat, tools, file uploads, and Projects still work"
|
||||
echo ""
|
||||
prompt_or_default "Choose a mode (1 or 2) [default: 1]: " "1"
|
||||
echo ""
|
||||
|
||||
case "$REPLY" in
|
||||
2)
|
||||
LITE_MODE=true
|
||||
print_info "Selected: Lite mode"
|
||||
ensure_file "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" \
|
||||
"${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "${LITE_COMPOSE_FILE}" || exit 1
|
||||
;;
|
||||
*)
|
||||
print_info "Selected: Standard mode"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
print_info "Deployment mode: Lite (set via --lite flag)"
|
||||
fi
|
||||
|
||||
# Validate lite + craft combination (could now be set interactively)
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
|
||||
print_error "--include-craft cannot be used with Lite mode."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Adjust resource expectations for lite mode
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
EXPECTED_DOCKER_RAM_GB=4
|
||||
EXPECTED_DISK_GB=16
|
||||
fi
|
||||
|
||||
# Ask for version
|
||||
print_info "Which tag would you like to deploy?"
|
||||
echo ""
|
||||
@@ -898,21 +595,23 @@ else
|
||||
echo "• Press Enter for craft-latest (recommended for Craft)"
|
||||
echo "• Type a specific tag (e.g., craft-v1.0.0)"
|
||||
echo ""
|
||||
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
|
||||
VERSION="$REPLY"
|
||||
read -p "Enter tag [default: craft-latest]: " -r VERSION
|
||||
else
|
||||
echo "• Press Enter for latest (recommended)"
|
||||
echo "• Type a specific tag (e.g., v0.1.0)"
|
||||
echo ""
|
||||
prompt_or_default "Enter tag [default: latest]: " "latest"
|
||||
VERSION="$REPLY"
|
||||
read -p "Enter tag [default: latest]: " -r VERSION
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
elif [ "$VERSION" = "latest" ]; then
|
||||
print_info "Selected: Latest tag"
|
||||
if [ -z "$VERSION" ]; then
|
||||
if [ "$INCLUDE_CRAFT" = true ]; then
|
||||
VERSION="craft-latest"
|
||||
print_info "Selected: craft-latest (Craft enabled)"
|
||||
else
|
||||
VERSION="latest"
|
||||
print_info "Selected: Latest tag"
|
||||
fi
|
||||
else
|
||||
print_info "Selected: $VERSION"
|
||||
fi
|
||||
@@ -946,13 +645,6 @@ else
|
||||
# Use basic auth by default
|
||||
AUTH_SCHEMA="basic"
|
||||
|
||||
# Reject craft image tags when running in lite mode (must check before writing .env)
|
||||
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
|
||||
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
|
||||
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create .env file from template
|
||||
print_info "Creating .env file with your selections..."
|
||||
cp "$ENV_TEMPLATE" "$ENV_FILE"
|
||||
@@ -962,13 +654,6 @@ else
|
||||
sed -i.bak "s/^IMAGE_TAG=.*/IMAGE_TAG=$VERSION/" "$ENV_FILE"
|
||||
print_success "IMAGE_TAG set to $VERSION"
|
||||
|
||||
# In lite mode, clear COMPOSE_PROFILES so profiled services (MinIO, etc.)
|
||||
# stay disabled — the template ships with s3-filestore enabled by default.
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
sed -i.bak 's/^COMPOSE_PROFILES=.*/COMPOSE_PROFILES=/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Cleared COMPOSE_PROFILES for lite mode"
|
||||
fi
|
||||
|
||||
# Configure basic authentication (default)
|
||||
sed -i.bak 's/^AUTH_TYPE=.*/AUTH_TYPE=basic/' "$ENV_FILE" 2>/dev/null || true
|
||||
print_success "Basic authentication enabled in configuration"
|
||||
@@ -1089,7 +774,7 @@ print_step "Pulling Docker images"
|
||||
print_info "This may take several minutes depending on your internet connection..."
|
||||
echo ""
|
||||
print_info "Downloading Docker images (this may take a while)..."
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml pull --quiet)
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Docker images downloaded successfully"
|
||||
else
|
||||
@@ -1103,9 +788,9 @@ print_info "Launching containers..."
|
||||
echo ""
|
||||
if [ "$USE_LATEST" = true ]; then
|
||||
print_info "Force pulling latest images and recreating containers..."
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d --pull always --force-recreate)
|
||||
else
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
|
||||
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d)
|
||||
fi
|
||||
if [ $? -ne 0 ]; then
|
||||
print_error "Failed to start Onyx services"
|
||||
@@ -1127,7 +812,7 @@ echo ""
|
||||
# Check for restart loops
|
||||
print_info "Checking container health status..."
|
||||
RESTART_ISSUES=false
|
||||
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)
|
||||
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null)
|
||||
|
||||
for CONTAINER in $CONTAINERS; do
|
||||
PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
|
||||
@@ -1156,7 +841,7 @@ if [ "$RESTART_ISSUES" = true ]; then
|
||||
print_error "Some containers are experiencing issues!"
|
||||
echo ""
|
||||
print_info "Please check the logs for more information:"
|
||||
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"
|
||||
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD -f docker-compose.yml logs)"
|
||||
|
||||
echo ""
|
||||
print_info "If the issue persists, please contact: founders@onyx.app"
|
||||
@@ -1175,12 +860,8 @@ check_onyx_health() {
|
||||
echo ""
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
local http_code=""
|
||||
if [[ "$DOWNLOADER" == "curl" ]]; then
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
|
||||
else
|
||||
http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
|
||||
fi
|
||||
# Check for successful HTTP responses (200, 301, 302, etc.)
|
||||
local http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port")
|
||||
if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
|
||||
return 0
|
||||
fi
|
||||
@@ -1236,18 +917,6 @@ print_info "If authentication is enabled, you can create your admin account here
|
||||
echo " • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
|
||||
echo " • The first user created will automatically have admin privileges"
|
||||
echo ""
|
||||
if [[ "$LITE_MODE" = true ]]; then
|
||||
echo ""
|
||||
print_info "Running in Lite mode — the following services are NOT started:"
|
||||
echo " • Vespa (vector database)"
|
||||
echo " • Redis (cache)"
|
||||
echo " • Model servers (embedding/inference)"
|
||||
echo " • Background workers (Celery)"
|
||||
echo ""
|
||||
print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
|
||||
print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
|
||||
fi
|
||||
echo ""
|
||||
print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
|
||||
echo ""
|
||||
print_info "For help or issues, contact: founders@onyx.app"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { ReactNode, useState } from "react";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { useState } from "react";
|
||||
import { ChatFileType, FileDescriptor } from "@/app/app/interfaces";
|
||||
import Attachment from "@/refresh-components/Attachment";
|
||||
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
|
||||
@@ -10,27 +9,10 @@ import PreviewModal from "@/sections/modals/PreviewModal";
|
||||
import { MinimalOnyxDocument } from "@/lib/search/interfaces";
|
||||
import ExpandableContentWrapper from "@/components/tools/ExpandableContentWrapper";
|
||||
|
||||
interface FileContainerProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
id?: string;
|
||||
}
|
||||
|
||||
interface FileDisplayProps {
|
||||
files: FileDescriptor[];
|
||||
}
|
||||
|
||||
function FileContainer({ children, className, id }: FileContainerProps) {
|
||||
return (
|
||||
<div
|
||||
id={id}
|
||||
className={cn("flex w-full flex-col items-end gap-2 py-2", className)}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function FileDisplay({ files }: FileDisplayProps) {
|
||||
const [close, setClose] = useState(true);
|
||||
const [previewingFile, setPreviewingFile] = useState<FileDescriptor | null>(
|
||||
@@ -59,7 +41,7 @@ export default function FileDisplay({ files }: FileDisplayProps) {
|
||||
)}
|
||||
|
||||
{textFiles.length > 0 && (
|
||||
<FileContainer id="onyx-file">
|
||||
<div id="onyx-file" className="flex flex-col items-end gap-2 py-2">
|
||||
{textFiles.map((file) => (
|
||||
<Attachment
|
||||
key={file.id}
|
||||
@@ -67,36 +49,40 @@ export default function FileDisplay({ files }: FileDisplayProps) {
|
||||
open={() => setPreviewingFile(file)}
|
||||
/>
|
||||
))}
|
||||
</FileContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{imageFiles.length > 0 && (
|
||||
<FileContainer id="onyx-image">
|
||||
<div id="onyx-image" className="flex flex-col items-end gap-2 py-2">
|
||||
{imageFiles.map((file) => (
|
||||
<InMessageImage key={file.id} fileId={file.id} />
|
||||
))}
|
||||
</FileContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{csvFiles.length > 0 && (
|
||||
<FileContainer className="overflow-auto">
|
||||
{csvFiles.map((file) =>
|
||||
close ? (
|
||||
<ExpandableContentWrapper
|
||||
key={file.id}
|
||||
fileDescriptor={file}
|
||||
close={() => setClose(false)}
|
||||
ContentComponent={CsvContent}
|
||||
/>
|
||||
) : (
|
||||
<Attachment
|
||||
key={file.id}
|
||||
open={() => setClose(true)}
|
||||
fileName={file.name || file.id}
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</FileContainer>
|
||||
<div className="flex flex-col items-end gap-2 py-2">
|
||||
{csvFiles.map((file) => {
|
||||
return (
|
||||
<div key={file.id} className="w-fit">
|
||||
{close ? (
|
||||
<>
|
||||
<ExpandableContentWrapper
|
||||
fileDescriptor={file}
|
||||
close={() => setClose(false)}
|
||||
ContentComponent={CsvContent}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<Attachment
|
||||
open={() => setClose(true)}
|
||||
fileName={file.name || file.id}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -40,7 +40,12 @@ export default function ExpandableContentWrapper({
|
||||
};
|
||||
|
||||
const Content = (
|
||||
<div className="w-message-default max-w-full !rounded-lg overflow-y-hidden h-full">
|
||||
<div
|
||||
className={cn(
|
||||
!expanded ? "w-message-default" : "w-full",
|
||||
"!rounded !rounded-lg overflow-y-hidden h-full"
|
||||
)}
|
||||
>
|
||||
<CardHeader className="w-full bg-background-tint-02 top-0 p-3">
|
||||
<div className="flex justify-between items-center">
|
||||
<Text className="text-ellipsis line-clamp-1" text03 mainUiAction>
|
||||
@@ -78,10 +83,12 @@ export default function ExpandableContentWrapper({
|
||||
)}
|
||||
>
|
||||
<CardContent className="p-0">
|
||||
<ContentComponent
|
||||
fileDescriptor={fileDescriptor}
|
||||
expanded={expanded}
|
||||
/>
|
||||
{!expanded && (
|
||||
<ContentComponent
|
||||
fileDescriptor={fileDescriptor}
|
||||
expanded={expanded}
|
||||
/>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user