chore(ruff): enable flake8s unused arg rules (#8206)

This commit is contained in:
Jamison Lahman
2026-02-05 15:25:07 -08:00
committed by GitHub
parent ef85a14b6e
commit d8fd6d398e
287 changed files with 1081 additions and 883 deletions

View File

@@ -57,7 +57,7 @@ if USE_IAM_AUTH:
def include_object(
object: SchemaItem,
object: SchemaItem, # noqa: ARG001
name: str | None,
type_: Literal[
"schema",
@@ -67,8 +67,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool,
compare_to: SchemaItem | None,
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False
@@ -244,7 +244,7 @@ def do_run_migrations(
def provide_iam_token_for_alembic(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
) -> None:
if USE_IAM_AUTH:
# Database connection settings

View File

@@ -39,7 +39,7 @@ EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
def include_object(
object: SchemaItem,
object: SchemaItem, # noqa: ARG001
name: str | None,
type_: Literal[
"schema",
@@ -49,8 +49,8 @@ def include_object(
"unique_constraint",
"foreign_key_constraint",
],
reflected: bool,
compare_to: SchemaItem | None,
reflected: bool, # noqa: ARG001
compare_to: SchemaItem | None, # noqa: ARG001
) -> bool:
if type_ == "table" and name in EXCLUDE_TABLES:
return False

View File

@@ -951,7 +951,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
return False
def progress(self, tag: str, amount: int) -> None:
def progress(self, tag: str, amount: int) -> None: # noqa: ARG002
try:
self.redis_connector.permissions.set_active()
@@ -982,7 +982,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
def monitor_ccpair_permissions_taskset(
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -259,7 +259,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
def try_creating_external_group_sync_task(
app: Celery,
cc_pair_id: int,
r: Redis,
r: Redis, # noqa: ARG001
tenant_id: str,
) -> str | None:
"""Returns an int if syncing is needed. The int represents the number of sync tasks generated.
@@ -344,7 +344,7 @@ def try_creating_external_group_sync_task(
bind=True,
)
def connector_external_group_sync_generator_task(
self: Task,
self: Task, # noqa: ARG001
cc_pair_id: int,
tenant_id: str,
) -> None:
@@ -590,8 +590,8 @@ def _perform_external_group_sync(
def validate_external_group_sync_fences(
tenant_id: str,
celery_app: Celery,
r: Redis,
celery_app: Celery, # noqa: ARG001
r: Redis, # noqa: ARG001
r_replica: Redis,
r_celery: Redis,
lock_beat: RedisLock,

View File

@@ -40,7 +40,7 @@ def export_query_history_task(
end: datetime,
start_time: datetime,
# Need to include the tenant_id since the TenantAwareTask needs this
tenant_id: str,
tenant_id: str, # noqa: ARG001
) -> None:
if not self.request.id:
raise RuntimeError("No task id defined for this task; cannot identify it")

View File

@@ -43,7 +43,7 @@ _TENANT_PROVISIONING_TIME_LIMIT = 60 * 10 # 10 minutes
trail=False,
bind=True,
)
def check_available_tenants(self: Task) -> None:
def check_available_tenants(self: Task) -> None: # noqa: ARG001
"""
Check if we have enough pre-provisioned tenants available.
If not, trigger the pre-provisioning of new tenants.

View File

@@ -21,9 +21,9 @@ logger = setup_logger()
trail=False,
)
def generate_usage_report_task(
self: Task,
self: Task, # noqa: ARG001
*,
tenant_id: str,
tenant_id: str, # noqa: ARG001
user_id: str | None = None,
period_from: str | None = None,
period_to: str | None = None,

View File

@@ -7,7 +7,7 @@ QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK
def name_chat_ttl_task(
retention_limit_days: float, tenant_id: str | None = None
retention_limit_days: float, tenant_id: str | None = None # noqa: ARG001
) -> str:
return f"chat_ttl_{retention_limit_days}_days"

View File

@@ -54,7 +54,7 @@ def delete_document_set_privacy__no_commit(
def fetch_document_sets(
user_id: UUID | None,
db_session: Session,
include_outdated: bool = True, # Parameter only for versioned implementation, unused
include_outdated: bool = True, # Parameter only for versioned implementation, unused # noqa: ARG001
) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:
assert user_id is not None

View File

@@ -643,7 +643,7 @@ def add_users_to_user_group(
def update_user_group(
db_session: Session,
user: User,
user: User, # noqa: ARG001
user_group_id: int,
user_group_update: UserGroupUpdate,
) -> UserGroup:

View File

@@ -25,7 +25,7 @@ CONFLUENCE_DOC_SYNC_LABEL = "confluence_doc_sync"
def confluence_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -34,7 +34,7 @@ GITHUB_DOC_SYNC_LABEL = "github_doc_sync"
def github_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[DocExternalAccess, None, None]:
"""

View File

@@ -12,7 +12,7 @@ logger = setup_logger()
def github_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
github_connector: GithubConnector = GithubConnector(

View File

@@ -91,7 +91,7 @@ class TeamInfo(BaseModel):
def _fetch_organization_members(
github_client: Github, org_name: str, retry_count: int = 0
github_client: Github, org_name: str, retry_count: int = 0 # noqa: ARG001
) -> List[UserInfo]:
"""Fetch all organization members including owners and regular members."""
org_members: List[UserInfo] = []
@@ -124,7 +124,7 @@ def _fetch_organization_members(
def _fetch_repository_teams_detailed(
repo: Repository, github_client: Github, retry_count: int = 0
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
) -> List[TeamInfo]:
"""Fetch teams with access to the repository and their members."""
teams_data: List[TeamInfo] = []
@@ -167,7 +167,7 @@ def _fetch_repository_teams_detailed(
def fetch_repository_team_slugs(
repo: Repository, github_client: Github, retry_count: int = 0
repo: Repository, github_client: Github, retry_count: int = 0 # noqa: ARG001
) -> List[str]:
"""Fetch team slugs with access to the repository."""
logger.info(f"Fetching team slugs for repository {repo.full_name}")

View File

@@ -39,8 +39,8 @@ def _get_slim_doc_generator(
def gmail_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""

View File

@@ -282,8 +282,8 @@ def get_external_access_for_folder(
def gdrive_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
"""

View File

@@ -384,7 +384,7 @@ def _build_onyx_groups(
def gdrive_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
# Initialize connector and build credential/service objects

View File

@@ -17,7 +17,7 @@ JIRA_DOC_SYNC_TAG = "jira_doc_sync"
def jira_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -102,7 +102,7 @@ def _build_group_member_email_map(
def jira_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""

View File

@@ -23,7 +23,7 @@ ContentRange = tuple[int, int | None] # (start_index, end_index) None means to
# NOTE: Used for testing timing
def _get_dummy_object_access_map(
object_ids: set[str], user_email: str, chunks: list[InferenceChunk]
object_ids: set[str], user_email: str, chunks: list[InferenceChunk] # noqa: ARG001
) -> dict[str, bool]:
time.sleep(0.15)
# return {object_id: True for object_id in object_ids}

View File

@@ -17,7 +17,7 @@ SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"
def sharepoint_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -15,7 +15,7 @@ logger = setup_logger()
def sharepoint_group_sync(
tenant_id: str,
tenant_id: str, # noqa: ARG001
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""Sync SharePoint groups and their members"""

View File

@@ -103,7 +103,7 @@ def _fetch_channel_permissions(
def _get_slack_document_access(
slack_connector: SlackConnector,
channel_permissions: dict[str, ExternalAccess],
channel_permissions: dict[str, ExternalAccess], # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
@@ -136,8 +136,8 @@ def _get_slack_document_access(
def slack_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
) -> Generator[DocExternalAccess, None, None]:
"""

View File

@@ -72,10 +72,10 @@ class SyncConfig(BaseModel):
# Mock doc sync function for testing (no-op)
def mock_doc_sync(
cc_pair: "ConnectorCredentialPair",
fetch_all_docs_fn: FetchAllDocumentsFunction,
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: Optional["IndexingHeartbeatInterface"],
cc_pair: "ConnectorCredentialPair", # noqa: ARG001
fetch_all_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction, # noqa: ARG001
callback: Optional["IndexingHeartbeatInterface"], # noqa: ARG001
) -> Generator["DocExternalAccess", None, None]:
"""Mock doc sync function for testing - returns empty list since permissions are fetched during indexing"""
yield from []

View File

@@ -18,7 +18,7 @@ TEAMS_DOC_SYNC_LABEL = "teams_doc_sync"
def teams_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction, # noqa: ARG001
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:

View File

@@ -139,7 +139,7 @@ def put_logo(
upload_logo(file=file, is_logotype=is_logotype)
def fetch_logo_helper(db_session: Session) -> Response:
def fetch_logo_helper(db_session: Session) -> Response: # noqa: ARG001
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
@@ -155,7 +155,7 @@ def fetch_logo_helper(db_session: Session) -> Response:
return Response(content=onyx_file.data, media_type=onyx_file.mime_type)
def fetch_logotype_helper(db_session: Session) -> Response:
def fetch_logotype_helper(db_session: Session) -> Response: # noqa: ARG001
try:
file_store = get_default_file_store()
onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())

View File

@@ -17,7 +17,7 @@ router = APIRouter(prefix="/evals")
@router.post("/eval_run", response_model=EvalRunAck)
def eval_run(
request: EvalConfigurationOptions,
user: User = Depends(current_cloud_superuser),
user: User = Depends(current_cloud_superuser), # noqa: ARG001
) -> EvalRunAck:
"""
Run an evaluation with the given message and optional dataset.

View File

@@ -260,7 +260,7 @@ def confluence_oauth_accessible_resources(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
) -> JSONResponse:
"""Atlassian's API is weird and does not supply us with enough info to be in a
usable state after authorizing. All API's require a cloud id. We have to list
@@ -323,7 +323,7 @@ def confluence_oauth_finalize(
cloud_url: str,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
tenant_id: str | None = Depends(get_current_tenant_id),
tenant_id: str | None = Depends(get_current_tenant_id), # noqa: ARG001
) -> JSONResponse:
"""Saves the info for the selected cloud site to the credential.
This is the final step in the confluence oauth flow where after the traditional

View File

@@ -78,7 +78,7 @@ def fetch_and_process_chat_session_history(
db_session: Session,
start: datetime,
end: datetime,
limit: int | None = 500,
limit: int | None = 500, # noqa: ARG001
) -> Generator[ChatSessionSnapshot]:
PAGE_SIZE = 100

View File

@@ -59,7 +59,7 @@ def generate_report(
def read_usage_report(
report_name: str,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
db_session: Session = Depends(get_session), # noqa: ARG001
) -> Response:
try:
file = get_usage_report_data(report_name)

View File

@@ -121,7 +121,9 @@ async def get_or_provision_tenant(
)
async def create_tenant(email: str, referral_source: str | None = None) -> str:
async def create_tenant(
email: str, referral_source: str | None = None # noqa: ARG001
) -> str:
"""
Create a new tenant on-demand when no pre-provisioned tenants are available.
This is the fallback method when we can't use a pre-provisioned tenant.
@@ -675,7 +677,7 @@ async def setup_tenant(tenant_id: str) -> None:
async def assign_tenant_to_user(
tenant_id: str, email: str, referral_source: str | None = None
tenant_id: str, email: str, referral_source: str | None = None # noqa: ARG001
) -> None:
"""
Assign a tenant to a user and perform necessary operations.

View File

@@ -96,7 +96,7 @@ def get_access_for_documents(
return versioned_get_access_for_documents_fn(document_ids, db_session)
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
def _get_acl_for_user(user: User, db_session: Session) -> set[str]: # noqa: ARG001
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL

View File

@@ -4,7 +4,9 @@ from onyx.db.models import User
from onyx.utils.variable_functionality import fetch_versioned_implementation
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
def _get_user_external_group_ids(
db_session: Session, user: User # noqa: ARG001
) -> list[str]:
return []

View File

@@ -30,7 +30,7 @@ REFRESH_ENDPOINTS = {
async def _test_expire_oauth_token(
user: User,
oauth_account: OAuthAccount,
db_session: AsyncSession,
db_session: AsyncSession, # noqa: ARG001
user_manager: BaseUserManager[User, Any],
expire_in_seconds: int = 10,
) -> bool:
@@ -59,7 +59,7 @@ async def _test_expire_oauth_token(
async def refresh_oauth_token(
user: User,
oauth_account: OAuthAccount,
db_session: AsyncSession,
db_session: AsyncSession, # noqa: ARG001
user_manager: BaseUserManager[User, Any],
) -> bool:
"""
@@ -182,7 +182,7 @@ async def check_and_refresh_oauth_tokens(
async def check_oauth_account_has_refresh_token(
user: User,
user: User, # noqa: ARG001
oauth_account: OAuthAccount,
) -> bool:
"""

View File

@@ -780,7 +780,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
)
async def on_after_forgot_password(
self, user: User, token: str, request: Optional[Request] = None
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
) -> None:
if not EMAIL_CONFIGURED:
logger.error(
@@ -799,7 +799,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)
async def on_after_request_verify(
self, user: User, token: str, request: Optional[Request] = None
self, user: User, token: str, request: Optional[Request] = None # noqa: ARG002
) -> None:
verify_email_domain(user.email)
@@ -983,7 +983,7 @@ class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
except (exceptions.UserNotExists, exceptions.InvalidID, KeyError):
return None
async def destroy_token(self, token: str, user: User) -> None:
async def destroy_token(self, token: str, user: User) -> None: # noqa: ARG002
"""Properly delete the token from async redis."""
redis = await get_async_redis_connection()
await redis.delete(f"{self.key_prefix}{token}")

View File

@@ -93,12 +93,12 @@ class TenantAwareTask(Task):
@task_prerun.connect
def on_task_prerun(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None, # noqa: ARG001
task: Task | None = None, # noqa: ARG001
args: tuple[Any, ...] | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None, # noqa: ARG001
**other_kwargs: Any, # noqa: ARG001
) -> None:
# Reset any per-task logging context so that prefixes (e.g. pruning_ctx)
# from a previous task executed in the same worker process do not leak
@@ -110,14 +110,14 @@ def on_task_prerun(
def on_task_postrun(
sender: Any | None = None,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None,
task: Task | None = None,
args: tuple | None = None,
args: tuple | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None,
retval: Any | None = None,
retval: Any | None = None, # noqa: ARG001
state: str | None = None,
**kwds: Any,
**kwds: Any, # noqa: ARG001
) -> None:
"""We handle this signal in order to remove completed tasks
from their respective tasksets. This allows us to track the progress of document set
@@ -209,7 +209,9 @@ def on_task_postrun(
return
def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
def on_celeryd_init(
sender: str, conf: Any = None, **kwargs: Any # noqa: ARG001
) -> None:
"""The first signal sent on celery worker startup"""
# NOTE(rkuo): start method "fork" is unsafe and we really need it to be "spawn"
@@ -242,7 +244,7 @@ def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
setup_braintrust_if_creds_available()
def wait_for_redis(sender: Any, **kwargs: Any) -> None:
def wait_for_redis(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for redis to become ready subject to a hardcoded timeout.
Will raise WorkerShutdown to kill the celery worker if the timeout
is reached."""
@@ -285,7 +287,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
return
def wait_for_db(sender: Any, **kwargs: Any) -> None:
def wait_for_db(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for the db to become ready subject to a hardcoded timeout.
Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""
@@ -327,7 +329,7 @@ def wait_for_db(sender: Any, **kwargs: Any) -> None:
return
def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
logger.info(f"Running as a secondary celery worker: pid={os.getpid()}")
# Set up variables for waiting on primary worker
@@ -359,7 +361,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
return
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
def on_worker_ready(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
task_logger.info("worker_ready signal received.")
# file based way to do readiness/liveness probes
@@ -372,7 +374,7 @@ def on_worker_ready(sender: Any, **kwargs: Any) -> None:
logger.info(f"Readiness signal touched at {path}.")
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
HttpxPool.close_all()
hostname: str = cast(str, sender.hostname)
@@ -405,9 +407,9 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
def on_setup_logging(
loglevel: int,
logfile: str | None,
format: str,
colorize: bool,
**kwargs: Any,
format: str, # noqa: ARG001
colorize: bool, # noqa: ARG001
**kwargs: Any, # noqa: ARG001
) -> None:
# TODO: could unhardcode format and colorize and accept these as options from
# celery's config
@@ -508,18 +510,18 @@ class TenantContextFilter(logging.Filter):
@task_postrun.connect
def reset_tenant_id(
sender: Any | None = None,
task_id: str | None = None,
task: Task | None = None,
args: tuple[Any, ...] | None = None,
kwargs: dict[str, Any] | None = None,
**other_kwargs: Any,
sender: Any | None = None, # noqa: ARG001
task_id: str | None = None, # noqa: ARG001
task: Task | None = None, # noqa: ARG001
args: tuple[Any, ...] | None = None, # noqa: ARG001
kwargs: dict[str, Any] | None = None, # noqa: ARG001
**other_kwargs: Any, # noqa: ARG001
) -> None:
"""Signal handler to reset tenant ID in context var after task ends."""
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None: # noqa: ARG001
"""Waits for Vespa to become ready subject to a timeout.
Raises WorkerShutdown if the timeout is reached."""
@@ -553,12 +555,12 @@ class LivenessProbe(bootsteps.StartStopStep):
priority=10,
)
def stop(self, worker: Any) -> None:
def stop(self, worker: Any) -> None: # noqa: ARG002
self.path.unlink(missing_ok=True)
if self.task_tref:
self.task_tref.cancel()
def update_liveness_file(self, worker: Any) -> None:
def update_liveness_file(self, worker: Any) -> None: # noqa: ARG002
self.path.touch()

View File

@@ -102,7 +102,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -244,7 +244,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
# it's unclear to me whether using the hub's timer or the bootstep timer is better
requires = {"celery.worker.components:Hub"}
def __init__(self, worker: Any, **kwargs: Any) -> None:
def __init__(self, worker: Any, **kwargs: Any) -> None: # noqa: ARG002
self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8 # Interval in seconds
self.task_tref = None
@@ -300,7 +300,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
except Exception:
task_logger.exception("Periodic task failed.")
def stop(self, worker: Any) -> None:
def stop(self, worker: Any) -> None: # noqa: ARG002
# Cancel the scheduled task when the worker stops
if self.task_tref:
self.task_tref.cancel()

View File

@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
@worker_process_init.connect
def init_worker(**kwargs: Any) -> None:
def init_worker(**kwargs: Any) -> None: # noqa: ARG001
SqlEngine.reset_engine()

View File

@@ -366,7 +366,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
def monitor_connector_deletion_taskset(
tenant_id: str, key_bytes: bytes, r: Redis
tenant_id: str, key_bytes: bytes, r: Redis # noqa: ARG001
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -1071,7 +1071,7 @@ def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
bind=True,
)
def cleanup_checkpoint_task(
self: Task, *, index_attempt_id: int, tenant_id: str | None
self: Task, *, index_attempt_id: int, tenant_id: str | None # noqa: ARG001
) -> None:
"""Clean up a checkpoint for a given index attempt"""
@@ -1160,7 +1160,7 @@ def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
bind=True,
)
def cleanup_index_attempt_task(
self: Task, *, index_attempt_ids: list[int], tenant_id: str
self: Task, *, index_attempt_ids: list[int], tenant_id: str # noqa: ARG001
) -> None:
"""Clean up an index attempt"""
start = time.monotonic()
@@ -1266,7 +1266,7 @@ def _resolve_indexing_document_errors(
bind=True,
)
def docprocessing_task(
self: Task,
self: Task, # noqa: ARG001
index_attempt_id: int,
cc_pair_id: int,
tenant_id: str,

View File

@@ -57,7 +57,7 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
# TODO: Pass index_attempt_id to the callback and check cancellation using the db
return bool(self.redis_connector.stop.fenced)
def progress(self, tag: str, amount: int) -> None:
def progress(self, tag: str, amount: int) -> None: # noqa: ARG002
"""Amount isn't used yet."""
# rkuo: this shouldn't be necessary yet because we spawn the process this runs inside

View File

@@ -26,7 +26,7 @@ logger = setup_logger()
trail=False,
)
def eval_run_task(
self: Task,
self: Task, # noqa: ARG001
*,
configuration_dict: dict[str, Any],
) -> None:
@@ -48,7 +48,7 @@ def eval_run_task(
bind=True,
trail=False,
)
def scheduled_eval_task(self: Task, **kwargs: Any) -> None:
def scheduled_eval_task(self: Task, **kwargs: Any) -> None: # noqa: ARG001
"""
Scheduled task to run evaluations on configured datasets.
Runs weekly on Sunday at midnight UTC.

View File

@@ -322,7 +322,7 @@ def _run_hierarchy_extraction(
bind=True,
)
def connector_hierarchy_fetching_task(
self: Task,
self: Task, # noqa: ARG001
*,
cc_pair_id: int,
tenant_id: str,

View File

@@ -17,7 +17,9 @@ from onyx.llm.well_known_providers.auto_update_service import (
trail=False,
bind=True,
)
def check_for_auto_llm_updates(self: Task, *, tenant_id: str) -> bool | None:
def check_for_auto_llm_updates(
self: Task, *, tenant_id: str # noqa: ARG001
) -> bool | None:
"""Periodic task to fetch LLM model updates from GitHub
and sync them to providers in Auto mode.

View File

@@ -871,7 +871,7 @@ def cloud_monitor_celery_queues(
@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)
def monitor_celery_queues(self: Task, *, tenant_id: str) -> None:
def monitor_celery_queues(self: Task, *, tenant_id: str) -> None: # noqa: ARG001
return monitor_celery_queues_helper(self)
@@ -952,7 +952,7 @@ def _get_cmdline_for_process(process: psutil.Process) -> str | None:
queue=OnyxCeleryQueues.MONITORING,
bind=True,
)
def monitor_process_memory(self: Task, *, tenant_id: str) -> None:
def monitor_process_memory(self: Task, *, tenant_id: str) -> None: # noqa: ARG001
"""
Task to monitor memory usage of supervisor-managed processes.
This periodically checks the memory usage of processes and logs information

View File

@@ -100,7 +100,7 @@ def _migrate_single_document(
bind=True,
)
def check_for_documents_for_opensearch_migration_task(
self: Task, *, tenant_id: str
self: Task, *, tenant_id: str # noqa: ARG001
) -> bool | None:
"""
Periodic task to check for and add documents to the OpenSearch migration
@@ -211,7 +211,7 @@ def check_for_documents_for_opensearch_migration_task(
bind=True,
)
def migrate_documents_from_vespa_to_opensearch_task(
self: Task,
self: Task, # noqa: ARG001
*,
tenant_id: str,
) -> bool | None:

View File

@@ -24,7 +24,7 @@ from onyx.db.engine.sql_engine import get_session_with_current_tenant
bind=True,
base=AbortableTask,
)
def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:
def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int: # noqa: ARG001
"""Runs periodically to clean up the kombu_message table"""
# we will select messages older than this amount to clean up

View File

@@ -587,7 +587,7 @@ def connector_pruning_generator_task(
def monitor_ccpair_pruning_taskset(
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session # noqa: ARG001
) -> None:
fence_key = key_bytes.decode("utf-8")
cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)

View File

@@ -276,7 +276,7 @@ def document_by_cc_pair_cleanup_task(
@shared_task(name=OnyxCeleryTask.CELERY_BEAT_HEARTBEAT, ignore_result=True, bind=True)
def celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None:
def celery_beat_heartbeat(self: Task, *, tenant_id: str) -> None: # noqa: ARG001
"""When this task runs, it writes a key to Redis with a TTL.
An external observer can check this key to figure out if the celery beat is still running.

View File

@@ -168,7 +168,9 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
bind=True,
ignore_result=True,
)
def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -> None:
def process_single_user_file(
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
start = time.monotonic()
@@ -391,7 +393,7 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
ignore_result=True,
)
def process_single_user_file_delete(
self: Task, *, user_file_id: str, tenant_id: str
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
"""Process a single user file delete."""
task_logger.info(f"process_single_user_file_delete - Starting id={user_file_id}")
@@ -542,7 +544,7 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
ignore_result=True,
)
def process_single_user_file_project_sync(
self: Task, *, user_file_id: str, tenant_id: str
self: Task, *, user_file_id: str, tenant_id: str # noqa: ARG001
) -> None:
"""Process a single user file project sync."""
task_logger.info(

View File

@@ -174,7 +174,9 @@ class SimpleJobClient:
logger.debug(f"Cleaning up job with id: '{job.id}'")
del self.jobs[job.id]
def submit(self, func: Callable, *args: Any, pure: bool = True) -> SimpleJob | None:
def submit(
self, func: Callable, *args: Any, pure: bool = True # noqa: ARG002
) -> SimpleJob | None:
"""NOTE: `pure` arg is needed so this can be a drop in replacement for Dask"""
self._cleanup_completed_jobs()
if len(self.jobs) >= self.n_workers:

View File

@@ -341,7 +341,7 @@ def create_temporary_persona(
def process_kg_commands(
message: str, persona_name: str, tenant_id: str, db_session: Session
message: str, persona_name: str, tenant_id: str, db_session: Session # noqa: ARG001
) -> None:
# Temporarily, until we have a draft UI for the KG Operations/Management
# TODO: move to api endpoint once we get frontend

View File

@@ -331,7 +331,7 @@ def construct_message_history(
def _create_project_files_message(
project_files: ExtractedProjectFiles,
token_counter: Callable[[str], int] | None,
token_counter: Callable[[str], int] | None, # noqa: ARG001
) -> ChatMessageSimple:
"""Convert project files to a ChatMessageSimple message.

View File

@@ -741,7 +741,7 @@ def llm_loop_completion_handle(
assistant_message: ChatMessage,
llm: LLM,
reserved_tokens: int,
processing_start_time: float | None = None,
processing_start_time: float | None = None, # noqa: ARG001
) -> None:
chat_session_id = assistant_message.chat_session_id

View File

@@ -54,7 +54,9 @@ class AsanaConnector(LoadConnector, PollConnector):
return None
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch | None
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch | None, # noqa: ARG002
) -> GenerateDocumentsOutput:
start_time = datetime.datetime.fromtimestamp(start).isoformat()
logger.info(f"Starting Asana poll from {start_time}")

View File

@@ -891,8 +891,8 @@ class ConfluenceConnector(
def _retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None,
include_permissions: bool = True,
) -> GenerateSlimDocumentOutput:

View File

@@ -213,7 +213,7 @@ def process_attachment(
def _process_image_attachment(
confluence_client: "OnyxConfluence",
confluence_client: "OnyxConfluence", # noqa: ARG001
attachment: dict[str, Any],
raw_bytes: bytes,
media_type: str,

View File

@@ -239,7 +239,7 @@ class LocalFileConnector(LoadConnector):
def __init__(
self,
file_locations: list[Path | str],
file_names: list[str] | None = None,
file_names: list[str] | None = None, # noqa: ARG002
zip_metadata: dict[str, Any] | None = None,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:

View File

@@ -195,7 +195,7 @@ class FreshdeskConnector(PollConnector, LoadConnector):
self.domain = domain
def _fetch_tickets(
self, start: datetime | None = None, end: datetime | None = None
self, start: datetime | None = None, end: datetime | None = None # noqa: ARG002
) -> Iterator[List[dict]]:
"""
'end' is not currently used, so we may double fetch tickets created after the indexing

View File

@@ -321,7 +321,7 @@ def _full_thread_from_id(
def _slim_thread_from_id(
thread_id: str,
user_email: str,
gmail_service: GmailService,
gmail_service: GmailService, # noqa: ARG001
) -> SlimDocument:
return SlimDocument(
id=thread_id,
@@ -432,7 +432,7 @@ class GmailConnector(
time_range_end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
page_token: str | None = None,
set_page_token: Callable[[str | None], None] = lambda x: None,
set_page_token: Callable[[str | None], None] = lambda x: None, # noqa: ARG005
is_slim: bool = False,
) -> Iterator[Document | ConnectorFailure] | GenerateSlimDocumentOutput:
query = _build_time_range_query(time_range_start, time_range_end)
@@ -504,7 +504,7 @@ class GmailConnector(
self,
user_email: str,
page_token: str | None = None,
set_page_token: Callable[[str | None], None] = lambda x: None,
set_page_token: Callable[[str | None], None] = lambda x: None, # noqa: ARG005
time_range_start: SecondsSinceUnixEpoch | None = None,
time_range_end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
@@ -526,7 +526,7 @@ class GmailConnector(
self,
user_email: str,
page_token: str | None = None,
set_page_token: Callable[[str | None], None] = lambda x: None,
set_page_token: Callable[[str | None], None] = lambda x: None, # noqa: ARG005
time_range_start: SecondsSinceUnixEpoch | None = None,
time_range_end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,

View File

@@ -217,7 +217,7 @@ class GoogleDriveConnector(
shared_folder_urls: str | None = None,
specific_user_emails: str | None = None,
exclude_domain_link_only: bool = False,
batch_size: int = INDEX_BATCH_SIZE,
batch_size: int = INDEX_BATCH_SIZE, # noqa: ARG002
# OLD PARAMETERS
folder_paths: list[str] | None = None,
include_shared: bool | None = None,

View File

@@ -361,9 +361,9 @@ class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync)
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
"""
Retrieve all document IDs from the configured spots.

View File

@@ -89,7 +89,7 @@ class BaseConnector(abc.ABC, Generic[CT]):
based on the application level image analysis setting."""
@classmethod
def normalize_url(cls, url: str) -> "NormalizationResult":
def normalize_url(cls, url: str) -> "NormalizationResult": # noqa: ARG003
"""Normalize a URL to match the canonical Document.id format used during ingestion.
Connectors that use URLs as document IDs should override this method.

View File

@@ -831,7 +831,7 @@ class JiraConnector(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
one_day = timedelta(hours=24).total_seconds()

View File

@@ -84,7 +84,10 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
@classmethod
def oauth_authorization_url(
cls, base_domain: str, state: str, additional_kwargs: dict[str, str]
cls,
base_domain: str,
state: str,
additional_kwargs: dict[str, str], # noqa: ARG003
) -> str:
if not LINEAR_CLIENT_ID:
raise ValueError("LINEAR_CLIENT_ID environment variable must be set")
@@ -102,7 +105,10 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
@classmethod
def oauth_code_to_token(
cls, base_domain: str, code: str, additional_kwargs: dict[str, str]
cls,
base_domain: str,
code: str,
additional_kwargs: dict[str, str], # noqa: ARG003
) -> dict[str, Any]:
data = {
"code": code,

View File

@@ -70,7 +70,7 @@ class FamilyFileGeneratorInMemory(generate_family_file.FamilyFileGenerator):
"""
return True
def writefile(self, verify: Any) -> None:
def writefile(self, verify: Any) -> None: # noqa: ARG002
"""Write the family file.
This overrides the method in the parent class to write the family definition to memory instead of to disk.

View File

@@ -146,7 +146,9 @@ class MediaWikiConnector(LoadConnector, PollConnector):
continue
self.pages.append(pywikibot.Page(self.site, page))
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
def load_credentials(
self, credentials: dict[str, Any] # noqa: ARG002
) -> dict[str, Any] | None:
"""Load credentials for a MediaWiki site.
Note:

View File

@@ -45,7 +45,9 @@ class MockConnector(CheckpointedConnectorWithPermSync[MockConnectorCheckpoint]):
self.connector_yields: list[SingleConnectorYield] | None = None
self.current_yield_index: int = 0
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
def load_credentials(
self, credentials: dict[str, Any] # noqa: ARG002
) -> dict[str, Any] | None:
response = self.client.get(self._get_mock_server_url("get-documents"))
response.raise_for_status()
data = response.json()
@@ -67,8 +69,8 @@ class MockConnector(CheckpointedConnectorWithPermSync[MockConnectorCheckpoint]):
def _load_from_checkpoint_common(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
start: SecondsSinceUnixEpoch, # noqa: ARG002
end: SecondsSinceUnixEpoch, # noqa: ARG002
checkpoint: MockConnectorCheckpoint,
include_permissions: bool = False,
) -> CheckpointOutput[MockConnectorCheckpoint]:

View File

@@ -1134,9 +1134,9 @@ class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSyn
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
doc_metadata_list: list[SlimDocument | HierarchyNode] = []
for parent_object_type in self.parent_object_list:

View File

@@ -1944,9 +1944,9 @@ class SharepointConnector(
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
yield from self._fetch_slim_documents_from_sharepoint()

View File

@@ -22,7 +22,7 @@ def get_sharepoint_external_access(
raise ValueError("DriveItem ID is required")
# Get external access using the EE implementation
def noop_fallback(*args: Any, **kwargs: Any) -> ExternalAccess:
def noop_fallback(*args: Any, **kwargs: Any) -> ExternalAccess: # noqa: ARG001
return ExternalAccess.empty()
get_external_access_func = fetch_versioned_implementation_with_fallback(

View File

@@ -242,9 +242,9 @@ class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
slim_doc_batch: list[SlimDocument | HierarchyNode] = []
for post_id in get_all_post_ids(self.slab_bot_token):

View File

@@ -828,8 +828,8 @@ class SlackConnector(
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
if self.client is None:

View File

@@ -48,10 +48,10 @@ class OnyxRedisSlackRetryHandler(RetryHandler):
def _can_retry(
self,
*,
state: RetryState,
request: HttpRequest,
state: RetryState, # noqa: ARG002
request: HttpRequest, # noqa: ARG002
response: Optional[HttpResponse] = None,
error: Optional[Exception] = None,
error: Optional[Exception] = None, # noqa: ARG002
) -> bool:
return response is not None and response.status_code == 429
@@ -59,7 +59,7 @@ class OnyxRedisSlackRetryHandler(RetryHandler):
self,
*,
state: RetryState,
request: HttpRequest,
request: HttpRequest, # noqa: ARG002
response: Optional[HttpResponse] = None,
error: Optional[Exception] = None,
) -> None:

View File

@@ -186,7 +186,7 @@ class TeamsConnector(
def load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch, # noqa: ARG002
checkpoint: TeamsCheckpoint,
) -> CheckpointOutput[TeamsCheckpoint]:
if self.graph_client is None:

View File

@@ -349,7 +349,7 @@ class TestRailConnector(LoadConnector, PollConnector):
if len(cases) < limit:
break
def _build_case_link(self, project_id: int, case_id: int) -> str:
def _build_case_link(self, project_id: int, case_id: int) -> str: # noqa: ARG002
# Standard UI link to a case
return f"{self.base_url}/index.php?/cases/view/{case_id}"
@@ -357,7 +357,7 @@ class TestRailConnector(LoadConnector, PollConnector):
self,
project: dict[str, Any],
case: dict[str, Any],
suite: dict[str, Any] | None = None,
suite: dict[str, Any] | None = None, # noqa: ARG002
) -> Document | None:
project_id = project.get("id")
if not isinstance(project_id, int):

View File

@@ -450,7 +450,7 @@ class WebConnector(LoadConnector):
mintlify_cleanup: bool = True, # Mostly ok to apply to other websites as well
batch_size: int = INDEX_BATCH_SIZE,
scroll_before_scraping: bool = False,
**kwargs: Any,
**kwargs: Any, # noqa: ARG002
) -> None:
self.mintlify_cleanup = mintlify_cleanup
self.batch_size = batch_size

View File

@@ -569,8 +569,8 @@ class ZendeskConnector(
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
slim_doc_batch: list[SlimDocument | HierarchyNode] = []
if self.content_type == "articles":

View File

@@ -192,7 +192,9 @@ class ZulipConnector(LoadConnector, PollConnector):
anchor = str(message.id)
def _poll_source(
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
self,
start: SecondsSinceUnixEpoch | None,
end: SecondsSinceUnixEpoch | None, # noqa: ARG002
) -> GenerateDocumentsOutput:
# Since Zulip doesn't support searching by timestamp,
# we have to always start from the newest message

View File

@@ -906,7 +906,7 @@ def slack_retrieval(
query: ChunkIndexRequest,
access_token: str,
db_session: Session,
connector: FederatedConnectorDetail | None = None,
connector: FederatedConnectorDetail | None = None, # noqa: ARG001
entities: dict[str, Any] | None = None,
limit: int | None = None,
slack_event_context: SlackContext | None = None,

View File

@@ -170,10 +170,10 @@ def get_document_sets_by_ids(
def make_doc_set_private(
document_set_id: int,
document_set_id: int, # noqa: ARG001
user_ids: list[UUID] | None,
group_ids: list[int] | None,
db_session: Session,
db_session: Session, # noqa: ARG001
) -> None:
# May cause error if someone switches down to MIT from EE
if user_ids or group_ids:
@@ -491,7 +491,9 @@ def delete_document_set_cc_pair_relationship__no_commit(
def fetch_document_sets(
user_id: UUID | None, db_session: Session, include_outdated: bool = False
user_id: UUID | None, # noqa: ARG001
db_session: Session,
include_outdated: bool = False,
) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]:
"""Return is a list where each element contains a tuple of:
1. The document set itself

View File

@@ -89,7 +89,7 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
@event.listens_for(_ASYNC_ENGINE.sync_engine, "do_connect")
def provide_iam_token_async(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
) -> None:
# For async engine using asyncpg, we still need to set the IAM token here.
host = POSTGRES_HOST

View File

@@ -36,7 +36,9 @@ def configure_psycopg2_iam_auth(
cparams["sslrootcert"] = SSL_CERT_FILE
def provide_iam_token(dialect: Any, conn_rec: Any, cargs: Any, cparams: Any) -> None:
def provide_iam_token(
dialect: Any, conn_rec: Any, cargs: Any, cparams: Any # noqa: ARG001
) -> None:
if USE_IAM_AUTH:
host = POSTGRES_HOST
port = POSTGRES_PORT

View File

@@ -66,7 +66,7 @@ def build_connection_string(
db: str = POSTGRES_DB,
app_name: str | None = None,
use_iam_auth: bool = USE_IAM_AUTH,
region: str = "us-west-2",
region: str = "us-west-2", # noqa: ARG001
) -> str:
if use_iam_auth:
base_conn_str = f"postgresql+{db_api}://{user}@{host}:{port}/{db}"
@@ -86,13 +86,13 @@ if LOG_POSTGRES_LATENCY:
@event.listens_for(Engine, "before_cursor_execute")
def before_cursor_execute( # type: ignore
conn, cursor, statement, parameters, context, executemany
conn, cursor, statement, parameters, context, executemany # noqa: ARG001
):
conn.info["query_start_time"] = time.time()
@event.listens_for(Engine, "after_cursor_execute")
def after_cursor_execute( # type: ignore
conn, cursor, statement, parameters, context, executemany
conn, cursor, statement, parameters, context, executemany # noqa: ARG001
):
total_time = time.time() - conn.info["query_start_time"]
if total_time > 0.1:
@@ -106,7 +106,7 @@ if LOG_POSTGRES_CONN_COUNTS:
checkin_count = 0
@event.listens_for(Engine, "checkout")
def log_checkout(dbapi_connection, connection_record, connection_proxy): # type: ignore
def log_checkout(dbapi_connection, connection_record, connection_proxy): # type: ignore # noqa: ARG001
global checkout_count
checkout_count += 1
@@ -122,7 +122,7 @@ if LOG_POSTGRES_CONN_COUNTS:
)
@event.listens_for(Engine, "checkin")
def log_checkin(dbapi_connection, connection_record): # type: ignore
def log_checkin(dbapi_connection, connection_record): # type: ignore # noqa: ARG001
global checkin_count
checkin_count += 1
logger.debug(f"Total connection checkins: {checkin_count}")
@@ -141,7 +141,7 @@ class SqlEngine:
pool_size: int,
# is really `pool_max_overflow`, but calling it `max_overflow` to stay consistent with SQLAlchemy
max_overflow: int,
app_name: str | None = None,
app_name: str | None = None, # noqa: ARG003
db_api: str = SYNC_DB_API,
use_iam: bool = USE_IAM_AUTH,
connection_string: str | None = None,

View File

@@ -456,8 +456,8 @@ def get_all_hierarchy_nodes_for_source(
def _get_accessible_hierarchy_nodes_for_source(
db_session: Session,
source: DocumentSource,
user_email: str | None,
external_group_ids: list[str],
user_email: str | None, # noqa: ARG001
external_group_ids: list[str], # noqa: ARG001
) -> list[HierarchyNode]:
"""
MIT version: Returns all hierarchy nodes for the source without permission filtering.

View File

@@ -49,7 +49,7 @@ def get_mcp_servers_by_owner(owner_email: str, db_session: Session) -> list[MCPS
def get_mcp_servers_for_persona(
persona_id: int, db_session: Session, user: User
persona_id: int, db_session: Session, user: User # noqa: ARG001
) -> list[MCPServer]:
"""Get all MCP servers associated with a persona via its tools"""
# Get the persona and its tools

View File

@@ -120,12 +120,16 @@ class EncryptedString(TypeDecorator):
# This type's behavior is fully deterministic and doesn't depend on any external factors.
cache_ok = True
def process_bind_param(self, value: str | None, dialect: Dialect) -> bytes | None:
def process_bind_param(
self, value: str | None, dialect: Dialect # noqa: ARG002
) -> bytes | None:
if value is not None:
return encrypt_string_to_bytes(value)
return value
def process_result_value(self, value: bytes | None, dialect: Dialect) -> str | None:
def process_result_value(
self, value: bytes | None, dialect: Dialect # noqa: ARG002
) -> str | None:
if value is not None:
return decrypt_bytes_to_string(value)
return value
@@ -136,14 +140,16 @@ class EncryptedJson(TypeDecorator):
# This type's behavior is fully deterministic and doesn't depend on any external factors.
cache_ok = True
def process_bind_param(self, value: dict | None, dialect: Dialect) -> bytes | None:
def process_bind_param(
self, value: dict | None, dialect: Dialect # noqa: ARG002
) -> bytes | None:
if value is not None:
json_str = json.dumps(value)
return encrypt_string_to_bytes(json_str)
return value
def process_result_value(
self, value: bytes | None, dialect: Dialect
self, value: bytes | None, dialect: Dialect # noqa: ARG002
) -> dict | None:
if value is not None:
json_str = decrypt_bytes_to_string(value)
@@ -156,13 +162,17 @@ class NullFilteredString(TypeDecorator):
# This type's behavior is fully deterministic and doesn't depend on any external factors.
cache_ok = True
def process_bind_param(self, value: str | None, dialect: Dialect) -> str | None:
def process_bind_param(
self, value: str | None, dialect: Dialect # noqa: ARG002
) -> str | None:
if value is not None and "\x00" in value:
logger.warning(f"NUL characters found in value: {value}")
return value.replace("\x00", "")
return value
def process_result_value(self, value: str | None, dialect: Dialect) -> str | None:
def process_result_value(
self, value: str | None, dialect: Dialect # noqa: ARG002
) -> str | None:
return value
@@ -273,7 +283,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
)
@validates("email")
def validate_email(self, key: str, value: str) -> str:
def validate_email(self, key: str, value: str) -> str: # noqa: ARG002
return value.lower() if value else value
@property
@@ -4173,7 +4183,7 @@ class UserTenantMapping(Base):
active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
@validates("email")
def validate_email(self, key: str, value: str) -> str:
def validate_email(self, key: str, value: str) -> str: # noqa: ARG002
return value.lower() if value else value

View File

@@ -18,14 +18,14 @@ class PydanticType(TypeDecorator):
self.pydantic_model = pydantic_model
def process_bind_param(
self, value: Optional[BaseModel], dialect: Any
self, value: Optional[BaseModel], dialect: Any # noqa: ARG002
) -> Optional[dict]:
if value is not None:
return json.loads(value.json())
return None
def process_result_value(
self, value: Optional[dict], dialect: Any
self, value: Optional[dict], dialect: Any # noqa: ARG002
) -> Optional[BaseModel]:
if value is not None:
return self.pydantic_model.parse_obj(value)
@@ -42,14 +42,14 @@ class PydanticListType(TypeDecorator):
self.pydantic_model = pydantic_model
def process_bind_param(
self, value: Optional[list[BaseModel]], dialect: Any
self, value: Optional[list[BaseModel]], dialect: Any # noqa: ARG002
) -> Optional[list[dict]]:
if value is not None:
return [json.loads(item.model_dump_json()) for item in value]
return None
def process_result_value(
self, value: Optional[list[dict]], dialect: Any
self, value: Optional[list[dict]], dialect: Any # noqa: ARG002
) -> Optional[list[BaseModel]]:
if value is not None:
return [self.pydantic_model.model_validate(item) for item in value]

View File

@@ -91,7 +91,9 @@ def create_slack_channel_persona(
return persona
def _no_ee_standard_answer_categories(*args: Any, **kwargs: Any) -> list:
def _no_ee_standard_answer_categories(
*args: Any, **kwargs: Any # noqa: ARG001
) -> list:
return []
@@ -162,7 +164,7 @@ def update_slack_channel_config(
channel_config: ChannelConfig,
standard_answer_category_ids: list[int],
enable_auto_filters: bool,
disabled: bool,
disabled: bool, # noqa: ARG001
) -> SlackChannelConfig:
slack_channel_config = db_session.scalar(
select(SlackChannelConfig).where(

View File

@@ -187,7 +187,7 @@ def run_deep_research_llm_loop(
state_container: ChatStateContainer,
simple_chat_history: list[ChatMessageSimple],
tools: list[Tool],
custom_agent_prompt: str | None,
custom_agent_prompt: str | None, # noqa: ARG001
llm: LLM,
token_counter: Callable[[str], int],
db_session: Session,

View File

@@ -240,10 +240,10 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
index_name: str,
secondary_index_name: str | None,
large_chunks_enabled: bool,
secondary_large_chunks_enabled: bool | None,
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
multitenant: bool = False,
httpx_client: httpx.Client | None = None,
httpx_client: httpx.Client | None = None, # noqa: ARG002
) -> None:
super().__init__(
index_name=index_name,
@@ -279,8 +279,8 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
primary_embedding_dim: int,
primary_embedding_precision: EmbeddingPrecision,
secondary_index_embedding_dim: int | None,
secondary_index_embedding_precision: EmbeddingPrecision | None,
secondary_index_embedding_dim: int | None, # noqa: ARG002
secondary_index_embedding_precision: EmbeddingPrecision | None, # noqa: ARG002
) -> None:
# Only handle primary index for now, ignore secondary.
return self._real_index.verify_and_create_index_if_necessary(
@@ -320,7 +320,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
doc_id: str,
*,
tenant_id: str,
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
) -> int:
return self._real_index.delete(doc_id, chunk_count)
@@ -329,7 +329,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
doc_id: str,
*,
tenant_id: str,
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
fields: VespaDocumentFields | None,
user_fields: VespaDocumentUserFields | None,
@@ -364,7 +364,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
chunk_requests: list[VespaChunkRequest],
filters: IndexFilters,
batch_retrieval: bool = False,
get_large_chunks: bool = False,
get_large_chunks: bool = False, # noqa: ARG002
) -> list[InferenceChunk]:
section_requests = [
DocumentSectionRequest(
@@ -386,10 +386,10 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
final_keywords: list[str] | None,
filters: IndexFilters,
hybrid_alpha: float,
time_decay_multiplier: float,
time_decay_multiplier: float, # noqa: ARG002
num_to_retrieve: int,
ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,
title_content_ratio: float | None = TITLE_CONTENT_RATIO,
ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC, # noqa: ARG002
title_content_ratio: float | None = TITLE_CONTENT_RATIO, # noqa: ARG002
) -> list[InferenceChunk]:
# Determine query type based on hybrid_alpha.
if hybrid_alpha >= 0.8:
@@ -458,7 +458,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
self._os_client = OpenSearchClient(index_name=self._index_name)
def verify_and_create_index_if_necessary(
self, embedding_dim: int, embedding_precision: EmbeddingPrecision
self,
embedding_dim: int,
embedding_precision: EmbeddingPrecision, # noqa: ARG002
) -> None:
"""Verifies and creates the index if necessary.
@@ -518,7 +520,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
def index(
self,
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
indexing_metadata: IndexingMetadata, # noqa: ARG002
) -> list[DocumentInsertionRecord]:
logger.debug(
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
@@ -566,7 +568,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
return document_indexing_results
def delete(self, document_id: str, chunk_count: int | None = None) -> int:
def delete(
self, document_id: str, chunk_count: int | None = None # noqa: ARG002
) -> int:
"""Deletes all chunks for a given document.
Does nothing if the specified document ID does not exist.
@@ -701,7 +705,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
filters: IndexFilters,
# TODO(andrei): Remove this from the new interface at some point; we
# should not be exposing this.
batch_retrieval: bool = False,
batch_retrieval: bool = False, # noqa: ARG002
# TODO(andrei): Add a param for whether to retrieve hidden docs.
) -> list[InferenceChunk]:
"""
@@ -751,7 +755,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
query_embedding: Embedding,
# TODO(andrei): This param is not great design, get rid of it.
final_keywords: list[str] | None,
query_type: QueryType,
query_type: QueryType, # noqa: ARG002
filters: IndexFilters,
num_to_retrieve: int,
) -> list[InferenceChunk]:
@@ -796,7 +800,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
self,
filters: IndexFilters,
num_to_retrieve: int = 10,
dirty: bool | None = None,
dirty: bool | None = None, # noqa: ARG002
) -> list[InferenceChunk]:
logger.debug(
f"[OpenSearchDocumentIndex] Randomly retrieving {num_to_retrieve} chunks for index {self._index_name}."

View File

@@ -197,7 +197,9 @@ class DocumentChunk(BaseModel):
@field_serializer("last_updated", mode="wrap")
def serialize_datetime_fields_to_epoch_seconds(
self, value: datetime | None, handler: SerializerFunctionWrapHandler
self,
value: datetime | None,
handler: SerializerFunctionWrapHandler, # noqa: ARG002
) -> int | None:
"""
Serializes datetime fields to seconds since the Unix epoch.
@@ -231,7 +233,7 @@ class DocumentChunk(BaseModel):
@field_serializer("tenant_id", mode="wrap")
def serialize_tenant_state(
self, value: TenantState, handler: SerializerFunctionWrapHandler
self, value: TenantState, handler: SerializerFunctionWrapHandler # noqa: ARG002
) -> str | None:
"""
Serializes tenant_state to the tenant str if multitenant, or None if

View File

@@ -735,7 +735,7 @@ class VespaIndex(DocumentIndex):
chunk_requests: list[VespaChunkRequest],
filters: IndexFilters,
batch_retrieval: bool = False,
get_large_chunks: bool = False,
get_large_chunks: bool = False, # noqa: ARG002
) -> list[InferenceChunk]:
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
@@ -769,11 +769,11 @@ class VespaIndex(DocumentIndex):
query_embedding: Embedding,
final_keywords: list[str] | None,
filters: IndexFilters,
hybrid_alpha: float,
time_decay_multiplier: float,
hybrid_alpha: float, # noqa: ARG002
time_decay_multiplier: float, # noqa: ARG002
num_to_retrieve: int,
ranking_profile_type: QueryExpansionType = QueryExpansionType.SEMANTIC,
title_content_ratio: float | None = TITLE_CONTENT_RATIO,
title_content_ratio: float | None = TITLE_CONTENT_RATIO, # noqa: ARG002
) -> list[InferenceChunk]:
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
@@ -809,7 +809,7 @@ class VespaIndex(DocumentIndex):
def admin_retrieval(
self,
query: str,
query_embedding: Embedding,
query_embedding: Embedding, # noqa: ARG002
filters: IndexFilters,
num_to_retrieve: int = NUM_RETURNED_HITS,
) -> list[InferenceChunk]:

View File

@@ -604,7 +604,7 @@ class VespaDocumentIndex(DocumentIndex):
self,
filters: IndexFilters,
num_to_retrieve: int = 100,
dirty: bool | None = None,
dirty: bool | None = None, # noqa: ARG002
) -> list[InferenceChunk]:
vespa_where_clauses = build_vespa_filters(filters, remove_trailing_and=True)

View File

@@ -73,7 +73,7 @@ class LocalEvalProvider(EvalProvider):
def eval(
self,
task: Callable[[dict[str, Any]], EvalToolResult],
configuration: EvalConfigurationOptions,
configuration: EvalConfigurationOptions, # noqa: ARG002
data: list[dict[str, Any]] | None = None,
remote_dataset_name: str | None = None,
multi_turn_task: Callable[[dict[str, Any]], MultiTurnEvalResult] | None = None,

View File

@@ -62,9 +62,9 @@ class NoOpFeatureFlagProvider(FeatureFlagProvider):
def feature_enabled(
self,
flag_key: str,
user_id: UUID,
user_properties: dict[str, Any] | None = None,
flag_key: str, # noqa: ARG002
user_id: UUID, # noqa: ARG002
user_properties: dict[str, Any] | None = None, # noqa: ARG002
) -> bool:
environment = ENVIRONMENT
if environment == "local":

View File

@@ -388,7 +388,7 @@ class S3BackedFileStore(FileStore):
def read_file(
self,
file_id: str,
mode: str | None = None,
mode: str | None = None, # noqa: ARG002
use_tempfile: bool = False,
db_session: Session | None = None,
) -> IO[bytes]:

View File

@@ -63,7 +63,7 @@ class UserFileIndexingAdapter:
self.db_session = db_session
def prepare(
self, documents: list[Document], ignore_time_skip: bool
self, documents: list[Document], ignore_time_skip: bool # noqa: ARG002
) -> DocumentBatchPrepareContext:
return DocumentBatchPrepareContext(
updatable_docs=documents, id_to_boost_map={} # TODO(subash): add boost map
@@ -237,8 +237,8 @@ class UserFileIndexingAdapter:
def post_index(
self,
context: DocumentBatchPrepareContext,
updatable_chunk_data: list[UpdatableChunkData],
filtered_documents: list[Document],
updatable_chunk_data: list[UpdatableChunkData], # noqa: ARG002
filtered_documents: list[Document], # noqa: ARG002
result: BuildMetadataAwareChunksResult,
) -> None:
user_file_ids = [doc.id for doc in context.updatable_docs]

Some files were not shown because too many files have changed in this diff Show More