Source code for datamasque.client.models.discovery

"""Typed request and response shapes for schema-discovery and ruleset-generation endpoints."""

from typing import Any, Optional, Union

from pydantic import BaseModel, ConfigDict, Field, field_validator

from datamasque.client.models.connection import ConnectionConfig, ConnectionId, unwrap_connection_id
from datamasque.client.models.data_selection import HashColumnsTableConfig, Locator, UserSelection
from datamasque.client.models.pagination import Page


[docs] class InDataDiscoveryRule(BaseModel): """A single rule for in-data discovery.""" model_config = ConfigDict(extra="forbid") name: Optional[str] = None pattern: str
[docs] class InDataDiscoveryConfig(BaseModel): """In-data discovery configuration nested under `SchemaDiscoveryRequest.in_data_discovery`.""" model_config = ConfigDict(extra="forbid") enabled: Optional[bool] = None row_sample_size: Optional[int] = None custom_rules: Optional[list[InDataDiscoveryRule]] = None non_sensitive_rules: Optional[list[InDataDiscoveryRule]] = None force: Optional[bool] = None
[docs] class SchemaDiscoveryRequest(BaseModel): """ Request body for `POST /api/schema-discovery/`. `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call. Every other field uses the server's default value when omitted. """ model_config = ConfigDict(extra="forbid") connection: Union[ConnectionId, ConnectionConfig] custom_keywords: list[str] = Field(default_factory=list) ignored_keywords: list[str] = Field(default_factory=list) schemas: list[str] = Field(default_factory=list) in_data_discovery: Optional[InDataDiscoveryConfig] = None disable_built_in_keywords: bool = False disable_global_custom_keywords: bool = False disable_global_ignored_keywords: bool = False @field_validator("connection", mode="before") @classmethod def _unwrap_connection(cls, value: Any) -> Any: return unwrap_connection_id(value)
[docs] class RulesetGenerationRequest(BaseModel): """ Request body for `POST /api/generate-ruleset/v2/`. `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call. `selected_columns` is the same nested `schema -> table -> [column, ...]` mapping used by `SelectedColumns.columns`, and `hash_columns` follows the `HashColumnsTableConfig` shape. """ model_config = ConfigDict(extra="forbid") connection: Union[ConnectionId, ConnectionConfig] selected_columns: dict[str, dict[str, list[str]]] hash_columns: Optional[dict[str, dict[str, HashColumnsTableConfig]]] = None @field_validator("connection", mode="before") @classmethod def _unwrap_connection(cls, value: Any) -> Any: return unwrap_connection_id(value)
[docs] class FileRulesetGenerationRequest(BaseModel): """ Request body for `POST /api/generate-file-ruleset/`. `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call. """ model_config = ConfigDict(extra="forbid") connection: Union[ConnectionId, ConnectionConfig] selected_data: list[UserSelection] @field_validator("connection", mode="before") @classmethod def _unwrap_connection(cls, value: Any) -> Any: return unwrap_connection_id(value)
[docs] class DiscoveryMatch(BaseModel): """A single match found by schema or file discovery.""" model_config = ConfigDict(extra="allow") label: str categories: list[str] flagged_by: str description: str hit_ratio: Optional[int] = None # None for metadata matches, percentage 0-100 for IDD matches.
[docs] class ForeignKeyRef(BaseModel): """A foreign key declared on a column, pointing to another column it references.""" model_config = ConfigDict(extra="allow") name: str referenced_column: str # Dotted path: "schema.table.column".
[docs] class ReferencingForeignKey(BaseModel): """A foreign key declared on another column that points *at* this column.""" model_config = ConfigDict(extra="allow") name: str referencing_column: str # Dotted path: "schema.table.column".
[docs] class SchemaDiscoveryColumn(BaseModel): """Column-level data in a schema discovery result.""" model_config = ConfigDict(extra="allow") data_type: Optional[str] = None max_length: Optional[int] = None foreign_keys: list[ForeignKeyRef] discovery_matches: list[DiscoveryMatch] numeric_precision: Optional[int] = None numeric_scale: Optional[int] = None constraint_columns: list[str] pk_constraint_name: Optional[str] = None uk_constraint_name: Optional[str] = None unique_index_names: list[str] referencing_foreign_keys: list[ReferencingForeignKey] constraint: str # Primary or Unique, or empty string if column does not participate in a PK/UK
[docs] class SchemaDiscoveryResult(BaseModel): """A single row in the v2 schema discovery results.""" model_config = ConfigDict(extra="allow", populate_by_name=True) id: int column: str table: str schema_name: Optional[str] = Field(default=None, alias="schema") # "schema" is a reserved word in Pydantic data: SchemaDiscoveryColumn
[docs] class ConstraintColumns(BaseModel): """A constraint's column list in table metadata.""" model_config = ConfigDict(extra="allow") columns: list[str]
[docs] class TableConstraints(BaseModel): """Constraint metadata for a single table.""" model_config = ConfigDict(extra="allow") primary_keys: Optional[list[ConstraintColumns]] = None unique_keys: Optional[list[ConstraintColumns]] = None foreign_keys: Optional[list[ConstraintColumns]] = None
[docs] class SchemaDiscoveryPage(Page[SchemaDiscoveryResult]): """ Admin-server envelope for `GET /api/schema-discovery/v2/{run_id}/`. Extends the standard `Page` with `table_metadata`. """ table_metadata: Optional[dict[str, dict[str, TableConstraints]]] = None
[docs] class FileDiscoveryMatch(BaseModel): """A single match in a file discovery locator.""" model_config = ConfigDict(extra="allow") categories: Optional[list[str]] = None flagged_by: Optional[str] = None description: Optional[str] = None label: Optional[str] = None hit_ratio: Optional[int] = None
[docs] class FileDiscoveryLocatorResult(BaseModel): """A locator (column/path) within a discovered file.""" model_config = ConfigDict(extra="allow") locator: Optional[Locator] = None matches: Optional[list[FileDiscoveryMatch]] = None data_types: Optional[list[str]] = None
[docs] class FileDiscoveryFile(BaseModel): """A file entry in a file discovery result.""" model_config = ConfigDict(extra="allow") path: Optional[str] = None file_type: Optional[str] = None delimiter: Optional[str] = None encoding: Optional[str] = None
[docs] class FileDiscoveryResult(BaseModel): """A single record from `GET /api/runs/{run_id}/file-discovery-results/`.""" model_config = ConfigDict(extra="allow") id: Optional[int] = None connection: Optional[Any] = None file_type: Optional[str] = None files: Optional[list[FileDiscoveryFile]] = None results: Optional[list[FileDiscoveryLocatorResult]] = None