Module preprocessor.package
A Package
is a single file used to hold one or more files of data.
The
Package is essentially a .zip archive with several specific files inside it
to define metadata about the package. Each package contains a file named
".meta.json" as well as the data itself.
Functions
def connect_sqlalchemy(engine, max_retries=4)
-
Connect to a SQLAlchemy engine with retries and exponential backoff
Args
engine
:sa.engine.Engine
- The engine to connect to
max_retries
:int
- The maximum number of times to retry the connection
Returns
sa.engine.Connection
- The connection
Classes
class Meta (version: MetaVersion, last_modified: datetime.datetime, record: Union[MetaRecordCsv, MetaRecordSql, MetaRecordMongo, MetaRecordModel, MetaRecordJson, MetaRecordWordEmbedding, MetaRecordAWSs3BucketStorage, MetaRecordAzureDataLakeStorage, MetaRecordDatabaseReport, MetaRecordReportResult, MetaRecordModelFolder])
-
Meta(version: preprocessor.package.MetaVersion, last_modified: datetime.datetime, record: Union[preprocessor.package.MetaRecordCsv, preprocessor.package.MetaRecordSql, preprocessor.package.MetaRecordMongo, preprocessor.package.MetaRecordModel, preprocessor.package.MetaRecordJson, preprocessor.package.MetaRecordWordEmbedding, preprocessor.package.MetaRecordAWSs3BucketStorage, preprocessor.package.MetaRecordAzureDataLakeStorage, preprocessor.package.MetaRecordDatabaseReport, preprocessor.package.MetaRecordReportResult, preprocessor.package.MetaRecordModelFolder])
Ancestors
- IsDict
- abc.ABC
Class variables
var SCHEMA_V1
var SCHEMA_V2
var SCHEMA_VERSION
var last_modified : datetime.datetime
var record : Union[MetaRecordCsv, MetaRecordSql, MetaRecordMongo, MetaRecordModel, MetaRecordJson, MetaRecordWordEmbedding, MetaRecordAWSs3BucketStorage, MetaRecordAzureDataLakeStorage, MetaRecordDatabaseReport, MetaRecordReportResult, MetaRecordModelFolder]
var version : MetaVersion
Static methods
def from_record(valid: dict) -> Union[MetaRecordCsv, MetaRecordSql, MetaRecordMongo, MetaRecordModel, MetaRecordJson, MetaRecordWordEmbedding, MetaRecordAWSs3BucketStorage, MetaRecordAzureDataLakeStorage, MetaRecordDatabaseReport, MetaRecordReportResult, MetaRecordModelFolder]
def promote_v2(valid: dict) -> dict
def validate(input: dict) -> dict
Inherited members
class MetaRecordAWSs3BucketStorage (bucket_name: str, region: str, object_name: str, aws_access_key_id: str, aws_secret_access_key: str)
-
MetaRecordAWSs3BucketStorage(bucket_name: str, region: str, object_name: str, aws_access_key_id: str, aws_secret_access_key: str)
Class variables
var aws_access_key_id : str
var aws_secret_access_key : str
var bucket_name : str
var object_name : str
var region : str
Static methods
def from_dict(input: dict) -> MetaRecordAWSs3BucketStorage
Methods
def to_dict(self) -> dict
class MetaRecordAzureDataLakeStorage (storage_account_name: str, storage_key: str, file_system: str, path: str)
-
MetaRecordAzureDataLakeStorage(storage_account_name: str, storage_key: str, file_system: str, path: str)
Class variables
var file_system : str
var path : str
var storage_account_name : str
var storage_key : str
Static methods
def from_dict(input: dict) -> MetaRecordAzureDataLakeStorage
Methods
def get_file_client(self) -> azure.storage.filedatalake._data_lake_file_client.DataLakeFileClient
def to_dict(self) -> dict
class MetaRecordCsv (sheet_path: str, path_column: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)
-
MetaRecordCsv(sheet_path: str, path_column: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)
Class variables
var path_column : str
var sheet_path : str
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]
Static methods
def from_dict(input: dict) -> MetaRecordCsv
Methods
def to_dict(self) -> dict
class MetaRecordDatabaseReport (query_template: str, params: Dict[str, ReportParameter], connection: Optional[str] = None, connection_opts: Optional[dict] = None, credentials_info: Optional[dict] = None, federation_group: Optional[str] = None, aggregation_template: Optional[dict] = None, post_processing_script: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None)
-
MetaRecordDatabaseReport(query_template: str, params: Dict[str, preprocessor.report_parameters.ReportParameter], connection: Optional[str] = None, connection_opts: Optional[dict] = None, credentials_info: Optional[dict] = None, federation_group: Optional[str] = None, aggregation_template: Optional[dict] = None, post_processing_script: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None)
Class variables
var aggregation_template : Optional[dict]
var connection : Optional[str]
var connection_opts : Optional[dict]
var credentials_info : Optional[dict]
var description : Optional[str]
var federation_group : Optional[str]
var name : Optional[str]
var params : Dict[str, ReportParameter]
var post_processing_script : Optional[str]
var query_template : str
Static methods
def from_dict(input: dict) -> MetaRecordDatabaseReport
Methods
def to_dict(self) -> dict
class MetaRecordJson (data: str)
-
MetaRecordJson(data: str)
Class variables
var data : str
Static methods
def from_dict(input: dict) -> MetaRecordJson
Methods
def to_dict(self) -> dict
class MetaRecordModel (model_path: str, model_type: str, unrestricted_data: list, reports: ModelReportRecord, logs: list, data_transformers_path: List = None, target_transformers_path: List = None, vocab_path: str = None, target_map_path: str = None)
-
MetaRecordModel(model_path: str, model_type: str, unrestricted_data: list, reports: preprocessor.package.ModelReportRecord, logs: list, data_transformers_path: List = None, target_transformers_path: List = None, vocab_path: str = None, target_map_path: str = None)
Class variables
var data_transformers_path : List
var logs : list
var model_path : str
var model_type : str
var reports : ModelReportRecord
var target_map_path : str
var target_transformers_path : List
var unrestricted_data : list
var vocab_path : str
Static methods
def from_dict(input: dict) -> MetaRecordModel
Methods
def to_dict(self) -> dict
class MetaRecordModelFolder (manifest: dict = None)
-
MetaRecordModelFolder(manifest: dict = None)
Class variables
var manifest : dict
Methods
def from_dict(input: dict) -> MetaRecordModelFolder
def to_dict(self) -> dict
class MetaRecordMongo (query: str, connection: str, database: str, collection: str, projection: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None, limit: Optional[int] = None, sort: Optional[List] = None)
-
MetaRecordMongo(query: str, connection: str, database: str, collection: str, projection: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None, limit: Optional[int] = None, sort: Optional[List] = None)
Class variables
var collection : str
var connection : str
var database : str
var limit : Optional[int]
var projection : str
var query : str
var sort : Optional[List]
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]
Static methods
def from_dict(input: dict) -> MetaRecordMongo
Methods
def to_dict(self) -> dict
class MetaRecordReportResult (manifest: dict = None)
-
MetaRecordReportResult(manifest: dict = None)
Class variables
var manifest : dict
Methods
def from_dict(input: dict) -> MetaRecordReportResult
def to_dict(self) -> dict
class MetaRecordSql (query: str, connection: str, options: Optional[dict] = None, credentials_info: Optional[dict] = None, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)
-
MetaRecordSql(query: str, connection: str, options: Optional[dict] = None, credentials_info: Optional[dict] = None, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)
Class variables
var connection : str
var credentials_info : Optional[dict]
var options : Optional[dict]
var query : str
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]
Static methods
def from_dict(input: dict) -> MetaRecordSql
Methods
def to_dict(self) -> dict
class MetaRecordWordEmbedding (vocab_path: str, embedding_path: str)
-
MetaRecordWordEmbedding(vocab_path: str, embedding_path: str)
Class variables
var embedding_path : str
var vocab_path : str
Static methods
def from_dict(input: dict) -> MetaRecordWordEmbedding
Methods
def to_dict(self) -> dict
class MetaVersion (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- enum.Enum
Class variables
var V1
var V2
class ModelReportRecord (input_shape: List[Union[List[int], int]] = None, output_shape: List[int] = None, model_summary: List[str] = None, library_version: str = None)
-
ModelReportRecord(input_shape: List[Union[List[int], int]] = None, output_shape: List[int] = None, model_summary: List[str] = None, library_version: str = None)
Class variables
var input_shape : List[Union[List[int], int]]
var library_version : str
var model_summary : List[str]
var output_shape : List[int]
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str