Functions

Classes
m load()

Module preprocessor.package

A Package is a single file used to hold one or more files of data. The Package is essentially a .zip archive with several specific files inside it to define metadata about the package. Each package contains a file named ".meta.json" as well as the data itself.

Functions

def connect_sqlalchemy(engine, max_retries=4)

Connect to a SQLAlchemy engine with retries and exponential backoff

Args

engine : sa.engine.Engine
The engine to connect to
max_retries : int
The maximum number of times to retry the connection

Returns

sa.engine.Connection
The connection

Classes

class Meta (version: MetaVersion, last_modified: datetime.datetime, record: Union[MetaRecordCsvMetaRecordSqlMetaRecordMongoMetaRecordModelMetaRecordJsonMetaRecordWordEmbeddingMetaRecordAWSs3BucketStorageMetaRecordAzureDataLakeStorageMetaRecordDatabaseReportMetaRecordReportResultMetaRecordModelFolder])

Meta(version: preprocessor.package.MetaVersion, last_modified: datetime.datetime, record: Union[preprocessor.package.MetaRecordCsv, preprocessor.package.MetaRecordSql, preprocessor.package.MetaRecordMongo, preprocessor.package.MetaRecordModel, preprocessor.package.MetaRecordJson, preprocessor.package.MetaRecordWordEmbedding, preprocessor.package.MetaRecordAWSs3BucketStorage, preprocessor.package.MetaRecordAzureDataLakeStorage, preprocessor.package.MetaRecordDatabaseReport, preprocessor.package.MetaRecordReportResult, preprocessor.package.MetaRecordModelFolder])

Ancestors

Class variables

var SCHEMA_V1
var SCHEMA_V2
var SCHEMA_VERSION
var last_modified : datetime.datetime
var record : Union[MetaRecordCsvMetaRecordSqlMetaRecordMongoMetaRecordModelMetaRecordJsonMetaRecordWordEmbeddingMetaRecordAWSs3BucketStorageMetaRecordAzureDataLakeStorageMetaRecordDatabaseReportMetaRecordReportResultMetaRecordModelFolder]
var versionMetaVersion

Static methods

def from_record(valid: dict) -> Union[MetaRecordCsvMetaRecordSqlMetaRecordMongoMetaRecordModelMetaRecordJsonMetaRecordWordEmbeddingMetaRecordAWSs3BucketStorageMetaRecordAzureDataLakeStorageMetaRecordDatabaseReportMetaRecordReportResultMetaRecordModelFolder]
def promote_v2(valid: dict) -> dict
def validate(input: dict) -> dict

Inherited members

class MetaRecordAWSs3BucketStorage (bucket_name: str, region: str, object_name: str, aws_access_key_id: str, aws_secret_access_key: str)

MetaRecordAWSs3BucketStorage(bucket_name: str, region: str, object_name: str, aws_access_key_id: str, aws_secret_access_key: str)

Class variables

var aws_access_key_id : str
var aws_secret_access_key : str
var bucket_name : str
var object_name : str
var region : str

Static methods

def from_dict(input: dict) -> MetaRecordAWSs3BucketStorage

Methods

def to_dict(self) -> dict
class MetaRecordAzureDataLakeStorage (storage_account_name: str, storage_key: str, file_system: str, path: str)

MetaRecordAzureDataLakeStorage(storage_account_name: str, storage_key: str, file_system: str, path: str)

Class variables

var file_system : str
var path : str
var storage_account_name : str
var storage_key : str

Static methods

def from_dict(input: dict) -> MetaRecordAzureDataLakeStorage

Methods

def get_file_client(self) -> azure.storage.filedatalake._data_lake_file_client.DataLakeFileClient
def to_dict(self) -> dict
class MetaRecordCsv (sheet_path: str, path_column: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)

MetaRecordCsv(sheet_path: str, path_column: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)

Class variables

var path_column : str
var sheet_path : str
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]

Static methods

def from_dict(input: dict) -> MetaRecordCsv

Methods

def to_dict(self) -> dict
class MetaRecordDatabaseReport (query_template: str, params: Dict[str, ReportParameter], connection: Optional[str] = None, connection_opts: Optional[dict] = None, credentials_info: Optional[dict] = None, federation_group: Optional[str] = None, aggregation_template: Optional[dict] = None, post_processing_script: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None)

MetaRecordDatabaseReport(query_template: str, params: Dict[str, preprocessor.report_parameters.ReportParameter], connection: Optional[str] = None, connection_opts: Optional[dict] = None, credentials_info: Optional[dict] = None, federation_group: Optional[str] = None, aggregation_template: Optional[dict] = None, post_processing_script: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None)

Class variables

var aggregation_template : Optional[dict]
var connection : Optional[str]
var connection_opts : Optional[dict]
var credentials_info : Optional[dict]
var description : Optional[str]
var federation_group : Optional[str]
var name : Optional[str]
var params : Dict[str, ReportParameter]
var post_processing_script : Optional[str]
var query_template : str

Static methods

def from_dict(input: dict) -> MetaRecordDatabaseReport

Methods

def to_dict(self) -> dict
class MetaRecordJson (data: str)

MetaRecordJson(data: str)

Class variables

var data : str

Static methods

def from_dict(input: dict) -> MetaRecordJson

Methods

def to_dict(self) -> dict
class MetaRecordModel (model_path: str, model_type: str, unrestricted_data: list, reports: ModelReportRecord, logs: list, data_transformers_path: List = None, target_transformers_path: List = None, vocab_path: str = None, target_map_path: str = None)

MetaRecordModel(model_path: str, model_type: str, unrestricted_data: list, reports: preprocessor.package.ModelReportRecord, logs: list, data_transformers_path: List = None, target_transformers_path: List = None, vocab_path: str = None, target_map_path: str = None)

Class variables

var data_transformers_path : List
var logs : list
var model_path : str
var model_type : str
var reportsModelReportRecord
var target_map_path : str
var target_transformers_path : List
var unrestricted_data : list
var vocab_path : str

Static methods

def from_dict(input: dict) -> MetaRecordModel

Methods

def to_dict(self) -> dict
class MetaRecordModelFolder (manifest: dict = None)

MetaRecordModelFolder(manifest: dict = None)

Class variables

var manifest : dict

Methods

def from_dict(input: dict) -> MetaRecordModelFolder
def to_dict(self) -> dict
class MetaRecordMongo (query: str, connection: str, database: str, collection: str, projection: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None, limit: Optional[int] = None, sort: Optional[List] = None)

MetaRecordMongo(query: str, connection: str, database: str, collection: str, projection: str, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None, limit: Optional[int] = None, sort: Optional[List] = None)

Class variables

var collection : str
var connection : str
var database : str
var limit : Optional[int]
var projection : str
var query : str
var sort : Optional[List]
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]

Static methods

def from_dict(input: dict) -> MetaRecordMongo

Methods

def to_dict(self) -> dict
class MetaRecordReportResult (manifest: dict = None)

MetaRecordReportResult(manifest: dict = None)

Class variables

var manifest : dict

Methods

def from_dict(input: dict) -> MetaRecordReportResult
def to_dict(self) -> dict
class MetaRecordSql (query: str, connection: str, options: Optional[dict] = None, credentials_info: Optional[dict] = None, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)

MetaRecordSql(query: str, connection: str, options: Optional[dict] = None, credentials_info: Optional[dict] = None, synthesizer_path: Optional[str] = None, synthesizer_type: Optional[str] = None)

Class variables

var connection : str
var credentials_info : Optional[dict]
var options : Optional[dict]
var query : str
var synthesizer_path : Optional[str]
var synthesizer_type : Optional[str]

Static methods

def from_dict(input: dict) -> MetaRecordSql

Methods

def to_dict(self) -> dict
class MetaRecordWordEmbedding (vocab_path: str, embedding_path: str)

MetaRecordWordEmbedding(vocab_path: str, embedding_path: str)

Class variables

var embedding_path : str
var vocab_path : str

Static methods

def from_dict(input: dict) -> MetaRecordWordEmbedding

Methods

def to_dict(self) -> dict
class MetaVersion (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

  • enum.Enum

Class variables

var V1
var V2
class ModelReportRecord (input_shape: List[Union[List[int], int]] = None, output_shape: List[int] = None, model_summary: List[str] = None, library_version: str = None)

ModelReportRecord(input_shape: List[Union[List[int], int]] = None, output_shape: List[int] = None, model_summary: List[str] = None, library_version: str = None)

Class variables

var input_shape : List[Union[List[int], int]]
var library_version : str
var model_summary : List[str]
var output_shape : List[int]

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str