Source code for preprocessor.standardizer.standardizer

# -*- coding: utf-8 -*-
"""
This File contains the Standardizer class. To run this script uncomment or add the following lines in the
[options.entry_points] section in setup.cfg:

    console_scripts =
        standardizer = standardizer.__main__:main

Then run `python setup.py install` which will install the command `standardizer`
inside your current environment.

"""

import argparse
import sys
import logging
import numpy as np
from sklearn import preprocessing
from preprocessor.preprocessor import Preprocessor
from itertools import zip_longest 
from joblib import dump, load

__author__ = "Harvey Bastidas"
__copyright__ = "Harvey Bastidas"
__license__ = "mit"

_logger = logging.getLogger(__name__)


[docs]class Standardizer(Preprocessor): """ The Standardizer preprocessor class """ def __init__(self, conf): """ Constructor using same parameters as base class """ super().__init__(conf)
[docs] def parse_args(self, args): """ Parse command line parameters Args: args ([str]): command line parameters as list of strings Returns: :obj:`argparse.Namespace`: command line parameters namespace """ parser = argparse.ArgumentParser(description="Dataset Standardizer: standarizes a dataset.") parser.add_argument("--no_config", help="Do not generate an output configuration file.", action="store_true",default=False) parser = self.parse_cmd(parser) pargs = parser.parse_args(args) self.assign_arguments(pargs) if hasattr(pargs, "no_config"): self.no_config = pargs.no_config else: self.no_config = False
[docs] def core(self): """ Core preprocessor task after starting the instance with the main method. Decide from the arguments, what method to call. Args: args (obj): command line parameters as objects """ if hasattr(self, "input_config_file"): if self.input_config_file != None: self.load_from_config() else: self.standardize() else: self.standardize()
[docs] def standardize(self): """ Standardize the dataset. """ pt = preprocessing.StandardScaler() pt.fit(self.input_ds) self.output_ds = pt.transform(self.input_ds) if hasattr(self, "no_config"): if self.no_config == False: dump(pt, self.output_config_file) else: dump(pt, self.output_config_file)
[docs] def load_from_config(self): """ Standardize the dataset from a config file. """ pt = preprocessing.StandardScaler() pt = load(self.input_config_file) self.output_ds = pt.transform(self.input_ds)
[docs] def store(self): """ Save preprocessed data and the configuration of the preprocessor. """ _logger.debug("output_file = "+ self.output_file) np.savetxt(self.output_file, self.output_ds, delimiter=",", fmt='%1.6f')
[docs]def run(args): """ Entry point for console_scripts """ standardizer = Standardizer(None) standardizer.main(args)
if __name__ == "__main__": run(sys.argv)