Module `insanonym_utils.runner`

Expand source code

from os import path
from typing import List
from pandas import read_json, read_csv
from pandas.core.frame import DataFrame
from .models import Column, FileConfigModel, FileModel, CustomAlgorithm
from .algorithms import *
import importlib

class Runner:
    """
    Class used to create dataframe and execute algorithms
    """
    def __init__(self, model: FileConfigModel):
        _file = FileModel(name=model.name, path=path.join(model.path, model.name))
        self.model = model
        """`FileConfigModel` to use in the runner"""
        self.dataframe: DataFrame = DataFrame()
        """The resulting `DataFrame`"""
        # Verifications before creating DataFrame
        self._verifyRows(_file.path)
        column_names = list(map(lambda x: x.name, model.columns))
        self._verifyColumnsInAlgorithms(model.algorithms, column_names)

        if model.file_type == 'json':
            self.dataframe = read_json(path_or_buf=_file.path, orient='index')
        elif model.file_type == 'csv':
            self.dataframe = read_csv(filepath_or_buffer=_file.path, sep=model.columns_delimiter, header=None)
        self.dataframe.columns = column_names
        self._typeChecking(model.columns)

    def _verifyRows(self, file):
        with open(file, 'r') as csv:
            first_line = csv.readline()
        if first_line.count(self.model.columns_delimiter) + 1 != len(self.model.columns):
            raise Exception('Number of columns differ')

    def _typeChecking(self, columns: List[Column]):
        for column in columns:
            if column.column_type == "datetime64[ns]":
                self.dataframe[column.name] = self.dataframe[column.name].astype(column.column_type)
            type = self.dataframe[column.name].dtype
            if type != column.column_type:
                raise Exception(f"Error in column types: column {column.name} if of type {type}, not {column.column_type}")

    def _verifyColumnsInAlgorithms(self, algorithms, column_names):
        for algo in algorithms:
            if hasattr(algo, 'columns'):
                for col in algo.columns:
                    if col not in column_names:
                        raise Exception('Please check the column names in your algorithms')
            elif hasattr(algo, 'column'):
                if algo.column not in column_names:
                    raise Exception('Please check the column names in your algorithms')

    def execute(self):
        """
        Execute algorithms specified in model on dataframe
        """
        for algo in self.model.algorithms:
            if not isinstance(algo, CustomAlgorithm):
                globals()[algo.name](self.dataframe, algo.options)
            else:
                mod = importlib.import_module(algo.name)
                mod.main(self.dataframe, algo.options)
        if self.model.export: self.save()

    def save(self):
        """
        Save the resulting dataframe to the specified location in model
        """
        exporter = self.model.export_rules
        if exporter.output_format == 'csv':
            self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False)
        elif exporter.output_format == 'json':
            self.dataframe.to_json(exporter.output_name, index=False)
        else: raise NotImplementedError

Classes

class Runner (model: FileConfigModel)

Class used to create dataframe and execute algorithms

Expand source code

class Runner:
    """
    Class used to create dataframe and execute algorithms
    """
    def __init__(self, model: FileConfigModel):
        _file = FileModel(name=model.name, path=path.join(model.path, model.name))
        self.model = model
        """`FileConfigModel` to use in the runner"""
        self.dataframe: DataFrame = DataFrame()
        """The resulting `DataFrame`"""
        # Verifications before creating DataFrame
        self._verifyRows(_file.path)
        column_names = list(map(lambda x: x.name, model.columns))
        self._verifyColumnsInAlgorithms(model.algorithms, column_names)

        if model.file_type == 'json':
            self.dataframe = read_json(path_or_buf=_file.path, orient='index')
        elif model.file_type == 'csv':
            self.dataframe = read_csv(filepath_or_buffer=_file.path, sep=model.columns_delimiter, header=None)
        self.dataframe.columns = column_names
        self._typeChecking(model.columns)

    def _verifyRows(self, file):
        with open(file, 'r') as csv:
            first_line = csv.readline()
        if first_line.count(self.model.columns_delimiter) + 1 != len(self.model.columns):
            raise Exception('Number of columns differ')

    def _typeChecking(self, columns: List[Column]):
        for column in columns:
            if column.column_type == "datetime64[ns]":
                self.dataframe[column.name] = self.dataframe[column.name].astype(column.column_type)
            type = self.dataframe[column.name].dtype
            if type != column.column_type:
                raise Exception(f"Error in column types: column {column.name} if of type {type}, not {column.column_type}")

    def _verifyColumnsInAlgorithms(self, algorithms, column_names):
        for algo in algorithms:
            if hasattr(algo, 'columns'):
                for col in algo.columns:
                    if col not in column_names:
                        raise Exception('Please check the column names in your algorithms')
            elif hasattr(algo, 'column'):
                if algo.column not in column_names:
                    raise Exception('Please check the column names in your algorithms')

    def execute(self):
        """
        Execute algorithms specified in model on dataframe
        """
        for algo in self.model.algorithms:
            if not isinstance(algo, CustomAlgorithm):
                globals()[algo.name](self.dataframe, algo.options)
            else:
                mod = importlib.import_module(algo.name)
                mod.main(self.dataframe, algo.options)
        if self.model.export: self.save()

    def save(self):
        """
        Save the resulting dataframe to the specified location in model
        """
        exporter = self.model.export_rules
        if exporter.output_format == 'csv':
            self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False)
        elif exporter.output_format == 'json':
            self.dataframe.to_json(exporter.output_name, index=False)
        else: raise NotImplementedError

Instance variables

var dataframe: The resulting DataFrame
var model: FileConfigModel to use in the runner

Methods

def execute(self)

Execute algorithms specified in model on dataframe

Expand source code

def execute(self):
    """
    Execute algorithms specified in model on dataframe
    """
    for algo in self.model.algorithms:
        if not isinstance(algo, CustomAlgorithm):
            globals()[algo.name](self.dataframe, algo.options)
        else:
            mod = importlib.import_module(algo.name)
            mod.main(self.dataframe, algo.options)
    if self.model.export: self.save()

def save(self)

Save the resulting dataframe to the specified location in model

Expand source code

def save(self):
    """
    Save the resulting dataframe to the specified location in model
    """
    exporter = self.model.export_rules
    if exporter.output_format == 'csv':
        self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False)
    elif exporter.output_format == 'json':
        self.dataframe.to_json(exporter.output_name, index=False)
    else: raise NotImplementedError