Module insanonym_utils.runner
Expand source code
from os import path
from typing import List
from pandas import read_json, read_csv
from pandas.core.frame import DataFrame
from .models import Column, FileConfigModel, FileModel, CustomAlgorithm
from .algorithms import *
import importlib
class Runner:
"""
Class used to create dataframe and execute algorithms
"""
def __init__(self, model: FileConfigModel):
_file = FileModel(name=model.name, path=path.join(model.path, model.name))
self.model = model
"""`FileConfigModel` to use in the runner"""
self.dataframe: DataFrame = DataFrame()
"""The resulting `DataFrame`"""
# Verifications before creating DataFrame
self._verifyRows(_file.path)
column_names = list(map(lambda x: x.name, model.columns))
self._verifyColumnsInAlgorithms(model.algorithms, column_names)
if model.file_type == 'json':
self.dataframe = read_json(path_or_buf=_file.path, orient='index')
elif model.file_type == 'csv':
self.dataframe = read_csv(filepath_or_buffer=_file.path, sep=model.columns_delimiter, header=None)
self.dataframe.columns = column_names
self._typeChecking(model.columns)
def _verifyRows(self, file):
with open(file, 'r') as csv:
first_line = csv.readline()
if first_line.count(self.model.columns_delimiter) + 1 != len(self.model.columns):
raise Exception('Number of columns differ')
def _typeChecking(self, columns: List[Column]):
for column in columns:
if column.column_type == "datetime64[ns]":
self.dataframe[column.name] = self.dataframe[column.name].astype(column.column_type)
type = self.dataframe[column.name].dtype
if type != column.column_type:
raise Exception(f"Error in column types: column {column.name} if of type {type}, not {column.column_type}")
def _verifyColumnsInAlgorithms(self, algorithms, column_names):
for algo in algorithms:
if hasattr(algo, 'columns'):
for col in algo.columns:
if col not in column_names:
raise Exception('Please check the column names in your algorithms')
elif hasattr(algo, 'column'):
if algo.column not in column_names:
raise Exception('Please check the column names in your algorithms')
def execute(self):
"""
Execute algorithms specified in model on dataframe
"""
for algo in self.model.algorithms:
if not isinstance(algo, CustomAlgorithm):
globals()[algo.name](self.dataframe, algo.options)
else:
mod = importlib.import_module(algo.name)
mod.main(self.dataframe, algo.options)
if self.model.export: self.save()
def save(self):
"""
Save the resulting dataframe to the specified location in model
"""
exporter = self.model.export_rules
if exporter.output_format == 'csv':
self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False)
elif exporter.output_format == 'json':
self.dataframe.to_json(exporter.output_name, index=False)
else: raise NotImplementedError
Classes
class Runner (model: FileConfigModel)
-
Class used to create dataframe and execute algorithms
Expand source code
class Runner: """ Class used to create dataframe and execute algorithms """ def __init__(self, model: FileConfigModel): _file = FileModel(name=model.name, path=path.join(model.path, model.name)) self.model = model """`FileConfigModel` to use in the runner""" self.dataframe: DataFrame = DataFrame() """The resulting `DataFrame`""" # Verifications before creating DataFrame self._verifyRows(_file.path) column_names = list(map(lambda x: x.name, model.columns)) self._verifyColumnsInAlgorithms(model.algorithms, column_names) if model.file_type == 'json': self.dataframe = read_json(path_or_buf=_file.path, orient='index') elif model.file_type == 'csv': self.dataframe = read_csv(filepath_or_buffer=_file.path, sep=model.columns_delimiter, header=None) self.dataframe.columns = column_names self._typeChecking(model.columns) def _verifyRows(self, file): with open(file, 'r') as csv: first_line = csv.readline() if first_line.count(self.model.columns_delimiter) + 1 != len(self.model.columns): raise Exception('Number of columns differ') def _typeChecking(self, columns: List[Column]): for column in columns: if column.column_type == "datetime64[ns]": self.dataframe[column.name] = self.dataframe[column.name].astype(column.column_type) type = self.dataframe[column.name].dtype if type != column.column_type: raise Exception(f"Error in column types: column {column.name} if of type {type}, not {column.column_type}") def _verifyColumnsInAlgorithms(self, algorithms, column_names): for algo in algorithms: if hasattr(algo, 'columns'): for col in algo.columns: if col not in column_names: raise Exception('Please check the column names in your algorithms') elif hasattr(algo, 'column'): if algo.column not in column_names: raise Exception('Please check the column names in your algorithms') def execute(self): """ Execute algorithms specified in model on dataframe """ for algo in self.model.algorithms: if not isinstance(algo, CustomAlgorithm): globals()[algo.name](self.dataframe, algo.options) else: mod = importlib.import_module(algo.name) mod.main(self.dataframe, algo.options) if self.model.export: self.save() def save(self): """ Save the resulting dataframe to the specified location in model """ exporter = self.model.export_rules if exporter.output_format == 'csv': self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False) elif exporter.output_format == 'json': self.dataframe.to_json(exporter.output_name, index=False) else: raise NotImplementedError
Instance variables
var dataframe
-
The resulting
DataFrame
var model
-
FileConfigModel
to use in the runner
Methods
def execute(self)
-
Execute algorithms specified in model on dataframe
Expand source code
def execute(self): """ Execute algorithms specified in model on dataframe """ for algo in self.model.algorithms: if not isinstance(algo, CustomAlgorithm): globals()[algo.name](self.dataframe, algo.options) else: mod = importlib.import_module(algo.name) mod.main(self.dataframe, algo.options) if self.model.export: self.save()
def save(self)
-
Save the resulting dataframe to the specified location in model
Expand source code
def save(self): """ Save the resulting dataframe to the specified location in model """ exporter = self.model.export_rules if exporter.output_format == 'csv': self.dataframe.to_csv(path_or_buf=exporter.output_name, sep=self.model.columns_delimiter, index=False, header=False) elif exporter.output_format == 'json': self.dataframe.to_json(exporter.output_name, index=False) else: raise NotImplementedError