This tutorial explains the steps to create customs blocks to load training data from project space, train a logistic regression model which is part of the system provided blocks.
titanic
in project-space and upload the titanic training and test data as train.csv and test.csvfrom razor.marketplace.blocks.rzt.ML_Blocks import LogisticRegression
from razor.api import project_space_path
import razor.flow as rf
import pandas as pd
@rf.block
class CsvReader:
filename: str
output:rf.SeriesOutput[pd.DataFrame]
def run(self):
file_path = project_space_path(self.filename)
chunks = pd.read_csv(file_path, chunksize=10, nrows=None, delimiter = None)
for df in chunks:
self.output.put(df)
@rf.block
class DfFilterNan():
df_chunks:rf.SeriesInput[pd.DataFrame]
output: rf.Output[pd.DataFrame]
def run(self):
concat_df = pd.DataFrame()
for df in self.df_chunks:
df.dropna(axis=0, inplace=True)
concat_df = pd.concat([concat_df, df])
self.output.put(concat_df)
@rf.block
class DfCategorical():
columns:list
df:pd.DataFrame
output:rf.Output[pd.DataFrame]
def run(self):
for col in self.columns:
self.df[col] = self.df[col].astype('category')
self.df[col] = self.df[col].cat.codes
self.output.put(self.df)
import numpy as np
@rf.block
class Get_data():
x_columns : list
y_column:list
df:pd.DataFrame
out_x:rf.Output[np.ndarray]
out_y:rf.Output[np.ndarray]
def run(self):
if self.y_column is not None and len(self.y_column)!=0:
x = self.df[self.x_columns].values
y = np.squeeze(self.df[self.y_column].values)
self.out_x.put(x)
self.out_y.put(y)
else:
x = self.df[self.x_columns].values
self.out_x.put(x)
atomic_csv_reader = CsvReader(filename="titanic/train.csv")
df_filter = DfFilterNan(
df_chunks =atomic_csv_reader.output
)
df_cat = DfCategorical( columns =["Sex", "Cabin", "Embarked"],
df = df_filter.output
)
train_data = Get_data(x_columns =['PassengerId', 'Pclass', 'Sex', 'Age', 'SibSp', 'Fare', 'Cabin', 'Embarked'],
y_column = ['Survived'],
df = df_cat.output
)
lr_model_train = LogisticRegression(
operation="fit",
x_data = train_data.out_x,
y_data = train_data.out_y,
path = "lr_m1.sav",
save = True
)
pipeline = rf.Pipeline(targets=[lr_model_train])
pipeline.show()
pipeline.execute()
<razor_tools.backend.ipython.mime.run_monitor.RunMonitor at 0x7f2976d9bb90>
<Process(Pipeline Manager('Pipeline_17'), stopped)>
import pandas as pd
import numpy as np
@rf.block
class NumpyToCsv():
numpy_array:np.ndarray
output_path:str
def run(self):
pd.DataFrame(self.numpy_array,columns=['Predictions']).to_csv(project_space_path(self.output_path))
lr_model_predict = LogisticRegression(operation = "predict",
x_data = train_data.out_x,
attribute = "classes_",
path="lr_m1.sav",
load = True
)
csv_writer = NumpyToCsv(output_path = "lr_pred_1.csv",
numpy_array = lr_model_predict.predictions)
predict_pipeline = rf.Pipeline(targets=[csv_writer])
predict_pipeline.show()
predict_pipeline.execute()
<razor_tools.backend.ipython.mime.run_monitor.RunMonitor at 0x7f2974167e90>
<Process(Pipeline Manager('Pipeline_24'), stopped)>