I am working on a loan prediction problem, the dataset includes categorical and numerical features. I am writing a custom transformer that will process both numerical and categorical attributes. Funny enough I am getting a syntax error and I can't see the problem. It's my first time writing custom transformers so practical tips and correction will be appreciated
from sklearn.base import BaseEstimator, TransformerMixin
class data_cleaner(BaseEstimator, TransformerMixin):
def __init__(self, cat_data, num_data):
self.cat_data = cat_data
self.num_data = num_data
def process_data(self, cat_data, num_data):
cat_data = []
num_data = []
for i,c in enumerate(X.dtypes):
if c == object:
cat_data.append(X.iloc[:, i])
else :
num_data.append(X.iloc[:, i]
cat_data = pd.DataFrame(cat_data).transpose()
num_data = pd.DataFrame(num_data).transpose()
#filling the missing categorical data
cat_data = cat_data.apply(lambda value:value.fillna(value.value_counts().index[0]))
#mapping the data on the loan_status
target = {'Y': '1', 'N': '0'}
loan_status = cat_data['Loan_Status']
#dropping the loan_status from cat_data
cat_data.drop('Loan_Status', inplace=True, axis=1)
#mapping loan_status wrt target
loan_status = loan_status.map(target)
#changing the cat_data into numerical values
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
for value in cat_data:
cat_data[value] = encoder.fit_transform(cat_data[value])
# Numerical data
# Fill every missing value with their previous value in the same column.
num_data.fillna(method='bfill', inplace=True)
#joining the cat_data and num_data
def transform(self, X, y=None):
X = pd.concat([cat_data, num_data, loan_status], axis=1)
def fit(self, *_):
return self
question from:
https://stackoverflow.com/questions/65874152/using-sklearn-base-estimator-and-transformermixin 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…