Pipeline, обучение модели

Есть код, но выдает ошибку, не могу понять где ошибка?


df = pd.read_csv('homework.csv')

X = df.drop(['price_category'], axis=1)
Y = df['price_category']

def filter_data(X):
    columns_to_drop = [
       'id',
       'url',
       'region',
       'region_url',
       'price',
       'manufacturer',
       'image_url',
       'description',
       'posting_date',
       'lat',
       'long']
    return X.drop(columns_to_drop, axis=1)

def calculate_outliers(X):
    q25 = df['year'].quantile(0.25)
    q75 = df['year'].quantile(0.75)
    iqr = q75 - q25
    X.loc[X['year'] < (q25 - 1.5 * iqr), 'year'] = round(q25 - 1.5 * iqr)
    X.loc[X['year'] > (q75 + 1.5 * iqr), 'year'] = round(q25 + 1.5 * iqr)
    return X['year']

numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

imputer_transformer = ColumnTransformer(transformers=[
        ('imputer_num', SimpleImputer(strategy='median'), numerical_features),
        ('imputer_cat',SimpleImputer(strategy='most_frequent'), categorical_features)
])

encode_scale_transformer = ColumnTransformer(transformers=[
        ('scale', StandardScaler(), numerical_features),
        ('encode',OneHotEncoder(handle_unknown='ignore'),categorical_features)
])


def new_predictor(X):
    X.loc[:, 'age_category'] = X['year'].apply(lambda x: 'new' if x > 2013 else ('old' if x < 2006 else 'average'))
    X.loc[:, 'short_model'] = X.apply(lambda x: x.model.split(' ')[0], axis=1)
    return X


def filter_data2(X):
    columns_to_drop = [
        'year',
        'model',
        'fuel',
        'odometer',
        'title_status',
        'transmission',
        'short_model',
        'state',
        'age_category']
    return X.drop(columns_to_drop, axis=1)


models = (
    LogisticRegression(solver='liblinear'),
    RandomForestClassifier(),
    SVC()
)


preprocessor = Pipeline(steps=[
        ('filter', FunctionTransformer(filter_data)),
        ('outliers', FunctionTransformer(calculate_outliers)),
        ('imputer_transformer', imputer_transformer),
        ('predictors', FunctionTransformer(new_predictor)),
        ('encode_scale_transformer', encode_scale_transformer),
        ('filter_2', FunctionTransformer(filter_data2))
])

for model in models:
    pipe = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])

    score = cross_val_score(pipe, X, Y, cv=4, scoring='accuracy')
    print(f'model:{type(model).__name__}, acc_mean: {score.mean():.4f}, acc_std:{score.std():.4f}'

Ответы (0 шт):