

#Use pipeline - combination of SMOTE and logistic regression model 
# Define which resampling method and which ML model to use in the pipeline
resampling = SMOTE(random_state = 27, sampling_strategy = "minority")
model = LogisticRegression(solver='liblinear')
pipeline = Pipeline([('SMOTE', resampling), ('Logistic Regression', model)])
# Split your data X and y, into a training and a test set and fit the pipeline onto the training data
y = PartB_encoded['Is_fraud']
X = PartB_encoded.drop(['Is_fraud'], axis = 1)       
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
pipeline.fit(X_train, y_train) 
predicted = pipeline.predict(X_test)       
print("Accuracy score: ", accuracy_score(y_true = y_test, y_pred = predicted))  
print("Precision score: ", precision_score(y_true = y_test, y_pred=predicted)) 
print("Recall score: ", recall_score(y_true = y_test, y_pred= predicted)) 
# Obtain the results from the classification report and confusion matrix 
print('Classifcation report:n', classification_report(y_test, predicted))
conf_mat = confusion_matrix(y_true=y_test, y_pred=predicted)
print('Confusion matrix:n', conf_mat)


Accuracy score:  0.9333130935552119
Precision score:  2.3716352424997034e-05
Recall score:  0.09090909090909091
Classification report:
precision    recall  f1-score   support
False       1.00      0.93      0.97    632407
True       0.00      0.09      0.00        11
accuracy                           0.93    632418
macro avg       0.50      0.51      0.48    632418
weighted avg       1.00      0.93      0.97    632418
Confusion matrix:
[[590243  42164]
[    10      1]]


