runner.py 4.14 KB
Newer Older
ALVES Guilherme's avatar
ALVES Guilherme committed
1
2
3
4
5
6
7
8
9
10
11
import argparse
import datetime

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._bagging import BaggingClassifier
from sklearn.ensemble._forest import RandomForestClassifier
from sklearn.ensemble._gb import GradientBoostingClassifier
from sklearn.linear_model._logistic import LogisticRegression
from sklearn.mixture._gaussian_mixture import GaussianMixture
from sklearn.neural_network._multilayer_perceptron import MLPClassifier
from sklearn.svm._classes import SVC
12
13
14
from sklearn.model_selection import train_test_split
import anchor_global
import lime_global
ALVES Guilherme's avatar
ALVES Guilherme committed
15

16
# from anchor_global import features_contributions
ALVES Guilherme's avatar
ALVES Guilherme committed
17
from core import load_data, train_classifier, Model, evaluation, \
18
    find_threshold, ensemble_out
ALVES Guilherme's avatar
ALVES Guilherme committed
19
20

    
21
def main(source_name, train_size, to_drop, all_categorical_features, max_features, algo, exp):
ALVES Guilherme's avatar
ALVES Guilherme committed
22
23
24
25
26
27
       
    data, labels, class_names, feature_names, categorical_names = load_data(source_name, all_categorical_features)
    train, test, labels_train, labels_test = train_test_split(data, labels, train_size=train_size, test_size=1-train_size, random_state=2)
    
    print("###########\nOriginal model \n###########")
    
28
    model, encoder = train_classifier(algo, train, test, labels_train, [], all_categorical_features)
ALVES Guilherme's avatar
ALVES Guilherme committed
29
30
31
32
33
34
    original_model = Model([model],[encoder],[[]])
    
    threshold_1 = find_threshold(original_model.prob(train), labels_train)
    accuracy = evaluation(original_model.prob(test), labels_test, threshold_1)
    print(accuracy)

35
36
    is_fair ,_ = exp(original_model, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names)
    
37
    
38
    if not is_fair:
39
        print("Original model is NOT fair")
40
41
    
        print("###########\nExpOut ensemble's model \n###########")
42
        ensemble = ensemble_out(algo,to_drop,train,test,labels_train, all_categorical_features)
43
44
45
46
47
48
49
        
        
        threshold_2 = find_threshold(ensemble.prob(train), labels_train)
        accuracy = evaluation(ensemble.prob(test), labels_test, threshold_2)
        print(accuracy)
        
        is_fair ,_ = exp(ensemble, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names)
50
51
52
53
54
        return ensemble
    
    else:
        print("Original model is fair")
        return original_model
ALVES Guilherme's avatar
ALVES Guilherme committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79


def algo_parser(algo_str):
    
    algo = algo_str.lower()
    
    if algo == "mlp":
        return MLPClassifier
    elif algo == "logreg":
        return LogisticRegression
    elif algo == "rf":
        return RandomForestClassifier
    elif algo == "ada":
        return AdaBoostClassifier
    elif algo == "bagging":
        return BaggingClassifier
    elif algo == "gaussianmixture":
        return GaussianMixture
    elif algo == "gradient":
        return GradientBoostingClassifier
    elif algo == "svm":
        return SVC
    else:
        return None
    
80
81
82
def exp_parser(algo_str):
    
    algo = algo_str.lower()
ALVES Guilherme's avatar
ALVES Guilherme committed
83
    
84
85
86
87
88
89
90
91
92
93
    if algo == "lime":
        return lime_global.fairness_eval
    elif algo == "anchors":
        return anchor_global.fairness_eval
    elif algo == "shap":
        return None
    else:
        return None

   
ALVES Guilherme's avatar
ALVES Guilherme committed
94
95
if __name__ == "__main__":
    
ALVES Guilherme's avatar
.    
ALVES Guilherme committed
96
    parser = argparse.ArgumentParser(description='ExpOut: process fairness for classification')
ALVES Guilherme's avatar
ALVES Guilherme committed
97
98
99
    parser.add_argument('--data')  
    parser.add_argument('--trainsize', type=float)
    parser.add_argument('--algo')  
100
101
    parser.add_argument('--exp')  
    parser.add_argument('--max_features', type=int)  
ALVES Guilherme's avatar
ALVES Guilherme committed
102
103
104
105
106
107
    parser.add_argument('-cat_features', '--cat_features', action='store', dest='cat_features_list', type=int, nargs='*', default=[], help="Examples: -i ")
    parser.add_argument('-drop', '--drop', action='store', dest='drop_list', type=int, nargs='*', default=[], help="Examples: -i ")
    
    args = parser.parse_args() 
    
    now = datetime.datetime.now()
108
    print(now.year,'-', now.month,'-', now.day,',', now.hour,':', now.minute,':', now.second,sep='')
ALVES Guilherme's avatar
ALVES Guilherme committed
109
    
110
    main(args.data, args.trainsize, args.drop_list, args.cat_features_list, args.max_features, algo_parser(args.algo), exp_parser(args.exp))