compas.py 2.26 KB
Newer Older
1
2
import sys; sys.path.extend(['..'])

3
4
5
6
import datetime

from sklearn.model_selection import train_test_split

7
8
from fixout.anchor_global import fairness_eval
from fixout.core import load_data, Model, ensemble_out, train_classifier
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from sklearn.svm import SVC

def main(seed):
    train_size = 0.8
    max_features = 10
    
    algo = SVC
    print(algo.__name__)
    source_name = 'compas.data'
    to_drop = [4,7,8,9] #  age, age_cat, sex, race
    all_categorical_features = [5,6,7,8,9]
    target_features = ["juv_fel_count", "juv_misd_count", "juv_other_count", "priors_count","age", "c_charge_degree", "c_charge_desc", "age_cat", "sex", "race","is_recid"]
    
    data, labels, class_names, feature_names, categorical_names = load_data(source_name, all_categorical_features,delimiter=",", target_features=target_features)
    train, test, labels_train, labels_test = train_test_split(data, labels, train_size=train_size, test_size=1-train_size, random_state=seed)
    
     
    model, encoder = train_classifier(algo, train, test, labels_train, [], all_categorical_features)
    original_model = Model([model],[encoder],[[]])
#     threshold_1 = find_threshold(original_model.prob(train), labels_train)
    print("Original model OK")

    fairness_eval(original_model, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names)
    
    ensemble = ensemble_out(algo,to_drop,train, test, labels_train, all_categorical_features)
#     threshold_2 = find_threshold(ensemble.prob(train), labels_train)
    
    print("ExpOut ensemble's model OK")
    fairness_eval(ensemble, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names)
#     
#     accuracy_original = evaluation(original_model.prob(test), labels_test, threshold_1)
#     print("accuracy_original", accuracy_original)
#     accuracy_ensemble = evaluation(ensemble.prob(test), labels_test, threshold_2)
#     print("accuracy_ensemble", accuracy_ensemble)
    

    
if __name__ == "__main__":
    now = datetime.datetime.now()
    print('compas\n',now.year,'-', now.month,'-', now.day,',', now.hour,':', now.minute,':', now.second, sep='')
    for i in range(1):
        print("experiment i=",i)
        main(i)