import argparse import datetime from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble._bagging import BaggingClassifier from sklearn.ensemble._forest import RandomForestClassifier from sklearn.ensemble._gb import GradientBoostingClassifier from sklearn.linear_model._logistic import LogisticRegression from sklearn.mixture._gaussian_mixture import GaussianMixture from sklearn.neural_network._multilayer_perceptron import MLPClassifier from sklearn.svm._classes import SVC from sklearn.model_selection import train_test_split import anchor_global import lime_global # from anchor_global import features_contributions from core import load_data, train_classifier, Model, evaluation, \ find_threshold, ensemble_out def main(source_name, train_size, to_drop, all_categorical_features, max_features, algo, exp): data, labels, class_names, feature_names, categorical_names = load_data(source_name, all_categorical_features) train, test, labels_train, labels_test = train_test_split(data, labels, train_size=train_size, test_size=1-train_size, random_state=2) print("###########\nOriginal model \n###########") model, encoder = train_classifier(algo, train, test, labels_train, [], all_categorical_features) original_model = Model([model],[encoder],[[]]) threshold_1 = find_threshold(original_model.prob(train), labels_train) accuracy = evaluation(original_model.prob(test), labels_test, threshold_1) print(accuracy) is_fair ,_ = exp(original_model, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names) if not is_fair: print("Original model is NOT fair") print("###########\nExpOut ensemble's model \n###########") ensemble = ensemble_out(algo,to_drop,train,test,labels_train, all_categorical_features) threshold_2 = find_threshold(ensemble.prob(train), labels_train) accuracy = evaluation(ensemble.prob(test), labels_test, threshold_2) print(accuracy) is_fair ,_ = exp(ensemble, train, max_features, to_drop, feature_names, class_names, all_categorical_features, categorical_names) return ensemble else: print("Original model is fair") return original_model def algo_parser(algo_str): algo = algo_str.lower() if algo == "mlp": return MLPClassifier elif algo == "logreg": return LogisticRegression elif algo == "rf": return RandomForestClassifier elif algo == "ada": return AdaBoostClassifier elif algo == "bagging": return BaggingClassifier elif algo == "gaussianmixture": return GaussianMixture elif algo == "gradient": return GradientBoostingClassifier elif algo == "svm": return SVC else: return None def exp_parser(algo_str): algo = algo_str.lower() if algo == "lime": return lime_global.fairness_eval elif algo == "anchors": return anchor_global.fairness_eval elif algo == "shap": return None else: return None if __name__ == "__main__": parser = argparse.ArgumentParser(description='ExpOut: process fairness for classification') parser.add_argument('--data') parser.add_argument('--trainsize', type=float) parser.add_argument('--algo') parser.add_argument('--exp') parser.add_argument('--max_features', type=int) parser.add_argument('-cat_features', '--cat_features', action='store', dest='cat_features_list', type=int, nargs='*', default=[], help="Examples: -i ") parser.add_argument('-drop', '--drop', action='store', dest='drop_list', type=int, nargs='*', default=[], help="Examples: -i ") args = parser.parse_args() now = datetime.datetime.now() print(now.year,'-', now.month,'-', now.day,',', now.hour,':', now.minute,':', now.second,sep='') main(args.data, args.trainsize, args.drop_list, args.cat_features_list, args.max_features, algo_parser(args.algo), exp_parser(args.exp))