Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ALVES Guilherme
fixout
Commits
17135b6c
Commit
17135b6c
authored
Jul 15, 2020
by
ALVES Guilherme
Browse files
New argument added to indicate which explainer should be employed
parent
f8975b96
Changes
6
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
17135b6c
...
...
@@ -2,6 +2,23 @@
This project is an extension of LimeOut[1]. It aims at tackle process fairness for classification, while keeping the accuracy level (or improving).
More precisely, ExpOut incorporates different explainers.
Classifiers available:
*
Multilayer Perceptron
*
Logistic Regression
*
Random Forest
*
Bagging
*
AdaBoost
*
Gaussian Mixture
*
Gradiente Boosting
Explainers
*
LIME
*
Anchors
# Example
`runner --data german.data --trainsize 0.8 --algo mlp --cat_features 0 2 3 5 6 8 9 11 13 14 16 18 19 --drop 8 18 19`
# References
[1] Vaishnavi Bhargava, Miguel Couceiro, Amedeo Napoli. LimeOut: An Ensemble Approach To Improve Process Fairness. 2020. ⟨hal-02864059v2⟩
...
...
anchor_global.py
View file @
17135b6c
...
...
@@ -3,22 +3,43 @@ from collections import Counter
from
anchor
import
anchor_tabular
import
lime_global
import
pandas
as
pd
import
numpy
as
np
def
f
eatures_contributions
(
model
,
train
,
feature_names
,
class_names
,
categorical_features
,
categorical_names
,
kernel_width
=
3
):
def
f
airness_eval
(
model
,
train
,
max_features
,
sensitive_features
,
feature_names
,
class_names
,
categorical_features
,
categorical_names
):
_
,
sp_obj
=
lime_global
.
features_contributions
(
model
.
prob
,
train
,
feature_names
,
class_names
,
categorical_features
,
categorical_names
,
kernel_width
)
_
,
sp_obj
=
lime_global
.
features_contributions
(
model
.
prob
,
train
,
feature_names
,
max_features
,
class_names
,
categorical_features
,
categorical_names
)
indices
=
sp_obj
.
indices
a_explainer
=
anchor_tabular
.
AnchorTabularExplainer
(
class_names
,
feature_names
,
train
,
categorical_names
=
categorical_names
)
non_empty_anchors
=
0
counter
=
Counter
()
for
i
in
indices
:
exp
=
a_explainer
.
explain_instance
(
train
[
i
],
model
.
predict
,
threshold
=
0.95
)
print
(
i
,
'%.2f'
%
exp
.
precision
(),
' %.2f'
%
exp
.
coverage
(),
'%s'
%
(
' AND '
.
join
(
exp
.
names
())))
a1
=
Counter
(
exp
.
exp_map
[
'feature'
])
counter
.
update
(
a1
)
print
(
i
,
'%.2f'
%
exp
.
precision
(),
' %.2f'
%
exp
.
coverage
(),
' (class %s)'
%
exp
.
exp_map
[
'prediction'
],
'%s'
%
(
' AND '
.
join
(
exp
.
names
())))
features
=
exp
.
exp_map
[
'feature'
]
if
len
(
features
)
>
0
:
a1
=
Counter
(
features
)
non_empty_anchors
+=
1
counter
.
update
(
a1
)
is_fair
=
True
i
=
0
ans_data
=
[]
for
key
,
value
in
sorted
(
counter
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
):
print
(
feature_names
[
key
],
"
\t
"
,
value
)
ans_data1
=
[
feature_names
[
key
],
value
/
non_empty_anchors
]
ans_data
.
append
(
ans_data1
)
if
i
<
max_features
and
key
in
sensitive_features
:
is_fair
=
False
i
+=
1
df
=
pd
.
DataFrame
(
ans_data
,
columns
=
[
"Feature"
,
"Frequency"
])
print
(
df
.
iloc
[(
-
np
.
abs
(
df
[
'Frequency'
].
values
)).
argsort
()])
return
is_fair
,
ans_data
core.py
View file @
17135b6c
...
...
@@ -107,8 +107,6 @@ class Model:
self
.
encoders
=
encoders
self
.
features_to_remove
=
features_to_remove
# TODO :
"""
Args:
models: a list of trained classifiers
...
...
german.py
View file @
17135b6c
...
...
@@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split
from
sklearn.neural_network._multilayer_perceptron
import
MLPClassifier
from
sklearn.tree
import
DecisionTreeClassifier
from
anchor_global
import
f
eatures_contributions
from
anchor_global
import
f
airness_eval
from
core
import
load_data
,
Model
,
evaluation
,
\
find_threshold
,
remove
,
ensemble_out
,
train_classifier
...
...
@@ -20,6 +20,7 @@ from core import load_data, Model, evaluation, \
def
main
():
train_size
=
0.8
test_size
=
0.2
max_features
=
10
algo
=
MLPClassifier
print
(
algo
.
__name__
)
...
...
@@ -39,7 +40,7 @@ def main():
accuracy
=
evaluation
(
original_model
.
prob
(
test
),
labels_test
,
threshold_1
)
print
(
accuracy
)
f
eatures_contributions
(
original_model
,
train
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
f
airness_eval
(
original_model
,
train
,
max_features
,
to_drop
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
print
(
"###########
\n
ExpOut ensemble's model
\n
###########"
)
ensemble
=
ensemble_out
(
algo
,
to_drop
,
train
,
labels_train
,
all_categorical_features
)
...
...
@@ -49,7 +50,7 @@ def main():
accuracy
=
evaluation
(
ensemble
.
prob
(
test
),
labels_test
,
threshold_2
)
print
(
accuracy
)
f
eatures_contributions
(
ensemble
,
train
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
f
airness_eval
(
ensemble
,
train
,
max_features
,
to_drop
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
if
__name__
==
"__main__"
:
...
...
lime_global.py
View file @
17135b6c
...
...
@@ -8,11 +8,17 @@ import numpy as np
from
lime
import
lime_tabular
,
submodular_pick
def
features_contributions
(
model
,
train
,
feature_names
,
class_names
,
categorical_features
,
categorical_names
,
kernel_width
=
3
):
def
features_contributions
(
predict_fn
,
train
,
feature_names
,
max_features
,
class_names
,
categorical_features
,
categorical_names
,
kernel_width
=
3
):
explainer
=
lime_tabular
.
LimeTabularExplainer
(
train
,
feature_names
=
feature_names
,
class_names
=
class_names
,
categorical_features
=
categorical_features
,
categorical_names
=
categorical_names
,
kernel_width
=
kernel_width
)
sp_obj
=
submodular_pick
.
SubmodularPick
(
explainer
,
train
,
model
,
sample_size
=
5
,
num_features
=
5
,
num_exps_desired
=
5
)
sp_obj
=
submodular_pick
.
SubmodularPick
(
explainer
,
train
,
predict_fn
,
sample_size
=
500
,
num_features
=
max_features
,
num_exps_desired
=
10
)
return
explainer
,
sp_obj
def
fairness_eval
(
model
,
train
,
max_features
,
sensitive_features
,
feature_names
,
class_names
,
categorical_features
,
categorical_names
):
_
,
sp_obj
=
features_contributions
(
model
.
prob
,
train
,
feature_names
,
max_features
,
class_names
,
categorical_features
,
categorical_names
)
a
=
Counter
()
for
i
in
sp_obj
.
V
:
...
...
@@ -20,16 +26,22 @@ def features_contributions(model, train, feature_names, class_names, categorical
a1
=
Counter
(
dict
(
exp
.
local_exp
[
1
]))
a
.
update
(
a1
)
is_fair
=
True
counter
=
0
ans_data
=
[]
for
key
in
a
:
ans_data1
=
[]
ans_data1
.
append
(
feature_names
[
key
])
ans_data1
.
append
(
a
[
key
])
ans_data1
=
[
feature_names
[
key
],
a
[
key
]]
ans_data
.
append
(
ans_data1
)
if
counter
<
max_features
and
key
in
sensitive_features
:
is_fair
=
False
counter
+=
1
# print(feature_names[key] )
df
=
pd
.
DataFrame
(
ans_data
,
columns
=
[
"Feature"
,
"Contribution"
])
sumdf
=
df
[
'Contribution'
].
sum
()
df
[
'Contribution'
]
=
df
[
'Contribution'
]
#
sumdf = df['Contribution'].sum()
#
df['Contribution'] = df['Contribution']
print
(
df
.
iloc
[(
-
np
.
abs
(
df
[
'Contribution'
].
values
)).
argsort
()])
return
explainer
,
sp_obj
\ No newline at end of file
return
is_fair
,
ans_data
\ No newline at end of file
runner.py
View file @
17135b6c
...
...
@@ -7,16 +7,18 @@ from sklearn.ensemble._forest import RandomForestClassifier
from
sklearn.ensemble._gb
import
GradientBoostingClassifier
from
sklearn.linear_model._logistic
import
LogisticRegression
from
sklearn.mixture._gaussian_mixture
import
GaussianMixture
from
sklearn.model_selection
import
train_test_split
from
sklearn.neural_network._multilayer_perceptron
import
MLPClassifier
from
sklearn.svm._classes
import
SVC
from
sklearn.model_selection
import
train_test_split
import
anchor_global
import
lime_global
from
anchor_global
import
features_contributions
#
from anchor_global import features_contributions
from
core
import
load_data
,
train_classifier
,
Model
,
evaluation
,
\
find_threshold
,
remove
,
ensemble_out
find_threshold
,
ensemble_out
def
main
(
source_name
,
train_size
,
to_drop
,
all_categorical_features
,
algo
):
def
main
(
source_name
,
train_size
,
to_drop
,
all_categorical_features
,
max_features
,
algo
,
exp
):
data
,
labels
,
class_names
,
feature_names
,
categorical_names
=
load_data
(
source_name
,
all_categorical_features
)
train
,
test
,
labels_train
,
labels_test
=
train_test_split
(
data
,
labels
,
train_size
=
train_size
,
test_size
=
1
-
train_size
,
random_state
=
2
)
...
...
@@ -30,17 +32,19 @@ def main(source_name, train_size, to_drop, all_categorical_features, algo):
accuracy
=
evaluation
(
original_model
.
prob
(
test
),
labels_test
,
threshold_1
)
print
(
accuracy
)
features_contributions
(
original_model
,
train
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
print
(
"###########
\n
ExpOut ensemble's model
\n
###########"
)
ensemble
=
ensemble_out
(
algo
,
to_drop
,
train
,
labels_train
,
all_categorical_features
)
threshold_2
=
find_threshold
(
ensemble
.
prob
(
train
),
labels_train
)
accuracy
=
evaluation
(
ensemble
.
prob
(
test
),
labels_test
,
threshold_2
)
print
(
accuracy
)
features_contributions
(
ensemble
,
train
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
is_fair
,
_
=
exp
(
original_model
,
train
,
max_features
,
to_drop
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
if
not
is_fair
:
print
(
"###########
\n
ExpOut ensemble's model
\n
###########"
)
ensemble
=
ensemble_out
(
algo
,
to_drop
,
train
,
labels_train
,
all_categorical_features
)
threshold_2
=
find_threshold
(
ensemble
.
prob
(
train
),
labels_train
)
accuracy
=
evaluation
(
ensemble
.
prob
(
test
),
labels_test
,
threshold_2
)
print
(
accuracy
)
is_fair
,
_
=
exp
(
ensemble
,
train
,
max_features
,
to_drop
,
feature_names
,
class_names
,
all_categorical_features
,
categorical_names
)
def
algo_parser
(
algo_str
):
...
...
@@ -65,61 +69,35 @@ def algo_parser(algo_str):
return
SVC
else
:
return
None
# AdaBoostClassifier
# BaggingClassifier
# BayesianGaussianMixture
# BernoulliNB
# CalibratedClassifierCV
# CategoricalNB
# ClassifierChain
# ComplementNB
# DecisionTreeClassifier
# DummyClassifier
# ExtraTreeClassifier
# ExtraTreesClassifier
# GaussianMixture
# GaussianNB
# GaussianProcessClassifier
# GradientBoostingClassifier
# GridSearchCV
# HistGradientBoostingClassifier
# KNeighborsClassifier
# LabelPropagation
# LabelSpreading
# LinearDiscriminantAnalysis
# LogisticRegression
# LogisticRegressionCV
# MLPClassifier
# MultiOutputClassifier
# MultinomialNB
# NuSVC
# OneVsRestClassifier
# Pipeline
# QuadraticDiscriminantAnalysis
# RFE
# RFECV
# RadiusNeighborsClassifier
# RandomForestClassifier
# RandomizedSearchCV
# SGDClassifier
# SVC
# StackingClassifier
# VotingClassifier
def
exp_parser
(
algo_str
):
algo
=
algo_str
.
lower
()
if
algo
==
"lime"
:
return
lime_global
.
fairness_eval
elif
algo
==
"anchors"
:
return
anchor_global
.
fairness_eval
elif
algo
==
"shap"
:
return
None
else
:
return
None
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
'
Description
'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'
ExpOut
'
)
parser
.
add_argument
(
'--data'
)
parser
.
add_argument
(
'--trainsize'
,
type
=
float
)
parser
.
add_argument
(
'--algo'
)
parser
.
add_argument
(
'--exp'
)
parser
.
add_argument
(
'--max_features'
,
type
=
int
)
parser
.
add_argument
(
'-cat_features'
,
'--cat_features'
,
action
=
'store'
,
dest
=
'cat_features_list'
,
type
=
int
,
nargs
=
'*'
,
default
=
[],
help
=
"Examples: -i "
)
parser
.
add_argument
(
'-drop'
,
'--drop'
,
action
=
'store'
,
dest
=
'drop_list'
,
type
=
int
,
nargs
=
'*'
,
default
=
[],
help
=
"Examples: -i "
)
args
=
parser
.
parse_args
()
now
=
datetime
.
datetime
.
now
()
print
(
now
.
year
,
'-'
,
now
.
month
,
'-'
,
now
.
day
,
','
,
now
.
hour
,
':'
,
now
.
minute
,
':'
,
now
.
second
)
print
(
now
.
year
,
'-'
,
now
.
month
,
'-'
,
now
.
day
,
','
,
now
.
hour
,
':'
,
now
.
minute
,
':'
,
now
.
second
,
sep
=
''
)
main
(
args
.
data
,
args
.
trainsize
,
args
.
drop_list
,
args
.
cat_features_list
,
algo_parser
(
args
.
algo
))
\ No newline at end of file
main
(
args
.
data
,
args
.
trainsize
,
args
.
drop_list
,
args
.
cat_features_list
,
args
.
max_features
,
algo_parser
(
args
.
algo
),
exp_parser
(
args
.
exp
))
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment