Commit 35a58651 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

[~] restructured EnsembleOut python model + requirements update

parent 0f79e17c
......@@ -94,8 +94,7 @@ print("Accuracy:", accuracy_score(y_test, pred))
print(class_names[model.predict(["piece of cake", "piece of shit"])])
# explaining the model
# vocab = list(model[0].vocabulary_.keys())
# fixout = FixOutText(X, y, vocab, to_drop=["black", "white", "bitch"], algo=model, max_features=50)
# fixout = FixOutText(X, y, sensitives=["black", "white", "bitch"], max_features=-1)
# t0 = time()
# actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
# fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
# print("took", time()-t0, "seconds")
\ No newline at end of file
......@@ -12,7 +12,9 @@ from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from time import time
from fixout.core_text import FixOutText
from fixout.core_text import FixOutText, EnsembleOutText
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
hsdata = pd.read_csv("datasets/hate_speech.csv")
class_names = np.array(["ok", "hate speech"])
......@@ -95,7 +97,7 @@ print("Accuracy:", accuracy_score(y_test, pred))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~ checking parity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get_dataset_predictions(dataset_filename) :
def get_dataset_predictions(model, dataset_filename) :
variant = pd.read_csv(dataset_filename)
tweets = variant["tweet"].to_numpy()
for i in range(tweets.shape[0]): tweets[i] = tweets[i][1:-1]
......@@ -115,16 +117,22 @@ def check_parity(preds, tweets, groups, word=None):
print(f"[{word}] P(hate_speech | AA) = {hate_rate_aa}")
print(f"[{word}] P(hate_speech | SA) = {hate_rate_sa}")
preds, tweets, groups = get_dataset_predictions("datasets/english_variant.csv")
preds, tweets, groups = get_dataset_predictions(model, "datasets/english_variant.csv")
check_parity(preds, tweets, groups)
btch_preds, btch_tweets, btch_groups = get_dataset_predictions("datasets/english_variant_btch.csv")
btch_preds, btch_tweets, btch_groups = get_dataset_predictions(model, "datasets/english_variant_btch.csv")
check_parity(btch_preds, btch_tweets, btch_groups)
ngga_preds, ngga_tweets, ngga_groups = get_dataset_predictions("datasets/english_variant_ngga.csv")
ngga_preds, ngga_tweets, ngga_groups = get_dataset_predictions(model, "datasets/english_variant_ngga.csv")
check_parity(ngga_preds, ngga_tweets, ngga_groups)
# explaining the model
vocab = list(model[0].vocabulary_.keys())
fixout = FixOutText(X, y, vocab, to_drop=["black", "white", "bitch"], algo=model, max_features=-1)
t0 = time()
actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
print("took", time()-t0, "seconds")
\ No newline at end of file
# ~~~~~~~~~~~~~~~~~~~~~~~~~~ Applying EnsembleOut ~~~~~~~~~~~~~~~~~~~~~~~~~
sensitive_words = ['lol', 'amp', 'haha', 'bout', 'im', 'u', 'tho', 'yea', 'lmao', 'finna', 'honestly']
ensemble = EnsembleOutText(model, sensitive_words)
ensemble.fit(X_train, y_train)
print("Ensemble accuracy:", accuracy_score(y_test, ensemble.predict(X_test)))
preds, tweets, groups = get_dataset_predictions(ensemble, "datasets/english_variant.csv")
check_parity(preds, tweets, groups)
btch_preds, btch_tweets, btch_groups = get_dataset_predictions(ensemble, "datasets/english_variant_btch.csv")
check_parity(btch_preds, btch_tweets, btch_groups)
ngga_preds, ngga_tweets, ngga_groups = get_dataset_predictions(ensemble, "datasets/english_variant_ngga.csv")
check_parity(ngga_preds, ngga_tweets, ngga_groups)
\ No newline at end of file
......@@ -6,7 +6,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from fixout.core_text import FixOutText
from fixout.core_text import FixOutText, EnsembleOutText
from time import time
......@@ -14,30 +14,32 @@ from time import time
categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
X_train = newsgroups_train.data
y_train = newsgroups_train.target
X_test = newsgroups_test.data
y_test = newsgroups_test.target
class_names = ['atheism', 'christian']
# creating a model pipeline and training it
vectorizer = TfidfVectorizer(lowercase=True)
rf = RandomForestClassifier(n_estimators=500)
model = make_pipeline(vectorizer, rf)
model.fit(newsgroups_train.data, newsgroups_train.target)
model.fit(X_train, y_train)
# evaluating our model
pred = model.predict(newsgroups_test.data)
print("Accuracy:", accuracy_score(newsgroups_test.target, pred))
print("Accuracy:", model.score(X_test, y_test))
# explaining our model
X = newsgroups_train.data + newsgroups_test.data
y = np.concatenate([newsgroups_train.target, newsgroups_test.target])
vocab = list(model[0].vocabulary_.keys())
fixout = FixOutText(X, y, vocab, to_drop=["com", "nigel", "of", "host", "library", "canrem", "symposium", "desks"], algo=model)
t0 = time()
actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
print("took", time()-t0, "seconds")
sensitive_words = ["com", "nigel", "of", "host", "library", "canrem", "symposium", "desks"]
fixout = FixOutText(X_train, y_train, sensitives=sensitive_words)
fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
# correcting fairness if necessary
# if is_fair_flag :
# print("The model is fair ! \o/")
# else :
# print("Model not fair, " + " ".join(actual_sensitive) + " in the main features...")
# is_fair2, contributions_ensemble, accuracy_ensemble, threshold_ens = fixout.ensemble_out(actual_sensitive)
\ No newline at end of file
if fair_flag :
print("Model is fair ! \o/")
else :
print("Model not fair, " + " ".join(actual_sensitive) + " in the main features...")
ensemble = EnsembleOutText(model, actual_sensitive)
ensemble.fit(X_train, y_train)
print("Ensemble accuracy:", ensemble.score(X_test, y_test))
fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(ensemble)
\ No newline at end of file
# from .core import FixOut
from .core_text import FixOutText
\ No newline at end of file
from .core_text import FixOutText
from .core_text import EnsembleOutText
\ No newline at end of file
......@@ -2,10 +2,11 @@
Implements the main procedures to build fairer ensembles, e.g. feature drop out, model training, ensemble bulding
"""
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from sklearn.metrics import accuracy_score
from sklearn.model_selection._split import train_test_split
from sklearn.preprocessing._label import LabelEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.base import ClassifierMixin
from fixout.lime_text_global import fairness_eval
from copy import deepcopy
......@@ -13,86 +14,30 @@ import numpy as np
import re
os.environ['KMP_DUPLICATE_LIB_OK']='True'
class FixOutText:
def __init__(self, X, y, vocabulary, to_drop, algo, train_size=0.7, exp=fairness_eval, max_features=10, sampling_size=None, seed=None, threshold=None):
def __init__(self, X, y, sensitives, exp=fairness_eval, max_features=10, sampling_size=None):
self.exp = exp
self.algo = deepcopy(algo)
self.seed = seed
self.threshold = threshold
self.data = X
self.X = X
le= LabelEncoder()
self.labels = le.fit_transform(y)
self.y = le.fit_transform(y)
self.class_names = le.classes_
self.feature_names = vocabulary
self.train, self.test, self.train_labels, self.test_labels = train_test_split(self.data, self.labels, train_size=train_size, random_state=self.seed)
self.sensitive_f = to_drop
self.sensitive_f = sensitives
self.max_features = max_features
if sampling_size is None :
self.sampling_size = len(X)//10 # by default, we set the sampling_size for SubmodularPick to 10% of the total number of instances
else :
self.sampling_size = sampling_size
def is_fair(self):
model = train_classifier(self.algo, self.train, self.train_labels)
self.original_model = Model([model], [None])
accuracy, threshold = evaluation(self.original_model.predict_proba(self.test), self.test_labels)
actual_sensitive, is_fair_flag, ans_data, explainer = self.exp(self.original_model, self.train, self.max_features, self.sensitive_f, self.feature_names, self.class_names, self.sampling_size, self.threshold)
return actual_sensitive, is_fair_flag, ans_data, accuracy, threshold
def ensemble_out(self, actual_sensitive, ignorecase=True, token_pattern=r"\b\w+\b"):
"""
Implements ENSEMBLE_Out
Parameters
algo: class of a classification algorithm
to_drop: list of features that must be dropped
train: X
labels_train: y
all_categorical_features: list of indices of categorical features
"""
models, removers = [], []
words_to_drop = actual_sensitive
flags = re.IGNORECASE if ignorecase else 0
for word in words_to_drop: # getting the word to drop
rep = re.compile(token_pattern.replace("\\w", '('+word+')'), flags=flags) # compiling a regex to find the word
train = list(map(lambda text: rep.sub("", text), self.train)) # removing the word
model = train_classifier(self.algo, train, self.train_labels)
models.append(model)
removers.append(rep)
# finally, remove all actual sensitive words
rep_all = re.compile(token_pattern.replace("\\w", '(' + '|'.join(words_to_drop) + ')'), flags=flags)
train = list(map(lambda text: rep_all.sub("", text), self.train))
model_all = train_classifier(self.algo, train, self.train_labels)
models.append(model_all)
removers.append(rep_all)
self.ensemble = Model(models, removers)
accuracy, threshold = evaluation(self.ensemble.predict_proba(self.test), self.test_labels)
_, is_fair_flag, ans_data, explainer = self.exp(self.ensemble, self.train, self.max_features, actual_sensitive, self.feature_names, self.class_names, self.sampling_size, self.threshold)
return is_fair_flag, ans_data, accuracy, threshold
def is_fair(self, model):
actual_sensitive, fair_flag, words_weights, explainer = self.exp(model, self.X, self.max_features, self.sensitive_f, self.class_names, self.sampling_size)
return fair_flag, words_weights, actual_sensitive, explainer
class EnsembleOut:
class EnsembleOutText(ClassifierMixin):
"""Class for ensemble models
Saves a list of trained classifiers and their respective encoders and deleted features
......@@ -101,15 +46,48 @@ class EnsembleOut:
models: a list of trained classifiers
removers: a list of compiled regex from the re module, for each model
"""
def __init__(self, models, removers):
self.models = models
self.removers = removers
def __init__(self, base_model, sensitive_words=(), ignorecase=True, token_pattern=r"\b\w+\b", auto_threshold=False):
self.models = [base_model]
self.removers = []
self.threshold = 0.5
self.dtype = np.uint8
self.auto_threshold = auto_threshold
flags = re.IGNORECASE if ignorecase else 0
for word in sensitive_words: # getting the word to drop
rep = re.compile(token_pattern.replace("\\w", '('+word+')'), flags=flags) # compiling a regex to find the word
self.removers.append(rep)
rep_all = re.compile(token_pattern.replace("\\w", '(' + '|'.join(sensitive_words) + ')'), flags=flags)
self.removers.append(rep_all)
def fit(self, X, y):
try:
self.dtype = y.dtype
except:
pass
base_model = self.models.pop(0)
for rep in self.removers:
X_train = list(map(lambda text: rep.sub("", text), X))
model = deepcopy(base_model)
model.fit(X_train, y)
self.models.append(model)
if self.auto_threshold:
self.adjust_threshold(X, y)
def adjust_threshold(self, X, y):
thresholds = np.arange(0, 1, 0.001)
scores = [accuracy_score(y, self.predict(X, threshold=t)) for t in thresholds] # score each threshold
i = np.argmax(scores)
self.threshold = thresholds[i]
def predict_proba(self, X):
"""
Returns probability for each class label.
"""
probs = []
n_models = len(self.models)
......@@ -118,38 +96,11 @@ class EnsembleOut:
rep = self.removers[i]
if rep is not None :
X = list(map(lambda s: rep.sub("", s), X))
comp = model.predict_proba(X).astype(float)
probs.append(comp)
res = sum(probs)/n_models
return res
def train_classifier(algo, train, train_labels):
model = deepcopy(algo)
model.fit(train, train_labels)
return model
y = model.predict_proba(X)
probs.append(y)
def to_labels(pos_probs, threshold):
return (pos_probs >= threshold).astype('int')
return np.array(probs).mean(axis=0)
def evaluation(probs, true_labels):
probs = probs[:, 1]
thresholds = np.arange(0, 1, 0.001) # define thresholds
scores = [accuracy_score(true_labels, to_labels(probs, t)) for t in thresholds] # evaluate each threshold
ix = np.argmax(scores) # get best threshold
# print('Threshold=%.3f, F-Score=%.5f' % (thresholds[ix], scores[ix]))
accuracy = accuracy_score(true_labels, to_labels(probs, thresholds[ix]))
return accuracy, thresholds[ix]
def evaluation_fixed_threshold(probs, true_labels, threshold):
probs = probs[:, 1]
return accuracy_score(true_labels, to_labels(probs, threshold))
\ No newline at end of file
def predict(self, X, threshold=None):
if threshold is None: threshold = self.threshold
return (self.predict_proba(X)[:,1] > threshold).astype(self.dtype)
\ No newline at end of file
......@@ -7,7 +7,21 @@ import numpy as np
from fixout.lime.lime_text import LimeTextExplainer
clusters = 50
def fairness_eval(model, train, max_features, sensitive_features, class_names, sample_size):
explainer, explanations = features_contributions(model.predict_proba, train, class_names, sample_size)
contributions = Counter()
for exp in explanations:
vocab = exp.domain_mapper.indexed_string.inverse_vocab
words_weights = {vocab[i]: weight for i, weight in exp.local_exp[1]}
a1 = Counter(words_weights)
contributions.update(a1)
actual_sensitive, is_fair, df = fairness_valid_top(contributions, sensitive_features, max_features)
return actual_sensitive, is_fair, df, explainer
def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=5):
......@@ -24,26 +38,7 @@ def features_contributions(predict_fn, train, class_names, sample_size, kernel_w
return explainer, explanations
def fairness_eval(model, train, max_features, sensitive_features, feature_names, class_names, sample_size, threshold=None):
explainer, explanations = features_contributions(model.predict_proba, train, class_names, sample_size)
contributions = Counter()
for exp in explanations:
vocab = exp.domain_mapper.indexed_string.inverse_vocab
words_weights = {vocab[i]: weight for i, weight in exp.local_exp[1]}
a1 = Counter(words_weights)
contributions.update(a1)
if threshold != None and threshold > 0:
actual_sensitive, is_fair, df = fairness_valid_threshold(contributions, feature_names, sensitive_features, threshold)
else:
actual_sensitive, is_fair, df = fairness_valid_top(contributions, feature_names, sensitive_features, max_features)
return actual_sensitive, is_fair, df, explainer
def fairness_valid_top(contributions, feature_names, sensitive_features, max_features):
def fairness_valid_top(contributions, sensitive_features, max_features):
actual_sensitive = []
ans_data = []
......@@ -60,44 +55,4 @@ def fairness_valid_top(contributions, feature_names, sensitive_features, max_fea
actual_sensitive.append(feature)
df = pd.DataFrame(ans_data, columns=["Index", "Word", "Contribution"])
return actual_sensitive, len(actual_sensitive) < 2, df
def fairness_valid_threshold(contributions, feature_names, sensitive_features, threshold):
actual_sensitive = []
ans_data = []
n_contributions = normalize(contributions)
sorted_dict = sorted(n_contributions.items(), key=lambda x: abs(x[1]), reverse=True)
for key,value in sorted_dict:
if abs(value) < threshold:
break
ans_data.append([key,feature_names[key],value])
for pair in ans_data:
key = pair[0]
if key in sensitive_features:
actual_sensitive.append(key)
df = pd.DataFrame(ans_data, columns = ["Index", "Feature", "Contribution"])
return actual_sensitive, len(actual_sensitive) < 2, df
def normalize(b):
a = b.copy()
values = [abs(x[1]) for x in a.items()]
# values = list(map(abs, a.items()))
minv = np.min(values)
maxv = np.max(values)
for key in a.keys():
v = a[key]
normalized = (abs(v) - minv) / (maxv - minv)
a[key] = normalized if v >= 0 else -normalized
return a
\ No newline at end of file
return actual_sensitive, len(actual_sensitive) < 2, df
\ No newline at end of file
numpy
pandas
aif360
imblearn
oapackage
shap
numpy~=1.18.1
pandas~=1.1.4
aif360~=0.3.0
imblearn~=0.0
oapackage~=2.6.6
shap~=0.37.0
scikit-learn~=0.23.2
scipy~=1.4.1
matplotlib~=3.3.2
nltk~=3.4.5
fairmodels~=0.1.3
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment