Commit 9e960077 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

[~] conflict merge

parents 202cd51d 1908d071
...@@ -5,6 +5,7 @@ from nltk.stem.porter import * ...@@ -5,6 +5,7 @@ from nltk.stem.porter import *
from nltk import pos_tag from nltk import pos_tag
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
...@@ -81,6 +82,7 @@ vectorizer = TfidfVectorizer( ...@@ -81,6 +82,7 @@ vectorizer = TfidfVectorizer(
) )
lr = LogisticRegression(class_weight='balanced') lr = LogisticRegression(class_weight='balanced')
# rf = RandomForestClassifier(n_estimators=100)
# training the model # training the model
model = make_pipeline(vectorizer, lr) model = make_pipeline(vectorizer, lr)
......
...@@ -423,7 +423,7 @@ class LimeTextExplainer(object): ...@@ -423,7 +423,7 @@ class LimeTextExplainer(object):
ret_exp.predict_proba = classifier_fn([text_instance])[0] ret_exp.predict_proba = classifier_fn([text_instance])[0]
ret_exp.local_pred = [ret_exp.predict_proba[1]] ret_exp.local_pred = [ret_exp.predict_proba[1]]
ret_exp.intercept[1] = ret_exp.predict_proba[1] ret_exp.intercept[1] = ret_exp.predict_proba[1]
ret_exp.local_exp[1] = [(0,0)] ret_exp.local_exp[1] = []
return ret_exp return ret_exp
if self.class_names is None: if self.class_names is None:
self.class_names = [str(x) for x in range(yss[0].shape[0])] self.class_names = [str(x) for x in range(yss[0].shape[0])]
......
...@@ -11,13 +11,13 @@ from lime.lime_text import LimeTextExplainer ...@@ -11,13 +11,13 @@ from lime.lime_text import LimeTextExplainer
clusters = 50 clusters = 50
def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=3): def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=5):
explainer = LimeTextExplainer(class_names=class_names, kernel_width=kernel_width) explainer = LimeTextExplainer(class_names=class_names, kernel_width=kernel_width)
if sample_size > len(train): if sample_size > len(train):
sample_size = len(train) sample_size = len(train)
indexes = np.random.choice(range(sample_size), sample_size) indexes = np.random.choice(range(sample_size), sample_size)
explanations = [explainer.explain_instance(train[i], predict_fn, num_features=1000) for i in indexes] explanations = [explainer.explain_instance(train[i].lower(), predict_fn, num_features=1000) for i in indexes]
# sp_obj = submodular_pick.SubmodularPick(explainer, train, predict_fn, sample_size=sample_size, # sp_obj = submodular_pick.SubmodularPick(explainer, train, predict_fn, sample_size=sample_size,
# num_features=1000, clusters=clusters) # num_features=1000, clusters=clusters)
# explanations = sp_obj.sp_explanations # explanations = sp_obj.sp_explanations
...@@ -31,7 +31,9 @@ def fairness_eval(model, train, max_features, sensitive_features, feature_names, ...@@ -31,7 +31,9 @@ def fairness_eval(model, train, max_features, sensitive_features, feature_names,
contributions = Counter() contributions = Counter()
for exp in explanations: for exp in explanations:
a1 = Counter(dict(exp.local_exp[1])) vocab = exp.domain_mapper.indexed_string.inverse_vocab
words_weights = {vocab[i]: weight for i, weight in exp.local_exp[1]}
a1 = Counter(words_weights)
contributions.update(a1) contributions.update(a1)
if threshold != None and threshold > 0: if threshold != None and threshold > 0:
...@@ -52,14 +54,13 @@ def fairness_valid_top(contributions, feature_names, sensitive_features, max_fea ...@@ -52,14 +54,13 @@ def fairness_valid_top(contributions, feature_names, sensitive_features, max_fea
max_features = len(sorted_dict) max_features = len(sorted_dict)
for i in range(max_features): for i in range(max_features):
key, value = sorted_dict[i] feature, value = sorted_dict[i]
feature = feature_names[key] ans_data.append([i, feature, value])
ans_data.append([key, feature, value])
if feature in sensitive_features: if feature in sensitive_features:
actual_sensitive.append(feature) actual_sensitive.append(feature)
df = pd.DataFrame(ans_data, columns = ["Index", "Word", "Contribution"]) df = pd.DataFrame(ans_data, columns=["Index", "Word", "Contribution"])
return actual_sensitive, len(actual_sensitive) < 2, df return actual_sensitive, len(actual_sensitive) < 2, df
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment