Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit 9e960077 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

[~] conflict merge

parents 202cd51d 1908d071
......@@ -5,6 +5,7 @@ from nltk.stem.porter import *
from nltk import pos_tag
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
......@@ -81,6 +82,7 @@ vectorizer = TfidfVectorizer(
)
lr = LogisticRegression(class_weight='balanced')
# rf = RandomForestClassifier(n_estimators=100)
# training the model
model = make_pipeline(vectorizer, lr)
......
......@@ -423,7 +423,7 @@ class LimeTextExplainer(object):
ret_exp.predict_proba = classifier_fn([text_instance])[0]
ret_exp.local_pred = [ret_exp.predict_proba[1]]
ret_exp.intercept[1] = ret_exp.predict_proba[1]
ret_exp.local_exp[1] = [(0,0)]
ret_exp.local_exp[1] = []
return ret_exp
if self.class_names is None:
self.class_names = [str(x) for x in range(yss[0].shape[0])]
......
......@@ -11,13 +11,13 @@ from lime.lime_text import LimeTextExplainer
clusters = 50
def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=3):
def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=5):
explainer = LimeTextExplainer(class_names=class_names, kernel_width=kernel_width)
if sample_size > len(train):
sample_size = len(train)
indexes = np.random.choice(range(sample_size), sample_size)
explanations = [explainer.explain_instance(train[i], predict_fn, num_features=1000) for i in indexes]
explanations = [explainer.explain_instance(train[i].lower(), predict_fn, num_features=1000) for i in indexes]
# sp_obj = submodular_pick.SubmodularPick(explainer, train, predict_fn, sample_size=sample_size,
# num_features=1000, clusters=clusters)
# explanations = sp_obj.sp_explanations
......@@ -31,7 +31,9 @@ def fairness_eval(model, train, max_features, sensitive_features, feature_names,
contributions = Counter()
for exp in explanations:
a1 = Counter(dict(exp.local_exp[1]))
vocab = exp.domain_mapper.indexed_string.inverse_vocab
words_weights = {vocab[i]: weight for i, weight in exp.local_exp[1]}
a1 = Counter(words_weights)
contributions.update(a1)
if threshold != None and threshold > 0:
......@@ -52,14 +54,13 @@ def fairness_valid_top(contributions, feature_names, sensitive_features, max_fea
max_features = len(sorted_dict)
for i in range(max_features):
key, value = sorted_dict[i]
feature = feature_names[key]
ans_data.append([key, feature, value])
feature, value = sorted_dict[i]
ans_data.append([i, feature, value])
if feature in sensitive_features:
actual_sensitive.append(feature)
df = pd.DataFrame(ans_data, columns = ["Index", "Word", "Contribution"])
df = pd.DataFrame(ans_data, columns=["Index", "Word", "Contribution"])
return actual_sensitive, len(actual_sensitive) < 2, df
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment