Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit 797929d2 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

[+] FixOut for hate speech experiment

parent 553773c2
......@@ -3,13 +3,17 @@ import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import *
from nltk import pos_tag
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from time import time
from core_text import FixOutText
hsdata = pd.read_csv("datasets/hate_speech.csv")
class_names = ["hate speech", "offensive", "neither"]
class_names = np.array(["ok", "hate speech"])
stopwords = stopwords.words("english")
stopwords.extend(["#ff", "ff", "rt"])
......@@ -57,8 +61,8 @@ for tweet in hsdata.tweet:
tag_str = " ".join(tag_list)
tweet_tags.append(tag_str)
X = hsdata.tweet
y = (hsdata["class"] != 2).astype(np.uint8)
X = hsdata.tweet.to_numpy()
y = (hsdata["class"] != 2).to_numpy(dtype=np.uint8)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
vectorizer = TfidfVectorizer(
......@@ -76,7 +80,20 @@ vectorizer = TfidfVectorizer(
)
rf = RandomForestClassifier(n_estimators=500)
# training the model
model = make_pipeline(vectorizer, rf)
model.fit(X_train, y_train)
print(model.predict(["piece of cake", "piece of shit"]))
\ No newline at end of file
# evaluating the model
pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
print(class_names[model.predict(["piece of cake", "piece of shit"])])
# explaining the model
vocab = list(model[0].vocabulary_.keys())
fixout = FixOutText(X, y, vocab, to_drop=["black", "white", "bitch"], algo=model)
t0 = time()
actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
print("took", time()-t0, "seconds")
\ No newline at end of file
......@@ -3,33 +3,29 @@ Implements LIME_Global. Verifies if sensitives features have high contributions.
"""
from collections import Counter
import pandas as pd
import numpy as np
import sys
import numpy as np
from lime import lime_tabular, submodular_pick
from lime.lime_text import LimeTextExplainer
from scipy import stats
clusters = 50
def features_contributions(predict_fn, train, class_names, sample_size, kernel_width=3):
explainer = LimeTextExplainer(class_names=class_names, kernel_width=kernel_width)
# TODO: temporarily replace submodular_pick by explanation on a random subset of samples
sp_obj = submodular_pick.SubmodularPick(explainer, train, predict_fn, sample_size=sample_size, num_features=1000, clusters=clusters)
if sample_size > len(train):
sample_size = len(train)
indexes = np.random.choice(range(sample_size), sample_size)
explanations = [explainer.explain_instance(train[i], predict_fn, num_features=1000) for i in indexes]
return explainer, sp_obj
return explainer, explanations
def fairness_eval(model, train, max_features, sensitive_features, feature_names, class_names, sample_size, threshold=None):
explainer, sp_obj = features_contributions(model.prob, train, class_names, sample_size)
explainer, explanations = features_contributions(model.prob, train, class_names, sample_size)
contributions = Counter()
for i in sp_obj.V:
exp = sp_obj.explanations[i]
for exp in explanations:
a1 = Counter(dict(exp.local_exp[1]))
contributions.update(a1)
......@@ -55,7 +51,7 @@ def fairness_valid_top(contributions, feature_names, sensitive_features, max_fea
if feature in sensitive_features:
actual_sensitive.append(feature)
df = pd.DataFrame(ans_data, columns = ["Index", "Feature", "Contribution"])
df = pd.DataFrame(ans_data, columns = ["Index", "Word", "Contribution"])
return actual_sensitive, len(actual_sensitive) < 2, df
......
......@@ -6,6 +6,8 @@ from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from core_text import FixOutText
from time import time
# loading data
categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
......@@ -27,7 +29,9 @@ X = newsgroups_train.data + newsgroups_test.data
y = np.concatenate([newsgroups_train.target, newsgroups_test.target])
vocab = list(model[0].vocabulary_.keys())
fixout = FixOutText(X, y, vocab, to_drop=["com", "nigel", "of", "host", "library", "canrem", "symposium", "desks"], algo=model)
t0 = time()
actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
print("took", time()-t0, "seconds")
# correcting fairness if necessary
# if is_fair_flag :
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment