Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit 943f6879 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

other text experiments

parent 42076923
......@@ -11,9 +11,11 @@ from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from fixout.core_text import FixOutText
from time import time
# Import data
df = pd.read_csv("examples/datasets/english_variant.csv")
df = pd.read_csv("datasets/english_variant.csv")
class_names = np.array(["SA", "AA"])
......@@ -36,9 +38,12 @@ def preprocess(text_string):
giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
'[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
mention_regex = '@[\w\-]+'
emoji_regex = '\\\\u[0-9a-fA-F]{4}'
parsed_text = re.sub(space_pattern, ' ', text_string)
parsed_text = re.sub(giant_url_regex, '', parsed_text)
parsed_text = re.sub(mention_regex, '', parsed_text)
parsed_text = re.sub(emoji_regex, '', parsed_text)
parsed_text = parsed_text.replace("&", "")
return parsed_text
......@@ -63,7 +68,7 @@ for tweet in df.tweet:
tag_str = " ".join(tag_list)
tweet_tags.append(tag_str)
X = df.tweet.to_numpy()
X = np.array(list(map(lambda text : " ".join(tokenize(preprocess(text))), df.tweet.to_numpy())))
y = (df["group"] == "AA").to_numpy(dtype=np.uint8)
print(len(X))
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
......@@ -98,7 +103,8 @@ print(class_names[model.predict(["piece of cake", "piece of shit"])])
# explaining the model
# fixout = FixOutText(X, y, sensitives=["black", "white", "bitch"], max_features=-1)
# t0 = time()
# fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
# print("took", time()-t0, "seconds")
\ No newline at end of file
fixout = FixOutText(X, y, sensitives=["black", "white", "bitch"], max_features=-1)
t0 = time()
fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
words_weights.to_csv("/home/fabien/Documents/Orpa/explanations/english_variant_avg.csv", index=False)
print("took", time()-t0, "seconds")
import sys; sys.path.extend(['..'])
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import *
from nltk import pos_tag
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from random import randint, choice
from fixout.core_text import FixOutText, EnsembleOutText
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
hsdata = pd.read_csv("datasets/hate_speech.csv").sample(frac=1)
engroup = pd.read_csv("datasets/english_variant.csv")
class_names = np.array(["ok", "hate speech"])
stopwords = stopwords.words("english")
stopwords.extend(["#ff", "ff", "rt"])
stemmer = PorterStemmer()
def preprocess(text_string):
"""
Accepts a text string and replaces:
1) urls with URLHERE
2) lots of whitespace with one instance
3) mentions with MENTIONHERE
This allows us to get standardized counts of urls and mentions
Without caring about specific people mentioned
"""
space_pattern = '\s+'
giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
'[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
mention_regex = '@[\w\-]+'
emoji_regex = '\\\\u[0-9a-fA-F]{4}'
parsed_text = re.sub(space_pattern, ' ', text_string)
parsed_text = re.sub(giant_url_regex, '', parsed_text)
parsed_text = re.sub(mention_regex, '', parsed_text)
parsed_text = re.sub(emoji_regex, '', parsed_text)
parsed_text = parsed_text.replace("&", "")
return parsed_text
def tokenize(tweet):
"""Removes punctuation & excess whitespace, sets to lowercase,
and stems tweets. Returns a list of stemmed tokens."""
tweet = " ".join(re.split("[^a-zA-Z]*", tweet.lower())).strip()
tokens = [stemmer.stem(t) for t in tweet.split()]
return tokens
def basic_tokenize(tweet):
"""Same as tokenize but without the stemming"""
tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
return tweet.split()
tweet_tags = []
for tweet in hsdata.tweet:
tokens = basic_tokenize(preprocess(tweet))
tags = pos_tag(tokens)
tag_list = [x[1] for x in tags]
tag_str = " ".join(tag_list)
tweet_tags.append(tag_str)
vectorizer = TfidfVectorizer(
tokenizer=tokenize,
ngram_range=(1, 3),
stop_words=stopwords,
preprocessor=preprocess,
use_idf=True,
smooth_idf=False,
norm=None,
decode_error='replace',
max_features=10000,
min_df=5,
max_df=0.75
)
base_words = """nigga
bitch
""".split('\n')
init_groups = [['lol', 'damn', 'lmao', 'actin', 'swerv', 'tho', 'naw', 'yassss', 'smh', 'dumb'],
['like', 'call', 'good'],
['shit', 'nigga', 'nicca', 'real', 'money', 'niggah', 'nothin', 'bout', 'noth', 'hoe'],
['tho', 'fuc', 'neva', 'naw', 'trippin', 'problem', 'gtfoh', 'eater', 'gone', 'ju'],
['got', 'love', 'had', 'wit', 'swear', 'aint', 'niggga', 'give', 'told', 'gotta'],
['nigga', 'shit', 'real', 'iffi', 'aint', 'money', 'niggah', 'wit', 'frontin', 'broke'],
['dont', 'wanna', 'mad', 'caus', 'realli', 'care', 'trust', 'worri', 'even', 'never'],
['ass', 'anonym', 'shut', 'yo', 'wit', 'up', 'dick', 'fuck', 'lil', 'dumb'],
['gotta', 'cuz', 'owe', 'lie', 'gone', 'gon', 'pow', 'wanna', 'knw', 'believ'],
['much', 'gonna', 'mani', 'someth', 'wonder', 'funni', 'brag', 'everyth', 'arrog', 'mayb']]
X = hsdata["tweet"]
y = (hsdata["class"] != 2).to_numpy(dtype=np.uint8)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
sa_tweets = engroup["tweet"][engroup["group"] == "SA"]
aa_tweets = engroup["tweet"][engroup["group"] == "AA"]
vectorizer.fit(X_train)
lr = LogisticRegression(class_weight='balanced')
def error(model) :
sa_preds = model.predict(sa_tweets)
aa_preds = model.predict(aa_tweets)
sa_count = len(sa_preds[sa_preds == 1]) / sa_preds.shape[0]
aa_count = len(aa_preds[aa_preds == 1]) / aa_preds.shape[0]
return abs(sa_count - aa_count)
def merge(model1, model2) :
groups1 = model1.groups
groups2 = model2.groups
assert len(groups1) == len(groups1), "Models should have the same number of groups"
new_groups = []
for g1, g2 in zip(groups1, groups2) :
if randint(0, 1) :
new_groups.append(g1)
else :
new_groups.append(g2)
return new_groups
def mutation(groups) :
new_groups = []
for g in groups :
ng = []
for w in g :
if randint(0, 9) > 0 :
ng.append(w)
if randint(0, 9) == 0 :
ng.append(choice(base_words))
new_groups.append(ng)
return EnsembleOutText(lr, new_groups, tokenizer=vectorizer.transform)
population = [mutation(init_groups) for _ in range(50)]
for epoch in range(50) :
print(f"Epoch {epoch}")
for model in population :
model.fit(X_train, y_train)
rank = sorted(population, key=lambda m: error(m))
parent1 = rank[0]
parent2 = rank[1]
print("Error:", error(parent1))
print("Accuracy:", parent1.score(X_test, y_test))
population = [mutation(merge(parent1, parent2)) for _ in range(50)]
\ No newline at end of file
......@@ -10,9 +10,13 @@ from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
print("Training embeddings...")
from examples.word_clustering import word_clustering_aa, word_clustering_sa
print("Ok let's go")
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# print("Training embeddings...")
# from examples.word_clustering import word_clustering_aa, word_clustering_sa
from examples.word_clustering import get_most_similar, embedding
from fairmodels import ModelProb, FairnessObject
# print("Ok let's go")
from copy import deepcopy
from time import time
......@@ -20,68 +24,67 @@ from fixout.core_text import FixOutText, EnsembleOutText
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
hsdata = pd.read_csv("datasets/hate_speech_eg.csv")
hsdata = pd.read_csv("datasets/hate_speech_eg.csv").sample(frac=1)
class_names = np.array(["ok", "hate speech"])
stopwords = stopwords.words("english")
stopwords.extend(["#ff", "ff", "rt"])
stemmer = PorterStemmer()
#
# def preprocess(text_string):
# """
# Accepts a text string and replaces:
# 1) urls with URLHERE
# 2) lots of whitespace with one instance
# 3) mentions with MENTIONHERE
#
# This allows us to get standardized counts of urls and mentions
# Without caring about specific people mentioned
# """
# space_pattern = '\s+'
# giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
# '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
# mention_regex = '@[\w\-]+'
# parsed_text = re.sub(space_pattern, ' ', text_string)
# parsed_text = re.sub(giant_url_regex, '', parsed_text)
# parsed_text = re.sub(mention_regex, '', parsed_text)
# return parsed_text
#
#
def preprocess(text_string):
"""
Accepts a text string and replaces:
1) urls with URLHERE
2) lots of whitespace with one instance
3) mentions with MENTIONHERE
This allows us to get standardized counts of urls and mentions
Without caring about specific people mentioned
"""
space_pattern = '\s+'
giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
'[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
mention_regex = '@[\w\-]+'
emoji_regex = '\\\\u[0-9a-fA-F]{4}'
parsed_text = re.sub(space_pattern, ' ', text_string)
parsed_text = re.sub(giant_url_regex, '', parsed_text)
parsed_text = re.sub(mention_regex, '', parsed_text)
parsed_text = re.sub(emoji_regex, '', parsed_text)
parsed_text = parsed_text.replace("&", "")
return parsed_text
def tokenize(tweet):
"""Removes punctuation & excess whitespace, sets to lowercase,
and stems tweets. Returns a list of stemmed tokens."""
tweet = " ".join(re.split("[^a-zA-Z]*", tweet.lower())).strip()
tokens = [stemmer.stem(t) for t in tweet.split()]
return tokens
#
#
# def basic_tokenize(tweet):
# """Same as tokenize but without the stemming"""
# tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
# return tweet.split()
#
# tweet_tags = []
# for tweet in hsdata.tweet:
# tokens = basic_tokenize(preprocess(tweet))
# tags = pos_tag(tokens)
# tag_list = [x[1] for x in tags]
# tag_str = " ".join(tag_list)
# tweet_tags.append(tag_str)
def basic_tokenize(tweet):
"""Same as tokenize but without the stemming"""
tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
return tweet.split()
X = hsdata.tweet.to_numpy()
tweet_tags = []
for tweet in hsdata.tweet:
tokens = basic_tokenize(preprocess(tweet))
tags = pos_tag(tokens)
tag_list = [x[1] for x in tags]
tag_str = " ".join(tag_list)
tweet_tags.append(tag_str)
X = np.array(list(map(lambda text : " ".join(tokenize(preprocess(text))), hsdata.tweet.to_numpy())))
y = (hsdata["class"] != 2).to_numpy(dtype=np.uint8)
ev = hsdata["english_variant"]
print("Clustering data...")
X[ev == "SA"] = list(map(word_clustering_sa, X[ev == "SA"]))
X[ev == "AA"] = list(map(word_clustering_aa, X[ev == "AA"]))
print("Data is clustered!")
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
X_train, X_test, y_train, y_test = X[:19000], X[19000:], y[:19000], y[19000:]
ev_train, ev_test = ev[:19000], ev[19000:]
X[ev == "SA"] = list(map(preprocess, X[ev == "SA"]))
X[ev == "AA"] = list(map(preprocess, X[ev == "AA"]))
X_train, X_test, y_train, y_test, ev_train, ev_test = train_test_split(X, y, ev, train_size=0.8)
vectorizer = TfidfVectorizer(
tokenizer=tokenize,
......@@ -96,25 +99,33 @@ vectorizer = TfidfVectorizer(
max_df=0.75
)
# vectors = vectorizer.fit_transform(X_train)
# vectors = vectors.toarray()
# pca = PCA(n_components=2)
# vectors2d = pca.fit_transform(vectors)
# plt.plot(vectors2d[:,0], vectors2d[:,1])
# plt.show()
lr = LogisticRegression(class_weight='balanced')
# rf = RandomForestClassifier(n_estimators=100)
# training the model
model = make_pipeline(vectorizer, lr)
model_sa = deepcopy(model)
model_aa = deepcopy(model)
# model_sa = deepcopy(model)
# model_aa = deepcopy(model)
model.fit(X_train, y_train)
model_sa.fit(X_train[ev_train == "SA"], y_train[ev_train == "SA"])
model_aa.fit(X_train[ev_train == "AA"], y_train[ev_train == "AA"])
# model_sa.fit(X_train[ev_train == "SA"], y_train[ev_train == "SA"])
# model_aa.fit(X_train[ev_train == "AA"], y_train[ev_train == "AA"])
# evaluating the model
pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
print("SA Accuracy:", model_sa.score(X_test[ev_test == "SA"], y_test[ev_test == "SA"]))
print("AA Accuracy:", model_aa.score(X_test[ev_test == "AA"], y_test[ev_test == "AA"]))
# print("SA Accuracy:", model_sa.score(X_test[ev_test == "SA"], y_test[ev_test == "SA"]))
# print("AA Accuracy:", model_aa.score(X_test[ev_test == "AA"], y_test[ev_test == "AA"]))
"""
# ~~~~~~~~~~~~~~~~~~~~~~~~~~ checking parity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get_dataset_predictions(model_sa, model_aa, dataset_filename) :
......@@ -131,32 +142,98 @@ def check_parity(preds_sa, preds_aa, word=None):
hate_rate_aa = len(preds_aa[preds_aa == 1]) / len(preds_aa)
print(f"[{word}] P(hate_speech | SA) = {hate_rate_sa}")
print(f"[{word}] P(hate_speech | AA) = {hate_rate_aa}")
return hate_rate_sa, hate_rate_aa
print("Original model")
preds_sa, preds_aa = get_dataset_predictions(model, model, "datasets/english_variant.csv")
check_parity(preds_sa, preds_aa, None)
check_parity(preds_sa, preds_aa, "")
preds_sa, preds_aa = get_dataset_predictions(model, model, "datasets/english_variant_btch.csv")
check_parity(preds_sa, preds_aa, "b*tch")
preds_sa, preds_aa = get_dataset_predictions(model, model, "datasets/english_variant_ngga.csv")
check_parity(preds_sa, preds_aa, "n*gga")
"""
# # difference per word
# sensitive_words = ['pandora', 'glad', 'niggah', 'half', 'yu', 'year', 'ude14', 'b', 'night', 'niggas', 'why', 'motherfuckers', 'white', 'go', 'stfu', 'right', 'asap', 'name', 'fairy', 'yall', 'listen', 'any', 'side', 'gon', 'niggers', 'nigger', 'emotions', 'continue', 'shit', 'rated', 'via', 'im', 'wen', 'fucked', 'friends', 'afraid', 'pussy', 'take', 'let', 'smh', 'dope', 'life', 'fb', 'booty', 'oomf', 'yankees', 'queer', 'sex', 'damn', 'fuckin', 'fck', 'amp', 'think', 'hoe', 'need', 'never', 'lmao', 'hoes', 'tl', 'dnt', 'sorry', 'yeah', 'love', 'fa', 'wtf', 'big', 'like', 'udc4c', 'wanna', 'gone', 'ud83d', 'dont', 'fucking', 'know', 'bird', 'af', 'thug', 'nicca', 'god', 'loyal', 'fucks', 'dis', 'little', 'n', 'retarded', 'automatically', 'totally', 'ipad', 'ctfu', 'females', 'visit', 'call', 'faggot', 'happy', 'one', 'everyone', 'hahaha', 'burger', 'slut', 'american', 'thank', 'wet', 'pussies', 'king', 'ud83c', 'lol', 'cunt', 'game', 'wow', 'niccas', 'bout', 'haha', 'ass', 'hate', 'yellow', 'iphone', 'bitch', 'niggahs', 'good', 'got', 'class', 'list', 'home', 'nigs', 'da', 'bitches', 'told', 'business', 'could', 'stop', 'twitter', 'park', 'man', 'tell', 'fuck', 'nobody', 'make', 'ya', 'yo', 'lil', 'faggots', 'gay', 'se', 'coffee', 'best', 'kill', 'dick', 'nig', 'job', 'though', 'birds', 'tho', 'dyke', 'thanks', 'yea', 'retard', 'alright', 'found', 'android', 'cc', 'much', 'nigga', 'ude0d', 'u', 'gt', 'back', 'fags', 'wit', 'em', 'honkies', 'imma', 'gotta', 'above', 'cause', 'slope', 'stupid', 'book', 'fag']
# # words in the top-100 lime global for the hate speech model, and for the english variant classifier
# results = []
# for w in sensitive_words :
# print(w)
# score_dict = {"word": w}
# ensemble = EnsembleOutText(model, [w])
# ensemble.fit(X_train, y_train)
# score_dict["accuracy"] = ensemble.score(X_test, y_test)
# preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant.csv")
# hate_rate_sa, hate_rate_aa = check_parity(preds_sa, preds_aa, "")
# score_dict["hate_speech | SA"] = hate_rate_sa
# score_dict["hate_speech | AA"] = hate_rate_aa
# score_dict["hate_speech (difference)"] = abs(hate_rate_sa - hate_rate_aa)
# preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant_btch.csv")
# hate_rate_sa, hate_rate_aa = check_parity(preds_sa, preds_aa, "b*tch")
# score_dict["[b*tch] hate_speech | SA"] = hate_rate_sa
# score_dict["[b*tch] hate_speech | AA"] = hate_rate_aa
# score_dict["[b*tch] hate_speech (difference)"] = abs(hate_rate_sa - hate_rate_aa)
# preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant_ngga.csv")
# hate_rate_sa, hate_rate_aa = check_parity(preds_sa, preds_aa, "n*gga")
# score_dict["[n*gga] hate_speech | SA"] = hate_rate_sa
# score_dict["[n*gga] hate_speech | AA"] = hate_rate_aa
# score_dict["[n*gga] hate_speech (difference)"] = abs(hate_rate_sa - hate_rate_aa)
# results.append(score_dict)
#
# results_df = pd.DataFrame(results)
# print("Contextual model")
# preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant.csv")
# check_parity(preds_sa, preds_aa, None)
# preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant_btch.csv")
# check_parity(preds_sa, preds_aa, "b*tch")
# preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant_ngga.csv")
# check_parity(preds_sa, preds_aa, "n*gga")
# explaining the model
# fixout = FixOutText(X, y, sensitives=["black", "white", "bitch"], max_features=-1)
# t0 = time()
# fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
# words_weights.to_csv("/home/fabien/Documents/Orpa/explanations/lime_mean.csv", index=False)
# print("took", time()-t0, "seconds")
print("Contextual model")
preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant.csv")
check_parity(preds_sa, preds_aa, None)
preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant_btch.csv")
# ~~~~~~~~~~~~~~~~~~~~~~~~~~ Applying EnsembleOut ~~~~~~~~~~~~~~~~~~~~~~~~~
# top_words = ["lol", "like", "shit", "tho", "got", "nigga"] #, "dont", "ass", "gotta", "much", "end", "lmao", "haha", "thank", "call", "bout", "ctfu", "also", "littl", "finna"]
# top_words = [tokenize(preprocess(w))[0] for w in top_words]
# sensitive_groups = [get_most_similar(w, embedding, min_similarity=0.9) for w in top_words]
sensitive_groups = [["nigga", "niggah", "niggas", "niggahs"], ["nigger", "niggers"], ["nig", "nigs"], ["nicca", "niccas"]]
print(sensitive_groups)
ensemble = EnsembleOutText(model, sensitive_groups)
ensemble.fit(X_train, y_train)
ens_pred = ensemble.predict(X_test)
print("Ensemble accuracy:", accuracy_score(y_test, ens_pred))
fixout = FixOutText(X_train, y_train, sensitives=sensitive_groups, max_features=-1)
fair_flag_ens, words_weights_ens, actual_sensitive_ens, explainer_ens = fixout.is_fair(ensemble)
"""
preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant.csv")
check_parity(preds_sa, preds_aa, "")
preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant_btch.csv")
check_parity(preds_sa, preds_aa, "b*tch")
preds_sa, preds_aa = get_dataset_predictions(model_sa, model_aa, "datasets/english_variant_ngga.csv")
preds_sa, preds_aa = get_dataset_predictions(ensemble, ensemble, "datasets/english_variant_ngga.csv")
check_parity(preds_sa, preds_aa, "n*gga")
# ~~~~~~~~~~~~~~~~~~~~~~~~~~ Applying EnsembleOut ~~~~~~~~~~~~~~~~~~~~~~~~~
# sensitive_words = ['lol', 'amp', 'haha', 'bout', 'im', 'u', 'tho', 'yea', 'lmao', 'finna', 'honestly']
# ensemble = EnsembleOutText(model, sensitive_words)
# ensemble.fit(X_train, y_train)
# print("Ensemble accuracy:", accuracy_score(y_test, ensemble.predict(X_test)))
"""
# df = pd.read_csv("datasets/english_variant.csv")
# pred = model.predict_proba(df["tweet"])[:,1]
# ens_pred = ensemble.predict_proba(df["tweet"])[:,1]
# nigga_group = sensitive_groups[-1]
# contains_nigga = np.array(["none", "nigga"])[[min(sum(int(word in tweet) for word in nigga_group), 1) for tweet in X_test]]
# contains_nigga = np.array(["none", "nigga"])[[int("nigga" in tweet) for tweet in X_test]]
#
# preds, tweets, groups = get_dataset_predictions(ensemble, "datasets/english_variant.csv")
# check_parity(preds, tweets, groups)
# btch_preds, btch_tweets, btch_groups = get_dataset_predictions(ensemble, "datasets/english_variant_btch.csv")
# check_parity(btch_preds, btch_tweets, btch_groups)
# ngga_preds, ngga_tweets, ngga_groups = get_dataset_predictions(ensemble, "datasets/english_variant_ngga.csv")
# check_parity(ngga_preds, ngga_tweets, ngga_groups)
\ No newline at end of file
# lm = ModelProb(preds=pred, threshold=0.5, name="Original model")
# ens = ModelProb(preds=ens_pred, threshold=0.5, name="EnsembleOut")
# fobject = FairnessObject(
# model_probs=[lm, ens],
# y=y_test,
# protected=contains_nigga,
# privileged="none"
# )
# plt = fobject.plot(fairness_metrics=['acc', 'tpr', "tnr", 'ppv', "npv", 'fpr', "fnr", "stp", "fdr", "for", "ts", "f1"])
# plt.show()
......@@ -4,6 +4,7 @@ import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from fixout.core_text import FixOutText, EnsembleOutText
......@@ -22,24 +23,31 @@ class_names = ['atheism', 'christian']
# creating a model pipeline and training it
vectorizer = TfidfVectorizer(lowercase=True)
rf = RandomForestClassifier(n_estimators=500)
model = make_pipeline(vectorizer, rf)
lr = LogisticRegression()
model = make_pipeline(vectorizer, lr)
model.fit(X_train, y_train)
# evaluating our model
print("Accuracy:", model.score(X_test, y_test))
"""
# explaining our model
sensitive_words = ["com", "nigel", "of", "host", "library", "canrem", "symposium", "desks"]
fixout = FixOutText(X_train, y_train, sensitives=sensitive_words)
sensitive_words = list(map(lambda x: [x], ["com", "nigel", "of", "host", "library", "canrem", "symposium", "desks", "edu"]))
fixout = FixOutText(X_train, y_train, sensitives=sensitive_words, max_features=-1)
fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(model)
# correcting fairness if necessary
if fair_flag :
print("Model is fair ! \o/")
else :
print("Model not fair, " + " ".join(actual_sensitive) + " in the main features...")
ensemble = EnsembleOutText(model, actual_sensitive)
ensemble.fit(X_train, y_train)
print("Ensemble accuracy:", ensemble.score(X_test, y_test))
fair_flag, words_weights, actual_sensitive, explainer = fixout.is_fair(ensemble)
\ No newline at end of file
# if fair_flag :
# print("Model is fair ! \o/")
# else :
# print("Model not fair, " + " ".join(actual_sensitive) + " in the main features...")
ensemble = EnsembleOutText(model, sensitive_words)
ensemble.fit(X_train, y_train)
print("Ensemble accuracy:", ensemble.score(X_test, y_test))
fair_flag_ens, words_weights_ens, actual_sensitive_ens, explainer_ens = fixout.is_fair(ensemble)
ensemble2 = EnsembleOutText(rf, sensitive_words, tokenizer=vectorizer.transform)
ensemble2.fit(X_train, y_train)
print("Ensemble accuracy:", ensemble2.score(X_test, y_test))
fair_flag_ens2, words_weights_ens2, actual_sensitive_ens2, explainer_ens2 = fixout.is_fair(ensemble2)
"""
\ No newline at end of file
from gensim.models import Word2Vec
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation
from sklearn.cluster import KMeans, DBSCAN
import nltk
from nltk.stem.porter import *
......@@ -16,7 +16,8 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
hsdata = pd.read_csv("datasets/english_variant_100k.csv")
hsdata = pd.read_csv("datasets/hate_speech.csv")
# hsdata = pd.read_csv("datasets/english_variant_100k.csv")
class_names = np.array(["ok", "hate speech"])
stopwords = nltk.corpus.stopwords.words("english")
......@@ -39,9 +40,11 @@ def preprocess(text_string):
giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
'[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
mention_regex = '@[\w\-]+'
emoji_regex = '\\\\u[0-9a-fA-F]{4}'
parsed_text = re.sub(space_pattern, ' ', text_string)
parsed_text = re.sub(giant_url_regex, '', parsed_text)
parsed_text = re.sub(mention_regex, '', parsed_text)
parsed_text = re.sub(emoji_regex, '', parsed_text)
parsed_text = parsed_text.replace("&", "")
return parsed_text
......@@ -68,10 +71,13 @@ for tweet in hsdata.tweet:
tag_str = " ".join(tag_list)
tweet_tags.append(tag_str)
tweets_aa = list(map(lambda s: tokenize(preprocess(s)), hsdata.tweet[hsdata["group"] == "AA"]))
tweets_sa = list(map(lambda s: tokenize(preprocess(s)), hsdata.tweet[hsdata["group"] == "SA"]))
embedding_aa = Word2Vec(sentences=tweets_aa)
embedding_sa = Word2Vec(sentences=tweets_sa)
tweets = list(map(lambda s: tokenize(preprocess(s)), hsdata.tweet))
# tweets_aa = list(map(lambda s: tokenize(preprocess(s)), hsdata.tweet[hsdata["group"] == "AA"]))
# tweets_sa = list(map(lambda s: tokenize(preprocess(s)), hsdata.tweet[hsdata["group"] == "SA"]))
embedding = Word2Vec(sentences=tweets)
# embedding_aa = Word2Vec(sentences=tweets_aa)
# embedding_sa = Word2Vec(sentences=tweets_sa)
def get_vocab(embedding):
......@@ -87,15 +93,16 @@ def get_vocab(embedding):
def get_clustering_function(embedding):
vocab, vectors = get_vocab(embedding)
kmeans = AffinityPropagation(damping=0.5)
kmeans.fit(vectors)
kmeans.n_clusters = len(kmeans.cluster_centers_)
cl = DBSCAN(eps=0.2) # dbscan nul!
cl.fit(vectors)
cl.clusters_ids = set(cl.labels_)
cl.n_clusters = len(cl.clusters_ids)
mapping = []
for i in range(kmeans.n_clusters):
center = kmeans.cluster_centers_[i]
for i in cl.clusters_ids:
center = vectors[cl.labels_ == i][0]
word = embedding.wv.similar_by_vector(center)[0][0]
mapping.append((set(vocab[kmeans.labels_ == i]), word))