hate_speech.py 4.19 KB
Newer Older
BERNIER Fabien's avatar
BERNIER Fabien committed
1
2
3
4
5
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.porter import *
from nltk import pos_tag
6
from sklearn.metrics import accuracy_score
BERNIER Fabien's avatar
BERNIER Fabien committed
7
8
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
9
from sklearn.linear_model import LogisticRegression
BERNIER Fabien's avatar
BERNIER Fabien committed
10
11
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
12
13
14
from time import time

from core_text import FixOutText
BERNIER Fabien's avatar
BERNIER Fabien committed
15
16

hsdata = pd.read_csv("datasets/hate_speech.csv")
17
class_names = np.array(["ok", "hate speech"])
BERNIER Fabien's avatar
BERNIER Fabien committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

stopwords = stopwords.words("english")
stopwords.extend(["#ff", "ff", "rt"])

stemmer = PorterStemmer()

def preprocess(text_string):
    """
    Accepts a text string and replaces:
    1) urls with URLHERE
    2) lots of whitespace with one instance
    3) mentions with MENTIONHERE

    This allows us to get standardized counts of urls and mentions
    Without caring about specific people mentioned
    """
    space_pattern = '\s+'
    giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    mention_regex = '@[\w\-]+'
    parsed_text = re.sub(space_pattern, ' ', text_string)
    parsed_text = re.sub(giant_url_regex, '', parsed_text)
    parsed_text = re.sub(mention_regex, '', parsed_text)
    return parsed_text


def tokenize(tweet):
    """Removes punctuation & excess whitespace, sets to lowercase,
    and stems tweets. Returns a list of stemmed tokens."""
    tweet = " ".join(re.split("[^a-zA-Z]*", tweet.lower())).strip()
    tokens = [stemmer.stem(t) for t in tweet.split()]
    return tokens


def basic_tokenize(tweet):
    """Same as tokenize but without the stemming"""
    tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
    return tweet.split()

tweet_tags = []
for tweet in hsdata.tweet:
    tokens = basic_tokenize(preprocess(tweet))
    tags = pos_tag(tokens)
    tag_list = [x[1] for x in tags]
    tag_str = " ".join(tag_list)
    tweet_tags.append(tag_str)

65
66
X = hsdata.tweet.to_numpy()
y = (hsdata["class"] != 2).to_numpy(dtype=np.uint8)
BERNIER Fabien's avatar
BERNIER Fabien committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

vectorizer = TfidfVectorizer(
    tokenizer=tokenize,
    preprocessor=preprocess,
    ngram_range=(1, 3),
    stop_words=stopwords,
    use_idf=True,
    smooth_idf=False,
    norm=None,
    decode_error='replace',
    max_features=10000,
    min_df=5,
    max_df=0.75
)

83
lr = LogisticRegression(class_weight='balanced')
84
85

# training the model
86
model = make_pipeline(vectorizer, lr)
BERNIER Fabien's avatar
BERNIER Fabien committed
87
88
model.fit(X_train, y_train)

89
90
91
92
# evaluating the model
pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

# ~~~~~~~~~~~~~~~~~~~~~~~~~~ checking parity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def get_dataset_predictions(dataset_filename) :
    variant = pd.read_csv(dataset_filename)
    tweets = variant["tweet"].to_numpy()
    for i in range(tweets.shape[0]): tweets[i] = tweets[i][1:-1]
    groups = variant["group"]
    return model.predict(tweets), tweets, groups


def check_parity(preds, tweets, groups, word=None):
    if word is not None :
        I = list(map(lambda t: word in t, tweets))
        groups = groups[I]
        preds = preds[I]
    preds_aa = preds[groups == "AA"]
    preds_sa = preds[groups == "SA"]
    hate_rate_aa = len(preds_aa[preds_aa == 1]) / len(preds_aa)
    hate_rate_sa = len(preds_sa[preds_sa == 1]) / len(preds_sa)
    print(f"[{word}] P(hate_speech | AA) = {hate_rate_aa}")
    print(f"[{word}] P(hate_speech | SA) = {hate_rate_sa}")

preds, tweets, groups = get_dataset_predictions("datasets/english_variant.csv")
check_parity(preds, tweets, groups)
btch_preds, btch_tweets, btch_groups = get_dataset_predictions("datasets/english_variant_btch.csv")
check_parity(btch_preds, btch_tweets, btch_groups)
ngga_preds, ngga_tweets, ngga_groups = get_dataset_predictions("datasets/english_variant_ngga.csv")
check_parity(ngga_preds, ngga_tweets, ngga_groups)
122
123
124

# explaining the model
vocab = list(model[0].vocabulary_.keys())
125
fixout = FixOutText(X, y, vocab, to_drop=["black", "white", "bitch"], algo=model, max_features=-1)
126
127
128
t0 = time()
actual_sensitive, is_fair_flag, ans_data, accuracy, threshold = fixout.is_fair()
print("took", time()-t0, "seconds")