Attention une mise à jour du service Gitlab va être effectuée le mardi 30 novembre entre 17h30 et 18h00. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes. Cette mise à jour intermédiaire en version 14.0.12 nous permettra de rapidement pouvoir mettre à votre disposition une version plus récente.

Commit fdb6b441 authored by BERNIER Fabien's avatar BERNIER Fabien
Browse files

[+] Column transformer managed by EnsembleOut

parent 00a47a91
......@@ -2,7 +2,7 @@ import sys; sys.path.extend(['..'])
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
......@@ -11,7 +11,7 @@ from fixout.core_tabular import EnsembleOutTabular
from fixout.utils import columns_preprocessers, transform_categorical_names
# load the data and convert it to numpy
data = pd.read_csv("/home/fabien/Documents/Orpa/fixout-demo/datasets/adult.data")
data = pd.read_csv("/home/fabien/Documents/Orpa/fixout-pkdd/datasets/adult2.data")
features = data.columns[:-1]
X = data.drop(columns="Target").to_numpy()
y = data["Target"].to_numpy()
......@@ -38,7 +38,7 @@ for i, contrib in explainer_original.get_top_k(k=10) :
print(features[i], '\t', contrib)
# make an ensemble
ensemble = EnsembleOutTabular(model, sensitive_features=(5, 8, 9))
ensemble = EnsembleOutTabular(lr, ct, sensitive_features=(5, 8, 9))
ensemble.fit(X_train, y_train)
print("Ensemble score:", ensemble.score(X_test, y_test))
......@@ -46,5 +46,5 @@ print("Ensemble score:", ensemble.score(X_test, y_test))
explainer_ensemble = TabularExplainer(ensemble.predict_proba, X_train, categorical_features=categorical)
explainer_ensemble.global_explanation(n_samples=200)
for i, contrib in explainer_original.get_top_k(k=10) :
for i, contrib in explainer_ensemble.get_top_k(k=10) :
print(features[i], '\t', contrib)
......@@ -2,6 +2,9 @@
Implements the main procedures to build fairer ensembles, e.g. feature drop out, model training, ensemble bulding
"""
import os
from sklearn.pipeline import make_pipeline
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from sklearn.metrics import accuracy_score
......@@ -11,42 +14,6 @@ from copy import deepcopy
import numpy as np
class FixOutTabular:
def __init__(self, model, X, sensitives, explainer="lime", top_k=10, sampling_size=None):
if type(explainer) == str :
if explainer == "lime" :
self.explainer = TabularExplainer(model.predict_proba, X)
else :
self.explainer = explainer
self.model = model
self.X = X
self.sensitives = sensitives
self.actual_sensitive = []
self.top_k = top_k
if sampling_size is None :
self.sampling_size = len(X)//10
else :
self.sampling_size = sampling_size
def explain(self, X):
exp = self.explainer.global_explanation(X)
top = sorted(enumerate(exp), key=lambda x:x[1], reverse=True)[:self.top_k]
top_features = set(map(lambda x:x[0], top))
actual_sensitive = top_features & set(self.sensitives)
self.actual_sensitive = list(actual_sensitive)
self.is_fair = len(actual_sensitive) < 2
return top
def get_ensemble(self):
if len(self.actual_sensitive) < 2 :
return None
return EnsembleOutTabular(self.model, sensitive_features=self.actual_sensitive)
class EnsembleOutTabular(ClassifierMixin):
"""Class for ensemble models
......@@ -57,13 +24,16 @@ class EnsembleOutTabular(ClassifierMixin):
"""
def __init__(self,
base_model,
column_transformer=None,
sensitive_features=(),
auto_threshold=False):
self.models = [base_model]
self.column_transformer = column_transformer
self.sensitives = [[i] for i in sensitive_features] \
+ ([[sensitive_features]] if len(sensitive_features) > 1 else [])
+ ([list(sensitive_features)] if len(sensitive_features) > 1 else [])
self.threshold = 0.5
self.dtype = np.uint8
self.classes_ = np.array([False, True])
self.auto_threshold = auto_threshold
def fit(self, X, y):
......@@ -72,14 +42,26 @@ class EnsembleOutTabular(ClassifierMixin):
except:
pass
base_model = self.models.pop(0)
base_ct = self.column_transformer
for features in self.sensitives:
# feature drop
X_train = np.array(X)
X_train[:,features] = 0
model = deepcopy(base_model)
for i in sorted(features, reverse=True):
X_train = np.delete(X_train, i, axis=1)
# categorical preprocessing
if base_ct is not None:
ct = deepcopy(base_ct)
ct.transformers = self._drop_transformers_columns(ct.transformers, features)
ct.set_params()
model = make_pipeline(ct, deepcopy(base_model))
else:
model = deepcopy(base_model)
model.fit(X_train, y)
self.models.append(model)
self.classes_ = self.models[-1].classes_
if self.auto_threshold:
self._adjust_threshold(X, y)
......@@ -100,7 +82,8 @@ class EnsembleOutTabular(ClassifierMixin):
model = self.models[i]
features = self.sensitives[i]
X_ = np.array(X)
X_[:,features] = 0
for i in sorted(features, reverse=True):
X_ = np.delete(X_, i, axis=1)
y = model.predict_proba(X_)
probs.append(y)
......@@ -108,4 +91,25 @@ class EnsembleOutTabular(ClassifierMixin):
def predict(self, X, threshold=None):
if threshold is None: threshold = self.threshold
return (self.predict_proba(X)[:,1] > threshold).astype(self.dtype)
\ No newline at end of file
return self.classes_[(self.predict_proba(X)[:,1] > threshold).astype(np.uint8)]
def _column_drop(self, columns, i):
new_columns = []
for j in columns:
if j < i:
new_columns.append(j)
elif j > i:
new_columns.append(j-1)
return new_columns
def _column_drop_many(self, columns, indexes):
for i in sorted(indexes, reverse=True):
columns = self._column_drop(columns, i)
return columns
def _drop_transformers_columns(self, transformers, indexes):
new_transformers = []
for name, preprocesser, columns in transformers:
columns = self._column_drop_many(columns, indexes)
new_transformers.append((name, preprocesser, columns))
return new_transformers
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment