Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 6813c9a1 authored by vitalii's avatar vitalii
Browse files

fix README and minor bugs

parent f111fe57
No related branches found
No related tags found
No related merge requests found
# implicit_variance
\ No newline at end of file
# Code from the paper ``On the Effect of Positive Discrimination on Selection Problems in the Presence of Implicit Variance''
To generate the figures, the following ipynb files are used
* Sec. 2-3: figures-model.ipynb
* Sec. 3-4: figures1-stage-concave.ipynb, figures-1-2-stage.ipynb
* Sec 5.1: experiment-pareto.ipynb
* Sec 5.2: experiment-jee.ipynb
* Sec 5.3: experiment-finite.ipynb
* Appendix A: appendix-different-prior.ipynb
The code for finite size selection is in "finite" folder, different distributions are implemented in "distribution".
The output data is stored in `data' folder.
# Requirements
* python3
* numpy
* pandas
* scipy
* matplotlib
* jupyter-notebook
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -14,7 +14,7 @@ def beta(x, q, g, a1, a2, beta=0.8):
nA = g.sum()
nB= n - nA
# perform greedy
I1, _ = greedy(x, q, g, n, a1, a2)
I1, _ = greedy(x, q, g, a1, a2)
n1A = g[I1].sum()
n1B = n1 - n1A
......
......@@ -38,7 +38,7 @@ def curves_one_stage(data, a1s, alg, T=10):
for a1 in a1s:
nA = np.sum(g)
I1, _ = alg(x, q, g, n, a1, a2=a1)
I1, _ = alg(x, q, g, a1, a2=a1)
pA_exp.append(g[I1].sum() / nA)
q_exp.append(q[I1].mean())
return a1s, np.array(q_exp), np.array(pA_exp)
......@@ -58,7 +58,7 @@ def curves_two_stage(data, a1s, alg, T=10):
for a1 in a1s:
nA = np.sum(g)
I1, I2 = alg(x, q, g, n, a1, a2)
I1, I2 = alg(x, q, g, a1, a2)
pA_exp.append(g[I1].sum() / nA)
q_exp.append(q[I2].sum() / n2)
return a1s, np.array(q_exp), np.array(pA_exp)
File deleted
from .first_stage import DecisionMaker
from .second_stage import *
import numpy as np
import scipy.stats as st
from scipy.optimize import newton, bisect
class DecisionMaker(object):
def __init__(self, mu, sigma, sigmag, pg, beta):
self.mu = np.array(mu)
self.sigma = np.array(sigma)
self.sigmag = np.array(sigmag)
self.beta = np.array(beta)
self.pg = np.array(pg)
self.sigmasum = np.sqrt(self.sigma**2 + self.sigmag**2)
def budget1(self, theta1):
theta1 = np.array(theta1)
return np.sum(self.pg * self.selection_proba1(theta1))
def selection_proba1(self, theta1):
theta1 = np.array(theta1)
return 1 - st.norm(self.mu + self.beta, self.sigmasum).cdf(theta1)
def utility1(self, theta1, a1):
theta1 = np.array(theta1)
return np.sum((self.sigma**2 * st.norm(0, self.sigmasum).pdf(self.mu + self.beta - theta1)\
+ self.mu * st.norm(0, self.sigmasum).cdf(self.mu + self.beta - theta1)) * self.pg) / a1
def group_oblivious_threshold(self, a1):
theta1 = newton(lambda x: self.budget1([x,x]) - a1, x0=0)
return theta1 * np.ones(2)
def dp_threshold(self, a1):
quantile = st.norm().ppf(1 - a1)
theta1 = self.sigmasum * quantile + self.mu + self.beta
return theta1
def opt1_threshold(self, a1, method='newton'):
C = self.group_oblivious_threshold(a1)[0]
Cinit = (C - self.mu - self.beta) * self.sigma**2 / self.sigmasum** 2 + self.mu
if method == 'newton':
C = newton(lambda C: self.budget1((np.array([C, C]) - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta) - a1,\
x0=Cinit[0]+1e-6, x1=Cinit[1])
elif method == 'bisect':
C = bisect(lambda C: self.budget1((np.array([C, C]) - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta) - a1,\
a=Cinit[0]-1e-6, b=Cinit[1])
theta1 = (C - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta
return theta1
import numpy as np
from scipy import integrate
from scipy import stats as st
def budget2(theta1, theta2, param):
'''
Return 2nd stage selection size.
'''
sigma_sum = np.sqrt(param.sigma ** 2 + param.sigmaq ** 2)
theta1 = np.array(theta1)
def fun(x2):
return (param.pg * st.norm(0, sigma_sum).pdf(x2) * st.norm(0, param.sigmag).cdf(-theta1 + x2)).sum()
return integrate.quad(fun, a=theta2, b=10)[0]
def eq2(theta1, theta2, param):
'''
Return 2nd stage selection utility.
'''
sigma_sum = np.sqrt(param.sigma ** 2 + param.sigmaq ** 2)
theta1 = np.array(theta1)
def fun(x2):
return (param.pg * x2 * param.sigmaq ** 2 / sigma_sum ** 2 * st.norm(0, sigma_sum).pdf(x2) * \
st.norm(0, param.sigmag).cdf(-theta1 + x2)).sum() / param.alpha2
return integrate.quad(fun, a=theta2, b=10)[0]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment