fix README and minor bugs

6813c9a1 · vitalii · f111fe57 · 6813c9a1 · 6813c9a1 · 6813c9a1
Commit 6813c9a1 authored 5 years ago by vitalii
--- a/README.md
+++ b/README.md
-# implicit_variance
\ No newline at end of file
+# Code from the paper ``On the Effect of Positive Discrimination on Selection Problems in the Presence of Implicit Variance''
+
+To generate the figures, the following ipynb files are used
+* Sec. 2-3: figures-model.ipynb
+* Sec. 3-4: figures1-stage-concave.ipynb, figures-1-2-stage.ipynb
+* Sec 5.1: experiment-pareto.ipynb 
+* Sec 5.2: experiment-jee.ipynb
+* Sec 5.3: experiment-finite.ipynb
+* Appendix A: appendix-different-prior.ipynb
+
+The code for finite size selection is in "finite" folder, different distributions are implemented in "distribution".
+The output data is stored in `data' folder.
+
+# Requirements
+
+* python3
+* numpy
+* pandas
+* scipy
+* matplotlib
+* jupyter-notebook
--- a/code/appendix-different-prior.ipynb
+++ b/code/appendix-different-prior.ipynb
--- a/code/figures-1-2-stages.ipynb
+++ b/code/figures-1-2-stages.ipynb
--- a/code/figures-model.ipynb
+++ b/code/figures-model.ipynb
--- a/code/finite/beta.py
+++ b/code/finite/beta.py
@@ -14,7 +14,7 @@ def beta(x, q, g, a1, a2, beta=0.8):
    nA = g.sum()
    nB= n - nA
    # perform greedy
-    I1, _ = greedy(x, q, g, n, a1, a2)
+    I1, _ = greedy(x, q, g, a1, a2)
    n1A = g[I1].sum()
    n1B = n1 - n1A


--- a/code/finite/util.py
+++ b/code/finite/util.py
@@ -38,7 +38,7 @@ def curves_one_stage(data, a1s, alg, T=10):
    
    for a1 in a1s:
        nA = np.sum(g)
-        I1, _ = alg(x, q, g, n, a1, a2=a1)
+        I1, _ = alg(x, q, g, a1, a2=a1)
        pA_exp.append(g[I1].sum() / nA)
        q_exp.append(q[I1].mean())
    return a1s, np.array(q_exp), np.array(pA_exp)
@@ -58,7 +58,7 @@ def curves_two_stage(data, a1s,  alg, T=10):
    
    for a1 in a1s:
        nA = np.sum(g)
-        I1, I2 = alg(x, q, g, n, a1, a2)
+        I1, I2 = alg(x, q, g, a1, a2)
        pA_exp.append(g[I1].sum() / nA)
        q_exp.append(q[I2].sum() / n2)
    return a1s, np.array(q_exp), np.array(pA_exp)
--- a/code/normal_prior/.DS_Store
+++ b/code/normal_prior/.DS_Store
--- a/code/normal_prior/__init__.py
+++ b/code/normal_prior/__init__.py
-from .first_stage import DecisionMaker
-from .second_stage import *
--- a/code/normal_prior/first_stage.py
+++ b/code/normal_prior/first_stage.py
-import numpy as np
-import scipy.stats as st
-from scipy.optimize import newton,  bisect
-
-
-class DecisionMaker(object):
-
-    def __init__(self, mu, sigma, sigmag, pg, beta):
-        self.mu = np.array(mu)
-        self.sigma = np.array(sigma)
-        self.sigmag = np.array(sigmag)
-        self.beta = np.array(beta)
-        self.pg = np.array(pg)
-        self.sigmasum = np.sqrt(self.sigma**2 + self.sigmag**2) 
-
-    def budget1(self, theta1):
-        theta1 = np.array(theta1)
-        return np.sum(self.pg * self.selection_proba1(theta1))
-
-    def selection_proba1(self, theta1):
-        theta1 = np.array(theta1)
-        return 1 - st.norm(self.mu + self.beta, self.sigmasum).cdf(theta1)
-
-    def utility1(self, theta1, a1):
-        theta1 = np.array(theta1)
-        return np.sum((self.sigma**2 * st.norm(0, self.sigmasum).pdf(self.mu + self.beta - theta1)\
-         + self.mu * st.norm(0, self.sigmasum).cdf(self.mu + self.beta - theta1)) * self.pg) / a1
-
-    def group_oblivious_threshold(self, a1):
-        theta1 = newton(lambda x: self.budget1([x,x]) - a1, x0=0)
-        return theta1 * np.ones(2)
-
-    def dp_threshold(self, a1):
-        quantile = st.norm().ppf(1 - a1)
-        theta1 = self.sigmasum * quantile + self.mu + self.beta
-        return theta1
-
-    def opt1_threshold(self, a1, method='newton'):
-        C = self.group_oblivious_threshold(a1)[0]
-        Cinit = (C - self.mu - self.beta) * self.sigma**2 / self.sigmasum** 2 + self.mu
-        if method == 'newton':
-            C = newton(lambda C: self.budget1((np.array([C, C])  - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta) - a1,\
-            x0=Cinit[0]+1e-6, x1=Cinit[1])
-        elif method == 'bisect':
-            C = bisect(lambda C: self.budget1((np.array([C, C])  - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta) - a1,\
-             a=Cinit[0]-1e-6, b=Cinit[1])
-        theta1 = (C  - self.mu)* self.sigmasum**2 / self.sigma**2 + self.mu + self.beta
-        return theta1
--- a/code/normal_prior/second_stage.py
+++ b/code/normal_prior/second_stage.py
-import numpy as np
-from scipy import integrate
-from scipy import stats as st
-
-
-def budget2(theta1, theta2, param):
-    '''
-    Return 2nd stage selection size.
-    '''
-    sigma_sum = np.sqrt(param.sigma ** 2 + param.sigmaq ** 2)
-    theta1 = np.array(theta1)
-    def fun(x2):
-        return (param.pg * st.norm(0, sigma_sum).pdf(x2) * st.norm(0, param.sigmag).cdf(-theta1 + x2)).sum()
-
-    return integrate.quad(fun, a=theta2, b=10)[0]
-
-
-def eq2(theta1, theta2, param):
-    '''
-    Return 2nd stage selection utility.
-    '''
-    sigma_sum = np.sqrt(param.sigma ** 2 + param.sigmaq ** 2)
-    theta1 = np.array(theta1)
-
-    def fun(x2):
-        return (param.pg * x2 * param.sigmaq ** 2 / sigma_sum ** 2 * st.norm(0, sigma_sum).pdf(x2) * \
-                st.norm(0, param.sigmag).cdf(-theta1 + x2)).sum() / param.alpha2
-
-    return integrate.quad(fun, a=theta2, b=10)[0]