diff --git a/GPRL/algorithms.py b/GPRL/algorithms.py index 6f110dad9519ed5eea21b9da4a2235dfdbba6a12..9555737762d4ee8ec10659064d950b83e5e8be88 100644 --- a/GPRL/algorithms.py +++ b/GPRL/algorithms.py @@ -10,7 +10,7 @@ from timeit import default_timer as timer from .UCB import UCBFitness, HeapWithKey -#stochastique objective must be placed first ! +#/!\ stochastique objective must be placed first ! def eaMuPlusLambdaUCB(population, toolbox, simulation_budget, parallel_update, mu, lambda_, cxpb, mutpb, ngen, select=False, stats=None, halloffame=None, verbose=__debug__, budget_scheduler=None, iteration_callback=None): assert all([isinstance(ind.fitness, UCBFitness) for ind in population]) @@ -41,6 +41,8 @@ def eaMuPlusLambdaUCB(population, toolbox, simulation_budget, parallel_update, m popoff.push(ind) tmp+=1 + population = toolbox.select(population, mu) + if halloffame is not None: halloffame.update(population) @@ -166,7 +168,7 @@ def regularized_evolution(population, toolbox, mu, lambda_, cxpb, mutpb, cycles return history, logbook -#objectif at zero +#objectif is at fitness.fit[0], fitness.fit[1:] is for features. def qdLambda(init_batch, toolbox, container, batch_size, ngen, lambda_, cxpb = 0.0, mutpb = 1.0, stats = None, halloffame = None, verbose = False, show_warnings = False, start_time = None, iteration_callback = None): """The simplest QD algorithm using DEAP. :param init_batch: Sequence of individuals used as initial batch. diff --git a/GPRL/genetic_programming/linearGP.py b/GPRL/genetic_programming/linearGP.py index 6565d9807d345fe2799e710d8d1f453ead8ed761..412be7c48915195a4f9bb8ecdc844e24a4bdeed2 100644 --- a/GPRL/genetic_programming/linearGP.py +++ b/GPRL/genetic_programming/linearGP.py @@ -30,11 +30,11 @@ class Interpreter(ABC):# Abstract class to define custom interpreter pass @abstractmethod - def to_string(opcode): + def toString(opcode): pass -class BasicInterpreter: +class BasicInterpreter(Interpreter): data_type="float" def __init__(self, mask=np.ones(NUM_OPS, dtype=bool)): self.num_ops = NUM_OPS @@ -483,7 +483,7 @@ from deap import tools from operator import attrgetter from copy import deepcopy #Diversity tournament -> book Chapter 9 : CONTROL OF DIVERSITY AND VARIATION STEP SIZE -def selDoubleTournament(individuals, k, fitness_size, diversity_size, fitness_first=True, fit_attr="fitness", effective=None): +def selDiversityTournament(individuals, k, fitness_size, diversity_size, fitness_first=True, fit_attr="fitness", effective=None): assert (1 <= diversity_size <= 2), "Parsimony tournament size has to be in the range [1, 2]." def _editDistTournament(individuals, k, select): diff --git a/GPRL/utils/gp_utils.py b/GPRL/utils/gp_utils.py index 9c666bc833f1bb7a4de7fb424b61d4c6f37d8a8e..1541984d9ebb3e592e6454c3edaacd1e85c2c16d 100644 --- a/GPRL/utils/gp_utils.py +++ b/GPRL/utils/gp_utils.py @@ -6,7 +6,7 @@ import numpy as np import random from deap import gp -import heapq +from operator import attrgetter operator_complexity = { 'add':1 , 'substract':1, 'const':1, 'multiply':1, 'divide':2, 'abs':2, 'or_':4, 'and_':4, 'gt':4, 'if_then_else':5, 'tanh':4, 'tan':4, diff --git a/README.md b/README.md index 92c41d04d70daebc78fe31d2bc2855e46c7c374c..afe069ab086a094572d6f125f12dd25ac5f8e74d 100644 --- a/README.md +++ b/README.md @@ -75,17 +75,24 @@ seed: #set seed for random ## See the result Once an experiment is finished, you can see inspect results like in `tutorial.ipynb`. This notebook show how to see and run an individual from a saved population. +## Exemple of best policies found : +The notebook `best_policy.ipynb` shows best policy found by each method and demonstrate their portability and effeciency. A google colab version of this notebook can be found here: <https://colab.research.google.com/drive/11DdE4i2kY6dPXWtQ7Iwq4hejMXJ1XmNX?usp=sharing> + ## Environments -| **Environment** | **Name** | -|-----------------------|-------------------------------------| -| Cartpole | CartPole-v1 | -| Acrobot | Acrobot-v1 | -| MountainCar | MountainCarContinuous-v0 | -| Pendulum | Pendulum-v0 | -| InvDoublePend | InvertedDoublePendulumBulletEnv-v0 | -| InvPendSwingUp | InvertedPendulumSwingupBulletEnv-v0 | -| LunarLander | LunarLanderContinuous-v2 | -| BipedalWalker | BipedalWalker-v3 | -| BipedalWalkerHardCore | BipedalWalkerHardcore-v3 | -| Hopper | HopperBulletEnv-v0 | +\# | **Environment** | **Name** | +--|-----------------------|-------------------------------------| +1 | Cartpole | CartPole-v1 | +2 | Acrobot | Acrobot-v1 | +3 | MountainCar | MountainCarContinuous-v0 | +4 | Pendulum | Pendulum-v0 | +5 | InvDoublePend | InvertedDoublePendulumBulletEnv-v0 | +6 | InvPendSwingUp | InvertedPendulumSwingupBulletEnv-v0 | +7 | LunarLander | LunarLanderContinuous-v2 | +8 | BipedalWalker | BipedalWalker-v3 | +9 | BipedalWalkerHardCore | BipedalWalkerHardcore-v3 | +10| Hopper | HopperBulletEnv-v0 | + +`conf_gp#` and `conf_lingp#` are the configurations used for environment number listed above. (ex: `conf_gp124` is for environement Cartpole, Acrobot and Pendulum just modify `env` in params) + +<!--- You can reproduce result for gp or lingp by running either `reproduce_results_gp.sh` or `reproduce_results_lingp.sh`. ---> \ No newline at end of file diff --git a/best_policy.ipynb b/best_policy.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a9c85975b1450275674b32878936053c58165dad --- /dev/null +++ b/best_policy.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9ihRVZVfoHsZ" + }, + "source": [ + "# Policies definition\n", + "Define all policy as a lambda function in dictionnaries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MTgB24OuBJWC", + "outputId": "49e47788-72c7-4b63-f95f-f32890f66c5a" + }, + "outputs": [], + "source": [ + "import gym\n", + "import pybullet_envs\n", + "import numpy as np\n", + "\n", + "def if_then_else(cond, true, false):\n", + " return true if cond else false\n", + "\n", + "GP_policy = {\n", + " \"CartPole-v1\": lambda s: int(s[3]> -8.44*s[2]),\n", + " \"Acrobot-v1\": lambda s: np.argmax([s[4], s[4], s[5]]),\n", + " \"MountainCarContinuous-v0\": lambda s:[if_then_else(abs(s[1])> 0.001, 4.861434963392261, 120.22370232914776)*s[1]],# Version that exploit proteced opérator ( in particular div(s[1],s[1]) used like a bit like Dirac delta function)\n", + " #\"MountainCarContinuous-v0\": lambda s: [(-36.1658411106967*s[1] + 11.14148115974943)*s[1]],# Version that don't use protected operator as non linearity.\n", + " \"Pendulum-v0\": lambda s: [s[0] - (9.16*s[2] + 40.14*s[1])/s[0]],\n", + " \"InvertedDoublePendulumBulletEnv-v0\": lambda s: [-10.7*s[7]],\n", + " \"InvertedPendulumSwingupBulletEnv-v0\": lambda s: [s[4] + s[3]*6.614633680991087 - s[2] + np.exp(if_then_else(s[3]>-0.7571072906634332, s[1], 15.013603569678889*s[1]))],\n", + " \"LunarLanderContinuous-v2\": lambda s: [-(2*s[3]+s[1]) - 0.20, -19.77*(s[2]-s[5]*(s[1]+1))],\n", + " \"BipedalWalker-v3\": lambda s: [s[6], s[7]/s[19], s[22]/s[7] - s[9] + s[0], s[0] - s[11]],\n", + " \"HopperBulletEnv-v0\": lambda s: [(s[9]+s[7])/(-25.552410848838754*s[5]-11.694385276665386), s[2] - s[10] + (np.sin(s[13]) - s[12])*s[14] - np.log(s[2])-s[11], -3.173856320619809*s[12]]\n", + "}\n", + "\n", + "LinGP_policy = {\n", + " \"CartPole-v1\": lambda s: int(s[3]> -s[2]-s[1]),\n", + " \"Acrobot-v1\": lambda s: np.argmax([0, if_then_else(5.79>s[5], s[5], -s[5]), if_then_else(s[4]<s[5], s[5] + 5.79, 0.0)]),\n", + " \"MountainCarContinuous-v0\": lambda s: [32.6*s[1]],\n", + " \"Pendulum-v0\": lambda s: [0.14 - 4.05*s[0]*(4.05*s[1] + s[2])],\n", + " \"InvertedDoublePendulumBulletEnv-v0\": lambda s: [-11.4*s[7]],\n", + " \"InvertedPendulumSwingupBulletEnv-v0\": lambda s: [4.80317773*(9.4457828593*s[3] + 2*s[4] + s[1])],\n", + " \"LunarLanderContinuous-v2\": lambda s: [if_then_else(s[1]>0, -0.5*s[1]-s[3]+0.11, 0), 4*(4*(s[4]-s[0])-s[2])],\n", + " \"BipedalWalker-v3\": lambda s: [if_then_else(s[3]<s[0], s[15]*s[21], 0.0), if_then_else(s[2]>s[11], np.sin(s[9])/-19.89, s[12]*s[6]), s[17]*s[6], if_then_else(s[18]<s[7], 1.0,if_then_else(0<s[17]*s[6], s[12]/s[16], 0))],\n", + " \"HopperBulletEnv-v0\": lambda s: [if_then_else(s[13]>0, 0.6146104060912387, 0), s[2]-s[10]-s[11], if_then_else(s[0]-s[3]< s[12], s[0]-s[6]-s[3]-s[7], -3.78765294*s[3]-s[7])]\n", + "}\n", + "\n", + "NMCS_policy = {\n", + " \"CartPole-v1\": lambda s: int(8.0/(s[3]+s[2])> s[2]),\n", + " \"Acrobot-v1\": lambda s: np.argmax([1, 0, 9.0/s[5]]),\n", + " \"MountainCarContinuous-v0\": lambda s: [24*s[1]],\n", + " \"Pendulum-v0\": lambda s: [s[1] - 9.0*s[0]/(s[1]+s[2])],\n", + "}\n", + "\n", + "\n", + "QD_GP_policy = {\n", + " \"BipedalWalker-v3\": lambda s : [s[17]*s[18], s[15] - s[6], (s[17]/-4.829122484272009) - s[9], (-0.9309851003020525/(-29.687535240681534*s[1])) - s[11]],\n", + " \"HopperBulletEnv-v0\": lambda s: [np.sin(np.exp(s[8])), -6.257060739725605*(s[7] + np.sin(s[3]+s[7])), np.sin(np.sin(s[7])-np.sin(s[8])-s[10]*(s[1]-np.log(abs(s[8]*s[3])+0.0001) - 5.860219777510614))]\n", + "}\n", + "\n", + "QD_LinGP_policy = {\n", + " \"BipedalWalker-v3\": lambda s: [if_then_else(s[7]>-0.62, s[4]/-0.62, if_then_else(s[18]<s[5], s[2]*s[20], 0)), s[20]/(s[1]+0.001), s[18]-s[11], 0.3-s[11]],\n", + " \"HopperBulletEnv-v0\": lambda s: [0.3568142180554972, if_then_else(s[11]>0, if_then_else(s[0]<0, - np.sin(s[7]), 0.0), s[12] - s[10]) - s[11], if_then_else(s[0]>s[7], 0.3492908874652392, -4.118712031661802*s[3])]\n", + "}\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qmzixwsae3vi" + }, + "source": [ + "### Policies description\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xmpLt4laxwn" + }, + "source": [ + "__Tree GP and LinearGP policies :__ \n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s1EZGBWgfTDM" + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SPhZzjn8n2VS" + }, + "source": [ + "__QD Policies :__\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P6LZPtZeaY_U" + }, + "source": [ + "## Simulate a policy on the environment" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vMMV7NmFBZfd", + "outputId": "bbfa4615-97ca-4ac2-d0dc-c6b6303b4795" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done : True cumulative reward : 310.7869826499602 Number of steps : 1392\n" + ] + } + ], + "source": [ + "name = \"BipedalWalker-v3\"#Environment to test\n", + "\n", + "env = gym.make(name)\n", + "#env.seed()\n", + "\n", + "agent = QD_GP_policy[name]#Take the policy from a specific approach\n", + "\n", + "obs = env.reset()\n", + "\n", + "r = 0\n", + "for i in range(2000):\n", + " action = agent(obs)\n", + " env.render()\n", + " obs, reward, done, info = env.step(action)\n", + " r+=reward\n", + "\n", + " if done:\n", + " break\n", + " \n", + "env.close()\n", + "\n", + "print(\"Done : \", done, \"cumulative reward : \", r, \"Number of steps : \", i+1)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "lY_pFlRoaAbO" + ], + "name": "Symbolic_policy.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/conf/conf_gp.yml b/conf/conf_gp.yml deleted file mode 100644 index b5d71c0e743fc9ab711a05f7eeac5dbbb37279bb..0000000000000000000000000000000000000000 --- a/conf/conf_gp.yml +++ /dev/null @@ -1,26 +0,0 @@ -algorithm: - name: algorithms.eaSimple - args: - ngen: 100 - cxpb: 0.1 - mutpb: 0.9 - -population: - init_size: 100 - -selection: - name: selNSGA2 - args: - -individual: Tree - -params: - env: "MountainCarContinuous-v0" - function_set: small - c: 0.0 - n_episodes: 1 - n_steps: 100 - gamma: 1.0 - n_thread: 1 - -seed: 42 \ No newline at end of file diff --git a/conf/conf_gpUCB_124.yml b/conf/conf_gpUCB_124.yml deleted file mode 100644 index a414c29cb672e1c9130d833a79e7f0218da92739..0000000000000000000000000000000000000000 --- a/conf/conf_gpUCB_124.yml +++ /dev/null @@ -1,32 +0,0 @@ -algorithm: - name: algo.eaMuPlusLambdaUCB - args: - mu: 100 - lambda_: 100 - simulation_budget: 5 - parallel_update: 16 - save_every: 10 - ngen: 200 - cxpb: 0.1 - mutpb: 0.9 - budget_scheduler: [[50, 10], [100, 20], [190, 50]] - -population: - init_size: 100 - -selection: - name: selNSGA2 - args: - -individual: Tree - -params: - env: "CartPole-v1" - function_set: small - c: 0.0 - n_episodes: 1 - n_steps: 500 - gamma: 1.0 - n_thread: 16 - -seed: 42 \ No newline at end of file diff --git a/conf/conf_gpUCB.yml b/conf/conf_gp_124.yml similarity index 94% rename from conf/conf_gpUCB.yml rename to conf/conf_gp_124.yml index a414c29cb672e1c9130d833a79e7f0218da92739..f41103acfff707b6b539efb62838782ed85449fb 100644 --- a/conf/conf_gpUCB.yml +++ b/conf/conf_gp_124.yml @@ -27,6 +27,4 @@ params: n_episodes: 1 n_steps: 500 gamma: 1.0 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_gpUCB_3.yml b/conf/conf_gp_3.yml similarity index 86% rename from conf/conf_gpUCB_3.yml rename to conf/conf_gp_3.yml index 2cc45982bde04b71d8da28530cf1774ac148909c..5f50423a0cfe237ccd369544f3188d60718a1722 100644 --- a/conf/conf_gpUCB_3.yml +++ b/conf/conf_gp_3.yml @@ -9,7 +9,7 @@ algorithm: ngen: 100 cxpb: 0.1 mutpb: 0.9 - budget_scheduler: [[25, 50], [50, 200], [90, 500]] + budget_scheduler: [[25, 50], [50, 100], [90, 250]] population: init_size: 500 @@ -29,5 +29,3 @@ params: gamma: 1.0 n_thread: 16 max_tree_size: 4 - -seed: 42 diff --git a/conf/conf_gpUCB_5678910.yml b/conf/conf_gp_5678910.yml similarity index 79% rename from conf/conf_gpUCB_5678910.yml rename to conf/conf_gp_5678910.yml index 11e22e065580a99a3abc8ba01b6c496b9b8b972e..774db1d3378fe04680b4d7212f639370fe038c89 100644 --- a/conf/conf_gpUCB_5678910.yml +++ b/conf/conf_gp_5678910.yml @@ -9,7 +9,7 @@ algorithm: ngen: 2000 cxpb: 0.1 mutpb: 0.9 - budget_scheduler: [[500, 50], [1000, 75], [1500, 150], [1950, 250]] + budget_scheduler: [[500, 25], [1000, 50], [1500, 100], [1950, 250]] population: init_size: 1000 @@ -21,12 +21,10 @@ selection: individual: Tree params: - env: "CartPole-v1" + env: "InvertedPendulumBulletEnv-v0" function_set: extended c: 0.0 n_episodes: 1 n_steps: 5000 gamma: 1.0 n_thread: 16 - -seed: 42 diff --git a/conf/conf_lingp.yml b/conf/conf_lingp_1234.yml similarity index 82% rename from conf/conf_lingp.yml rename to conf/conf_lingp_1234.yml index 9b31e24296a4c697d9fc9b368507486fcfce0cde..5fd112f936c556e1a312c06dc8a4ae6a35b2ee0a 100644 --- a/conf/conf_lingp.yml +++ b/conf/conf_lingp_1234.yml @@ -9,7 +9,7 @@ algorithm: ngen: 200 cxpb: 0.0 mutpb: 1.0 - budget_scheduler: [[50, 10], [100, 20], [190, 100]] + budget_scheduler: [[50, 10], [100, 20], [190, 50]] population: init_size: 100 @@ -22,7 +22,7 @@ selection: individual: Linear params: - env: "MountainCarContinuous-v0" + env: "CartPole-v1" function_set: small c: 0.0 n_episodes: 1 @@ -38,6 +38,4 @@ params: pDel: 0.6 pSwap: 0.1 pMut: 0.5 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_lingp_5678910.yml b/conf/conf_lingp_5678910.yml new file mode 100644 index 0000000000000000000000000000000000000000..c814fe4bc88c428cd4122be61bd0498d9556824d --- /dev/null +++ b/conf/conf_lingp_5678910.yml @@ -0,0 +1,41 @@ +algorithm: + name: algo.eaMuPlusLambdaUCB + args: + mu: 500 + lambda_: 500 + simulation_budget: 5 + parallel_update: 16 + save_every: 10 + ngen: 2000 + cxpb: 0.0 + mutpb: 1.0 + budget_scheduler: [[500, 25], [1000, 50], [1500, 100], [1950, 250]] + +population: + init_size: 100 + +selection: + name: selTournament + args: + tournsize: 5 + +individual: Linear + +params: + env: "InvertedPendulumBulletEnv-v0" + function_set: extended + c: 0.0 + n_episodes: 1 + n_steps: 2000 + gamma: 1.0 + regCalcSize: 4 + regConstSize: 10 + init_size_min: 2 + init_size_max: 5 + pConst: 0.3 + pBranch: 0.3 + pIns: 0.3 + pDel: 0.6 + pSwap: 0.1 + pMut: 0.5 + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_qdgp-BipedalWalker.yml b/conf/conf_qdgp-BipedalWalker.yml index ef39b9288b4b4ac6f6e81744c502e6082cdfcd73..31180f245939b2883ce64f54986484b5b26e9e74 100644 --- a/conf/conf_qdgp-BipedalWalker.yml +++ b/conf/conf_qdgp-BipedalWalker.yml @@ -4,7 +4,7 @@ algorithm: save_every: 10 batch_size: 100 lambda_: 500 - ngen: 5000 + ngen: 2000 cxpb: 0.0 mutpb: 1.0 show_warnings: True @@ -19,7 +19,7 @@ population: fitness_domain: [[-200_000.0, 350.0],] selection: - name: selRandom + name: selQDRandom args: individual: Tree @@ -28,11 +28,9 @@ params: env: BipedalWalker-v3 function_set: extended c: 0.0 - n_episodes: 3 - n_steps: 500 + n_episodes: 1 + n_steps: 2000 gamma: 1.0 features_kept: [False, False, True, False, True, False, False, False] tree_max_depth: 10 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_qdgp-Hopper.yml b/conf/conf_qdgp-Hopper.yml index 58d632c16dd3f5b630fed18684020d943b92816a..7a47bc2ac2a71a0c86c63c472dd9436194a219ac 100644 --- a/conf/conf_qdgp-Hopper.yml +++ b/conf/conf_qdgp-Hopper.yml @@ -4,7 +4,7 @@ algorithm: save_every: 10 batch_size: 100 lambda_: 500 - ngen: 5000 + ngen: 2000 cxpb: 0.0 mutpb: 1.0 show_warnings: True @@ -14,13 +14,13 @@ algorithm: population: init_size: 1000 args: - shape: [10, 10, 10] - max_items_per_bin: 5 + shape: [10, 10] + max_items_per_bin: 10 features_domain: [[0, 100], [0., 20.], [0.0, 1.2], [0.0, 1.2], [0.0, 1.2], [0.0, 1.0]] fitness_domain: [[-200_000.0, 2000.0],] selection: - name: selRandom + name: selQDRandom args: individual: Tree @@ -29,11 +29,9 @@ params: env: HopperBulletEnv-v0 function_set: extended c: 0.0 - n_episodes: 3 - n_steps: 500 + n_episodes: 1 + n_steps: 2000 gamma: 1.0 - features_kept: [False, False, False, True, True, True] + features_kept: [False, False, False, False, True, True] tree_max_depth: 10 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_qdlingp-BipedalWalker.yml b/conf/conf_qdlingp-BipedalWalker.yml index 2edc0e8ab9322160096fcb993396f19fa46c5c06..60399fae802ac8910b0d620cfa9e246843896aa6 100644 --- a/conf/conf_qdlingp-BipedalWalker.yml +++ b/conf/conf_qdlingp-BipedalWalker.yml @@ -4,7 +4,7 @@ algorithm: save_every: 10 batch_size: 100 lambda_: 500 - ngen: 5000 + ngen: 2000 cxpb: 0.0 mutpb: 1.0 show_warnings: True @@ -20,7 +20,7 @@ population: fitness_domain: [[-200_000.0, 350.0],] selection: - name: selRandom + name: selQDRandom args: individual: Linear @@ -29,8 +29,8 @@ params: env: BipedalWalker-v3 function_set: extended c: 0.0 - n_episodes: 3 - n_steps: 500 + n_episodes: 1 + n_steps: 2000 gamma: 1.0 features_kept: [False, False, True, False, True, False, False, False] regCalcSize: 16 @@ -43,6 +43,4 @@ params: pDel: 0.6 pSwap: 0.1 pMut: 0.5 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/conf/conf_qdlingp-Hopper.yml b/conf/conf_qdlingp-Hopper.yml index 3936eeb9c28504963e5320ae290443bc0f88f2b6..3f968c093f923c96ef7429f49ca6c8cfca2e70c4 100644 --- a/conf/conf_qdlingp-Hopper.yml +++ b/conf/conf_qdlingp-Hopper.yml @@ -4,7 +4,7 @@ algorithm: save_every: 10 batch_size: 100 lambda_: 500 - ngen: 5000 + ngen: 2000 cxpb: 0.0 mutpb: 1.0 show_warnings: True @@ -14,13 +14,13 @@ algorithm: population: init_size: 1000 args: - shape: [10, 10, 10] - max_items_per_bin: 5 + shape: [10, 10] + max_items_per_bin: 10 features_domain: [[0, 100], [0., 20.], [0.0, 1.2], [0.0, 1.2], [0.0, 1.2], [0.0, 1.0]] fitness_domain: [[-200_000.0, 2000.0],] selection: - name: selRandom + name: selQDRandom args: individual: Linear @@ -29,10 +29,10 @@ params: env: HopperBulletEnv-v0 function_set: extended c: 0.0 - n_episodes: 3 - n_steps: 500 + n_episodes: 1 + n_steps: 2000 gamma: 1.0 - features_kept: [False, False, False, True, True, True] + features_kept: [False, False, False, False, True, True] regCalcSize: 16 regConstSize: 20 init_size_min: 2 @@ -43,6 +43,4 @@ params: pDel: 0.6 pSwap: 0.1 pMut: 0.5 - n_thread: 16 - -seed: 42 \ No newline at end of file + n_thread: 16 \ No newline at end of file diff --git a/evolve.py b/evolve.py index 2ff9ff3f662d608e8b3757df02ffadda35ded38b..19b1d72f818984f863075dc4b6ef8ab986a03237 100644 --- a/evolve.py +++ b/evolve.py @@ -1,14 +1,12 @@ from GPRL.utils.utils import basic_budget_scheduler, save_each_generation -import pandas as pd import numpy as np import random -from deap import tools if "__main__" == __name__: import yaml import argparse import multiprocessing - from deap import algorithms + from deap import gp, algorithms from GPRL import algorithms as algo from GPRL.utils.utils import convert_logbook_to_dataframe from GPRL.UCB import UpdateFitnessHof, UpdateFitnessParetoFront @@ -16,7 +14,7 @@ if "__main__" == __name__: import ntpath from shutil import copyfile import pickle - import time + from datetime import datetime parser = argparse.ArgumentParser(description='Main programm to launch experiments from yaml configuration file') parser.add_argument("--conf", required=True, help="configuration file path", type=str) @@ -27,6 +25,18 @@ if "__main__" == __name__: with open(args.conf) as f: conf = yaml.load(f, Loader=yaml.SafeLoader) + if conf.get("seed", None): + random.seed(conf["seed"]) + np.random.seed(random.randint(0, 4294967295)) + + if conf.get("selection", None): + if conf["selection"]["name"] in ["selQDRandom", "selFitProp"]: + conf["selection"]["name"] = "gp_utils." + conf["selection"]["name"] + elif conf["selection"] == "selDiversityTournament": + conf["selection"]["name"] = "linGP." + conf["selection"]["name"] + else: + conf["selection"]["name"] = "tools." + conf["selection"]["name"] + conf["params"]["selection"] = conf["selection"] if conf["individual"] == "Tree": import experiments.gp as evoTool @@ -43,11 +53,6 @@ if "__main__" == __name__: pool = multiprocessing.Pool(conf["params"]["n_thread"], initializer=factory.init_global_var) evoTool.toolbox.register("map", pool.map) - - - if conf.get("seed", None): - np.random.seed(conf["seed"]) - random.seed(conf["seed"]) if "qd" in conf["algorithm"]["name"]: from GPRL.containers.grid import FixGrid as Grid @@ -67,7 +72,7 @@ if "__main__" == __name__: hof = UpdateFitnessHof(10, maxsize_arm=10) #hof = UpdateFitnessParetoFront() - dir = os.path.join(args.path, "log-"+ conf["params"]["env"] + "-"+ ntpath.basename(args.conf)[:-4] +"-"+str(time.time())) + dir = os.path.join(args.path, "log-"+ conf["params"]["env"] + "-"+ ntpath.basename(args.conf)[:-4] +"--"+datetime.today().strftime('%Y-%m-%d-%H_%M_%S')) if not os.path.exists(dir): os.mkdir(dir) copyfile(args.conf, os.path.join(dir, "conf.yml")) @@ -78,15 +83,39 @@ if "__main__" == __name__: conf["algorithm"]["args"]["iteration_callback"] = save_each_generation(dir, modulo=conf["algorithm"]["args"]["save_every"]) del conf["algorithm"]["args"]["save_every"] - algorithm = eval(conf["algorithm"]["name"])#/!\ not good from a security point of view but flexible + algorithm = eval(conf["algorithm"]["name"])#/!\ not good from a security point of view but flexible... pop, log = algorithm(pop, evoTool.toolbox, halloffame=hof, stats=mstats, **conf["algorithm"]["args"]) + df = convert_logbook_to_dataframe(log) + + if conf["algorithm"]["name"] != "algo.qdLambda": + print("Re-evaluating best individual on 1000 episodes for unbiased result...") + print("But maybe a better one could be found manually in the population.") + print() + max_eval = len(max(pop, key=lambda ind: len(ind.fitness.rewards)).fitness.rewards) + best = min([ind for ind in pop if len(ind.fitness.rewards) >= 0.8*max_eval], key=lambda ind: ind.fitness.values[1]) + + results = np.array(evoTool.toolbox.map(evoTool.toolbox.evaluate, [best for _ in range(1000)])) + + if isinstance(best[0], gp.PrimitiveTree): + for tree in best: + print(tree) + else: + print(best.to_effective(list(range(evoTool.OUTPUT)))[0]) + print(f"cumulative rewards = {results[:,0].mean()} +- {results[:,0].std()}") + + row = df.iloc[-1,:].copy() + row["fitness_max"] = results[:,0].mean() + row.iloc[0] += 1 + df = df.append(row) # adding unbiased fitness on last row and fitness max column + with open(os.path.join(dir, "pop-final.pkl"), 'wb') as output: pickle.dump(list(pop), output, pickle.HIGHEST_PROTOCOL) with open(os.path.join(dir, "hof-final.pkl"), 'wb') as output: pickle.dump(list(hof), output, pickle.HIGHEST_PROTOCOL) - convert_logbook_to_dataframe(log).to_csv(os.path.join(dir, "log.csv"), index=False) + df.to_csv(os.path.join(dir, "log.csv"), index=False) + print("Experiment is saved at : ", dir) factory.close() diff --git a/experiments/gp.py b/experiments/gp.py index 17d6d8b4aff2eee89ad87c6d8d6eebc88e234181..d6ecdc57833056c5f2b4c3c1d5d5df0996efa9ff 100644 --- a/experiments/gp.py +++ b/experiments/gp.py @@ -103,8 +103,11 @@ class Factory(EvolveFactory): toolbox.register("evaluate", MC_fitness, n_steps=self.conf["n_steps"], num_episodes=self.conf["n_episodes"], gamma=self.conf["gamma"]) - if self.conf.get("selection", False): - toolbox.register("select", eval(self.conf["selection"]["name"]), **self["selection"]["args"]) + if self.conf.get("selection", False): + if self.conf["selection"]["args"]: + toolbox.register("select", eval(self.conf["selection"]["name"]), **self.conf["selection"]["args"]) + else: + toolbox.register("select", eval(self.conf["selection"]["name"])) else: toolbox.register("select", tools.selNSGA2) @@ -124,7 +127,7 @@ class Factory(EvolveFactory): return toolbox, creator def get_stats(self): - stats_fit = tools.Statistics(lambda ind: ind.fitness.values[0]) + stats_fit = tools.Statistics(lambda ind: sum(ind.fitness.rewards)/len(ind.fitness.rewards) if ind.fitness.rewards else ind.fitness.values[0]) stats_complexity = tools.Statistics(lambda ind: team.team_complexity(ind, gp_utils.complexity)) stats_size = tools.Statistics(len) #stats_bandit = tools.Statistics(lambda ind: len(ind.fitness.rewards)) diff --git a/experiments/linGP.py b/experiments/linGP.py index 349e83f0bd12c7964daa55b7cd3d19e269d4766e..b451ccd81e265bec9d0a24cf30e5849fcb7974fe 100644 --- a/experiments/linGP.py +++ b/experiments/linGP.py @@ -17,6 +17,7 @@ except ImportError: from deap import creator, tools, base from GPRL.genetic_programming import linearGP as linGP from GPRL.UCB import UCBFitness +from GPRL.utils import gp_utils from GPRL.factory import EvolveFactory from GPRL.utils.utils import convert_logbook_to_dataframe, save_each_generation @@ -77,7 +78,7 @@ class Factory(EvolveFactory): toolbox, creator = self.make_toolbox() def make_toolbox(self): - creator.create("FitnessMax", UCBFitness, weights=(1.0, -1.0)) + creator.create("FitnessMax", UCBFitness, weights=(1.0, -1.0), c=self.conf["c"], sigma=1) creator.create("Individual", linGP.Program, fitness=creator.FitnessMax) if self.conf['function_set']=="small": @@ -94,8 +95,11 @@ class Factory(EvolveFactory): toolbox.register("evaluate", MC_fitness, n_steps=self.conf["n_steps"], num_episodes=self.conf["n_episodes"], gamma=self.conf["gamma"]) - if self.conf.get("selection", False): - toolbox.register("select", eval(self.conf["selection"]["name"]), **self["selection"]["args"]) + if self.conf.get("selection", False): + if self.conf["selection"]["args"]: + toolbox.register("select", eval(self.conf["selection"]["name"]), **self.conf["selection"]["args"]) + else: + toolbox.register("select", eval(self.conf["selection"]["name"])) else: toolbox.register("select", tools.selTournament, tournsize=5) @@ -105,7 +109,7 @@ class Factory(EvolveFactory): return toolbox, creator def get_stats(self): - stats_fit = tools.Statistics(lambda ind: ind.fitness.values[0]) + stats_fit = tools.Statistics(lambda ind: sum(ind.fitness.rewards)/len(ind.fitness.rewards) if ind.fitness.rewards else ind.fitness.values[0]) stats_complexity = tools.Statistics(lambda ind: sum(map(lambda x: linGP.opcode_complexity[x.opcode], ind.to_effective(list(range(OUTPUT)))[0]))) stats_eff = tools.Statistics(lambda ind: len(ind.to_effective(list(range(OUTPUT)))[0])) stats_size = tools.Statistics(len) diff --git a/reproduce_results.sh b/reproduce_results_gp.sh old mode 100755 new mode 100644 similarity index 89% rename from reproduce_results.sh rename to reproduce_results_gp.sh index bb632c2861857db6e35200fd7e31797feff56fe6..d7fb2b394ad59ef217522de1bda39039c0dec964 --- a/reproduce_results.sh +++ b/reproduce_results_gp.sh @@ -16,13 +16,13 @@ do ( ( if [[ "$pb" == *"Mountain"* ]]; then - conf="conf/conf_gpUCB_3.yml" + conf="conf/conf_gp_3.yml" elif [[ "$pb" == *"CartPole"* ]] || [[ "$pb" == *"Acrobot"* ]] || [[ "$pb" == "Pendulum-v0" ]]; then - conf="conf/conf_gpUCB_124.yml" + conf="conf/conf_gp_124.yml" elif [[ "$pb" == *""* ]]; then - conf="conf/conf_gpUCB_5678910.yml" + conf="conf/conf_gp_5678910.yml" fi - filename="conf/conf_gpUCB_${pb}_${stamp}.yml" + filename="conf/conf_gp_${pb}_${stamp}.yml" cp $conf $filename sed -i "s/env:.*/env: $pb/g" $filename python evolve.py --conf $filename & diff --git a/reproduce_results_lingp.sh b/reproduce_results_lingp.sh new file mode 100644 index 0000000000000000000000000000000000000000..6d857b5af1f372a28314fa44a8799dc6b058c7e1 --- /dev/null +++ b/reproduce_results_lingp.sh @@ -0,0 +1,35 @@ + + +#for pb in CartPole Acrobot MountainCar Pendulum BipedalWalker BipedalWalkerHardcore +#do +#python -c "import pybullet ; import gym ; import pybullet ; gym.make('$pb-v2')" 2>&1 | tail -n 1 | sed 's/...$//g' | sed "s/.*'//g" +# +#done +#echo HopperBulletEnv-v0 +#echo InvertedDoublePendulum-v2 +#echo 'Not found: InvPendulumSwingUp ? Not pendulum ?' +#echo LunarLander-v2 + +stamp=STAMP${RANDOM}_${RANDOM}_`date | sed 's/ /_/g'` +for pb in `cat listpb.txt` +do +( +( + if [[ "$pb" == *"CartPole"* ]] || [[ "$pb" == *"Acrobot"* ]] || [[ "$pb" == "Pendulum-v0" ]] || [[ "$pb" == "MountainCarContinuous-v0" ]]; then + conf="conf/conf_lingp_1234.yml" + elif [[ "$pb" == *""* ]]; then + conf="conf/conf_lingp_5678910.yml" + fi + filename="conf/conf_lingp_${pb}_${stamp}.yml" + cp $conf $filename + sed -i "s/env:.*/env: $pb/g" $filename + python evolve.py --conf $filename & + wait +) | tee run_$stamp +) & +done + +wait + +#conf_gp.yml conf_gpUCB_124.yml conf_gpUCB_5678910.yml conf_qdgp-BipedalWalker.yml conf_qdlingp-BipedalWalker.yml +#conf_gpUCB.yml conf_gpUCB_3.yml conf_lingp.yml conf_qdgp-Hopper.yml conf_qdlingp-Hopper.yml