Improve 'mnist_quickrun' example documentation and add torch variant.

cf016b5f · ANDREY Paul · 7166e0a6 · cf016b5f · cf016b5f · cf016b5f
Verified Commit cf016b5f authored 1 year ago by ANDREY Paul
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -288,7 +288,10 @@ where to report it. An example:

 ```python
 [experiment]
-metrics=[["multi-classif",{labels = [0,1,2,3,4,5,6,7,8,9]}]] # Accuracy metric
+metrics = [
+    # Multi-label Accuracy, Precision, Recall and F1-Score.
+    ["multi-classif", {labels = [0,1,2,3,4,5,6,7,8,9]}]
+]
 checkpoint = "./result_custom" # Custom location for results
 ```


--- a/examples/mnist_quickrun/config.toml
+++ b/examples/mnist_quickrun/config.toml
 # This is a minimal TOML file for the MNIST example
 # It contains the bare minimum to make the experiment run.
-# See quickstart for more details. 
+# See quickstart for more details.

 # The TOML is parsed by python as dictionnary with each `[header]`
 # as a key. Note the "=" sign and the absence of quotes around keys.
@@ -12,7 +12,7 @@
    port = 8765 # Port used, works as-is on most set ups

 [data] # Where to find your data
-    data_folder = "examples/mnist_quickrun/data_iid" 
+    data_folder = "examples/mnist_quickrun/data_iid"

 [optim] # Optimization options for both client and server
    aggregator = "averaging" # Server aggregation strategy
@@ -37,5 +37,7 @@
    batch_size = 128 # Evaluation batch size

 [experiment] # What to report during the experiment and where to report it
-    metrics=[["multi-classif",{labels = [0,1,2,3,4,5,6,7,8,9]}]] # Accuracy metric
-
+    metrics = [
+        # Multi-label Accuracy, Precision, Recall and F-Score.
+        ["multi-classif", {labels = [0,1,2,3,4,5,6,7,8,9]}]
+    ]
--- a/examples/mnist_quickrun/mnist.ipynb
+++ b/examples/mnist_quickrun/mnist.ipynb
@@ -23,7 +23,9 @@
    "id": "Clzf4NTja121"
   },
   "source": [
-    "We first clone the repo, to have both the package itself and the `examples` folder we will use in this tutorial, then naviguate to the package directory, and finally install the required dependencies"
+    "We first clone the repo, to have both the package itself and the `examples` folder we will use in this tutorial, then naviguate to the package directory, and finally install the required dependencies.\n",
+    "\n",
+    "**If you have already cloned the repository and/or installed declearn, you may skip the following commands.** Simply make sure to set your current working directory to the folder under which the `examples/mnist_quickrun` subfolder may be found (as cloned or downloaded from the repo)."
   ]
  },
  {
@@ -100,7 +102,9 @@
   "source": [
    "## The model\n",
    "\n",
-    "To do this, we will use a simple CNN, defined in `examples/mnist_quickrun/model.py`"
+    "To do this, we will use a simple CNN, defined in `examples/mnist_quickrun/model.py`.\n",
+    "\n",
+    "Here, the model is implemented in TensorFlow, which is merely an implementation detail. If you update the `config.toml` file to use the `examples/mnist_quickrun/model_torch.py`, you will train a model with the same architecture, but implemented with Torch."
   ]
  },
  {
@@ -147,7 +151,8 @@
   ],
   "source": [
    "from examples.mnist_quickrun.model import network\n",
-    "network.summary()"
+    "\n",
+    "network.summary()  # network is a `tensorflow.keras.Model` instance"
   ]
  },
  {

 %% Cell type:markdown id: tags:

 This notebook is meant to be run in google colab. You can find import your local copy of the file in the the [colab welcome page](https://colab.research.google.com/).

 %% Cell type:markdown id: tags:

 # Setting up your declearn

 %% Cell type:markdown id: tags:

-We first clone the repo, to have both the package itself and the `examples` folder we will use in this tutorial, then naviguate to the package directory, and finally install the required dependencies
+We first clone the repo, to have both the package itself and the `examples` folder we will use in this tutorial, then naviguate to the package directory, and finally install the required dependencies.
+
+**If you have already cloned the repository and/or installed declearn, you may skip the following commands.** Simply make sure to set your current working directory to the folder under which the `examples/mnist_quickrun` subfolder may be found (as cloned or downloaded from the repo).

 %% Cell type:code id: tags:

 ``` python
 # you may want to specify a release branch or tag
 !git clone https://gitlab.inria.fr/magnet/declearn/declearn2
 ```

 %% Cell type:code id: tags:

 ``` python
 cd declearn2
 ```

 %% Cell type:code id: tags:

 ``` python
 # Install the package, with TensorFlow and Websockets extra dependencies.
 # You may want to work in a dedicated virtual environment.
 !pip install .[tensorflow,websockets]
 ```

 %% Cell type:markdown id: tags:

 # Running your first experiment

 %% Cell type:markdown id: tags:

 We are going to train a common model between three simulated clients on the classic [MNIST dataset](http://yann.lecun.com/exdb/mnist/). The input of the model is a set of images of handwritten digits, and the model needs to determine which number between 0 and 9 each image corresponds to.

 %% Cell type:markdown id: tags:

 ## The model

-To do this, we will use a simple CNN, defined in `examples/mnist_quickrun/model.py`
+To do this, we will use a simple CNN, defined in `examples/mnist_quickrun/model.py`.
+
+Here, the model is implemented in TensorFlow, which is merely an implementation detail. If you update the `config.toml` file to use the `examples/mnist_quickrun/model_torch.py`, you will train a model with the same architecture, but implemented with Torch.

 %% Cell type:code id: tags:

 ``` python
 from examples.mnist_quickrun.model import network
-network.summary()
+
+network.summary()  # network is a `tensorflow.keras.Model` instance
 ```

 %% Output

    Model: "sequential"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #
    =================================================================
     conv2d (Conv2D)             (None, 26, 26, 8)         80
    
     max_pooling2d (MaxPooling2D  (None, 13, 13, 8)        0
     )
    
     dropout (Dropout)           (None, 13, 13, 8)         0
    
     flatten (Flatten)           (None, 1352)              0
    
     dense (Dense)               (None, 64)                86592
    
     dropout_1 (Dropout)         (None, 64)                0
    
     dense_1 (Dense)             (None, 10)                650
    
    =================================================================
    Total params: 87,322
    Trainable params: 87,322
    Non-trainable params: 0
    _________________________________________________________________

 %% Cell type:markdown id: tags:

 ## The data

 We start by splitting the MNIST dataset between 3 clients and storing the output in the `examples/mnist_quickrun` folder. For this we use an experimental utility provided by `declearn`.

 %% Cell type:code id: tags:

 ``` python
 from declearn.dataset import split_data

 split_data(folder="examples/mnist_quickrun")
 ```

 %% Output

    Downloading MNIST source file train-images-idx3-ubyte.gz.
    Downloading MNIST source file train-labels-idx1-ubyte.gz.
    Splitting data into 3 shards using the 'iid' scheme.

 %% Cell type:markdown id: tags:

 The python code above is equivalent to running `declearn-split examples/mnist_quickrun/` in a shell command-line.

 %% Cell type:markdown id: tags:

 Here is what the first image of the first client looks like:

 %% Cell type:code id: tags:

 ``` python
 import matplotlib.pyplot as plt
 import numpy as np

 images = np.load("examples/mnist_quickrun/data_iid/client_0/train_data.npy")
 sample_img = images[0]
 sample_fig = plt.imshow(sample_img,cmap='Greys')
 ```

 %% Output



 %% Cell type:markdown id: tags:

 For more information on how the `split_data` function works, you can look at the documentation.

 %% Cell type:code id: tags:

 ``` python
 print(split_data.__doc__)
 ```

 %% Output

    Randomly split a dataset into shards.
    
        The resulting folder structure is :
            folder/
            └─── data*/
                └─── client*/
                │      train_data.* - training data
                │      train_target.* - training labels
                │      valid_data.* - validation data
                │      valid_target.* - validation labels
                └─── client*/
                │    ...
    
        Parameters
        ----------
        folder: str, default = "."
            Path to the folder where to add a data folder
            holding output shard-wise files
        data_file: str or None, default=None
            Optional path to a folder where to find the data.
            If None, default to the MNIST example.
        target_file: str or int or None, default=None
            If str, path to the labels file to import, or name of a `data`
            column to use as labels (only if `data` points to a csv file).
            If int, index of a `data` column of to use as labels).
            Required if data is not None, ignored if data is None.
        n_shards: int
            Number of shards between which to split the data.
        scheme: {"iid", "labels", "biased"}, default="iid"
            Splitting scheme(s) to use. In all cases, shards contain mutually-
            exclusive samples and cover the full raw training data.
            - If "iid", split the dataset through iid random sampling.
            - If "labels", split into shards that hold all samples associated
            with mutually-exclusive target classes.
            - If "biased", split the dataset through random sampling according
            to a shard-specific random labels distribution.
        perc_train: float, default= 0.8
            Train/validation split in each client dataset, must be in the
            ]0,1] range.
        seed: int or None, default=None
            Optional seed to the RNG used for all sampling operations.
    

 %% Cell type:markdown id: tags:

 ## Quickrun

 We can now run our experiment. As explained in the section 2.1 of the [quickstart documentation](https://magnet.gitlabpages.inria.fr/declearn/docs/latest/quickstart), using the `declearn-quickrun` entry-point requires a configuration file, some data, and a model:

 * A TOML file, to store your experiment configurations. Here:
 `examples/mnist_quickrun/config.toml`.
 * A folder with your data, split by client. Here: `examples/mnist_quickrun/data_iid`
 * A model python file, to declare your model wrapped in a `declearn` object. Here: `examples/mnist_quickrun/model.py`.

 We then only have to run the `quickrun` coroutine with the path to the TOML file:

 %% Cell type:code id: tags:

 ``` python
 from declearn.quickrun import quickrun

 await quickrun(config="examples/mnist_quickrun/config.toml")
 ```

 %% Cell type:markdown id: tags:

 The python code above is equivalent to running `declearn-quickrun examples/mnist_quickrun/config.toml` in a shell command-line.

 %% Cell type:markdown id: tags:

 The output obtained is the combination of the CLI output of our server and our clients, going through:

 * `INFO:Server:Starting clients registration process.` : a first registration step, where clients register with the server
 * `INFO:Server:Sending initialization requests to clients.`: the initilization of the object needed for training on both the server and clients side.
 * `Server:INFO: Initiating training round 1`: the training starts, where each client makes its local update(s) and send the result to the server which aggregates them
 * `INFO: Initiating evaluation round 1`: the model is evaluated at each round
 * `Server:INFO: Stopping training`: the training is finalized

 %% Cell type:markdown id: tags:

 ## Results

 You can have a look at the results in the `examples/mnist_quickrun/result_*` folder, including the metrics evolution during training.

 %% Cell type:code id: tags:

 ``` python
 import pandas as pd
 import glob
 import os

 res_file = glob.glob('examples/mnist_quickrun/result*')
 res = pd.read_csv(os.path.join(res_file[0],'server/metrics.csv'))
 res_fig = res.plot()
 ```

 %% Cell type:markdown id: tags:

 # Experiment further


 You can change the TOML config file to experiment with different strategies.

 %% Cell type:markdown id: tags:

 For instance, try splitting the data in a very heterogenous way, by distributing digits in mutually exclusive way between clients.

 %% Cell type:code id: tags:

 ``` python
 split_data(folder="examples/mnist_quickrun",scheme='labels')
 ```

 %% Cell type:markdown id: tags:

 And change the `examples/mnist_quickrun/config.toml` file with:

 ```
 [data]
    data_folder = "examples/mnist_quickrun/data_labels"
 ```

 %% Cell type:markdown id: tags:

 If you run the model as is, you should see a drop of performance


 %% Cell type:code id: tags:

 ``` python
 quickrun(config="examples/mnist_quickrun/config.toml")
 ```

 %% Cell type:markdown id: tags:

 Now try modifying the `examples/mnist_quickrun/config.toml` file like this, to implement the [scaffold algorithm](https://arxiv.org/abs/1910.06378) and running the experiment again.

 ```
  [optim]

      [optim.client_opt]
      lrate = 0.005
      modules = ["scaffold-client"]

      [optim.server_opt]
      lrate = 1.0
      modules = ["scaffold-client"]
 ```

 %% Cell type:code id: tags:

 ``` python
 quickrun(config="examples/mnist_quickrun/config.toml")
 ```

--- a/examples/mnist_quickrun/model.py
+++ b/examples/mnist_quickrun/model.py
+# coding: utf-8
+
+# Copyright 2023 Inria (Institut National de Recherche en Informatique
+# et Automatique)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 """Simple TensorFlow-backed CNN model for the MNIST quickrun example."""

 import tensorflow as tf

 from declearn.model.tensorflow import TensorflowModel

+
 stack = [
    tf.keras.layers.InputLayer(input_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(8, 3, 1, activation="relu"),

--- a/examples/mnist_quickrun/model_torch.py
+++ b/examples/mnist_quickrun/model_torch.py
+# coding: utf-8
+
+# Copyright 2023 Inria (Institut National de Recherche en Informatique
+# et Automatique)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Simple Torch-backed CNN model for the MNIST quickrun example."""
+
+import torch
+
+from declearn.model.torch import TorchModel
+
+
+stack = [
+    torch.nn.Unflatten(dim=0, unflattened_size=(-1, 1)),
+    torch.nn.Conv2d(1, 8, 3, 1),
+    torch.nn.ReLU(),
+    torch.nn.MaxPool2d(2),
+    torch.nn.Dropout(0.25),
+    torch.nn.Flatten(),
+    torch.nn.Linear(1352, 64),
+    torch.nn.ReLU(),
+    torch.nn.Dropout(0.5),
+    torch.nn.Linear(64, 10),
+    torch.nn.Softmax(dim=-1),
+]
+network = torch.nn.Sequential(*stack)
+
+# This needs to be called "model"; otherwise, a different name must be
+# specified via the experiment's TOML configuration file.
+model = TorchModel(network, loss=torch.nn.CrossEntropyLoss())