diff --git a/test/functional/test_toy_clf_fairness.py b/test/functional/test_toy_clf_fairness.py
index 41df97a8f2275f090099720de905c7e8b74cbe9d..b94b4f5b8995860c21da5a304d9e27abb5c25c0d 100644
--- a/test/functional/test_toy_clf_fairness.py
+++ b/test/functional/test_toy_clf_fairness.py
@@ -147,6 +147,7 @@ async def server_routine(
         rounds=5,
         register={"min_clients": n_clients, "timeout": 2},
         training={"n_epoch": 1, "batch_size": 10},
+        evaluate={"frequency": 5},  # only evaluate the last model
         fairness={"batch_size": 50},
     )
     with warnings.catch_warnings():
diff --git a/test/functional/test_toy_clf_secagg.py b/test/functional/test_toy_clf_secagg.py
index 3599d22f0c677c577c1bb7ad876141d7dee79cc1..429284ce1de66de4feede8165716607e5a07814a 100644
--- a/test/functional/test_toy_clf_secagg.py
+++ b/test/functional/test_toy_clf_secagg.py
@@ -143,9 +143,10 @@ async def async_run_server(
     )
     # Set up hyper-parameters and run training.
     config = FLRunConfig.from_params(
-        rounds=10,
+        rounds=8,
         register={"min_clients": n_clients, "timeout": 2},
         training={"n_epoch": 1, "batch_size": 1, "drop_remainder": False},
+        evaluate={"frequency": 8},  # only evaluate the last model
     )
     await server.async_run(config)
 
diff --git a/test/functional/test_toy_reg.py b/test/functional/test_toy_reg.py
index 515e182e5899cea3fdbab1da8c9ae9cb1bdaae7a..9b5514d920cee9047be43fe4aeccfa96b9326d7d 100644
--- a/test/functional/test_toy_reg.py
+++ b/test/functional/test_toy_reg.py
@@ -411,6 +411,7 @@ async def async_run_server(
             "batch_size": b_size,
             "drop_remainder": False,
         },
+        evaluate={"frequency": 10},  # only evaluate the last model
     )
     await server.async_run(config)