Mentions légales du service

Skip to content
Snippets Groups Projects
Commit e92fdd62 authored by GICQUEL Antoine's avatar GICQUEL Antoine :zzz:
Browse files

Update unit tests for the CUDA version of the direct computation

parent a5297401
No related tags found
No related merge requests found
......@@ -39,7 +39,9 @@ if(${CMAKE_PROJECT_NAME}_BUILD_PBC)
endif()
if(${CMAKE_PROJECT_NAME}_USE_CUDA)
list(APPEND source_tests_files direct/direct_cuda.cu)
list(APPEND source_tests_files
gpu/full_direct_gpu.cu
gpu/full_direct_gpu_source_target.cu)
endif()
set(TEST_DATA_FILES_PATH ${CMAKE_SOURCE_DIR}/data/units/)
......
This diff is collapsed.
/*
* full_direct.cu
*
* Created on: 13 March 2024
* Author: Antoine Gicquel
*/
// Unit test for the CUDA version of the direct computation (standard case)
// ---------------------------------------
// @FUSE_LIBCUDACXX
#define CATCH_CONFIG_RUNNER
#include <catch2/catch.hpp>
#include "scalfmm/matrix_kernels/laplace.hpp"
#include "scalfmm/matrix_kernels/scalar_kernels.hpp"
#include "unit_full_direct.hpp"
#include <iostream>
/**
guix shell --pure gcc-toolchain@11 cuda-toolkit@12 openblas pkg-config fftw fftwf bash \
coreutils ncurses cmake make -- bash --norc
export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libcuda.so:/usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so
cmake -B build -DBLA_VENDOR=OpenBLAS -Dscalfmm_USE_CUDA=ON -Dscalfmm_BUILD_UNITS=ON
cmake --build build --target unit.full_direct_gpu_source_target
ctest --test-dir build --tests-regex full_direct_gpu_source_target --verbose
**/
template<std::size_t Dimension, typename ValueType, typename MatrixKernelType>
auto run_test() -> void
{
static constexpr std::size_t dimension = Dimension;
using value_type = ValueType;
using matrix_kernel_type = MatrixKernelType;
const std::size_t number_of_particles{2000};
const value_type eps{10 * std::numeric_limits<value_type>::epsilon()};
for(std::size_t blocks_per_grid = 16; blocks_per_grid < 512; blocks_per_grid *= 2)
{
for(std::size_t threads_per_block = 16; threads_per_block < 512; threads_per_block *= 2)
{
REQUIRE(run<dimension, value_type, matrix_kernel_type>(number_of_particles, threads_per_block,
blocks_per_grid, eps));
}
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-1d", "[full-direct-gpu-1d]", scalfmm::matrix_kernels::laplace::one_over_r,
scalfmm::matrix_kernels::laplace::like_mrhs, scalfmm::matrix_kernels::laplace::grad_one_over_r<1>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<1>,
scalfmm::matrix_kernels::others::one_over_r2,
scalfmm::matrix_kernels::others::grad_one_over_r2<1>)
{
static constexpr std::size_t dimension = 1;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-2d", "[full-direct-gpu-2d]", scalfmm::matrix_kernels::laplace::one_over_r,
scalfmm::matrix_kernels::laplace::like_mrhs, scalfmm::matrix_kernels::laplace::grad_one_over_r<2>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<2>,
scalfmm::matrix_kernels::others::one_over_r2,
scalfmm::matrix_kernels::others::grad_one_over_r2<2>)
{
static constexpr std::size_t dimension = 2;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-3d", "[full-direct-gpu-3d]", scalfmm::matrix_kernels::laplace::one_over_r,
scalfmm::matrix_kernels::laplace::like_mrhs, scalfmm::matrix_kernels::laplace::grad_one_over_r<3>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<3>,
scalfmm::matrix_kernels::others::one_over_r2,
scalfmm::matrix_kernels::others::grad_one_over_r2<3>)
{
static constexpr std::size_t dimension = 3;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-4d", "[full-direct-gpu-4d]", scalfmm::matrix_kernels::laplace::one_over_r,
scalfmm::matrix_kernels::laplace::like_mrhs, scalfmm::matrix_kernels::laplace::grad_one_over_r<4>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<4>,
scalfmm::matrix_kernels::others::one_over_r2,
scalfmm::matrix_kernels::others::grad_one_over_r2<4>)
{
static constexpr std::size_t dimension = 4;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int
{
// Run the tests
int result = Catch::Session().run(argc, argv);
return result;
}
/*
* full_direct_source_target.cu
*
* Created on: 13 March 2024
* Author: Antoine Gicquel
*/
// Unit test for the CUDA version of the direct computation (source-target case)
// ----------------------------------------------------------------------------
// @FUSE_LIBCUDACXX
#define CATCH_CONFIG_RUNNER
#include <catch2/catch.hpp>
#include "scalfmm/matrix_kernels/laplace.hpp"
#include "scalfmm/matrix_kernels/scalar_kernels.hpp"
#include "unit_full_direct.hpp"
#include <iostream>
/**
guix shell --pure gcc-toolchain@11 cuda-toolkit@12 openblas pkg-config fftw fftwf bash \
coreutils ncurses cmake make -- bash --norc
export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libcuda.so:/usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so
cmake -B build -DBLA_VENDOR=OpenBLAS -Dscalfmm_USE_CUDA=ON -Dscalfmm_BUILD_UNITS=ON
cmake --build build --target unit.full_direct_gpu_source_target
ctest --test-dir build --tests-regex full_direct_gpu_source_target --verbose
**/
template<std::size_t Dimension, typename ValueType, typename MatrixKernelType>
auto run_test() -> void
{
static constexpr std::size_t dimension = Dimension;
using value_type = ValueType;
using matrix_kernel_type = MatrixKernelType;
const std::size_t number_of_particles{1000};
const value_type eps{10 * std::numeric_limits<value_type>::epsilon()};
for(std::size_t blocks_per_grid = 16; blocks_per_grid < 512; blocks_per_grid *= 2)
{
for(std::size_t threads_per_block = 16; threads_per_block < 512; threads_per_block *= 2)
{
REQUIRE(run<dimension, value_type, matrix_kernel_type>(number_of_particles, 2 * number_of_particles,
threads_per_block, blocks_per_grid, eps));
REQUIRE(run<dimension, value_type, matrix_kernel_type>(2 * number_of_particles, number_of_particles,
threads_per_block, blocks_per_grid, eps));
}
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-source-target-1d", "[full-direct-gpu-source-target-1d]",
scalfmm::matrix_kernels::laplace::one_over_r, scalfmm::matrix_kernels::laplace::like_mrhs,
scalfmm::matrix_kernels::laplace::grad_one_over_r<1>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<1>,
scalfmm::matrix_kernels::others::one_over_r2, scalfmm::matrix_kernels::others::grad_one_over_r2<1>)
{
static constexpr std::size_t dimension = 1;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-source-target-2d", "[full-direct-gpu-source-target-2d]",
scalfmm::matrix_kernels::laplace::one_over_r, scalfmm::matrix_kernels::laplace::like_mrhs,
scalfmm::matrix_kernels::laplace::grad_one_over_r<2>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<2>,
scalfmm::matrix_kernels::others::one_over_r2, scalfmm::matrix_kernels::others::grad_one_over_r2<2>)
{
static constexpr std::size_t dimension = 2;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-source-target-3d", "[full-direct-gpu-source-target-3d]",
scalfmm::matrix_kernels::laplace::one_over_r, scalfmm::matrix_kernels::laplace::like_mrhs,
scalfmm::matrix_kernels::laplace::grad_one_over_r<3>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<3>,
scalfmm::matrix_kernels::others::one_over_r2, scalfmm::matrix_kernels::others::grad_one_over_r2<3>)
{
static constexpr std::size_t dimension = 3;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
TEMPLATE_TEST_CASE("full-direct-gpu-source-target-4d", "[full-direct-gpu-source-target-4d]",
scalfmm::matrix_kernels::laplace::one_over_r, scalfmm::matrix_kernels::laplace::like_mrhs,
scalfmm::matrix_kernels::laplace::grad_one_over_r<4>,
scalfmm::matrix_kernels::laplace::val_grad_one_over_r<4>,
scalfmm::matrix_kernels::others::one_over_r2, scalfmm::matrix_kernels::others::grad_one_over_r2<4>)
{
static constexpr std::size_t dimension = 4;
using matrix_kernel_type = TestType;
SECTION("single precision", "[single-precision]")
{
using value_type = float;
run_test<dimension, value_type, matrix_kernel_type>();
}
SECTION("double precision", "[double-precision]")
{
using value_type = double;
run_test<dimension, value_type, matrix_kernel_type>();
}
}
int main(int argc, char* argv[])
{
// Run the tests
int result = Catch::Session().run(argc, argv);
return result;
}
#pragma once
#include "scalfmm/algorithms/full_direct.hpp"
#include "scalfmm/container/particle.hpp"
#include "scalfmm/container/point.hpp"
#include "scalfmm/matrix_kernels/gaussian.hpp"
#include "scalfmm/matrix_kernels/laplace.hpp"
#include "scalfmm/matrix_kernels/scalar_kernels.hpp"
#include "scalfmm/meta/utils.hpp"
#include "scalfmm/setup/scalfmm_cuda_compatibility.hpp"
#include "scalfmm/tree/box.hpp"
#include "scalfmm/tree/for_each.hpp"
#include "scalfmm/utils/accurater.hpp"
#include <iomanip>
#include <iostream>
#include <limits>
#include <random>
#include <type_traits>
#include <vector>
#include <cpp_tools/colors/colorized.hpp>
#include <cpp_tools/timers/simple_timer.hpp>
/// @brief
/// @tparam ParticleType
/// @param container
/// @return
template<typename ParticleType>
auto fill_container(std::vector<ParticleType>& container, const std::size_t seed = 123) -> void
{
using value_type = typename ParticleType::position_value_type;
std::mt19937 gen(seed);
std::uniform_real_distribution<value_type> dis(0., 2.);
auto random_r = [&dis, &gen]() { return dis(gen); };
const std::size_t N{container.size()};
for(std::size_t i{0}; i < N; ++i)
{
auto& part = container[i];
for(auto& p: part.position())
{
p = random_r();
}
for(auto& i: part.inputs())
{
i = random_r();
}
for(auto& o: part.outputs())
{
o = value_type(0.);
}
part.variables(i);
}
}
/// @brief
/// @tparam ParticleType
/// @param container1
/// @param container2
/// @return
template<typename ParticleType>
auto compute_error(std::vector<ParticleType> const& container1, std::vector<ParticleType> const& container2)
{
using particle_type = ParticleType;
using value_type = typename particle_type::outputs_value_type;
constexpr std::size_t nb_out{particle_type::outputs_size};
scalfmm::utils::accurater<value_type> error;
std::for_each(container1.begin(), container1.end(),
[&error, &container2, nb_out](auto const& particle1)
{
const auto& idx1 = std::get<0>(particle1.variables());
auto output1 = particle1.outputs();
auto output2 = container2[idx1].outputs();
for(std::size_t i{0}; i < nb_out; ++i)
{
error.add(output1.at(i), output2.at(i));
}
});
return error;
}
/// @brief
/// @tparam ValueType
/// @tparam MatrixKernelType
/// @tparam Dimension
/// @param number_of_particles
/// @param eps
/// @return
template<std::size_t Dimension, typename ValueType, typename MatrixKernelType>
auto run(const std::size_t N, const std::size_t threads_per_block, const std::size_t blocks_per_grid,
const ValueType eps = 1.0) -> bool
{
static constexpr std::size_t dimension = Dimension;
using value_type = ValueType;
using matrix_kernel_type = MatrixKernelType;
// number of inputs and outputs
static constexpr std::size_t nb_inputs_near{matrix_kernel_type::km};
static constexpr std::size_t nb_outputs_near{matrix_kernel_type::kn};
// particle, container, position aliases
using particle_type = scalfmm::container::particle<value_type, dimension, value_type, nb_inputs_near, value_type,
nb_outputs_near, std::size_t>;
using position_type = typename particle_type::position_type;
using container_type = std::vector<particle_type>;
// time measurement
using duration_type = std::chrono::nanoseconds;
using timer_type = cpp_tools::timers::timer<duration_type>;
// error measurement
using accurater_type = scalfmm::utils::accurater<value_type>;
matrix_kernel_type matrix_kernel{};
// print config
std::cout << cpp_tools::colors::blue << std::endl;
std::cout << "[param] N = " << N << std::endl;
std::cout << "[param] matrix kernel = " << matrix_kernel.name() << std::endl;
std::cout << "[param] dimension = " << dimension << std::endl;
std::cout << "[param] threads-per-block = " << threads_per_block << std::endl;
std::cout << "[param] blocks-per-block = " << blocks_per_grid << std::endl;
std::cout << "[param] tolerance = " << eps << std::endl;
if constexpr(std::is_same_v<value_type, float>)
{
std::cout << "[param] value type = float" << std::endl;
}
else
{
std::cout << "[param] value type = double" << std::endl;
}
std::cout << cpp_tools::colors::reset << std::endl;
timer_type common_timer{};
// init containers
container_type container_cpu(N), container_gpu_shared(N);
fill_container(container_cpu, 123456789);
std::copy(container_cpu.begin(), container_cpu.end(), container_gpu_shared.begin());
// direct computation on the CPU
common_timer.tic();
scalfmm::algorithms::full_direct(container_cpu, matrix_kernel);
common_timer.tac();
std::cout << "[time] cpu = " << common_timer.elapsed() / 1e9 << " s" << std::endl;
// direct computation on the GPU
common_timer.tic();
scalfmm::algorithms::full_direct_cuda_shared(container_gpu_shared, matrix_kernel, threads_per_block,
blocks_per_grid);
common_timer.tac();
std::cout << "[time] cuda shared = " << common_timer.elapsed() / 1e9 << " s" << std::endl;
// compute relative error
accurater_type error = compute_error(container_cpu, container_gpu_shared);
std::cout << cpp_tools::colors::red << std::endl;
std::cout << error << std::endl;
std::cout << cpp_tools::colors::reset;
// final test
const value_type tolerance{1e2 * std::numeric_limits<value_type>::epsilon()};
const value_type relative_l2_norm_error = error.get_relative_l2_norm();
if(relative_l2_norm_error > tolerance || std::isnan(relative_l2_norm_error))
{
std::cout << cpp_tools::colors::red << std::endl;
std::cout << "[test] FAILED: Tolerance not reached!" << std::endl;
std::cout << cpp_tools::colors::reset << std::endl;
return false;
}
std::cout << cpp_tools::colors::green << std::endl;
std::cout << "[test] OK: Tolerance reached!" << std::endl;
std::cout << cpp_tools::colors::reset << std::endl;
return true;
}
/// @brief
/// @tparam ValueType
/// @tparam MatrixKernelType
/// @tparam Dimension
/// @param N
/// @return
template<std::size_t Dimension, typename ValueType, typename MatrixKernelType>
auto run(const std::size_t source_N, const std::size_t target_N, const std::size_t threads_per_block,
const std::size_t blocks_per_grid, const ValueType eps = 1.0) -> bool
{
static constexpr std::size_t dimension = Dimension;
using value_type = ValueType;
using matrix_kernel_type = MatrixKernelType;
// number of inputs and outputs
static constexpr std::size_t nb_inputs_near{matrix_kernel_type::km};
static constexpr std::size_t nb_outputs_near{matrix_kernel_type::kn};
// particle, container, position aliases
using particle_type = scalfmm::container::particle<value_type, dimension, value_type, nb_inputs_near, value_type,
nb_outputs_near, std::size_t>;
using position_type = typename particle_type::position_type;
using container_type = std::vector<particle_type>;
// time measurement
using duration_type = std::chrono::nanoseconds;
using timer_type = cpp_tools::timers::timer<duration_type>;
// error computation
using accurater_type = scalfmm::utils::accurater<value_type>;
matrix_kernel_type matrix_kernel{};
std::cout << cpp_tools::colors::blue << std::endl;
std::cout << "[param] source-N = " << source_N << std::endl;
std::cout << "[param] target-N = " << target_N << std::endl;
std::cout << "[param] matrix kernel = " << matrix_kernel.name() << std::endl;
std::cout << "[param] dimension = " << dimension << std::endl;
std::cout << "[param] threads-per-block = " << threads_per_block << std::endl;
std::cout << "[param] blocks-per-block = " << blocks_per_grid << std::endl;
std::cout << "[param] tolerance = " << eps << std::endl;
std::cout << cpp_tools::colors::reset << std::endl;
timer_type common_timer{};
// init containers
container_type source_container_cpu(source_N), target_container_cpu(target_N),
source_container_gpu_shared(source_N), target_container_gpu_shared(target_N);
fill_container(source_container_cpu, 123456789);
fill_container(target_container_cpu, 987654321);
std::copy(source_container_cpu.begin(), source_container_cpu.end(), source_container_gpu_shared.begin());
std::copy(target_container_cpu.begin(), target_container_cpu.end(), target_container_gpu_shared.begin());
// direct computation on the CPU
common_timer.tic();
scalfmm::algorithms::full_direct(source_container_cpu, target_container_cpu, matrix_kernel);
common_timer.tac();
std::cout << "[time] cpu = " << common_timer.elapsed() / 1e9 << std::endl;
// direct computation on the GPU
common_timer.tic();
scalfmm::algorithms::full_direct_cuda_shared(source_container_gpu_shared, target_container_gpu_shared,
matrix_kernel, threads_per_block, blocks_per_grid);
common_timer.tac();
std::cout << "[time] cuda shared = " << common_timer.elapsed() / 1e9 << std::endl;
// compute relative error
accurater_type error = compute_error(target_container_cpu, target_container_gpu_shared);
std::cout << cpp_tools::colors::red << std::endl;
std::cout << error << std::endl;
std::cout << cpp_tools::colors::reset;
// final test
const value_type tolerance{1e2 * std::numeric_limits<value_type>::epsilon()};
const value_type relative_l2_norm_error = error.get_relative_l2_norm();
if(relative_l2_norm_error > tolerance || std::isnan(relative_l2_norm_error))
{
std::cout << cpp_tools::colors::red << std::endl;
std::cout << "[test] FAILED: Tolerance not reached!" << std::endl;
std::cout << cpp_tools::colors::reset << std::endl;
return false;
}
std::cout << cpp_tools::colors::green << std::endl;
std::cout << "[test] OK: Tolerance reached!" << std::endl;
std::cout << cpp_tools::colors::reset << std::endl;
return true;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment