Commit 8a8fde43 authored by Nathalie Furmento's avatar Nathalie Furmento
Browse files

website: 2016-06-PATC tutorial

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/starpu/website@17459 176f6dd6-97d6-42f4-bd05-d3db9ad07c7a
parent 476f9e14
CFLAGS += $(shell pkg-config --cflags starpumpi-1.2)
LDFLAGS += $(shell pkg-config --libs starpumpi-1.2)
CC=mpicc
ring_async_implicit: ring_async_implicit.o
stencil5: stencil5.o
clean:
rm -f ring_async_implicit stencil5 *.o
#!/bin/bash
# @ class = clgpu
# @ job_name = job_ring
# @ total_tasks = 10
# @ node = 1
# @ wall_clock_limit = 00:10:00
# @ output = $(HOME)/starpu/$(job_name).$(jobid).out
# @ error = $(HOME)/starpu/$(job_name).$(jobid).err
# @ job_type = mpich
# @ queue
source /gpfslocal/pub/training/runtime_june2016/starpu_env.sh
make ring_async_implicit
mpirun -np 2 $PWD/ring_async_implicit
/* StarPU --- Runtime system for heterogeneous multicore architectures.
*
* Copyright (C) 2010 Université de Bordeaux 1
* Copyright (C) 2010, 2011, 2012, 2013, 2014 Centre National de la Recherche Scientifique
*
* StarPU is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at
* your option) any later version.
*
* StarPU is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
*/
#include <starpu_mpi.h>
#define NITER 32
int token = 42;
starpu_data_handle_t token_handle;
void increment_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
(*tokenptr)++;
}
static struct starpu_codelet increment_cl =
{
.cpu_funcs = {increment_cpu, NULL},
.nbuffers = 1,
.modes = {STARPU_RW}
};
void increment_token(void)
{
struct starpu_task *task = starpu_task_create();
task->cl = &increment_cl;
task->handles[0] = token_handle;
int ret = starpu_task_submit(task);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
}
int main(int argc, char **argv)
{
int ret, rank, size;
ret = starpu_init(NULL);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
ret = starpu_mpi_init(NULL, NULL, 1);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size < 2)
{
if (rank == 0)
fprintf(stderr, "We need at least 2 processes.\n");
MPI_Finalize();
return 77;
}
starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token));
int nloops = NITER;
int loop;
int last_loop = nloops - 1;
int last_rank = size - 1;
for (loop = 0; loop < nloops; loop++)
{
int tag = loop*size + rank;
if (loop == 0 && rank == 0)
{
token = 0;
fprintf(stdout, "Start with token value %u\n", token);
}
else
{
starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL);
}
increment_token();
if (loop == last_loop && rank == last_rank)
{
starpu_data_acquire(token_handle, STARPU_R);
fprintf(stdout, "Finished : token value %u\n", token);
starpu_data_release(token_handle);
}
else
{
starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL);
}
}
starpu_task_wait_for_all();
starpu_data_unregister(token_handle);
starpu_mpi_shutdown();
starpu_shutdown();
if (rank == last_rank)
{
fprintf(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size);
STARPU_ASSERT(token == nloops*size);
}
return 0;
}
/* StarPU --- Runtime system for heterogeneous multicore architectures.
*
* Copyright (C) 2011, 2013, 2015 Université Bordeaux
* Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016 CNRS
*
* StarPU is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at
* your option) any later version.
*
* StarPU is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
*/
#include <starpu_mpi.h>
#include <math.h>
#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
#define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \
int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \
fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \
fflush(ofile); }} while(0);
void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]);
float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]);
float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]);
float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]);
// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
// fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
}
struct starpu_codelet stencil5_cl =
{
.cpu_funcs = {stencil5_cpu},
.nbuffers = 5,
.modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
};
#define NITER_DEF 10
#define X 5
#define Y 5
int display = 0;
int niter = NITER_DEF;
/* Returns the MPI node number where data indexes index is */
int my_distrib(int x, int y, int nb_nodes)
{
/* Block distrib */
return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
}
/* Shifted distribution, for migration example */
int my_distrib2(int x, int y, int nb_nodes)
{
return (my_distrib(x, y, nb_nodes) + 1) % nb_nodes;
}
static void parse_args(int argc, char **argv)
{
int i;
for (i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-iter") == 0)
{
char *argptr;
niter = strtol(argv[++i], &argptr, 10);
}
if (strcmp(argv[i], "-display") == 0)
{
display = 1;
}
}
}
int main(int argc, char **argv)
{
int my_rank, size, x, y, loop;
float mean=0;
float matrix[X][Y];
starpu_data_handle_t data_handles[X][Y];
int ret = starpu_init(NULL);
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
starpu_mpi_init(&argc, &argv, 1);
starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank);
starpu_mpi_comm_size(MPI_COMM_WORLD, &size);
if (starpu_cpu_worker_get_count() == 0)
{
FPRINTF(stderr, "We need at least 1 CPU worker.\n");
starpu_mpi_shutdown();
starpu_shutdown();
return 77;
}
parse_args(argc, argv);
/* Initial data values */
starpu_srand48((long int)time(NULL));
for(x = 0; x < X; x++)
{
for (y = 0; y < Y; y++)
{
matrix[x][y] = (float)starpu_drand48();
mean += matrix[x][y];
}
}
mean /= (X*Y);
if (display)
{
FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
for(x = 0; x < X; x++)
{
fprintf(stdout, "[%d] ", my_rank);
for (y = 0; y < Y; y++)
{
fprintf(stdout, "%2.2f ", matrix[x][y]);
}
fprintf(stdout, "\n");
}
}
/* Initial distribution */
for(x = 0; x < X; x++)
{
for (y = 0; y < Y; y++)
{
int mpi_rank = my_distrib(x, y, size);
if (mpi_rank == my_rank)
{
//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
}
else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
|| my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
{
/* I don't own that index, but will need it for my computations */
//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
}
else
{
/* I know it's useless to allocate anything for this */
data_handles[x][y] = NULL;
}
if (data_handles[x][y])
{
starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
}
}
}
/* First computation with initial distribution */
for(loop=0 ; loop<niter; loop++)
{
for (x = 1; x < X-1; x++)
{
for (y = 1; y < Y-1; y++)
{
starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
0);
}
}
}
FPRINTF(stderr, "Waiting ...\n");
starpu_task_wait_for_all();
/* Now migrate data to a new distribution */
/* First register newly needed data */
for(x = 0; x < X; x++)
{
for (y = 0; y < Y; y++)
{
int mpi_rank = my_distrib2(x, y, size);
if (!data_handles[x][y] && (mpi_rank == my_rank
|| my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size)
|| my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size)))
{
/* Register newly-needed data */
starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
}
if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y]))
{
/* Migrate the data */
starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
/* And register new rank of the matrix */
starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
}
}
}
/* Second computation with new distribution */
for(loop=0 ; loop<niter; loop++)
{
for (x = 1; x < X-1; x++)
{
for (y = 1; y < Y-1; y++)
{
starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
0);
}
}
}
FPRINTF(stderr, "Waiting ...\n");
starpu_task_wait_for_all();
/* Unregister data */
for(x = 0; x < X; x++)
{
for (y = 0; y < Y; y++)
{
if (data_handles[x][y])
{
int mpi_rank = my_distrib(x, y, size);
/* Get back data to original place where the user-provided buffer is. */
starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
/* Register original rank of the matrix (although useless) */
starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
/* And unregister it */
starpu_data_unregister(data_handles[x][y]);
}
}
}
starpu_mpi_shutdown();
starpu_shutdown();
if (display)
{
FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
for(x = 0; x < X; x++)
{
FPRINTF(stdout, "[%d] ", my_rank);
for (y = 0; y < Y; y++)
{
FPRINTF(stdout, "%2.2f ", matrix[x][y]);
}
FPRINTF(stdout, "\n");
}
}
return 0;
}
#!/bin/bash
# @ class = clgpu
# @ job_name = job_stencil
# @ total_tasks = 10
# @ node = 1
# @ wall_clock_limit = 00:10:00
# @ output = $(HOME)/starpu/$(job_name).$(jobid).out
# @ error = $(HOME)/starpu/$(job_name).$(jobid).err
# @ job_type = mpich
# @ queue
source /gpfslocal/pub/training/runtime_june2016/starpu_env.sh
make stencil5
mpirun -np 2 $PWD/stencil5 -display
......@@ -607,14 +607,13 @@ which will emit a gnuplot file in the current directory.
</div>
</div>
<!--
<div class="section">
<h2>Sessions Part 3: MPI Support</h2>
<p>
StarPU provides support for MPI communications. It does so two ways. Either the
StarPU provides support for MPI communications. It does so in two ways. Either the
application specifies MPI transfers by hand, or it lets StarPU infer them from
data dependencies
data dependencies.
</p>
<div class="section">
......@@ -644,11 +643,18 @@ complete.
<tt>
<pre>
#how many nodes and cores
#PBS -W x=NACCESSPOLICY:SINGLEJOB -l nodes=1:ppn=12 -q formation_gpu
#!/bin/bash
# @ class = clgpu
# @ job_name = job_ring
# @ total_tasks = 10
# @ node = 1
# @ wall_clock_limit = 00:10:00
# @ output = $(HOME)/starpu/$(job_name).$(jobid).out
# @ error = $(HOME)/starpu/$(job_name).$(jobid).err
# @ job_type = mpich
# @ queue
# go in the directory from which the submission was made
cd $PBS_O_WORKDIR
source /gpfslocal/pub/training/runtime_june2016/starpu_env.sh
make ring_async_implicit
mpirun -np 2 $PWD/ring_async_implicit
......@@ -668,9 +674,28 @@ It also shows how data can be migrated to a
new distribution.
</p>
<tt>
<pre>
#!/bin/bash
# @ class = clgpu
# @ job_name = job_stencil
# @ total_tasks = 10
# @ node = 1
# @ wall_clock_limit = 00:10:00
# @ output = $(HOME)/starpu/$(job_name).$(jobid).out
# @ error = $(HOME)/starpu/$(job_name).$(jobid).err
# @ job_type = mpich
# @ queue
source /gpfslocal/pub/training/runtime_june2016/starpu_env.sh
make stencil5
mpirun -np 2 $PWD/stencil5 -display
</pre>
</tt>
</div>
</div>
-->
<div class="section" id="contact">
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment