Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 74d7c3ae authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'starpu/recursive' into 'master'

StarPU: Prepare the filed for recursive algorithms

See merge request !269
parents 22c8a3c5 ec9ce5c0
Branches
Tags
1 merge request!269StarPU: Prepare the filed for recursive algorithms
Showing
with 127 additions and 28 deletions
...@@ -181,7 +181,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -181,7 +181,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
/* U is of size A->m by min(A->m, A->n) */ /* U is of size A->m by min(A->m, A->n) */
chameleon_zdesc_copy_and_restrict( A, &descU, A->m, minmn ); chameleon_zdesc_copy_and_restrict( A, &descU, A->m, minmn );
chameleon_pzplrnt( &descU, seed, sequence, request ); chameleon_pzplrnt( &descU, descU.m, 0, 0, seed, sequence, request );
/* Shift the seed to generate the next random unitary matrix */ /* Shift the seed to generate the next random unitary matrix */
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
...@@ -257,7 +257,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym ...@@ -257,7 +257,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
/* V is of size min(A->m, A->n) by A->n */ /* V is of size min(A->m, A->n) by A->n */
chameleon_zdesc_copy_and_restrict( A, &descV, minmn, A->n ); chameleon_zdesc_copy_and_restrict( A, &descV, minmn, A->n );
chameleon_pzplrnt( &descV, seed, sequence, request ); chameleon_pzplrnt( &descV, descV.m, 0, 0, seed, sequence, request );
/* Apply a QR factorization */ /* Apply a QR factorization */
mat.mt = descV.mt; mat.mt = descV.mt;
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* chameleon_pzplghe - Generate a random hermitian (positive definite if 'bump' is large enough) half-matrix by tiles. * chameleon_pzplghe - Generate a random hermitian (positive definite if 'bump' is large enough) half-matrix by tiles.
*/ */
void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
unsigned long long int seed, int bigM, int m0, int n0, unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
...@@ -56,7 +56,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -56,7 +56,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
break; break;
...@@ -72,7 +72,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -72,7 +72,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
break; break;
...@@ -89,7 +89,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A, ...@@ -89,7 +89,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_zplghe( INSERT_TASK_zplghe(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
} }
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* chameleon_pzplgsy - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. * chameleon_pzplgsy - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles.
*/ */
void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A,
unsigned long long int seed, int bigM, int m0, int n0, unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
...@@ -56,7 +56,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -56,7 +56,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
break; break;
...@@ -72,7 +72,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -72,7 +72,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
break; break;
...@@ -89,7 +89,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_ ...@@ -89,7 +89,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
INSERT_TASK_zplgsy( INSERT_TASK_zplgsy(
&options, &options,
bump, tempmm, tempnn, A(m, n), bump, tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
} }
......
...@@ -28,8 +28,9 @@ ...@@ -28,8 +28,9 @@
/** /**
* chameleon_pzplghe - Generate a random matrix by tiles. * chameleon_pzplghe - Generate a random matrix by tiles.
*/ */
void chameleon_pzplrnt( CHAM_desc_t *A, unsigned long long int seed, void chameleon_pzplrnt( CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) int bigM, int m0, int n0, unsigned long long int seed,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
...@@ -52,7 +53,7 @@ void chameleon_pzplrnt( CHAM_desc_t *A, unsigned long long int seed, ...@@ -52,7 +53,7 @@ void chameleon_pzplrnt( CHAM_desc_t *A, unsigned long long int seed,
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, &options,
tempmm, tempnn, A(m, n), tempmm, tempnn, A(m, n),
A->m, m*A->mb, n*A->nb, seed ); bigM, m*A->mb + m0, n*A->nb + n0, seed );
} }
} }
RUNTIME_options_finalize(&options, chamctxt); RUNTIME_options_finalize(&options, chamctxt);
......
...@@ -270,7 +270,7 @@ int CHAMELEON_zplghe_Tile_Async( double bump, ...@@ -270,7 +270,7 @@ int CHAMELEON_zplghe_Tile_Async( double bump,
if (chameleon_min( A->m, A->n ) == 0) if (chameleon_min( A->m, A->n ) == 0)
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
chameleon_pzplghe( bump, uplo, A, seed, sequence, request ); chameleon_pzplghe( bump, uplo, A, A->m, A->i, A->j, seed, sequence, request );
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
} }
...@@ -272,7 +272,7 @@ int CHAMELEON_zplgsy_Tile_Async( CHAMELEON_Complex64_t bump, ...@@ -272,7 +272,7 @@ int CHAMELEON_zplgsy_Tile_Async( CHAMELEON_Complex64_t bump,
if (chameleon_min( A->m, A->n ) == 0) if (chameleon_min( A->m, A->n ) == 0)
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
chameleon_pzplgsy( bump, uplo, A, seed, sequence, request ); chameleon_pzplgsy( bump, uplo, A, A->m, A->i, A->j, seed, sequence, request );
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
} }
...@@ -261,7 +261,7 @@ int CHAMELEON_zplrnt_Tile_Async( CHAM_desc_t *A, ...@@ -261,7 +261,7 @@ int CHAMELEON_zplrnt_Tile_Async( CHAM_desc_t *A,
if (chameleon_min( A->m, A->n ) == 0) if (chameleon_min( A->m, A->n ) == 0)
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
chameleon_pzplrnt( A, seed, sequence, request ); chameleon_pzplrnt( A, A->m, A->i, A->j, seed, sequence, request );
return CHAMELEON_SUCCESS; return CHAMELEON_SUCCESS;
} }
...@@ -97,9 +97,9 @@ void chameleon_pzlaswp(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *s ...@@ -97,9 +97,9 @@ void chameleon_pzlaswp(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *s
void chameleon_pzlaswpc(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaswpc(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzplgsy(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplgsy(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzplrnt(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplrnt(CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzplrnk(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplrnk(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
...@@ -17,10 +17,7 @@ ...@@ -17,10 +17,7 @@
* @date 2020-03-03 * @date 2020-03-03
* *
*/ */
#define _GNU_SOURCE 1
#include "control/common.h" #include "control/common.h"
#include <stdlib.h>
#include <stdio.h>
#include "chameleon/runtime.h" #include "chameleon/runtime.h"
static int static int
......
...@@ -158,6 +158,20 @@ CHAM_tile_get_ptr( const CHAM_tile_t *tile ) ...@@ -158,6 +158,20 @@ CHAM_tile_get_ptr( const CHAM_tile_t *tile )
return tile->mat; return tile->mat;
} }
static inline const char *
CHAM_tile_get_typestr( const CHAM_tile_t *tile )
{
if ( tile->format & CHAMELEON_TILE_DESC ) {
return "Desc";
}
if ( tile->format & CHAMELEON_TILE_HMAT ) {
return "HMat";
}
return "Full";
}
END_C_DECLS END_C_DECLS
#endif /* _chameleon_struct_h_ */ #endif /* _chameleon_struct_h_ */
...@@ -153,6 +153,14 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options, ...@@ -153,6 +153,14 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
/* Reduce the C access if needed */ /* Reduce the C access if needed */
accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s, %s, %s )", cl_name, clargs->tileA->name, clargs->tileB->name, clargs->tileC->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zgemm, &cl_zgemm,
......
...@@ -135,6 +135,14 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options, ...@@ -135,6 +135,14 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
/* Reduce the C access if needed */ /* Reduce the C access if needed */
accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s, %s )", cl_name, clargs->tileA->name, clargs->tileC->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zherk, &cl_zherk,
......
...@@ -82,6 +82,14 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, ...@@ -82,6 +82,14 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
/* Fix the worker id */ /* Fix the worker id */
workerid = (schedopt == NULL) ? -1 : schedopt->workerid; workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s )", cl_name, clargs->tileA->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zlauum, &cl_zlauum,
......
...@@ -98,6 +98,14 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options, ...@@ -98,6 +98,14 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
/* Fix the worker id */ /* Fix the worker id */
workerid = (schedopt == NULL) ? -1 : schedopt->workerid; workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s )", cl_name, clargs->tileA->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zpotrf, &cl_zpotrf,
......
...@@ -135,6 +135,14 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options, ...@@ -135,6 +135,14 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
/* Reduce the C access if needed */ /* Reduce the C access if needed */
accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s, %s )", cl_name, clargs->tileA->name, clargs->tileC->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_zsyrk, &cl_zsyrk,
......
...@@ -128,6 +128,14 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options, ...@@ -128,6 +128,14 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
/* Fix the worker id */ /* Fix the worker id */
workerid = (schedopt == NULL) ? -1 : schedopt->workerid; workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s, %s )", cl_name, clargs->tileA->name, clargs->tileB->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrmm, &cl_ztrmm,
......
...@@ -129,6 +129,14 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options, ...@@ -129,6 +129,14 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
/* Fix the worker id */ /* Fix the worker id */
workerid = (schedopt == NULL) ? -1 : schedopt->workerid; workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s, %s )", cl_name, clargs->tileA->name, clargs->tileB->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrsm, &cl_ztrsm,
......
...@@ -96,6 +96,14 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, ...@@ -96,6 +96,14 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
/* Fix the worker id */ /* Fix the worker id */
workerid = (schedopt == NULL) ? -1 : schedopt->workerid; workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
#if defined(CHAMELEON_KERNELS_TRACE)
{
char *cl_fullname;
chameleon_asprintf( &cl_fullname, "%s( %s )", cl_name, clargs->tileA->name );
cl_name = cl_fullname;
}
#endif
/* Insert the task */ /* Insert the task */
rt_starpu_insert_task( rt_starpu_insert_task(
&cl_ztrtri, &cl_ztrtri,
......
...@@ -536,10 +536,18 @@ static starpu_ssize_t ...@@ -536,10 +536,18 @@ static starpu_ssize_t
cti_describe( void *data_interface, char *buf, size_t size ) cti_describe( void *data_interface, char *buf, size_t size )
{ {
starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface; starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface;
#if defined(CHAMELEON_KERNELS_TRACE)
return snprintf( buf, size, "M%ux%ux%u %s",
(unsigned) cham_tile_interface->tile.m,
(unsigned) cham_tile_interface->tile.n,
(unsigned) cham_tile_interface->flttype,
cham_tile_interface->tile.name);
#else
return snprintf( buf, size, "M%ux%ux%u", return snprintf( buf, size, "M%ux%ux%u",
(unsigned) cham_tile_interface->tile.m, (unsigned) cham_tile_interface->tile.m,
(unsigned) cham_tile_interface->tile.n, (unsigned) cham_tile_interface->tile.n,
(unsigned) cham_tile_interface->flttype ); (unsigned) cham_tile_interface->flttype );
#endif
} }
static int cti_copy_any_to_any( void *src_interface, unsigned src_node, static int cti_copy_any_to_any( void *src_interface, unsigned src_node,
...@@ -554,11 +562,23 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, ...@@ -554,11 +562,23 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node,
size_t ld_dst = cham_tile_dst->tile.ld; size_t ld_dst = cham_tile_dst->tile.ld;
int ret = 0; int ret = 0;
void *src_mat = CHAM_tile_get_ptr( &(cham_tile_src->tile) );
void *dst_mat = CHAM_tile_get_ptr( &(cham_tile_dst->tile) );
#if defined(CHAMELEON_KERNELS_TRACE)
fprintf( stderr,
"[ANY->ANY] src(%s, type:%s, m=%d, n=%d, ld=%d, ptr:%p) dest(%s, type:%s, m=%d, n=%d, ld=%d, ptr:%p)\n",
cham_tile_src->tile.name, CHAM_tile_get_typestr( &(cham_tile_src->tile) ),
cham_tile_src->tile.m, cham_tile_src->tile.n, cham_tile_src->tile.ld, src_mat,
cham_tile_dst->tile.name, CHAM_tile_get_typestr( &(cham_tile_dst->tile) ),
cham_tile_dst->tile.m, cham_tile_dst->tile.n, cham_tile_dst->tile.ld, dst_mat );
#endif
#if defined(HAVE_STARPU_INTERFACE_COPY2D) #if defined(HAVE_STARPU_INTERFACE_COPY2D)
ld_src *= elemsize; ld_src *= elemsize;
ld_dst *= elemsize; ld_dst *= elemsize;
if (starpu_interface_copy2d( (uintptr_t) cham_tile_src->tile.mat, 0, src_node, if (starpu_interface_copy2d( (uintptr_t) src_mat, 0, src_node,
(uintptr_t) cham_tile_dst->tile.mat, 0, dst_node, (uintptr_t) dst_mat, 0, dst_node,
m * elemsize, n, ld_src, ld_dst, async_data ) ) { m * elemsize, n, ld_src, ld_dst, async_data ) ) {
ret = -EAGAIN; ret = -EAGAIN;
} }
...@@ -566,8 +586,8 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, ...@@ -566,8 +586,8 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node,
if ( (ld_src == m) && (ld_dst == m) ) if ( (ld_src == m) && (ld_dst == m) )
{ {
/* Optimize unpartitioned and y-partitioned cases */ /* Optimize unpartitioned and y-partitioned cases */
if ( starpu_interface_copy( (uintptr_t) cham_tile_src->tile.mat, 0, src_node, if ( starpu_interface_copy( (uintptr_t) src_mat, 0, src_node,
(uintptr_t) cham_tile_dst->tile.mat, 0, dst_node, (uintptr_t) dst_mat, 0, dst_node,
m * n * elemsize, async_data ) ) m * n * elemsize, async_data ) )
{ {
ret = -EAGAIN; ret = -EAGAIN;
...@@ -584,8 +604,8 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node, ...@@ -584,8 +604,8 @@ static int cti_copy_any_to_any( void *src_interface, unsigned src_node,
uint32_t src_offset = y * ld_src; uint32_t src_offset = y * ld_src;
uint32_t dst_offset = y * ld_dst; uint32_t dst_offset = y * ld_dst;
if ( starpu_interface_copy( (uintptr_t) cham_tile_src->tile.mat, src_offset, src_node, if ( starpu_interface_copy( (uintptr_t) srcmat, src_offset, src_node,
(uintptr_t) cham_tile_dst->tile.mat, dst_offset, dst_node, (uintptr_t) dstmat, dst_offset, dst_node,
m * elemsize, async_data ) ) m * elemsize, async_data ) )
{ {
ret = -EAGAIN; ret = -EAGAIN;
...@@ -651,6 +671,9 @@ starpu_cham_tile_register( starpu_data_handle_t *handleptr, ...@@ -651,6 +671,9 @@ starpu_cham_tile_register( starpu_data_handle_t *handleptr,
if ( tile->format & CHAMELEON_TILE_FULLRANK ) { if ( tile->format & CHAMELEON_TILE_FULLRANK ) {
cham_tile_interface.allocsize = tile->m * tile->n * elemsize; cham_tile_interface.allocsize = tile->m * tile->n * elemsize;
} }
else if ( tile->format & CHAMELEON_TILE_DESC ) { /* Needed in case starpu ask for it */
cham_tile_interface.allocsize = tile->m * tile->n * elemsize;
}
else if ( tile->format & CHAMELEON_TILE_HMAT ) { else if ( tile->format & CHAMELEON_TILE_HMAT ) {
/* For hmat, allocated data will be handled by hmat library. StarPU cannot allocate it for the library */ /* For hmat, allocated data will be handled by hmat library. StarPU cannot allocate it for the library */
cham_tile_interface.allocsize = 0; cham_tile_interface.allocsize = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment