Mentions légales du service

Skip to content
Snippets Groups Projects
Commit f43cd603 authored by LISITO Alycia's avatar LISITO Alycia
Browse files

zgetrf: correct tile size

parent 690738da
No related branches found
No related tags found
1 merge request!453Ctest getrf
...@@ -118,7 +118,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ...@@ -118,7 +118,7 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
int m, h; int m, h;
int tempkm, tempkn, minmn; int tempkm, tempkn, tempmm, minmn;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
...@@ -133,14 +133,15 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, ...@@ -133,14 +133,15 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws,
for (h=0; h<=minmn; h++){ for (h=0; h<=minmn; h++){
INSERT_TASK_zgetrf_percol_diag( INSERT_TASK_zgetrf_percol_diag(
options, options,
h, k * A->mb, tempkm, tempkn, h, k * A->mb,
A(k, k), A(k, k),
ipiv ); ipiv );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
INSERT_TASK_zgetrf_percol_offdiag( INSERT_TASK_zgetrf_percol_offdiag(
options, options,
h, m * A->mb, tempmm, tempkn, h, m * A->mb,
A(m, k), A(m, k),
ipiv ); ipiv );
} }
...@@ -164,7 +165,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, ...@@ -164,7 +165,7 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
int m, h, b, nbblock; int m, h, b, nbblock;
int tempkm, tempkn, minmn; int tempkm, tempkn, tempmm, minmn;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
...@@ -185,14 +186,15 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, ...@@ -185,14 +186,15 @@ chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws,
INSERT_TASK_zgetrf_blocked_diag( INSERT_TASK_zgetrf_blocked_diag(
options, options,
j, k * A->mb, ws->ib, tempkm, tempkn, j, k * A->mb, ws->ib,
A(k, k), Up(k, k), A(k, k), Up(k, k),
ipiv ); ipiv );
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
INSERT_TASK_zgetrf_blocked_offdiag( INSERT_TASK_zgetrf_blocked_offdiag(
options, options,
j, m * A->mb, ws->ib, tempmm, tempkn, j, m * A->mb, ws->ib,
A(m, k), Up(k, k), A(m, k), Up(k, k),
ipiv ); ipiv );
} }
......
...@@ -508,23 +508,23 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options, ...@@ -508,23 +508,23 @@ void INSERT_TASK_zgetrf_nopiv_percol_trsm( const RUNTIME_option_t *options,
const CHAM_desc_t *U, int Um, int Un ); const CHAM_desc_t *U, int Um, int Un );
void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
int h, int m0, int m, int n, int h, int m0,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_ipiv_t *ws ); CHAM_ipiv_t *ws );
void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
int h, int m0, int m, int n, int h, int m0,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_ipiv_t *ws ); CHAM_ipiv_t *ws );
void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
int h, int m0, int ib, int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ws ); CHAM_ipiv_t *ws );
void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
int h, int m0, int ib, int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ws ); CHAM_ipiv_t *ws );
......
...@@ -29,7 +29,7 @@ CHAMELEON_CL_CB( zgetrf_blocked_trsm, cti_handle_get_m(task->handles[0]), 0, ...@@ -29,7 +29,7 @@ CHAMELEON_CL_CB( zgetrf_blocked_trsm, cti_handle_get_m(task->handles[0]), 0,
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg) static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
{ {
int h, m0, ib; int m, n, h, m0, ib;
RUNTIME_sequence_t *sequence; RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request; RUNTIME_request_t *request;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
...@@ -40,7 +40,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -40,7 +40,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
CHAMELEON_Complex64_t *U = NULL; CHAMELEON_Complex64_t *U = NULL;
int ldu = -1;; int ldu = -1;;
starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib, starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib,
&sequence, &request ); &sequence, &request );
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
...@@ -67,7 +67,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -67,7 +67,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
nextpiv->h = h; nextpiv->h = h;
nextpiv->has_diag = 1; nextpiv->has_diag = 1;
CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, ib, CORE_zgetrf_panel_diag( m, n, h, m0, ib,
CHAM_tile_get_ptr( tileA ), tileA->ld, CHAM_tile_get_ptr( tileA ), tileA->ld,
U, ldu, U, ldu,
ipiv, &(nextpiv->pivot), &(prevpiv->pivot) ); ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
...@@ -87,7 +87,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -87,7 +87,7 @@ static void cl_zgetrf_blocked_diag_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU( zgetrf_blocked_diag, cl_zgetrf_blocked_diag_cpu_func ); CODELETS_CPU( zgetrf_blocked_diag, cl_zgetrf_blocked_diag_cpu_func );
void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
int h, int m0, int ib, int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv ) CHAM_ipiv_t *ipiv )
...@@ -123,6 +123,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, ...@@ -123,6 +123,8 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int), STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &ib, sizeof(int),
...@@ -146,7 +148,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options, ...@@ -146,7 +148,7 @@ void INSERT_TASK_zgetrf_blocked_diag( const RUNTIME_option_t *options,
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
{ {
int h, m0, ib; int m, n, h, m0, ib;
RUNTIME_sequence_t *sequence; RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request; RUNTIME_request_t *request;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
...@@ -156,7 +158,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) ...@@ -156,7 +158,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
CHAMELEON_Complex64_t *U = NULL; CHAMELEON_Complex64_t *U = NULL;
int ldu = -1;; int ldu = -1;;
starpu_codelet_unpack_args( cl_arg, &h, &m0, &ib, &sequence, &request ); starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &ib, &sequence, &request );
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
nextpiv = (cppi_interface_t*) descr[1]; nextpiv = (cppi_interface_t*) descr[1];
...@@ -169,7 +171,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) ...@@ -169,7 +171,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
nextpiv->h = h; /* Initialize in case it uses a copy */ nextpiv->h = h; /* Initialize in case it uses a copy */
CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, ib, CORE_zgetrf_panel_offdiag( m, n, h, m0, ib,
CHAM_tile_get_ptr(tileA), tileA->ld, CHAM_tile_get_ptr(tileA), tileA->ld,
U, ldu, U, ldu,
&(nextpiv->pivot), &(prevpiv->pivot) ); &(nextpiv->pivot), &(prevpiv->pivot) );
...@@ -182,7 +184,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg) ...@@ -182,7 +184,7 @@ static void cl_zgetrf_blocked_offdiag_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU(zgetrf_blocked_offdiag, cl_zgetrf_blocked_offdiag_cpu_func) CODELETS_CPU(zgetrf_blocked_offdiag, cl_zgetrf_blocked_offdiag_cpu_func)
void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
int h, int m0, int ib, int m, int n, int h, int m0, int ib,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_desc_t *U, int Um, int Un, CHAM_desc_t *U, int Um, int Un,
CHAM_ipiv_t *ipiv ) CHAM_ipiv_t *ipiv )
...@@ -206,6 +208,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options, ...@@ -206,6 +208,8 @@ void INSERT_TASK_zgetrf_blocked_offdiag( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int), STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &ib, sizeof(int),
......
...@@ -28,7 +28,7 @@ CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0 ...@@ -28,7 +28,7 @@ CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
{ {
int h, m0; int m, n, h, m0;
RUNTIME_sequence_t *sequence; RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request; RUNTIME_request_t *request;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
...@@ -36,8 +36,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -36,8 +36,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
cppi_interface_t *nextpiv; cppi_interface_t *nextpiv;
cppi_interface_t *prevpiv; cppi_interface_t *prevpiv;
starpu_codelet_unpack_args( cl_arg, &h, &m0, starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
&sequence, &request );
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[1]); ipiv = (int *)STARPU_VECTOR_GET_PTR(descr[1]);
...@@ -58,7 +57,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -58,7 +57,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
nextpiv->h = h; nextpiv->h = h;
nextpiv->has_diag = 1; nextpiv->has_diag = 1;
CORE_zgetrf_panel_diag( tileA->m, tileA->n, h, m0, tileA->n, CORE_zgetrf_panel_diag( m, n, h, m0, tileA->n,
CHAM_tile_get_ptr( tileA ), tileA->ld, CHAM_tile_get_ptr( tileA ), tileA->ld,
NULL, -1, NULL, -1,
ipiv, &(nextpiv->pivot), &(prevpiv->pivot) ); ipiv, &(nextpiv->pivot), &(prevpiv->pivot) );
...@@ -66,7 +65,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -66,7 +65,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
if ( h > 0 ) { if ( h > 0 ) {
cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " ); cppi_display_dbg( prevpiv, stderr, "Prevpiv after call: " );
} }
if ( h < tileA->n ) { if ( h < n ) {
cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " ); cppi_display_dbg( nextpiv, stderr, "Nextpiv after call: " );
} }
} }
...@@ -78,7 +77,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg) ...@@ -78,7 +77,7 @@ static void cl_zgetrf_percol_diag_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func ); CODELETS_CPU( zgetrf_percol_diag, cl_zgetrf_percol_diag_cpu_func );
void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
int h, int m0, int m, int n, int h, int m0,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_ipiv_t *ipiv ) CHAM_ipiv_t *ipiv )
{ {
...@@ -101,6 +100,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, ...@@ -101,6 +100,8 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int), STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*),
...@@ -122,14 +123,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options, ...@@ -122,14 +123,14 @@ void INSERT_TASK_zgetrf_percol_diag( const RUNTIME_option_t *options,
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
{ {
int h, m0; int m, n, h, m0;
RUNTIME_sequence_t *sequence; RUNTIME_sequence_t *sequence;
RUNTIME_request_t *request; RUNTIME_request_t *request;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
cppi_interface_t *nextpiv; cppi_interface_t *nextpiv;
cppi_interface_t *prevpiv; cppi_interface_t *prevpiv;
starpu_codelet_unpack_args( cl_arg, &h, &m0, &sequence, &request ); starpu_codelet_unpack_args( cl_arg, &m, &n, &h, &m0, &sequence, &request );
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
nextpiv = (cppi_interface_t*) descr[1]; nextpiv = (cppi_interface_t*) descr[1];
...@@ -137,7 +138,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) ...@@ -137,7 +138,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
nextpiv->h = h; /* Initialize in case it uses a copy */ nextpiv->h = h; /* Initialize in case it uses a copy */
CORE_zgetrf_panel_offdiag( tileA->m, tileA->n, h, m0, tileA->n, CORE_zgetrf_panel_offdiag( m, n, h, m0, tileA->n,
CHAM_tile_get_ptr(tileA), tileA->ld, CHAM_tile_get_ptr(tileA), tileA->ld,
NULL, -1, NULL, -1,
&(nextpiv->pivot), &(prevpiv->pivot) ); &(nextpiv->pivot), &(prevpiv->pivot) );
...@@ -150,7 +151,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg) ...@@ -150,7 +151,7 @@ static void cl_zgetrf_percol_offdiag_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func) CODELETS_CPU(zgetrf_percol_offdiag, cl_zgetrf_percol_offdiag_cpu_func)
void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
int h, int m0, int m, int n, int h, int m0,
CHAM_desc_t *A, int Am, int An, CHAM_desc_t *A, int Am, int An,
CHAM_ipiv_t *ipiv ) CHAM_ipiv_t *ipiv )
{ {
...@@ -170,6 +171,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options, ...@@ -170,6 +171,8 @@ void INSERT_TASK_zgetrf_percol_offdiag( const RUNTIME_option_t *options,
rt_starpu_insert_task( rt_starpu_insert_task(
codelet, codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &h, sizeof(int), STARPU_VALUE, &h, sizeof(int),
STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &m0, sizeof(int),
STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t *),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment