Commit b1186634 authored by PRUVOST Florent's avatar PRUVOST Florent
Browse files

do not check and register field mat of the descriptor if not used

parent 71807fa7
......@@ -63,8 +63,10 @@ MORSE_desc_t morse_desc_init(MORSE_enum dtyp, int mb, int nb, int bsiz,
desc.mt = (m == 0) ? 0 : (i+m-1)/mb - i/mb + 1;
desc.nt = (n == 0) ? 0 : (j+n-1)/nb - j/nb + 1;
desc.occurences = 0;
desc.id = nbdesc; nbdesc++;
desc.occurences = 0;
desc.use_mat = 1;
desc.register_mat = 1;
#if defined(CHAMELEON_USE_MPI)
MPI_Comm_rank( MPI_COMM_WORLD, &(desc.myrank) );
......@@ -152,8 +154,10 @@ MORSE_desc_t morse_desc_init_user(MORSE_enum dtyp, int mb, int nb, int bsiz,
desc.mt = (m == 0) ? 0 : (i+m-1)/mb - i/mb + 1;
desc.nt = (n == 0) ? 0 : (j+n-1)/nb - j/nb + 1;
desc.occurences = 0;
desc.id = nbdesc; nbdesc++;
desc.occurences = 0;
desc.use_mat = 1;
desc.register_mat = 1;
#if defined(CHAMELEON_USE_MPI)
MPI_Comm_rank( MPI_COMM_WORLD, &(desc.myrank) );
......@@ -248,7 +252,7 @@ int morse_desc_check(MORSE_desc_t *desc)
morse_error("morse_desc_check", "NULL descriptor");
return MORSE_ERR_NOT_INITIALIZED;
}
if (desc->mat == NULL) {
if (desc->mat == NULL && desc->use_mat == 1) {
morse_error("morse_desc_check", "NULL matrix pointer");
return MORSE_ERR_UNALLOCATED;
}
......@@ -312,7 +316,7 @@ int morse_desc_mat_free( MORSE_desc_t *desc )
RUNTIME_desc_destroy( desc );
if (desc->mat != NULL) {
if (desc->mat != NULL && desc->use_mat == 1) {
#ifndef CHAMELEON_SIMULATION
free(desc->mat);
#endif
......@@ -503,6 +507,13 @@ int MORSE_Desc_Create_User(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp, int
**desc = morse_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
get_blkaddr, get_blkldd, get_rankof);
/* if the user gives a pointer to the overall data (tiles) we can use it */
(**desc).use_mat = (mat == NULL) ? 0 : 1;
/* users data can have multiple forms: let him register tiles */
(**desc).register_mat = 0;
(**desc).mat = mat;
/* Create scheduler structure like registering data */
......
......@@ -188,7 +188,7 @@ int MORSE_Dealloc_Workspace(MORSE_desc_t **desc)
morse_error("MORSE_Dealloc_Workspace", "attempting to deallocate a NULL descriptor");
return MORSE_ERR_UNALLOCATED;
}
if ((*desc)->mat == NULL) {
if ((*desc)->mat == NULL && (*desc)->use_mat == 1) {
morse_error("MORSE_Dealloc_Worspace", "attempting to deallocate a NULL pointer");
return MORSE_ERR_UNALLOCATED;
}
......
......@@ -63,41 +63,48 @@ struct morse_desc_s;
typedef struct morse_desc_s MORSE_desc_t;
struct morse_desc_s {
// function to get matrix tiles address
void *(*get_blkaddr)( const MORSE_desc_t*, int, int );
// function to get matrix tiles leading dimension
int (*get_blkldd )( const MORSE_desc_t*, int );
// function to get matrix tiles MPI rank
int (*get_rankof) ( const MORSE_desc_t*, int, int );
void *mat; // pointer to the beginning of the matrix
size_t A21; // pointer to the beginning of the matrix A21
size_t A12; // pointer to the beginning of the matrix A12
size_t A22; // pointer to the beginning of the matrix A22
MORSE_enum styp; // storage layout of the matrix
MORSE_enum dtyp; // precision of the matrix
int mb; // number of rows in a tile
int nb; // number of columns in a tile
int bsiz; // size in elements including padding
int lm; // number of rows of the entire matrix
int ln; // number of columns of the entire matrix
int lmt; // number of tile rows of the entire matrix - derived parameter
int lnt; // number of tile columns of the entire matrix - derived parameter
int i; // row index to the beginning of the submatrix
int j; // column index to the beginning of the submatrix
int m; // number of rows of the submatrix
int n; // number of columns of the submatrix
int mt; // number of tile rows of the submatrix - derived parameter
int nt; // number of tile columns of the submatrix - derived parameter
// Data for distributed cases
int p; // number of rows of the 2D distribution grid
int q; // number of columns of the 2D distribution grid
int llm; // number of rows of the 2D distribution grid
int lln; // number of columns of the 2D distribution grid
int llm1; // number of tile rows of the A11 matrix - derived parameter
int lln1; // number of tile columns of the A11 matrix - derived parameter
int llmt; // number of tile rows of the local (to a node) matrix
int llnt; // number of tile columns of the local (to a node) matrix
int id;
int occurences;
int myrank;
void *schedopt;
void *mat; // pointer to the beginning of the matrix
size_t A21; // pointer to the beginning of the matrix A21
size_t A12; // pointer to the beginning of the matrix A12
size_t A22; // pointer to the beginning of the matrix A22
MORSE_enum styp; // storage layout of the matrix
MORSE_enum dtyp; // precision of the matrix
int mb; // number of rows in a tile
int nb; // number of columns in a tile
int bsiz; // size in elements including padding
int lm; // number of rows of the entire matrix
int ln; // number of columns of the entire matrix
int lmt; // number of tile rows of the entire matrix - derived parameter
int lnt; // number of tile columns of the entire matrix - derived parameter
int i; // row index to the beginning of the submatrix
int j; // column index to the beginning of the submatrix
int m; // number of rows of the submatrix
int n; // number of columns of the submatrix
int mt; // number of tile rows of the submatrix - derived parameter
int nt; // number of tile columns of the submatrix - derived parameter
// Data for distributed cases
int p; // number of rows of the 2D distribution grid
int q; // number of columns of the 2D distribution grid
int llm; // number of rows of the 2D distribution grid
int lln; // number of columns of the 2D distribution grid
int llm1; // number of tile rows of the A11 matrix - derived parameter
int lln1; // number of tile columns of the A11 matrix - derived parameter
int llmt; // number of tile rows of the local (to a node) matrix
int llnt; // number of tile columns of the local (to a node) matrix
int id; // identification number of the descriptor
int occurences; // identify main matrix desc (occurances=1) or
// submatrix desc (occurances>1) to avoid unregistering
// GPU data twice
int use_mat; // 1 if we have a pointer to the overall data mat - else 0
int register_mat; // 1 if we have to register mat - else 0 (handled by the application)
int myrank; // MPI rank of the descriptor
void *schedopt; // scheduler (QUARK|StarPU) specific structure
};
......
......@@ -53,29 +53,31 @@ void RUNTIME_desc_create( MORSE_desc_t *desc )
desc->occurences = 1;
/*
* Allocate starpu_handle_t array (handlers are initialized on the fly when discovered by any algorithm to save space)
* Allocate starpu_handle_t array (handlers are initialized on the fly when
* discovered by any algorithm to save space)
*/
desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t));
assert(desc->schedopt);
tiles = (starpu_data_handle_t*)(desc->schedopt);
#if defined(CHAMELEON_USE_CUDA)
/*
* Register allocated memory as CUDA pinned memory
*/
{
int64_t eltsze = morse_element_size(desc->dtyp);
size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze;
/* Register the matrix as pinned memory */
if ( cudaHostRegister( desc->mat, size, cudaHostRegisterPortable ) != cudaSuccess )
if (desc->use_mat == 1 && desc->register_mat == 1){
/*
* Register allocated memory as CUDA pinned memory
*/
{
morse_warning("RUNTIME_desc_create(StarPU)", "cudaHostRegister failed to register the matrix as pinned memory");
int64_t eltsze = morse_element_size(desc->dtyp);
size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze;
/* Register the matrix as pinned memory */
if ( cudaHostRegister( desc->mat, size, cudaHostRegisterPortable ) != cudaSuccess )
{
morse_warning("RUNTIME_desc_create(StarPU)", "cudaHostRegister failed to register the matrix as pinned memory");
}
}
}
#endif
#if defined(CHAMELEON_USE_MPI)
/*
* Check that we are not going over MPI tag limitations
......@@ -131,7 +133,7 @@ void RUNTIME_desc_destroy( MORSE_desc_t *desc )
/*
* If this is the last descriptor using the matrix, we release the handle
* and deregister the GPU data
* and unregister the GPU data
*/
if ( desc->occurences == 0 ) {
starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
......@@ -154,7 +156,7 @@ void RUNTIME_desc_destroy( MORSE_desc_t *desc )
}
#if defined(CHAMELEON_USE_CUDA)
{
if (desc->use_mat == 1 && desc->register_mat == 1){
int64_t eltsze = morse_element_size(desc->dtyp);
size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze;
......@@ -162,7 +164,8 @@ void RUNTIME_desc_destroy( MORSE_desc_t *desc )
if (cudaHostUnregister(desc->mat) != cudaSuccess)
{
morse_warning("RUNTIME_desc_destroy(StarPU)",
"cudaHostUnregister failed to unregister the pinned memory associated to the matrix");
"cudaHostUnregister failed to unregister the "
"pinned memory associated to the matrix");
}
}
#endif /* defined(CHAMELEON_USE_CUDA) */
......@@ -183,7 +186,8 @@ void RUNTIME_desc_submatrix( MORSE_desc_t *desc )
return;
}
/* TODO: Acquire/Release/GetonCPU need to be studied carefully and fixed because we are not using them correctly */
/* TODO: Acquire/Release/GetonCPU need to be studied carefully and fixed
* because we are not using them correctly */
int RUNTIME_desc_acquire( MORSE_desc_t *desc )
{
starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment