diff --git a/CMakeLists.txt b/CMakeLists.txt index 21caac4ee5dcc3c861e917b2b25660fd20cd08aa..64804a36e642eee68ad063184d372411a12e1fc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ include(RulesPrecisions) set(generated_sources "") set(generated_headers "") +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/drivers") + set(HEADERS z_spm.h ) @@ -15,19 +17,25 @@ precisions_rules_py(generated_headers set(SOURCES z_spm.c + z_spm_2dense.c + z_spm_convert_col_row_major.c z_spm_convert_to_csc.c z_spm_convert_to_csr.c z_spm_convert_to_ijv.c + z_spm_dofs2flat.c + z_spm_expand.c z_spm_genrhs.c + z_spm_integer.c + z_spm_laplacian.c z_spm_matrixvector.c z_spm_norm.c - z_spm_2dense.c z_spm_print.c ) set(spm_headers ${generated_headers} spm.h + spm_drivers.h ) ### Generate the sources in all precisions @@ -39,17 +47,35 @@ set(spm_sources ${generated_sources} spm.c spm_io.c + spm_integers.c + spm_expand.c + spm_read_driver.c + drivers/skitf.f + drivers/iohb.c + drivers/mmio.c + drivers/laplacian.c + drivers/readhb.c + drivers/readijv.c + drivers/readmm.c + drivers/readrsa.c ) add_custom_target(spm_headers_tgt DEPENDS ${spm_headers} ) -set_source_files_properties( - spm.c - PROPERTIES DEPENDS spm_headers_tgt - ) - -set(PASTIX_LIB_SRCS - ${PASTIX_LIB_SRCS} +add_library(pastix_spm ${spm_sources} ) + +add_dependencies(pastix_spm + spm_headers_tgt +) + +### Generate the lib +if (MPI_FOUND) + set_target_properties(pastix_spm PROPERTIES COMPILE_FLAGS "${MPI_COMPILE_FLAGS}") +endif (MPI_FOUND) + +install(TARGETS pastix_spm + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) diff --git a/drivers/iohb.c b/drivers/iohb.c new file mode 100644 index 0000000000000000000000000000000000000000..bc0cb95bbefe46ad1667d51974fd66d3da70b428 --- /dev/null +++ b/drivers/iohb.c @@ -0,0 +1,1623 @@ +/* + Fri Aug 15 16:29:47 EDT 1997 + + Harwell-Boeing File I/O in C + V. 1.0 + + National Institute of Standards and Technology, MD. + K.A. Remington + + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + NOTICE + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby granted + provided that the above copyright notice appear in all copies and + that both the copyright notice and this permission notice appear in + supporting documentation. + + Neither the Author nor the Institution (National Institute of Standards + and Technology) make any representations about the suitability of this + software for any purpose. This software is provided "as is" without + expressed or implied warranty. + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + --------------------- + INTERFACE DESCRIPTION + --------------------- + --------------- + QUERY FUNCTIONS + --------------- + + FUNCTION: + + int readHB_info(const char *filename, int *M, int *N, int *nz, + char **Type, int *Nrhs) + + DESCRIPTION: + + The readHB_info function opens and reads the header information from + the specified Harwell-Boeing file, and reports back the number of rows + and columns in the stored matrix (M and N), the number of nonzeros in + the matrix (nz), the 3-character matrix type(Type), and the number of + right-hand-sides stored along with the matrix (Nrhs). This function + is designed to retrieve basic size information which can be used to + allocate arrays. + + FUNCTION: + + int readHB_header(FILE* in_file, char* Title, char* Key, char* Type, + int* Nrow, int* Ncol, int* Nnzero, int* Nrhs, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + int* Ptrcrd, int* Indcrd, int* Valcrd, int* Rhscrd, + char *Rhstype) + + DESCRIPTION: + + More detailed than the readHB_info function, readHB_header() reads from + the specified Harwell-Boeing file all of the header information. + + + ------------------------------ + DOUBLE PRECISION I/O FUNCTIONS + ------------------------------ + FUNCTION: + + int readHB_newmat_double(const char *filename, int *M, int *N, *int nz, + int **colptr, int **rowind, double**val) + + int readHB_mat_double(const char *filename, int *colptr, int *rowind, + double*val) + + + DESCRIPTION: + + This function opens and reads the specified file, interpreting its + contents as a sparse matrix stored in the Harwell/Boeing standard + format. (See readHB_aux_double to read auxillary vectors.) + -- Values are interpreted as double precision numbers. -- + + The "mat" function uses _pre-allocated_ vectors to hold the index and + nonzero value information. + + The "newmat" function allocates vectors to hold the index and nonzero + value information, and returns pointers to these vectors along with + matrix dimension and number of nonzeros. + + FUNCTION: + + int readHB_aux_double(const char* filename, const char AuxType, double b[]) + + int readHB_newaux_double(const char* filename, const char AuxType, double** b) + + DESCRIPTION: + + This function opens and reads from the specified file auxillary vector(s). + The char argument Auxtype determines which type of auxillary vector(s) + will be read (if present in the file). + + AuxType = 'F' right-hand-side + AuxType = 'G' initial estimate (Guess) + AuxType = 'X' eXact solution + + If Nrhs > 1, all of the Nrhs vectors of the given type are read and + stored in column-major order in the vector b. + + The "newaux" function allocates a vector to hold the values retrieved. + The "mat" function uses a _pre-allocated_ vector to hold the values. + + FUNCTION: + + int writeHB_mat_double(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const double val[], int Nrhs, const double rhs[], + const double guess[], const double exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype) + + DESCRIPTION: + + The writeHB_mat_double function opens the named file and writes the specified + matrix and optional auxillary vector(s) to that file in Harwell-Boeing + format. The format arguments (Ptrfmt,Indfmt,Valfmt, and Rhsfmt) are + character strings specifying "Fortran-style" output formats -- as they + would appear in a Harwell-Boeing file. They are used to produce output + which is as close as possible to what would be produced by Fortran code, + but note that "D" and "P" edit descriptors are not supported. + If NULL, the following defaults will be used: + Ptrfmt = Indfmt = "(8I10)" + Valfmt = Rhsfmt = "(4E20.13)" + + ----------------------- + CHARACTER I/O FUNCTIONS + ----------------------- + FUNCTION: + + int readHB_mat_char(const char* filename, int colptr[], int rowind[], + char val[], char* Valfmt) + int readHB_newmat_char(const char* filename, int* M, int* N, int* nonzeros, + int** colptr, int** rowind, char** val, char** Valfmt) + + DESCRIPTION: + + This function opens and reads the specified file, interpreting its + contents as a sparse matrix stored in the Harwell/Boeing standard + format. (See readHB_aux_char to read auxillary vectors.) + -- Values are interpreted as char strings. -- + (Used to translate exact values from the file into a new storage format.) + + The "mat" function uses _pre-allocated_ arrays to hold the index and + nonzero value information. + + The "newmat" function allocates char arrays to hold the index + and nonzero value information, and returns pointers to these arrays + along with matrix dimension and number of nonzeros. + + FUNCTION: + + int readHB_aux_char(const char* filename, const char AuxType, char b[]) + int readHB_newaux_char(const char* filename, const char AuxType, char** b, + char** Rhsfmt) + + DESCRIPTION: + + This function opens and reads from the specified file auxillary vector(s). + The char argument Auxtype determines which type of auxillary vector(s) + will be read (if present in the file). + + AuxType = 'F' right-hand-side + AuxType = 'G' initial estimate (Guess) + AuxType = 'X' eXact solution + + If Nrhs > 1, all of the Nrhs vectors of the given type are read and + stored in column-major order in the vector b. + + The "newaux" function allocates a character array to hold the values + retrieved. + The "mat" function uses a _pre-allocated_ array to hold the values. + + FUNCTION: + + int writeHB_mat_char(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const char val[], int Nrhs, const char rhs[], + const char guess[], const char exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype) + + DESCRIPTION: + + The writeHB_mat_char function opens the named file and writes the specified + matrix and optional auxillary vector(s) to that file in Harwell-Boeing + format. The format arguments (Ptrfmt,Indfmt,Valfmt, and Rhsfmt) are + character strings specifying "Fortran-style" output formats -- as they + would appear in a Harwell-Boeing file. Valfmt and Rhsfmt must accurately + represent the character representation of the values stored in val[] + and rhs[]. + + If NULL, the following defaults will be used for the integer vectors: + Ptrfmt = Indfmt = "(8I10)" + Valfmt = Rhsfmt = "(4E20.13)" + + + */ + +/*---------------------------------------------------------------------*/ +/* If zero-based indexing is desired, _SP_base should be set to 0 */ +/* This will cause indices read from H-B files to be decremented by 1 */ +/* and indices written to H-B files to be incremented by 1 */ +/* <<< Standard usage is _SP_base = 1 >>> */ +#ifndef _SP_base +#define _SP_base 1 +#endif +/*---------------------------------------------------------------------*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "common.h" +#include "spm_drivers.h" +#include "drivers/iohb.h" + +#define STR_SIZE 256 +#define FGETS(line, BUFSIZ, infile) \ + { \ + if (NULL == fgets(line, BUFSIZ, infile)) \ + { \ + fprintf(stderr, "ERROR: %s:%d fgets\n", __FILE__, __LINE__); \ + exit(1); \ + } \ + } + +char* substr(const char* S, const int pos, const int len); +void upcase(char* S); +void IOHBTerminate(char* message); + +int readHB_info(const char* filename, int* M, int* N, int* nz, char** Type, + int* Nrhs) +{ + /****************************************************************************/ + /* The readHB_info function opens and reads the header information from */ + /* the specified Harwell-Boeing file, and reports back the number of rows */ + /* and columns in the stored matrix (M and N), the number of nonzeros in */ + /* the matrix (nz), and the number of right-hand-sides stored along with */ + /* the matrix (Nrhs). */ + /* */ + /* For a description of the Harwell Boeing standard, see: */ + /* Duff, et al., ACM TOMS Vol.15, No.1, March 1989 */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /****************************************************************************/ + FILE *in_file; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Nrow, Ncol, Nnzero; + char Title[73], Key[9], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21]; + + if ( *Type == NULL ) IOHBTerminate("Type must be allocated"); + + if ( (in_file = fopen( filename, "r")) == NULL ) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + readHB_header(in_file, Title, Key, *Type, &Nrow, &Ncol, &Nnzero, Nrhs, + Ptrfmt, Indfmt, Valfmt, Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + fclose(in_file); + *M = Nrow; + *N = Ncol; + *nz = Nnzero; + if (Rhscrd == 0) {*Nrhs = 0;} + + /* In verbose mode, print some of the header information: */ + /* + if (verbose == 1) + { + printf("Reading from Harwell-Boeing file %s (verbose on)...\n",filename); + printf(" Title: %s\n",Title); + printf(" Key: %s\n",Key); + printf(" The stored matrix is %i by %i with %i nonzeros.\n", + *M, *N, *nz ); + printf(" %i right-hand--side(s) stored.\n",*Nrhs); + } + */ + + return 1; +} + +int readHB_header(FILE* in_file, char* Title, char* Key, char* Type, + int* Nrow, int* Ncol, int* Nnzero, int* Nrhs, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + int* Ptrcrd, int* Indcrd, int* Valcrd, int* Rhscrd, + char *Rhstype) +{ + /*************************************************************************/ + /* Read header information from the named H/B file... */ + /*************************************************************************/ + int Totcrd,Neltvl,Nrhsix; + char line[BUFSIZ]; + + /* First line: */ + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) first line of HB file.\n"); + (void) sscanf(line, "%72c%8[^\n]", Title, Key); + *(Key+8) = '\0'; + *(Title+72) = '\0'; + + /* Second line: */ + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) second line of HB file.\n"); + if ( sscanf(line,"%i",&Totcrd) != 1) Totcrd = 0; + if ( sscanf(line,"%*i%i",Ptrcrd) != 1) *Ptrcrd = 0; + if ( sscanf(line,"%*i%*i%i",Indcrd) != 1) *Indcrd = 0; + if ( sscanf(line,"%*i%*i%*i%i",Valcrd) != 1) *Valcrd = 0; + if ( sscanf(line,"%*i%*i%*i%*i%i",Rhscrd) != 1) *Rhscrd = 0; + + /* Third line: */ + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) third line of HB file.\n"); + if ( sscanf(line, "%3c", Type) != 1) + IOHBTerminate("iohb.c: Invalid Type info, line 3 of Harwell-Boeing file.\n"); + Type[3] = '\0'; + upcase(Type); + if ( sscanf(line,"%*3c%i",Nrow) != 1) *Nrow = 0 ; + if ( sscanf(line,"%*3c%*i%i",Ncol) != 1) *Ncol = 0 ; + if ( sscanf(line,"%*3c%*i%*i%i",Nnzero) != 1) *Nnzero = 0 ; + if ( sscanf(line,"%*3c%*i%*i%*i%i",&Neltvl) != 1) Neltvl = 0 ; + + /* Fourth line: */ + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) fourth line of HB file.\n"); + if ( sscanf(line, "%16c",Ptrfmt) != 1) + IOHBTerminate("iohb.c: Invalid format info, line 4 of Harwell-Boeing file.\n"); + if ( sscanf(line, "%*16c%16c",Indfmt) != 1) + IOHBTerminate("iohb.c: Invalid format info, line 4 of Harwell-Boeing file.\n"); + if ( sscanf(line, "%*16c%*16c%20c",Valfmt) != 1) + IOHBTerminate("iohb.c: Invalid format info, line 4 of Harwell-Boeing file.\n"); + sscanf(line, "%*16c%*16c%*20c%20c",Rhsfmt); + *(Ptrfmt+16) = '\0'; + *(Indfmt+16) = '\0'; + *(Valfmt+20) = '\0'; + *(Rhsfmt+20) = '\0'; + + /* (Optional) Fifth line: */ + if (*Rhscrd != 0 ) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) fifth line of HB file.\n"); + if ( sscanf(line, "%3c", Rhstype) != 1) + IOHBTerminate("iohb.c: Invalid RHS type information, line 5 of Harwell-Boeing file.\n"); + if ( sscanf(line, "%*3c%i", Nrhs) != 1) *Nrhs = 0; + if ( sscanf(line, "%*3c%*i%i", &Nrhsix) != 1) Nrhsix = 0; + } + return 1; +} + + +int readHB_mat_double(const char* filename, int colptr[], int rowind[], + double val[]) +{ + /****************************************************************************/ + /* This function opens and reads the specified file, interpreting its */ + /* contents as a sparse matrix stored in the Harwell/Boeing standard */ + /* format and creating compressed column storage scheme vectors to hold */ + /* the index and nonzero value information. */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* Parsing real formats from Fortran is tricky, and this file reader */ + /* does not claim to be foolproof. It has been tested for cases when */ + /* the real values are printed consistently and evenly spaced on each */ + /* line, with Fixed (F), and Exponential (E or D) formats. */ + /* */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /****************************************************************************/ + FILE *in_file; + int i,j,ind,col,offset,count,last,Nrhs; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Nrow, Ncol, Nnzero, Nentries; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Valperline, Valwidth, Valprec; + char Valflag; /* Indicates 'E','D', or 'F' float format */ + char* ThisElement; + char Title[73], Key[8], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21]; + char line[BUFSIZ]; + + if ( (in_file = fopen( filename, "r")) == NULL ) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + readHB_header(in_file, Title, Key, Type, &Nrow, &Ncol, &Nnzero, &Nrhs, + Ptrfmt, Indfmt, Valfmt, Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + + /* Parse the array input formats from Line 3 of HB file */ + ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth); + ParseIfmt(Indfmt,&Indperline,&Indwidth); + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + } + + /* Read column pointer array: */ + + offset = 1-_SP_base; /* if base 0 storage is declared (via macro definition), */ + /* then storage entries are offset by 1 */ + + ThisElement = (char *) malloc(Ptrwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Ptrwidth) = '\0'; + count=0; + for (i=0;i<Ptrcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in pointer data region of HB file.\n"); + col = 0; + for (ind = 0;ind<Ptrperline;ind++) + { + if (count > Ncol) break; + strncpy(ThisElement,line+col,Ptrwidth); + /* ThisElement = substr(line,col,Ptrwidth); */ + colptr[count] = atoi(ThisElement)-offset; + count++; col += Ptrwidth; + } + } + free(ThisElement); + + /* Read row index array: */ + + ThisElement = (char *) malloc(Indwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Indwidth) = '\0'; + count = 0; + for (i=0;i<Indcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in index data region of HB file.\n"); + col = 0; + for (ind = 0;ind<Indperline;ind++) + { + if (count == Nnzero) break; + strncpy(ThisElement,line+col,Indwidth); + /* ThisElement = substr(line,col,Indwidth); */ + rowind[count] = atoi(ThisElement)-offset; + count++; col += Indwidth; + } + } + free(ThisElement); + + /* Read array of values: */ + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + + if ( Type[0] == 'C' ) Nentries = 2*Nnzero; + else Nentries = Nnzero; + + ThisElement = (char *) malloc(Valwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Valwidth) = '\0'; + count = 0; + for (i=0;i<Valcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in value data region of HB file.\n"); + if (Valflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + /* *strchr(Valfmt,'D') = 'E'; */ + } + col = 0; + for (ind = 0;ind<Valperline;ind++) + { + if (count == Nentries) break; + strncpy(ThisElement,line+col,Valwidth); + /*ThisElement = substr(line,col,Valwidth);*/ + if ( Valflag != 'F' && strchr(ThisElement,'E') == NULL ) { + /* insert a char prefix for exp */ + last = strlen(ThisElement); + for (j=last+1;j>=0;j--) { + ThisElement[j] = ThisElement[j-1]; + if ( ThisElement[j] == '+' || ThisElement[j] == '-' ) { + ThisElement[j-1] = Valflag; + break; + } + } + } + val[count] = atof(ThisElement); + count++; col += Valwidth; + } + } + free(ThisElement); + } + + fclose(in_file); + return 1; +} + +int readHB_newmat_double(const char* filename, int* M, int* N, int* nonzeros, + int** colptr, int** rowind, double** val) +{ + int Nrhs; + char *Type = malloc(sizeof(char)*4); + + readHB_info(filename, M, N, nonzeros, &Type, &Nrhs); + + *colptr = (int *)malloc((*N+1)*sizeof(int)); + if ( *colptr == NULL ) IOHBTerminate("Insufficient memory for colptr.\n"); + *rowind = (int *)malloc(*nonzeros*sizeof(int)); + if ( *rowind == NULL ) IOHBTerminate("Insufficient memory for rowind.\n"); + if ( Type[0] == 'C' ) { + /* + fprintf(stderr, "Warning: Reading complex data from HB file %s.\n",filename); + fprintf(stderr, " Real and imaginary parts will be interlaced in val[].\n"); + */ + /* Malloc enough space for real AND imaginary parts of val[] */ + *val = (double *)malloc(*nonzeros*sizeof(double)*2); + if ( *val == NULL ) IOHBTerminate("Insufficient memory for val.\n"); + } else { + if ( Type[0] != 'P' ) { + /* Malloc enough space for real array val[] */ + *val = (double *)malloc(*nonzeros*sizeof(double)); + if ( *val == NULL ) IOHBTerminate("Insufficient memory for val.\n"); + } + } /* No val[] space needed if pattern only */ + free(Type); + return readHB_mat_double(filename, *colptr, *rowind, *val); + +} + +int readHB_aux_double(const char* filename, const char AuxType, double b[]) +{ + /****************************************************************************/ + /* This function opens and reads the specified file, placing auxillary */ + /* vector(s) of the given type (if available) in b. */ + /* Return value is the number of vectors successfully read. */ + /* */ + /* AuxType = 'F' full right-hand-side vector(s) */ + /* AuxType = 'G' initial Guess vector(s) */ + /* AuxType = 'X' eXact solution vector(s) */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* Parsing real formats from Fortran is tricky, and this file reader */ + /* does not claim to be foolproof. It has been tested for cases when */ + /* the real values are printed consistently and evenly spaced on each */ + /* line, with Fixed (F), and Exponential (E or D) formats. */ + /* */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /****************************************************************************/ + FILE *in_file; + int i,j,n,maxcol,start,stride,col,last,linel; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Nrow, Ncol, Nnzero, Nentries; + int Nrhs, nvecs, rhsi; + int Rhsperline, Rhswidth, Rhsprec; + char Rhsflag; + char *ThisElement; + char Title[73], Key[9], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21]; + char line[BUFSIZ]; + + if ((in_file = fopen( filename, "r")) == NULL) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + readHB_header(in_file, Title, Key, Type, &Nrow, &Ncol, &Nnzero, &Nrhs, + Ptrfmt, Indfmt, Valfmt, Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + + if (Nrhs <= 0) + { + fprintf(stderr, "Warn: Attempt to read auxillary vector(s) when none are present.\n"); + return 0; + } + if (Rhstype[0] != 'F' ) + { + fprintf(stderr,"Warn: Attempt to read auxillary vector(s) which are not stored in Full form.\n"); + fprintf(stderr," Rhs must be specified as full. \n"); + return 0; + } + + /* If reading complex data, allow for interleaved real and imaginary values. */ + if ( Type[0] == 'C' ) { + Nentries = 2*Nrow; + } else { + Nentries = Nrow; + } + + nvecs = 1; + + if ( Rhstype[1] == 'G' ) nvecs++; + if ( Rhstype[2] == 'X' ) nvecs++; + + if ( AuxType == 'G' && Rhstype[1] != 'G' ) { + fprintf(stderr, "Warn: Attempt to read auxillary Guess vector(s) when none are present.\n"); + return 0; + } + if ( AuxType == 'X' && Rhstype[2] != 'X' ) { + fprintf(stderr, "Warn: Attempt to read auxillary eXact solution vector(s) when none are present.\n"); + return 0; + } + + ParseRfmt(Rhsfmt, &Rhsperline, &Rhswidth, &Rhsprec,&Rhsflag); + maxcol = Rhsperline*Rhswidth; + + /* Lines to skip before starting to read RHS values... */ + n = Ptrcrd + Indcrd + Valcrd; + + for (i = 0; i < n; i++) + FGETS(line, BUFSIZ, in_file); + + /* start - number of initial aux vector entries to skip */ + /* to reach first vector requested */ + /* stride - number of aux vector entries to skip between */ + /* requested vectors */ + if ( AuxType == 'F' ) start = 0; + else if ( AuxType == 'G' ) start = Nentries; + else start = (nvecs-1)*Nentries; + stride = (nvecs-1)*Nentries; + + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + col = 0; + /* Skip to initial offset */ + + for (i=0;i<start;i++) { + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + col = 0; + } + col += Rhswidth; + } + if (Rhsflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + } + + /* Read a vector of desired type, then skip to next */ + /* repeating to fill Nrhs vectors */ + + ThisElement = (char *) malloc(Rhswidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Rhswidth) = '\0'; + for (rhsi=0;rhsi<Nrhs;rhsi++) { + + for (i=0;i<Nentries;i++) { + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + if (Rhsflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + } + col = 0; + } + strncpy(ThisElement,line+col,Rhswidth); + /*ThisElement = substr(line, col, Rhswidth);*/ + if ( Rhsflag != 'F' && strchr(ThisElement,'E') == NULL ) { + /* insert a char prefix for exp */ + last = strlen(ThisElement); + for (j=last+1;j>=0;j--) { + ThisElement[j] = ThisElement[j-1]; + if ( ThisElement[j] == '+' || ThisElement[j] == '-' ) { + ThisElement[j-1] = Rhsflag; + break; + } + } + } + b[i] = atof(ThisElement); + col += Rhswidth; + } + + /* Skip any interleaved Guess/eXact vectors */ + + for (i=0;i<stride;i++) { + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + col = 0; + } + col += Rhswidth; + } + + } + free(ThisElement); + + + fclose(in_file); + return Nrhs; +} + +int readHB_newaux_double(const char* filename, const char AuxType, double** b) +{ + int Nrhs = 0; + int M = 0; + int N = 0; + int nonzeros = 0; + char *Type = NULL; + + readHB_info(filename, &M, &N, &nonzeros, &Type, &Nrhs); + if ( Nrhs <= 0 ) { + fprintf(stderr,"Warn: Requested read of aux vector(s) when none are present.\n"); + return 0; + } else { + if ( Type[0] == 'C' ) { + fprintf(stderr, "Warning: Reading complex aux vector(s) from HB file %s.",filename); + fprintf(stderr, " Real and imaginary parts will be interlaced in b[]."); + *b = (double *)malloc(M*Nrhs*sizeof(double)*2); + if ( *b == NULL ) IOHBTerminate("Insufficient memory for rhs.\n"); + return readHB_aux_double(filename, AuxType, *b); + } else { + *b = (double *)malloc(M*Nrhs*sizeof(double)); + if ( *b == NULL ) IOHBTerminate("Insufficient memory for rhs.\n"); + return readHB_aux_double(filename, AuxType, *b); + } + } +} + +int writeHB_mat_double(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const double val[], int Nrhs, const double rhs[], + const double guess[], const double exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype) +{ + /****************************************************************************/ + /* The writeHB function opens the named file and writes the specified */ + /* matrix and optional right-hand-side(s) to that file in Harwell-Boeing */ + /* format. */ + /* */ + /* For a description of the Harwell Boeing standard, see: */ + /* Duff, et al., ACM TOMS Vol.15, No.1, March 1989 */ + /* */ + /****************************************************************************/ + FILE *out_file; + int i,j,entry,offset,acount,linemod; + int totcrd, ptrcrd, indcrd, valcrd, rhscrd; + int nvalentries, nrhsentries; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Rhsperline, Rhswidth, Rhsprec; + char Rhsflag; + int Valperline, Valwidth, Valprec; + char Valflag; /* Indicates 'E','D', or 'F' float format */ + char pformat[16],iformat[16],vformat[19],rformat[19]; + + if ( Type[0] == 'C' ) { + nvalentries = 2*nz; + nrhsentries = 2*M; + } else { + nvalentries = nz; + nrhsentries = M; + } + + if ( filename != NULL ) { + if ( (out_file = fopen( filename, "w")) == NULL ) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + } else out_file = stdout; + + if ( Ptrfmt == NULL ) Ptrfmt = "(8I10)"; + ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth); + sprintf(pformat,"%%%dd",Ptrwidth); + ptrcrd = (N+1)/Ptrperline; + if ( (N+1)%Ptrperline != 0) ptrcrd++; + + if ( Indfmt == NULL ) Indfmt = Ptrfmt; + ParseIfmt(Indfmt,&Indperline,&Indwidth); + sprintf(iformat,"%%%dd",Indwidth); + indcrd = nz/Indperline; + if ( nz%Indperline != 0) indcrd++; + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + if ( Valfmt == NULL ) Valfmt = "(4E20.13)"; + ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + if (Valflag == 'D') *strchr(Valfmt,'D') = 'E'; + if (Valflag == 'F') + sprintf(vformat,"%% %d.%df",Valwidth,Valprec); + else + sprintf(vformat,"%% %d.%dE",Valwidth,Valprec); + valcrd = nvalentries/Valperline; + if ( nvalentries%Valperline != 0) valcrd++; + } else valcrd = 0; + + if ( Nrhs > 0 ) { + if ( Rhsfmt == NULL ) Rhsfmt = Valfmt; + ParseRfmt(Rhsfmt,&Rhsperline,&Rhswidth,&Rhsprec, &Rhsflag); + if (Rhsflag == 'F') + sprintf(rformat,"%% %d.%df",Rhswidth,Rhsprec); + else + sprintf(rformat,"%% %d.%dE",Rhswidth,Rhsprec); + if (Rhsflag == 'D') *strchr(Rhsfmt,'D') = 'E'; + rhscrd = nrhsentries/Rhsperline; + if ( nrhsentries%Rhsperline != 0) rhscrd++; + if ( Rhstype[1] == 'G' ) rhscrd+=rhscrd; + if ( Rhstype[2] == 'X' ) rhscrd+=rhscrd; + rhscrd*=Nrhs; + } else rhscrd = 0; + + totcrd = 4+ptrcrd+indcrd+valcrd+rhscrd; + + + /* Print header information: */ + + fprintf(out_file,"%-72s%-8s\n%14d%14d%14d%14d%14d\n",Title, Key, totcrd, + ptrcrd, indcrd, valcrd, rhscrd); + fprintf(out_file,"%3s%11s%14d%14d%14d\n",Type," ", M, N, nz); + fprintf(out_file,"%-16s%-16s%-20s", Ptrfmt, Indfmt, Valfmt); + if ( Nrhs != 0 ) { + /* Print Rhsfmt on fourth line and */ + /* optional fifth header line for auxillary vector information: */ + fprintf(out_file,"%-20s\n%-14s%d\n",Rhsfmt,Rhstype,Nrhs); + } else fprintf(out_file,"\n"); + + offset = 1-_SP_base; /* if base 0 storage is declared (via macro definition), */ + /* then storage entries are offset by 1 */ + + /* Print column pointers: */ + for (i=0;i<N+1;i++) + { + entry = colptr[i]+offset; + fprintf(out_file,pformat,entry); + if ( (i+1)%Ptrperline == 0 ) fprintf(out_file,"\n"); + } + + if ( (N+1) % Ptrperline != 0 ) fprintf(out_file,"\n"); + + /* Print row indices: */ + for (i=0;i<nz;i++) + { + entry = rowind[i]+offset; + fprintf(out_file,iformat,entry); + if ( (i+1)%Indperline == 0 ) fprintf(out_file,"\n"); + } + + if ( nz % Indperline != 0 ) fprintf(out_file,"\n"); + + /* Print values: */ + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + + for (i=0;i<nvalentries;i++) + { + fprintf(out_file,vformat,val[i]); + if ( (i+1)%Valperline == 0 ) fprintf(out_file,"\n"); + } + + if ( nvalentries % Valperline != 0 ) fprintf(out_file,"\n"); + + /* If available, print right hand sides, + guess vectors and exact solution vectors: */ + acount = 1; + linemod = 0; + if ( Nrhs > 0 ) { + for (i=0;i<Nrhs;i++) + { + for ( j=0;j<nrhsentries;j++ ) { + fprintf(out_file,rformat,rhs[j]); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + rhs += nrhsentries; + if ( Rhstype[1] == 'G' ) { + for ( j=0;j<nrhsentries;j++ ) { + fprintf(out_file,rformat,guess[j]); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + guess += nrhsentries; + } + if ( Rhstype[2] == 'X' ) { + for ( j=0;j<nrhsentries;j++ ) { + fprintf(out_file,rformat,exact[j]); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + exact += nrhsentries; + } + } + } + + } + + if ( fclose(out_file) != 0){ + fprintf(stderr,"Error closing file in writeHB_mat_double().\n"); + return 0; + } else return 1; + +} + +int readHB_mat_char(const char* filename, int colptr[], int rowind[], + char val[], char* Valfmt) +{ + /****************************************************************************/ + /* This function opens and reads the specified file, interpreting its */ + /* contents as a sparse matrix stored in the Harwell/Boeing standard */ + /* format and creating compressed column storage scheme vectors to hold */ + /* the index and nonzero value information. */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* Parsing real formats from Fortran is tricky, and this file reader */ + /* does not claim to be foolproof. It has been tested for cases when */ + /* the real values are printed consistently and evenly spaced on each */ + /* line, with Fixed (F), and Exponential (E or D) formats. */ + /* */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /****************************************************************************/ + FILE *in_file; + int i,j,ind,col,offset,count,last; + int Nrow,Ncol,Nnzero,Nentries,Nrhs; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Valperline, Valwidth, Valprec; + char Valflag; /* Indicates 'E','D', or 'F' float format */ + char* ThisElement; + char line[BUFSIZ]; + char Title[73], Key[8], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Rhsfmt[21]; + + if ( (in_file = fopen( filename, "r")) == NULL ) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + readHB_header(in_file, Title, Key, Type, &Nrow, &Ncol, &Nnzero, &Nrhs, + Ptrfmt, Indfmt, Valfmt, Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + + /* Parse the array input formats from Line 3 of HB file */ + ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth); + ParseIfmt(Indfmt,&Indperline,&Indwidth); + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + if (Valflag == 'D') { + *strchr(Valfmt,'D') = 'E'; + } + } + + /* Read column pointer array: */ + + offset = 1-_SP_base; /* if base 0 storage is declared (via macro definition), */ + /* then storage entries are offset by 1 */ + + ThisElement = (char *) malloc(Ptrwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Ptrwidth) = '\0'; + count=0; + for (i=0;i<Ptrcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in pointer data region of HB file.\n"); + col = 0; + for (ind = 0;ind<Ptrperline;ind++) + { + if (count > Ncol) break; + strncpy(ThisElement,line+col,Ptrwidth); + /*ThisElement = substr(line,col,Ptrwidth);*/ + colptr[count] = atoi(ThisElement)-offset; + count++; col += Ptrwidth; + } + } + free(ThisElement); + + /* Read row index array: */ + + ThisElement = (char *) malloc(Indwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Indwidth) = '\0'; + count = 0; + for (i=0;i<Indcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in index data region of HB file.\n"); + col = 0; + for (ind = 0;ind<Indperline;ind++) + { + if (count == Nnzero) break; + strncpy(ThisElement,line+col,Indwidth); + /*ThisElement = substr(line,col,Indwidth);*/ + rowind[count] = atoi(ThisElement)-offset; + count++; col += Indwidth; + } + } + free(ThisElement); + + /* Read array of values: AS CHARACTERS*/ + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + + if ( Type[0] == 'C' ) Nentries = 2*Nnzero; + else Nentries = Nnzero; + + ThisElement = (char *) malloc(Valwidth+1); + if ( ThisElement == NULL ) IOHBTerminate("Insufficient memory for ThisElement."); + *(ThisElement+Valwidth) = '\0'; + count = 0; + for (i=0;i<Valcrd;i++) + { + FGETS(line, BUFSIZ, in_file); + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in value data region of HB file.\n"); + if (Valflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + } + col = 0; + for (ind = 0;ind<Valperline;ind++) + { + if (count == Nentries) break; + ThisElement = &val[count*Valwidth]; + strncpy(ThisElement,line+col,Valwidth); + /*strncpy(ThisElement,substr(line,col,Valwidth),Valwidth);*/ + if ( Valflag != 'F' && strchr(ThisElement,'E') == NULL ) { + /* insert a char prefix for exp */ + last = strlen(ThisElement); + for (j=last+1;j>=0;j--) { + ThisElement[j] = ThisElement[j-1]; + if ( ThisElement[j] == '+' || ThisElement[j] == '-' ) { + ThisElement[j-1] = Valflag; + break; + } + } + } + count++; col += Valwidth; + } + } + } + + return 1; +} + +int readHB_newmat_char(const char* filename, int* M, int* N, int* nonzeros, int** colptr, + int** rowind, char** val, char** Valfmt) +{ + FILE *in_file; + int Nrhs; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Valperline, Valwidth, Valprec; + char Valflag; /* Indicates 'E','D', or 'F' float format */ + char Title[73], Key[9], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Rhsfmt[21]; + + if ((in_file = fopen( filename, "r")) == NULL) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + *Valfmt = (char *)malloc(21*sizeof(char)); + if ( *Valfmt == NULL ) IOHBTerminate("Insufficient memory for Valfmt."); + readHB_header(in_file, Title, Key, Type, M, N, nonzeros, &Nrhs, + Ptrfmt, Indfmt, (*Valfmt), Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + fclose(in_file); + ParseRfmt(*Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + + *colptr = (int *)malloc((*N+1)*sizeof(int)); + if ( *colptr == NULL ) IOHBTerminate("Insufficient memory for colptr.\n"); + *rowind = (int *)malloc(*nonzeros*sizeof(int)); + if ( *rowind == NULL ) IOHBTerminate("Insufficient memory for rowind.\n"); + if ( Type[0] == 'C' ) { + /* + fprintf(stderr, "Warning: Reading complex data from HB file %s.\n",filename); + fprintf(stderr, " Real and imaginary parts will be interlaced in val[].\n"); + */ + /* Malloc enough space for real AND imaginary parts of val[] */ + *val = (char *)malloc(*nonzeros*Valwidth*sizeof(char)*2); + if ( *val == NULL ) IOHBTerminate("Insufficient memory for val.\n"); + } else { + if ( Type[0] != 'P' ) { + /* Malloc enough space for real array val[] */ + *val = (char *)malloc(*nonzeros*Valwidth*sizeof(char)); + if ( *val == NULL ) IOHBTerminate("Insufficient memory for val.\n"); + } + } /* No val[] space needed if pattern only */ + return readHB_mat_char(filename, *colptr, *rowind, *val, *Valfmt); + +} + +int readHB_aux_char(const char* filename, const char AuxType, char b[]) +{ + /****************************************************************************/ + /* This function opens and reads the specified file, placing auxilary */ + /* vector(s) of the given type (if available) in b : */ + /* Return value is the number of vectors successfully read. */ + /* */ + /* AuxType = 'F' full right-hand-side vector(s) */ + /* AuxType = 'G' initial Guess vector(s) */ + /* AuxType = 'X' eXact solution vector(s) */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* Parsing real formats from Fortran is tricky, and this file reader */ + /* does not claim to be foolproof. It has been tested for cases when */ + /* the real values are printed consistently and evenly spaced on each */ + /* line, with Fixed (F), and Exponential (E or D) formats. */ + /* */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /****************************************************************************/ + FILE *in_file; + int i,j,n,maxcol,start,stride,col,last,linel,nvecs,rhsi; + int Nrow, Ncol, Nnzero, Nentries,Nrhs; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Rhsperline, Rhswidth, Rhsprec; + char Rhsflag; + char Title[73], Key[9], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21]; + char line[BUFSIZ]; + char *ThisElement; + + if ((in_file = fopen( filename, "r")) == NULL) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + readHB_header(in_file, Title, Key, Type, &Nrow, &Ncol, &Nnzero, &Nrhs, + Ptrfmt, Indfmt, Valfmt, Rhsfmt, + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + + if (Nrhs <= 0) + { + fprintf(stderr, "Warn: Attempt to read auxillary vector(s) when none are present.\n"); + return 0; + } + if (Rhstype[0] != 'F' ) + { + fprintf(stderr,"Warn: Attempt to read auxillary vector(s) which are not stored in Full form.\n"); + fprintf(stderr," Rhs must be specified as full. \n"); + return 0; + } + + /* If reading complex data, allow for interleaved real and imaginary values. */ + if ( Type[0] == 'C' ) { + Nentries = 2*Nrow; + } else { + Nentries = Nrow; + } + + nvecs = 1; + + if ( Rhstype[1] == 'G' ) nvecs++; + if ( Rhstype[2] == 'X' ) nvecs++; + + if ( AuxType == 'G' && Rhstype[1] != 'G' ) { + fprintf(stderr, "Warn: Attempt to read auxillary Guess vector(s) when none are present.\n"); + return 0; + } + if ( AuxType == 'X' && Rhstype[2] != 'X' ) { + fprintf(stderr, "Warn: Attempt to read auxillary eXact solution vector(s) when none are present.\n"); + return 0; + } + + ParseRfmt(Rhsfmt, &Rhsperline, &Rhswidth, &Rhsprec,&Rhsflag); + maxcol = Rhsperline*Rhswidth; + + /* Lines to skip before starting to read RHS values... */ + n = Ptrcrd + Indcrd + Valcrd; + + for (i = 0; i < n; i++) + FGETS(line, BUFSIZ, in_file); + + /* start - number of initial aux vector entries to skip */ + /* to reach first vector requested */ + /* stride - number of aux vector entries to skip between */ + /* requested vectors */ + if ( AuxType == 'F' ) start = 0; + else if ( AuxType == 'G' ) start = Nentries; + else start = (nvecs-1)*Nentries; + stride = (nvecs-1)*Nentries; + + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in auxillary vector data region of HB file.\n"); + col = 0; + /* Skip to initial offset */ + + for (i=0;i<start;i++) { + col += Rhswidth; + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in auxillary vector data region of HB file.\n"); + col = 0; + } + } + + if (Rhsflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + } + /* Read a vector of desired type, then skip to next */ + /* repeating to fill Nrhs vectors */ + + for (rhsi=0;rhsi<Nrhs;rhsi++) { + + for (i=0;i<Nentries;i++) { + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in auxillary vector data region of HB file.\n"); + if (Rhsflag == 'D') { + while( strchr(line,'D') ) *strchr(line,'D') = 'E'; + } + col = 0; + } + ThisElement = &b[i*Rhswidth]; + strncpy(ThisElement,line+col,Rhswidth); + if ( Rhsflag != 'F' && strchr(ThisElement,'E') == NULL ) { + /* insert a char prefix for exp */ + last = strlen(ThisElement); + for (j=last+1;j>=0;j--) { + ThisElement[j] = ThisElement[j-1]; + if ( ThisElement[j] == '+' || ThisElement[j] == '-' ) { + ThisElement[j-1] = Rhsflag; + break; + } + } + } + col += Rhswidth; + } + b+=Nentries*Rhswidth; + + /* Skip any interleaved Guess/eXact vectors */ + + for (i=0;i<stride;i++) { + col += Rhswidth; + if ( col >= ( maxcol<linel?maxcol:linel ) ) { + FGETS(line, BUFSIZ, in_file); + linel= strchr(line,'\n')-line; + if ( sscanf(line,"%*s") < 0 ) + IOHBTerminate("iohb.c: Null (or blank) line in auxillary vector data region of HB file.\n"); + col = 0; + } + } + + } + + + fclose(in_file); + return Nrhs; +} + +int readHB_newaux_char(const char* filename, const char AuxType, char** b, char** Rhsfmt) +{ + FILE *in_file; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int Nrow,Ncol,Nnzero,Nrhs; + int Rhsperline, Rhswidth, Rhsprec; + char Rhsflag; + char Title[73], Key[9], Type[4], Rhstype[4]; + char Ptrfmt[17], Indfmt[17], Valfmt[21]; + + if ((in_file = fopen( filename, "r")) == NULL) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + + *Rhsfmt = (char *)malloc(21*sizeof(char)); + if ( *Rhsfmt == NULL ) IOHBTerminate("Insufficient memory for Rhsfmt."); + readHB_header(in_file, Title, Key, Type, &Nrow, &Ncol, &Nnzero, &Nrhs, + Ptrfmt, Indfmt, Valfmt, (*Rhsfmt), + &Ptrcrd, &Indcrd, &Valcrd, &Rhscrd, Rhstype); + fclose(in_file); + if ( Nrhs == 0 ) { + fprintf(stderr,"Warn: Requested read of aux vector(s) when none are present.\n"); + return 0; + } else { + ParseRfmt(*Rhsfmt,&Rhsperline,&Rhswidth,&Rhsprec,&Rhsflag); + if ( Type[0] == 'C' ) { + fprintf(stderr, "Warning: Reading complex aux vector(s) from HB file %s.",filename); + fprintf(stderr, " Real and imaginary parts will be interlaced in b[]."); + *b = (char *)malloc(Nrow*Nrhs*Rhswidth*sizeof(char)*2); + if ( *b == NULL ) IOHBTerminate("Insufficient memory for rhs.\n"); + return readHB_aux_char(filename, AuxType, *b); + } else { + *b = (char *)malloc(Nrow*Nrhs*Rhswidth*sizeof(char)); + if ( *b == NULL ) IOHBTerminate("Insufficient memory for rhs.\n"); + return readHB_aux_char(filename, AuxType, *b); + } + } +} + +int writeHB_mat_char(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const char val[], int Nrhs, const char rhs[], + const char guess[], const char exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype) +{ + /****************************************************************************/ + /* The writeHB function opens the named file and writes the specified */ + /* matrix and optional right-hand-side(s) to that file in Harwell-Boeing */ + /* format. */ + /* */ + /* For a description of the Harwell Boeing standard, see: */ + /* Duff, et al., ACM TOMS Vol.15, No.1, March 1989 */ + /* */ + /****************************************************************************/ + FILE *out_file; + int i,j,acount,linemod,entry,offset; + int totcrd, ptrcrd, indcrd, valcrd, rhscrd; + int nvalentries, nrhsentries; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Rhsperline, Rhswidth, Rhsprec; + char Rhsflag; + int Valperline, Valwidth, Valprec; + char Valflag; /* Indicates 'E','D', or 'F' float format */ + char pformat[16],iformat[16],vformat[19],rformat[19]; + + if ( Type[0] == 'C' ) { + nvalentries = 2*nz; + nrhsentries = 2*M; + } else { + nvalentries = nz; + nrhsentries = M; + } + + if ( filename != NULL ) { + if ( (out_file = fopen( filename, "w")) == NULL ) { + fprintf(stderr,"Error: Cannot open file: %s\n",filename); + return 0; + } + } else out_file = stdout; + + if ( Ptrfmt == NULL ) Ptrfmt = "(8I10)"; + ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth); + sprintf(pformat,"%%%dd",Ptrwidth); + + if ( Indfmt == NULL ) Indfmt = Ptrfmt; + ParseIfmt(Indfmt,&Indperline,&Indwidth); + sprintf(iformat,"%%%dd",Indwidth); + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + if ( Valfmt == NULL ) Valfmt = "(4E20.13)"; + ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + sprintf(vformat,"%%%ds",Valwidth); + } + + ptrcrd = (N+1)/Ptrperline; + if ( (N+1)%Ptrperline != 0) ptrcrd++; + + indcrd = nz/Indperline; + if ( nz%Indperline != 0) indcrd++; + + valcrd = nvalentries/Valperline; + if ( nvalentries%Valperline != 0) valcrd++; + + if ( Nrhs > 0 ) { + if ( Rhsfmt == NULL ) Rhsfmt = Valfmt; + ParseRfmt(Rhsfmt,&Rhsperline,&Rhswidth,&Rhsprec, &Rhsflag); + sprintf(rformat,"%%%ds",Rhswidth); + rhscrd = nrhsentries/Rhsperline; + if ( nrhsentries%Rhsperline != 0) rhscrd++; + if ( Rhstype[1] == 'G' ) rhscrd+=rhscrd; + if ( Rhstype[2] == 'X' ) rhscrd+=rhscrd; + rhscrd*=Nrhs; + } else rhscrd = 0; + + totcrd = 4+ptrcrd+indcrd+valcrd+rhscrd; + + + /* Print header information: */ + + fprintf(out_file,"%-72s%-8s\n%14d%14d%14d%14d%14d\n",Title, Key, totcrd, + ptrcrd, indcrd, valcrd, rhscrd); + fprintf(out_file,"%3s%11s%14d%14d%14d\n",Type," ", M, N, nz); + fprintf(out_file,"%-16s%-16s%-20s", Ptrfmt, Indfmt, Valfmt); + if ( Nrhs != 0 ) { + /* Print Rhsfmt on fourth line and */ + /* optional fifth header line for auxillary vector information: */ + fprintf(out_file,"%-20s\n%-14s%d\n",Rhsfmt,Rhstype,Nrhs); + } else fprintf(out_file,"\n"); + + offset = 1-_SP_base; /* if base 0 storage is declared (via macro definition), */ + /* then storage entries are offset by 1 */ + + /* Print column pointers: */ + for (i=0;i<N+1;i++) + { + entry = colptr[i]+offset; + fprintf(out_file,pformat,entry); + if ( (i+1)%Ptrperline == 0 ) fprintf(out_file,"\n"); + } + + if ( (N+1) % Ptrperline != 0 ) fprintf(out_file,"\n"); + + /* Print row indices: */ + for (i=0;i<nz;i++) + { + entry = rowind[i]+offset; + fprintf(out_file,iformat,entry); + if ( (i+1)%Indperline == 0 ) fprintf(out_file,"\n"); + } + + if ( nz % Indperline != 0 ) fprintf(out_file,"\n"); + + /* Print values: */ + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + for (i=0;i<nvalentries;i++) + { + fprintf(out_file,vformat,val+i*Valwidth); + if ( (i+1)%Valperline == 0 ) fprintf(out_file,"\n"); + } + + if ( nvalentries % Valperline != 0 ) fprintf(out_file,"\n"); + + /* Print right hand sides: */ + acount = 1; + linemod=0; + if ( Nrhs > 0 ) { + for (j=0;j<Nrhs;j++) { + for (i=0;i<nrhsentries;i++) + { + fprintf(out_file,rformat,rhs+i*Rhswidth); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + if ( Rhstype[1] == 'G' ) { + for (i=0;i<nrhsentries;i++) + { + fprintf(out_file,rformat,guess+i*Rhswidth); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + } + if ( Rhstype[2] == 'X' ) { + for (i=0;i<nrhsentries;i++) + { + fprintf(out_file,rformat,exact+i*Rhswidth); + if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n"); + } + if ( acount%Rhsperline != linemod ) { + fprintf(out_file,"\n"); + linemod = (acount-1)%Rhsperline; + } + } + } + } + + } + + if ( fclose(out_file) != 0){ + fprintf(stderr,"Error closing file in writeHB_mat_char().\n"); + return 0; + } else return 1; + +} + +int ParseIfmt(char* fmt, int* perline, int* width) +{ + /*************************************************/ + /* Parse an *integer* format field to determine */ + /* width and number of elements per line. */ + /*************************************************/ + char *tmp; + if (fmt == NULL ) { + *perline = 0; *width = 0; return 0; + } + upcase(fmt); + tmp = strchr(fmt,'('); + tmp = substr(fmt,tmp - fmt + 1, strchr(fmt,'I') - tmp - 1); + *perline = atoi(tmp); + if (tmp) free(tmp); + + tmp = strchr(fmt,'I'); + tmp = substr(fmt,tmp - fmt + 1, strchr(fmt,')') - tmp - 1); + *width = atoi(tmp); + if (tmp) free(tmp); + + return *width; +} + +int ParseRfmt(char* fmt, int* perline, int* width, int* prec, char* flag) +{ + /*************************************************/ + /* Parse a *real* format field to determine */ + /* width and number of elements per line. */ + /* Also sets flag indicating 'E' 'F' 'P' or 'D' */ + /* format. */ + /*************************************************/ + char* tmp; + char* tmp2; + char* tmp3; + int len; + + if (fmt == NULL ) { + *perline = 0; + *width = 0; + flag = NULL; + return 0; + } + + upcase(fmt); + if (strchr(fmt,'(') != NULL) fmt = strchr(fmt,'('); + if (strchr(fmt,')') != NULL) { + tmp2 = strchr(fmt,')'); + while ( strchr(tmp2+1,')') != NULL ) { + tmp2 = strchr(tmp2+1,')'); + } + *(tmp2+1) = '\0'; + } + if (strchr(fmt,'P') != NULL) /* Remove any scaling factor, which */ + { /* affects output only, not input */ + if (strchr(fmt,'(') != NULL) { + tmp = strchr(fmt,'P'); + if ( *(++tmp) == ',' ) tmp++; + tmp3 = strchr(fmt,'(')+1; + len = tmp-tmp3; + tmp2 = tmp3; + while ( *(tmp2+len) != '\0' ) { + *tmp2=*(tmp2+len); + tmp2++; + } + *(strchr(fmt,')')+1) = '\0'; + } + } + if (strchr(fmt,'E') != NULL) { + *flag = 'E'; + } else if (strchr(fmt,'D') != NULL) { + *flag = 'D'; + } else if (strchr(fmt,'F') != NULL) { + *flag = 'F'; + } else { + fprintf(stderr,"Real format %s in H/B file not supported.\n",fmt); + return 0; + } + tmp = strchr(fmt,'('); + tmp = substr(fmt,tmp - fmt + 1, strchr(fmt,*flag) - tmp - 1); + *perline = atoi(tmp); + free(tmp); + tmp = strchr(fmt,*flag); + if ( strchr(fmt,'.') ) { + char *tmp2 = substr( fmt, strchr(fmt,'.') - fmt + 1, strchr(fmt,')') - strchr(fmt,'.')-1); + *prec = atoi( tmp2 ); + if (tmp2) free(tmp2); + tmp = substr(fmt,tmp - fmt + 1, strchr(fmt,'.') - tmp - 1); + } else { + tmp = substr(fmt,tmp - fmt + 1, strchr(fmt,')') - tmp - 1); + } + *width = atoi(tmp); + + if (tmp) free(tmp); + return *width; +} + +char* substr(const char* S, const int pos, const int len) +{ + int i; + char *SubS; + if ( (pos+len) <= (int)strlen(S)) { + SubS = (char *)malloc(len+1); + if ( SubS == NULL ) IOHBTerminate("Insufficient memory for SubS."); + for (i=0;i<len;i++) SubS[i] = S[pos+i]; + SubS[len] = '\0'; + } else { + SubS = NULL; + } + return SubS; +} + +#include<ctype.h> +void upcase(char* S) +{ + /* Convert S to uppercase */ + int i,len; + len = strlen(S); + for (i=0;i< len;i++) + S[i] = (char)toupper((int)S[i]); +} + +void IOHBTerminate(char* message) +{ + fprintf(stderr,"%s", message); + exit(1); +} diff --git a/drivers/iohb.h b/drivers/iohb.h new file mode 100644 index 0000000000000000000000000000000000000000..191659a62944862c397af0ceb27f59ca9f42c2b8 --- /dev/null +++ b/drivers/iohb.h @@ -0,0 +1,55 @@ +#ifndef IOHB_H +#define IOHB_H + +int readHB_info(const char* filename, int* M, int* N, int* nz, char** Type, + int* Nrhs); + +int readHB_header(FILE* in_file, char* Title, char* Key, char* Type, + int* Nrow, int* Ncol, int* Nnzero, int* Nrhs, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + int* Ptrcrd, int* Indcrd, int* Valcrd, int* Rhscrd, + char *Rhstype); + +int readHB_mat_double(const char* filename, int colptr[], int rowind[], + double val[]); + +int readHB_newmat_double(const char* filename, int* M, int* N, int* nonzeros, + int** colptr, int** rowind, double** val); + +int readHB_aux_double(const char* filename, const char AuxType, double b[]); + +int readHB_newaux_double(const char* filename, const char AuxType, double** b); + +int writeHB_mat_double(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const double val[], int Nrhs, const double rhs[], + const double guess[], const double exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype); + +int readHB_mat_char(const char* filename, int colptr[], int rowind[], + char val[], char* Valfmt); + +int readHB_newmat_char(const char* filename, int* M, int* N, int* nonzeros, int** colptr, + int** rowind, char** val, char** Valfmt); + +int readHB_aux_char(const char* filename, const char AuxType, char b[]); + +int readHB_newaux_char(const char* filename, const char AuxType, char** b, char** Rhsfmt); + +int writeHB_mat_char(const char* filename, int M, int N, + int nz, const int colptr[], const int rowind[], + const char val[], int Nrhs, const char rhs[], + const char guess[], const char exact[], + const char* Title, const char* Key, const char* Type, + char* Ptrfmt, char* Indfmt, char* Valfmt, char* Rhsfmt, + const char* Rhstype); + +int ParseIfmt(char* fmt, int* perline, int* width); + +int ParseRfmt(char* fmt, int* perline, int* width, int* prec, char* flag); + +void IOHBTerminate(char* message); + +#endif diff --git a/drivers/laplacian.c b/drivers/laplacian.c new file mode 100644 index 0000000000000000000000000000000000000000..4ad213ae94cb709c6120dba254fa2eb5cadfd85f --- /dev/null +++ b/drivers/laplacian.c @@ -0,0 +1,345 @@ +/** + * @file laplacian.c + * + * This file contains the user routine to generate 1, 2 and 3 dimensional Laplacian + * matrices. + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @author Theophile Terraz + * @date 2011-11-11 + * + **/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "common.h" +#include "spm_drivers.h" +#include "drivers/laplacian.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * laplacian_usage - Print the usage information to generate correct Laplacian + * matrices. + * + *******************************************************************************/ +static inline void +laplacian_usage(void) +{ + fprintf(stderr, + "Usage: genLaplacian( \"[<type>:]<dim1>[:<dim2>[:<dim3>]]\" )\n" + " <type> p = pattern only\n" + " s = real simple\n" + " d = real double [default]\n" + " c = complex simple\n" + " z = complex double\n" + " <dim1> size of the first dimension of the 1D|2D|3D laplacian\n" + " <dim2> size of the second dimension of the 2D|3D laplacian\n" + " <dim3> size of the third dimension of the 3D laplacian\n" + " Example:\n" + " genLaplacian( \"z:10:20\" ) generates a 2D complex laplacian matrix of size 200.\n" + ); +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * laplacian_parse_info - Parse information given through the filename string to + * configure the laplacian matrix to generate. + * + ******************************************************************************* + * + * @param[in] filename + * Configuration string of the Laplacian. See laplacian_usage() for + * more information. + * + * @param[in,out] csc + * At start, an allocated csc structure that will store the Lapalcian + * matrix. + * At exit, the fields of the csc are initialized and especially the + * type, symmetry and number of unknows are setup. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been generated successfully + * \retval PASTIX_ERR_BADPARAMETER if the configuration string is incorrect + * + *******************************************************************************/ +static inline int +laplacian_parse_info( const char *filename, + pastix_csc_t *csc, + pastix_int_t *dim1, + pastix_int_t *dim2, + pastix_int_t *dim3 ) +{ + long tmp1, tmp2, tmp3; + csc->colptr = NULL; + csc->rowptr = NULL; + csc->values = NULL; + csc->loc2glob = NULL; + + /* Look for the datatype */ + { + char flt; + char *tmpf = strdup( filename ); + + if ( sscanf( filename, "%c:%s", &flt, tmpf ) == 2 ) { + filename += 2; + switch( flt ){ + case 'Z': + case 'z': + csc->flttype = PastixComplex64; + break; + + case 'C': + case 'c': + csc->flttype = PastixComplex32; + break; + + case 'D': + case 'd': + csc->flttype = PastixDouble; + break; + + case 'S': + case 's': + csc->flttype = PastixFloat; + break; + + case 'P': + case 'p': + csc->flttype = PastixPattern; + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + csc->flttype = PastixDouble; + /* + * The first dimension is only one character long so we come + * back to the beginning of the string + */ + filename -= 2; + break; + + default: + laplacian_usage(); + return PASTIX_ERR_BADPARAMETER; + } + } + else { + csc->flttype = PastixDouble; + } + + free(tmpf); + } + + /* Scan the dimensions */ + *dim1 = *dim2 = *dim3 = 0; + + if ( sscanf( filename, "%ld:%ld:%ld", &tmp1, &tmp2, &tmp3 ) == 3 ) { + *dim1 = (pastix_int_t)tmp1; + *dim2 = (pastix_int_t)tmp2; + *dim3 = (pastix_int_t)tmp3; + csc->gN = (*dim1)*(*dim2)*(*dim3); + } + else if ( sscanf( filename, "%ld:%ld", &tmp1, &tmp2 ) == 2 ) { + *dim1 = (pastix_int_t)tmp1; + *dim2 = (pastix_int_t)tmp2; + csc->gN = (*dim1)*(*dim2); + } + else if ( sscanf( filename, "%ld", &tmp1 ) == 1 ) { + *dim1 = (pastix_int_t)tmp1; + csc->gN = *dim1; + } + else { + laplacian_usage(); + return PASTIX_ERR_BADPARAMETER; + } + + /* One of the dimension was set to 0 */ + if ( csc->gN == 0 ) { + laplacian_usage(); + return PASTIX_ERR_BADPARAMETER; + } + + csc->n = csc->gN; + return PASTIX_SUCCESS; +} + +static void (*laplacian_table1D[6])(pastix_csc_t*, pastix_int_t) = +{ + p_spmLaplacian1D, + NULL, + s_spmLaplacian1D, + d_spmLaplacian1D, + c_spmLaplacian1D, + z_spmLaplacian1D +}; + +static void (*laplacian_table2D[6])(pastix_csc_t*, pastix_int_t, pastix_int_t) = +{ + p_spmLaplacian2D, + NULL, + s_spmLaplacian2D, + d_spmLaplacian2D, + c_spmLaplacian2D, + z_spmLaplacian2D +}; + +static void (*laplacian_table3D[6])(pastix_csc_t*, pastix_int_t, pastix_int_t, pastix_int_t) = +{ + p_spmLaplacian3D, + NULL, + s_spmLaplacian3D, + d_spmLaplacian3D, + c_spmLaplacian3D, + z_spmLaplacian3D +}; + +static void (*extended_laplacian_table2D[6])(pastix_csc_t*, pastix_int_t, pastix_int_t) = +{ + p_spmLaplacian2D, + NULL, + s_spmExtendedLaplacian2D, + d_spmExtendedLaplacian2D, + c_spmExtendedLaplacian2D, + z_spmExtendedLaplacian2D +}; + +static void (*extended_laplacian_table3D[6])(pastix_csc_t*, pastix_int_t, pastix_int_t, pastix_int_t) = +{ + p_spmExtendedLaplacian3D, + NULL, + s_spmExtendedLaplacian3D, + d_spmExtendedLaplacian3D, + c_spmExtendedLaplacian3D, + z_spmExtendedLaplacian3D +}; + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * genLaplacian - Generate a Laplacian of size csc->n + * + ******************************************************************************* + * + * @param[in] filename + * Configuration string of the Laplacian. + * [<type>:]<dim1>[:<dim2>[:<dim3>]] + * <type> p = pattern only\n" + * s = real simple\n" + * d = real double [default]\n" + * c = complex simple\n" + * z = complex double\n" + * <dim1> size of the first dimension of the 1D|2D|3D laplacian\n" + * <dim2> size of the second dimension of the 2D|3D laplacian\n" + * <dim3> size of the third dimension of the 3D laplacian\n" + * + * @param[in,out] csc + * At start, an allocated csc structure. + * At exit, contains a laplacian matrix in the csc format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been generated successfully + * \retval PASTIX_ERR_BADPARAMETER if the configuration string is incorrect + * + *******************************************************************************/ +int +genLaplacian( const char *filename, + pastix_csc_t *csc ) +{ + pastix_int_t dim1, dim2, dim3; + int rc; + + rc = laplacian_parse_info(filename, csc, &dim1, &dim2, &dim3); + if (rc != PASTIX_SUCCESS) + return rc; + + if( dim3 > 0 ) { + laplacian_table3D[csc->flttype](csc, dim1, dim2, dim3); + } + else if (dim2 > 0) { + laplacian_table2D[csc->flttype](csc, dim1, dim2); + } + else { + laplacian_table1D[csc->flttype](csc, dim1); + } + + return PASTIX_SUCCESS; +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * genExtendedLaplacian - Generate a extended Laplacian of size csc->n + * + ******************************************************************************* + * + * @param[in] filename + * Configuration string of the Laplacian. + * [<type>:]<dim1>[:<dim2>[:<dim3>]] + * <type> p = pattern only + * s = real simple + * d = real double [default] + * c = complex simple + * z = complex double + * <dim1> size of the first dimension of the 1D|2D|3D laplacian + * <dim2> size of the second dimension of the 2D|3D laplacian + * <dim3> size of the third dimension of the 3D laplacian + * + * @param[in,out] csc + * At start, an allocated csc structure. + * At exit, contains a laplacian matrix in the csc format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been generated successfully + * \retval PASTIX_ERR_BADPARAMETER if the configuration string is incorrect + * + *******************************************************************************/ +int +genExtendedLaplacian( const char *filename, + pastix_csc_t *csc ) +{ + pastix_int_t dim1, dim2, dim3; + int rc; + + rc = laplacian_parse_info(filename, csc, &dim1, &dim2, &dim3); + if (rc != PASTIX_SUCCESS) + return rc; + + if( dim3 > 0 ) { + extended_laplacian_table3D[csc->flttype](csc, dim1, dim2, dim3); + } + else if (dim2 > 0) { + extended_laplacian_table2D[csc->flttype](csc, dim1, dim2); + } + else { + laplacian_table1D[csc->flttype](csc, dim1); + } + + return PASTIX_SUCCESS; +} diff --git a/drivers/laplacian.h b/drivers/laplacian.h new file mode 100644 index 0000000000000000000000000000000000000000..c64d4fa983735441ebe41ff4e6e2ba4229452c87 --- /dev/null +++ b/drivers/laplacian.h @@ -0,0 +1,45 @@ +/** + * @file laplacian.h + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Theophile Terraz + * @date 2011-11-11 + * + **/ +#ifndef _LAPLACIAN_H_ +#define _LAPLACIAN_H_ + +void z_spmLaplacian1D( pastix_csc_t *csc, pastix_int_t dim1 ); +void c_spmLaplacian1D( pastix_csc_t *csc, pastix_int_t dim1 ); +void d_spmLaplacian1D( pastix_csc_t *csc, pastix_int_t dim1 ); +void s_spmLaplacian1D( pastix_csc_t *csc, pastix_int_t dim1 ); +void p_spmLaplacian1D( pastix_csc_t *csc, pastix_int_t dim1 ); + +void z_spmLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void c_spmLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void d_spmLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void s_spmLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void p_spmLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); + +void z_spmLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void c_spmLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void d_spmLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void s_spmLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void p_spmLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); + +void z_spmExtendedLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void c_spmExtendedLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void d_spmExtendedLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void s_spmExtendedLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); +void p_spmExtendedLaplacian2D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2 ); + +void z_spmExtendedLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void c_spmExtendedLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void d_spmExtendedLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void s_spmExtendedLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); +void p_spmExtendedLaplacian3D( pastix_csc_t *csc, pastix_int_t dim1, pastix_int_t dim2, pastix_int_t dim3 ); + +#endif /* _LAPLACIAN_H_ */ diff --git a/drivers/mmio.c b/drivers/mmio.c new file mode 100644 index 0000000000000000000000000000000000000000..809b86dac008eeba6c8b089d52c2e5c2d482a11c --- /dev/null +++ b/drivers/mmio.c @@ -0,0 +1,516 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#include "common.h" +#include "spm_drivers.h" +#include "drivers/mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **row_, int **col_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *row, *col; + + if ((f = fopen(fname, "r")) == NULL) + return -1; + + + if (mm_read_banner(f, &matcode) != 0) + { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } + + + + if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", + mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + + row = (int *) malloc(nz * sizeof(int)); + col = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *row_ = row; + *col_ = col; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i<nz; i++) + { + if (3 != fscanf(f, "%d %d %lg\n", &row[i], &col[i], &val[i])) + { + fprintf(stderr, "Error: reading matrix\n"); + return 1; + } + row[i]--; /* adjust from 1-based to 0-based */ + col[i]--; + } + fclose(f); + + return 0; +} + +int mm_is_valid(MM_typecode matcode) +{ + if (!mm_is_matrix(matcode)) return 0; + if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0; + if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0; + if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) || + mm_is_skew(matcode))) return 0; + return 1; +} + +int mm_read_banner(FILE *f, MM_typecode *matcode) +{ + char line[MM_MAX_LINE_LENGTH]; + char banner[MM_MAX_TOKEN_LENGTH]; + char mtx[MM_MAX_TOKEN_LENGTH]; + char crd[MM_MAX_TOKEN_LENGTH]; + char data_type[MM_MAX_TOKEN_LENGTH]; + char storage_scheme[MM_MAX_TOKEN_LENGTH]; + char *p; + + + mm_clear_typecode(matcode); + + if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) + return MM_PREMATURE_EOF; + + if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type, + storage_scheme) != 5) + return MM_PREMATURE_EOF; + + for (p=mtx; *p!='\0'; *p=(char)tolower((int)*p),p++); /* convert to lower case */ + for (p=crd; *p!='\0'; *p=(char)tolower((int)*p),p++); + for (p=data_type; *p!='\0'; *p=(char)tolower((int)*p),p++); + for (p=storage_scheme; *p!='\0'; *p=(char)tolower((int)*p),p++); + + /* check for banner */ + if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) + return MM_NO_HEADER; + + /* first field should be "mtx" */ + if (strcmp(mtx, MM_MTX_STR) != 0) + return MM_UNSUPPORTED_TYPE; + mm_set_matrix(matcode); + + + /* second field describes whether this is a sparse matrix (in coordinate + storgae) or a dense array */ + + + if (strcmp(crd, MM_SPARSE_STR) == 0) + mm_set_sparse(matcode); + else + if (strcmp(crd, MM_DENSE_STR) == 0) + mm_set_dense(matcode); + else + return MM_UNSUPPORTED_TYPE; + + + /* third field */ + + if (strcmp(data_type, MM_REAL_STR) == 0) + mm_set_real(matcode); + else + if (strcmp(data_type, MM_COMPLEX_STR) == 0) + mm_set_complex(matcode); + else + if (strcmp(data_type, MM_PATTERN_STR) == 0) + mm_set_pattern(matcode); + else + if (strcmp(data_type, MM_INT_STR) == 0) + mm_set_integer(matcode); + else + return MM_UNSUPPORTED_TYPE; + + + /* fourth field */ + + if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) + mm_set_general(matcode); + else + if (strcmp(storage_scheme, MM_SYMM_STR) == 0) + mm_set_symmetric(matcode); + else + if (strcmp(storage_scheme, MM_HERM_STR) == 0) + mm_set_hermitian(matcode); + else + if (strcmp(storage_scheme, MM_SKEW_STR) == 0) + mm_set_skew(matcode); + else + return MM_UNSUPPORTED_TYPE; + + + return 0; +} + +int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz) +{ + if (fprintf(f, "%d %d %d\n", M, N, nz) != 3) + return MM_COULD_NOT_WRITE_FILE; + else + return 0; +} + +int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz ) +{ + char line[MM_MAX_LINE_LENGTH]; + int num_items_read; + + /* set return null parameter values, in case we exit with errors */ + *M = *N = *nz = 0; + + /* now continue scanning until you reach the end-of-comments */ + do + { + if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) + return MM_PREMATURE_EOF; + }while (line[0] == '%'); + + /* line[] is either blank or has M,N, nz */ + if (sscanf(line, "%d %d %d", M, N, nz) == 3) + return 0; + + else + do + { + num_items_read = fscanf(f, "%d %d %d", M, N, nz); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + } + while (num_items_read != 3); + + return 0; +} + + +int mm_read_mtx_array_size(FILE *f, int *M, int *N) +{ + char line[MM_MAX_LINE_LENGTH]; + int num_items_read; + /* set return null parameter values, in case we exit with errors */ + *M = *N = 0; + + /* now continue scanning until you reach the end-of-comments */ + do + { + if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) + return MM_PREMATURE_EOF; + }while (line[0] == '%'); + + /* line[] is either blank or has M,N, nz */ + if (sscanf(line, "%d %d", M, N) == 2) + return 0; + + else /* we have a blank line */ + do + { + num_items_read = fscanf(f, "%d %d", M, N); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + } + while (num_items_read != 2); + + return 0; +} + +int mm_write_mtx_array_size(FILE *f, int M, int N) +{ + if (fprintf(f, "%d %d\n", M, N) != 2) + return MM_COULD_NOT_WRITE_FILE; + else + return 0; +} + + + +/*-------------------------------------------------------------------------*/ + +/******************************************************************/ +/* use when row[], col[], and val[]J, and val[] are already allocated */ +/******************************************************************/ + +int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int row[], int col[], + double val[], MM_typecode matcode) +{ + int i; + (void)M; (void)N; + + if (mm_is_complex(matcode)) + { + for (i=0; i<nz; i++) + if (fscanf(f, "%d %d %lg %lg", &row[i], &col[i], &val[2*i], &val[2*i+1]) + != 4) return MM_PREMATURE_EOF; + } + else if (mm_is_real(matcode)) + { + for (i=0; i<nz; i++) + { + if (fscanf(f, "%d %d %lg\n", &row[i], &col[i], &val[i]) + != 3) return MM_PREMATURE_EOF; + + } + } + + else if (mm_is_pattern(matcode)) + { + for (i=0; i<nz; i++) + if (fscanf(f, "%d %d", &row[i], &col[i]) + != 2) return MM_PREMATURE_EOF; + } + else + return MM_UNSUPPORTED_TYPE; + + return 0; + +} + +int mm_read_mtx_crd_entry(FILE *f, int *row, int *col, + double *real, double *imag, MM_typecode matcode) +{ + if (mm_is_complex(matcode)) + { + if (fscanf(f, "%d %d %lg %lg", row, col, real, imag) + != 4) return MM_PREMATURE_EOF; + } + else if (mm_is_real(matcode)) + { + if (fscanf(f, "%d %d %lg\n", row, col, real) + != 3) return MM_PREMATURE_EOF; + + } + + else if (mm_is_pattern(matcode)) + { + if (fscanf(f, "%d %d", row, col) != 2) return MM_PREMATURE_EOF; + } + else + return MM_UNSUPPORTED_TYPE; + + return 0; + +} + + +/************************************************************************ + mm_read_mtx_crd() fills M, N, nz, array of values, and return + type code, e.g. 'MCRS' + + if matrix is complex, values[] is of size 2*nz, + (nz pairs of real/imaginary values) +************************************************************************/ + +int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **row, int **col, + double **val, MM_typecode *matcode) +{ + int ret_code; + FILE *f; + + if (strcmp(fname, "stdin") == 0) f=stdin; + else + if ((f = fopen(fname, "r")) == NULL) + return MM_COULD_NOT_READ_FILE; + + + if ((ret_code = mm_read_banner(f, matcode)) != 0) + return ret_code; + + if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && + mm_is_matrix(*matcode))) + return MM_UNSUPPORTED_TYPE; + + if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0) + return ret_code; + + + *row = (int *) malloc(*nz * sizeof(int)); + *col = (int *) malloc(*nz * sizeof(int)); + *val = NULL; + + if (mm_is_complex(*matcode)) + { + *val = (double *) malloc(*nz * 2 * sizeof(double)); + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *row, *col, *val, + *matcode); + if (ret_code != 0) return ret_code; + } + else if (mm_is_real(*matcode)) + { + *val = (double *) malloc(*nz * sizeof(double)); + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *row, *col, *val, + *matcode); + if (ret_code != 0) return ret_code; + } + + else if (mm_is_pattern(*matcode)) + { + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *row, *col, *val, + *matcode); + if (ret_code != 0) return ret_code; + } + + if (f != stdin) fclose(f); + return 0; +} + +int mm_write_banner(FILE *f, MM_typecode matcode) +{ + char *str = mm_typecode_to_str(matcode); + int ret_code; + + ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str); + free(str); + if (ret_code !=2 ) + return MM_COULD_NOT_WRITE_FILE; + else + return 0; +} + +int mm_write_mtx_crd(char fname[], int M, int N, int nz, int row[], int col[], + double val[], MM_typecode matcode) +{ + FILE *f; + int i; + + if (strcmp(fname, "stdout") == 0) + f = stdout; + else + if ((f = fopen(fname, "w")) == NULL) + return MM_COULD_NOT_WRITE_FILE; + + /* print banner followed by typecode */ + fprintf(f, "%s ", MatrixMarketBanner); + fprintf(f, "%s\n", mm_typecode_to_str(matcode)); + + /* print matrix sizes and nonzeros */ + fprintf(f, "%d %d %d\n", M, N, nz); + + /* print values */ + if (mm_is_pattern(matcode)) + for (i=0; i<nz; i++) + fprintf(f, "%d %d\n", row[i], col[i]); + else + if (mm_is_real(matcode)) + for (i=0; i<nz; i++) + fprintf(f, "%d %d %20.16g\n", row[i], col[i], val[i]); + else + if (mm_is_complex(matcode)) + for (i=0; i<nz; i++) + fprintf(f, "%d %d %20.16g %20.16g\n", row[i], col[i], val[2*i], + val[2*i+1]); + else + { + if (f != stdout) fclose(f); + return MM_UNSUPPORTED_TYPE; + } + + if (f !=stdout) fclose(f); + + return 0; +} + + +/** +* Create a new copy of a string s. mm_strdup() is a common routine, but +* not part of ANSI C, so it is included here. Used by mm_typecode_to_str(). +* +*/ +char *mm_strdup(const char *s) +{ + int len = strlen(s); + char *s2 = (char *) malloc((len+1)*sizeof(char)); + return strcpy(s2, s); +} + +char *mm_typecode_to_str(MM_typecode matcode) +{ + char buffer[MM_MAX_LINE_LENGTH]; + char *types[4]; + char *mm_strdup(const char *); + + /* check for MTX type */ + if (mm_is_matrix(matcode)) + types[0] = MM_MTX_STR; + else + return NULL; + + /* check for CRD or ARR matrix */ + if (mm_is_sparse(matcode)) + types[1] = MM_SPARSE_STR; + else + if (mm_is_dense(matcode)) + types[1] = MM_DENSE_STR; + else + return NULL; + + /* check for element data type */ + if (mm_is_real(matcode)) + types[2] = MM_REAL_STR; + else + if (mm_is_complex(matcode)) + types[2] = MM_COMPLEX_STR; + else + if (mm_is_pattern(matcode)) + types[2] = MM_PATTERN_STR; + else + if (mm_is_integer(matcode)) + types[2] = MM_INT_STR; + else + return NULL; + + + /* check for symmetry type */ + if (mm_is_general(matcode)) + types[3] = MM_GENERAL_STR; + else + if (mm_is_symmetric(matcode)) + types[3] = MM_SYMM_STR; + else + if (mm_is_hermitian(matcode)) + types[3] = MM_HERM_STR; + else + if (mm_is_skew(matcode)) + types[3] = MM_SKEW_STR; + else + return NULL; + + sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]); + return mm_strdup(buffer); + +} diff --git a/drivers/mmio.h b/drivers/mmio.h new file mode 100644 index 0000000000000000000000000000000000000000..28e40b02b7efadad66c38dc65de0b457d75e6d47 --- /dev/null +++ b/drivers/mmio.h @@ -0,0 +1,131 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + +#ifndef MM_IO_H +#define MM_IO_H + +#define MM_MAX_LINE_LENGTH 1025 +#define MatrixMarketBanner "%%MatrixMarket" +#define MM_MAX_TOKEN_LENGTH 64 + +typedef char MM_typecode[4]; + +char *mm_typecode_to_str(MM_typecode matcode); + +int mm_read_banner(FILE *f, MM_typecode *matcode); +int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz); +int mm_read_mtx_array_size(FILE *f, int *M, int *N); + +int mm_write_banner(FILE *f, MM_typecode matcode); +int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz); +int mm_write_mtx_array_size(FILE *f, int M, int N); + + +/********************* MM_typecode query fucntions ***************************/ + +#define mm_is_matrix(typecode) ((typecode)[0]=='M') + +#define mm_is_sparse(typecode) ((typecode)[1]=='C') +#define mm_is_coordinate(typecode) ((typecode)[1]=='C') +#define mm_is_dense(typecode) ((typecode)[1]=='A') +#define mm_is_array(typecode) ((typecode)[1]=='A') + +#define mm_is_complex(typecode) ((typecode)[2]=='C') +#define mm_is_real(typecode) ((typecode)[2]=='R') +#define mm_is_pattern(typecode) ((typecode)[2]=='P') +#define mm_is_integer(typecode) ((typecode)[2]=='I') + +#define mm_is_symmetric(typecode) ((typecode)[3]=='S') +#define mm_is_general(typecode) ((typecode)[3]=='G') +#define mm_is_skew(typecode) ((typecode)[3]=='K') +#define mm_is_hermitian(typecode) ((typecode)[3]=='H') + +int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ + + +/********************* MM_typecode modify fucntions ***************************/ + +#define mm_set_matrix(typecode) ((*typecode)[0]='M') +#define mm_set_coordinate(typecode) ((*typecode)[1]='C') +#define mm_set_array(typecode) ((*typecode)[1]='A') +#define mm_set_dense(typecode) mm_set_array(typecode) +#define mm_set_sparse(typecode) mm_set_coordinate(typecode) + +#define mm_set_complex(typecode) ((*typecode)[2]='C') +#define mm_set_real(typecode) ((*typecode)[2]='R') +#define mm_set_pattern(typecode) ((*typecode)[2]='P') +#define mm_set_integer(typecode) ((*typecode)[2]='I') + + +#define mm_set_symmetric(typecode) ((*typecode)[3]='S') +#define mm_set_general(typecode) ((*typecode)[3]='G') +#define mm_set_skew(typecode) ((*typecode)[3]='K') +#define mm_set_hermitian(typecode) ((*typecode)[3]='H') + +#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ + (*typecode)[2]=' ',(*typecode)[3]='G') + +#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) + + +/********************* Matrix Market error codes ***************************/ + + +#define MM_COULD_NOT_READ_FILE 11 +#define MM_PREMATURE_EOF 12 +#define MM_NOT_MTX 13 +#define MM_NO_HEADER 14 +#define MM_UNSUPPORTED_TYPE 15 +#define MM_LINE_TOO_LONG 16 +#define MM_COULD_NOT_WRITE_FILE 17 + + +/******************** Matrix Market internal definitions ******************** + + MM_matrix_typecode: 4-character sequence + ojbect sparse/ data storage + dense type scheme + + string position: [0] [1] [2] [3] + + Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) + A(array) C(omplex) H(ermitian) + P(attern) S(ymmetric) + I(nteger) K(kew) + + ***********************************************************************/ + +#define MM_MTX_STR "matrix" +#define MM_ARRAY_STR "array" +#define MM_DENSE_STR "array" +#define MM_COORDINATE_STR "coordinate" +#define MM_SPARSE_STR "coordinate" +#define MM_COMPLEX_STR "complex" +#define MM_REAL_STR "real" +#define MM_INT_STR "integer" +#define MM_GENERAL_STR "general" +#define MM_SYMM_STR "symmetric" +#define MM_HERM_STR "hermitian" +#define MM_SKEW_STR "skew-symmetric" +#define MM_PATTERN_STR "pattern" + + +/* high level routines */ +int mm_write_mtx_crd(char fname[], int M, int N, int nz, int Itab[], int Jtab[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int Itab[], int Jtab[], + double val[], MM_typecode matcode); +int mm_read_mtx_crd_entry(FILE *f, int *Itab, int *Jtab, double *real, double *img, + MM_typecode matcode); + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_); + + + +#endif diff --git a/drivers/readhb.c b/drivers/readhb.c new file mode 100644 index 0000000000000000000000000000000000000000..d36c292e69dbd48d7887b5bc367d5adcd8121355 --- /dev/null +++ b/drivers/readhb.c @@ -0,0 +1,126 @@ +/** + * @file readhb.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @date 2011-11-11 + * + **/ +#include <stdio.h> +#include "common.h" +#include "spm_drivers.h" +#include "drivers/iohb.h" + +/** + * ****************************************************************************** + * + * @ingroup pastix_csc_driver + * + * readHB - Interface to the Harwell-Boeing C driver (iohb.c) + * + ******************************************************************************* + * + * @param[in] filename + * The file containing the matrix. + * + * @param[in] csc + * At exit, contains the matrix in csc format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the Harwell Boeing driver + * \retval PASTIX_ERR_BADPARAMETER if the matrix is no in a supported format + * + *******************************************************************************/ +int +readHB( const char *filename, + pastix_csc_t *csc ) +{ + int M, N, nz, nrhs; + + /* Harwell Boeing is a variant of RSA */ + csc->fmttype = PastixCSC; + csc->dof = 1; + csc->loc2glob= NULL; + + /* Read header informations */ + { + char *Type; + Type = malloc(4*sizeof(char)); + Type[0] ='a'; + + readHB_info(filename, &M, &N, &nz, &Type, &nrhs); + + if ( M != N ) { + fprintf(stderr, "readHB: PaStiX does not support non square matrices (m=%d, N=%d\n", M, N); + return PASTIX_ERR_BADPARAMETER; + } + + csc->gN = M; + csc->n = M; + csc->gnnz = nz; + csc->nnz = nz; + + /* Check float type */ + switch( Type[0] ) { + case 'C': + case 'c': + csc->flttype = PastixComplex64; + break; + case 'R': + case 'r': + csc->flttype = PastixDouble; + break; + case 'P': + case 'p': + csc->flttype = PastixPattern; + break; + default: + fprintf(stderr, "readhb: Floating type unknown (%c)\n", Type[0]); + return PASTIX_ERR_BADPARAMETER; + } + + /* Check Symmetry */ + switch( Type[1] ) { + case 'S': + case 's': + csc->mtxtype = PastixSymmetric; + break; + case 'H': + case 'h': + csc->mtxtype = PastixHermitian; + assert( csc->flttype == PastixDouble ); + break; + case 'U': + case 'u': + default: + csc->mtxtype = PastixGeneral; + } + free(Type); + } + + /* Read the matrix and its values */ + { + int *colptr, *rowind; + int rc; + + rc = readHB_newmat_double( filename, &M, &N, &nz, + &colptr, &rowind, (double**)(&(csc->values)) ); + + if (rc == 0) { + fprintf(stderr, "readhb: Error in reading the HB matrix values\n"); + return PASTIX_ERR_IO; + } + + /* Move the colptr/rowind from int to pastix_int_t if different sizes */ + csc->colptr = spmIntConvert(csc->n+1, colptr); + csc->rowptr = spmIntConvert(csc->nnz, rowind); + } + return PASTIX_SUCCESS; +} diff --git a/drivers/readijv.c b/drivers/readijv.c new file mode 100644 index 0000000000000000000000000000000000000000..1286846c5712f863d4fdc4026728eeee6a01fc0b --- /dev/null +++ b/drivers/readijv.c @@ -0,0 +1,192 @@ +/** + * @file readijv.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @date 2011-11-11 + * + **/ +#include <stdio.h> +#include <stdlib.h> +#include "common.h" +#include "spm_drivers.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * threeFilesReadHeader - Read header from three file IJV format. + * + ******************************************************************************* + * + * @param[in] infile + * The opened header file + * + * @param[out] Nrow + * At exit, contains the number of rows of the matrix. + * + * @param[out] Ncol + * At exit, contains the number of columns of the matrix. + * + * @param[out] Nnzero + * At exit, contains the number of non zero entries of the matrix. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the information has been read successfully + * \retval PASTIX_ERR_BADPARAMETER if the header has a wrong format + * + *******************************************************************************/ +int +threeFilesReadHeader(FILE *infile, + pastix_int_t *Nrow, + pastix_int_t *Ncol, + pastix_int_t *Nnzero) +{ + long temp1,temp2,temp3; + + /* ncol nrow nnzero */ + if (fscanf(infile, "%ld %ld %ld\n", &temp1, &temp2, &temp3) != 3) { + Nrow = Ncol = Nnzero = 0; + fprintf(stderr, "readijv: Wrong format in header file\n"); + return PASTIX_ERR_BADPARAMETER; + } + *Nrow = (pastix_int_t)temp1; + *Ncol = (pastix_int_t)temp2; + *Nnzero = (pastix_int_t)temp3; + + return PASTIX_SUCCESS; +} + +/** + * ****************************************************************************** + * + * @ingroup pastix_csc_driver + * + * readIJV - Read matrix from three files IJV + * + * header file is "filename"/header + * columns file is "filename"/ia_threeFiles + * rows file is "filename"/ja_threeFiles + * values file is "filename"/ra_threeFiles + * + ******************************************************************************* + * + * @param[in] dirname + * Directory that contains the files. + * + * @param[out] csc + * At exit, contains the matrix in ijv format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occurs while reading the files + * \retval PASTIX_ERR_BADPARAMETER if a problem occurs while opening the + * files + * + *******************************************************************************/ +int +readIJV( const char *dirname, + pastix_csc_t *csc ) +{ + + FILE *iafile, *jafile, *rafile; + FILE *hdrfile; + char *filename; + pastix_int_t *tempcol; + pastix_int_t *temprow; + double *tempval; + pastix_int_t i, Nrow, Ncol, Nnzero; + + filename = malloc(strlen(dirname)+10); + + csc->flttype = PastixDouble; + csc->mtxtype = PastixGeneral; + csc->fmttype = PastixIJV; + csc->dof = 1; + csc->loc2glob= NULL; + + /* Read the header information */ + { + sprintf(filename,"%s/header",dirname); + hdrfile = fopen (filename,"r"); + if (hdrfile == NULL) + { + fprintf(stderr,"readijv: Cannot open the header file (%s)\n", filename); + return PASTIX_ERR_BADPARAMETER; + } + threeFilesReadHeader(hdrfile, &Nrow, &Ncol, &Nnzero); + fclose(hdrfile); + } + + csc->gN = Ncol; + csc->n = Ncol; + csc->gnnz = Nnzero; + csc->nnz = Nnzero; + csc->colptr = (pastix_int_t *) malloc(Nnzero*sizeof(pastix_int_t)); + csc->rowptr = (pastix_int_t *) malloc(Nnzero*sizeof(pastix_int_t)); + csc->values = (double *) malloc(Nnzero*sizeof(double)); + + /* Open the 3 files */ + sprintf(filename,"%s/ia_threeFiles",dirname); + iafile = fopen(filename,"r"); + if (iafile == NULL) + { + fprintf(stderr,"readijv: Cannot open the ia file (%s)\n", filename); + return PASTIX_ERR_BADPARAMETER; + } + + sprintf(filename,"%s/ja_threeFiles",dirname); + jafile = fopen(filename,"r"); + if (jafile == NULL) + { + fprintf(stderr,"readijv: Cannot open the ja file (%s)\n", filename); + fclose(iafile); + return PASTIX_ERR_BADPARAMETER; + } + + sprintf(filename,"%s/ra_threeFiles",dirname); + rafile = fopen(filename,"r"); + if (rafile == NULL) + { + fprintf(stderr,"readijv: Cannot open the ra file (%s)\n", filename); + fclose(iafile); + fclose(jafile); + return PASTIX_ERR_BADPARAMETER; + } + + /* Read the files */ + tempcol = csc->colptr; + temprow = csc->rowptr; + tempval = csc->values; + + for (i=0; i<Nnzero; i++, tempcol++, temprow++, tempval++) + { + long temp1, temp2; + double temp3; + + if (( 1 != fscanf(iafile,"%ld\n", &temp1)) || + ( 1 != fscanf(jafile,"%ld\n", &temp2)) || + ( 1 != fscanf(rafile,"%le\n", &temp3)) ) + { + fprintf(stderr, "ERROR: reading matrix\n"); + return PASTIX_ERR_IO; + } + *temprow = (pastix_int_t)temp1; + *tempcol = (pastix_int_t)temp2; + *tempval = temp3; + } + fclose(iafile); + fclose(jafile); + fclose(rafile); + + return PASTIX_SUCCESS; +} diff --git a/drivers/readmm.c b/drivers/readmm.c new file mode 100644 index 0000000000000000000000000000000000000000..e6904e9b0a76d18b8f1883007eb5623f01a24613 --- /dev/null +++ b/drivers/readmm.c @@ -0,0 +1,298 @@ +/** + * @file readmm.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @author Theophile Terraz + * @date 2011-11-11 + * + **/ +#include "common.h" +#include "spm_drivers.h" +#include "drivers/mmio.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * z_readMM - Read the data part of a complex matrix in Matrix Market file. + * For more information about matrix market format see mmio.c/mmio.h + * + ******************************************************************************* + * + * @param[in] file + * The file opened in readMM which contains the matrix stored in Matrix + * Market format. + * + * @param[in,out] csc + * At exit, the data of the matrix are stored in the csc structure. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the RSA driver + * + *******************************************************************************/ +int +z_readMM( FILE *file, + pastix_csc_t *csc ) +{ + pastix_complex64_t *valptr; + pastix_int_t *colptr; + pastix_int_t *rowptr; + pastix_int_t i; + long row, col; + double re, im; + + csc->values = malloc( csc->nnz * sizeof(pastix_complex64_t) ); + + colptr = csc->colptr; + rowptr = csc->rowptr; + valptr = (pastix_complex64_t*)(csc->values); + + for (i=0; i<csc->nnz; i++, colptr++, rowptr++, valptr++) + { + if (4 != fscanf(file,"%ld %ld %lg %lg\n", &row, &col, &re, &im)) + { + fprintf(stderr, "readmm: erro while reading matrix file (line %ld)\n", (long)i); + return PASTIX_ERR_IO; + } + + *rowptr = (pastix_int_t)row; + *colptr = (pastix_int_t)col; + *valptr = (pastix_complex64_t)(re + im * I); + } + + return PASTIX_SUCCESS; +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * d_readMM - Read the data part of a real matrix in Matrix Market file. + * For more information about matrix market format see mmio.c/mmio.h + * + ******************************************************************************* + * + * @param[in] file + * The file opened in readMM which contains the matrix stored in Matrix + * Market format. + * + * @param[in,out] csc + * At exit, the data of the matrix are stored in the csc structure. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the RSA driver + * + *******************************************************************************/ +int +d_readMM( FILE *file, + pastix_csc_t *csc ) +{ + double *valptr; + pastix_int_t *colptr; + pastix_int_t *rowptr; + pastix_int_t i; + long row, col; + double re; + + csc->values = malloc( csc->nnz * sizeof(double) ); + + colptr = csc->colptr; + rowptr = csc->rowptr; + valptr = (double*)(csc->values); + + for (i=0; i<csc->nnz; i++, colptr++, rowptr++, valptr++) + { + if (3 != fscanf(file,"%ld %ld %lg\n", &row, &col, &re)) + { + fprintf(stderr, "readmm: erro while reading matrix file (line %ld)\n", (long)i); + return PASTIX_ERR_IO; + } + + *rowptr = (pastix_int_t)row; + *colptr = (pastix_int_t)col; + *valptr = re; + } + + return PASTIX_SUCCESS; +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * p_readMM - Read the data part of a pattern matrix in Matrix Market file. + * For more information about matrix market format see mmio.c/mmio.h + * + ******************************************************************************* + * + * @param[in] file + * The file opened in readMM which contains the matrix stored in Matrix + * Market format. + * + * @param[in,out] csc + * At exit, the data of the matrix are stored in the csc structure. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the RSA driver + * + *******************************************************************************/ +int +p_readMM( FILE *file, + pastix_csc_t *csc ) +{ + pastix_int_t *colptr; + pastix_int_t *rowptr; + pastix_int_t i; + long row, col; + + csc->values = NULL; + + colptr = csc->colptr; + rowptr = csc->rowptr; + + for (i=0; i<csc->nnz; i++, colptr++, rowptr++) + { + if (2 != fscanf(file,"%ld %ld\n", &row, &col)) + { + fprintf(stderr, "readmm: erro while reading matrix file (line %ld)\n", (long)i); + return PASTIX_ERR_IO; + } + + *rowptr = (pastix_int_t)row; + *colptr = (pastix_int_t)col; + } + + return PASTIX_SUCCESS; +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * readMM - Read a matrix in Matrix Market fill. This corresponds to + * IJV format with (%d %d[ %lf[ %lf]]) format per line. + * For more information about matrix market format see mmio.c/mmio.h + * + ******************************************************************************* + * + * @param[in] filename + * The filename that contains the matrix stored in Matrix Market format. + * + * @param[in] csc + * At exit, contains the matrix in csc format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the RSA driver + * \retval PASTIX_ERR_BADPARAMETER if the matrix is no in a supported format + * + *******************************************************************************/ +int +readMM( const char *filename, + pastix_csc_t *csc ) +{ + MM_typecode matcode; + FILE *file; + int rc; + + file = fopen(filename,"r"); + if (file == NULL) + { + fprintf(stderr,"readmm: Cannot open the file (%s)\n", filename); + return PASTIX_ERR_BADPARAMETER; + } + + if (mm_read_banner(file, &matcode) != 0) + { + fprintf(stderr,"readmm: Could not process Matrix Market banner.\n"); + return PASTIX_ERR_IO; + } + + /* Float values type */ + if (mm_is_complex(matcode)) { + csc->flttype = PastixComplex64; + } + else if (mm_is_real(matcode)) { + csc->flttype = PastixDouble; + } + else if (mm_is_pattern(matcode)) { + csc->flttype = PastixPattern; + } + else { + fprintf(stderr,"readmm: Unsupported type of matrix.\n"); + return PASTIX_ERR_BADPARAMETER; + } + + /* Matrix structure */ + if (mm_is_general(matcode)) { + csc->mtxtype = PastixGeneral; + } + else if (mm_is_symmetric(matcode)) { + csc->mtxtype = PastixSymmetric; + } + else if (mm_is_hermitian(matcode)) { + csc->mtxtype = PastixHermitian; + } + else { + fprintf(stderr,"readmm: Unsupported type of matrix.\n"); + return PASTIX_ERR_BADPARAMETER; + } + + csc->fmttype = PastixIJV; + csc->dof = 1; + csc->loc2glob= NULL; + + /* Read the size */ + { + int m, n, nnz; + if (mm_read_mtx_crd_size(file, &m, &n, &nnz) != 0) { + fprintf(stderr, "readmm: error while reading matrix sizes\n"); + return PASTIX_ERR_IO; + } + + csc->gN = n; + csc->n = n; + csc->gnnz = nnz; + csc->nnz = nnz; + } + + csc->colptr = (pastix_int_t*)malloc(csc->nnz * sizeof(pastix_int_t)); + csc->rowptr = (pastix_int_t*)malloc(csc->nnz * sizeof(pastix_int_t)); + + switch( csc->flttype ) { + case PastixComplex64: + rc = z_readMM(file,csc); + break; + + case PastixDouble: + rc = d_readMM(file,csc); + break; + + case PastixPattern: + default: + rc = p_readMM(file,csc); + } + + fclose(file); + return rc; +} diff --git a/drivers/readrsa.c b/drivers/readrsa.c new file mode 100644 index 0000000000000000000000000000000000000000..fd81afae2ee581f0535d604958fac40fbd662b20 --- /dev/null +++ b/drivers/readrsa.c @@ -0,0 +1,237 @@ +/** + * @file readrsa.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @date 2011-11-11 + * + **/ +#include "common.h" +#include "spm_drivers.h" + +/** + * Function: FORTRAN_CALL(wreadmtc) + * + * Declaration of the wreadmtc fortran function + * defined in skitf.f. + * + * Parameters: + * tmp1 - Maximum number of column (INPUT) + * tmp2 - Maximum number of non zeros (INPUT) + * tmp3 - job to be done (see skitf file) (INPUT) + * filename - Path to the file to read from (INPUT) + * len - length of *filname* (INPUT) + * val - Values of the elements of the matrix. (OUTPUT) + * row - Rows of the elements of the matrix. (OUTPUT) + * col - Index of first element of each column in *row* and *val* (OUTPUT) + * crhs - Right hand side(s). (OUTPUT) + * nrhs - Number of right hand side(s). (OUTPUT) + * RhsType - Right hand side type (OUTPUT) + * tmpNrow - Number of rows. (OUTPUT) + * tmpNcol - Number of columns. (OUTPUT) + * tmpNnzero - Number of non zeros. (OUTPUT) + * title - name of the matrix. (OUTPUT) + * key - key of the matrix (see skitf.f) (OUTPUT) + * Type - Type of the matrix (OUTPUT) + * ierr - Error return value (OUTPUT) + * + */ +void +FC_GLOBAL(wreadmtc,WREADMTC)(int *tmp1, + int *tmp2, + int *tmp3, + const char *filename, + int *len, + double *val, + int *row, + int *col, + double *crhs, + int *nrhs, + char *RhsType, + int *tmpNrow, + int *tmpNcol, + int *tmpNnzero, + char *title, + char *key, + char *Type, + int *ierr); + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * readRSAHeader - Read the header structure of a RSA file + * + ******************************************************************************* + * + * @param[in] filename + * The filename that contains the matrix stored in RSA format. This is + * an interface to the wreadmtx fortran fucntion provided within + * SparseKit. + * + * @param[out] N + * At exit, contains the number of rows/columns of the matrix. + * + * @param[out] Nnzero + * At exit, contains the number of non zero entries of the matrix. + * + * @param[out] Type + * At exit, contains the type of the matrix. + * + * @param[out] RhsType + * At exit, contains the type of the right hand side. + * + *******************************************************************************/ +void +readRSAHeader( const char *filename, + int *N, + int *Nnz, + char *Type, + char *RhsType ) +{ + int tmp; + int *col = NULL; + int *row = NULL; + char title[72+1]; + char key[8+1]; + int nrhs; + int len; + int ierr; + double *val = NULL; + double *crhs = NULL; + int M; + + len = strlen(filename); + tmp = 0; + + FC_GLOBAL(wreadmtc,WREADMTC) + (&tmp, &tmp, &tmp, filename, &len, val, row, col, crhs, &nrhs, + RhsType, &M, N, Nnz, title, key, Type, &ierr ); + + if(ierr != 0) { + fprintf(stderr, "cannot read matrix (job=0)\n"); + } + + if ( M != (*N) ) + { + fprintf(stderr,"ERROR : (M != N)\n"); + exit(EXIT_FAILURE); + } + + Type[3] = '\0'; +} + +/** + ******************************************************************************* + * + * @ingroup pastix_csc_driver + * + * readRSA - Read a RSA matrix file. This driver reads only real matrices, and + * does not support complex matrices. + * The matrix is returned in double, convert it to real if needed through TODO + * + ******************************************************************************* + * + * @param[in] filename + * The filename that contains the matrix stored in RSA format. This is + * an interface to the wreadmtx fortran function provided within + * SparseKit. + * + * @param[in] csc + * At exit, contains the matrix in csc format. + * + ******************************************************************************* + * + * @return + * \retval PASTIX_SUCCESS if the matrix has been read successfully + * \retval PASTIX_ERR_IO if a problem occured in the RSA driver + * \retval PASTIX_ERR_BADPARAMETER if the matrix is no in a supported format + * + *******************************************************************************/ +int +readRSA( const char *filename, + pastix_csc_t *csc ) +{ + char Type[4]; + char RhsType[4]; + int tmp; + char title[72+1]; + char key[8+1]; + int nrhs; + int len; + int ierr; + double *crhs=NULL; + int M, N, Nnz; + int *tmpcolptr; + int *tmprows; + + readRSAHeader(filename, &N, &Nnz, Type, RhsType ); + + switch( Type[1] ){ + case 'S': + case 's': + csc->mtxtype = PastixSymmetric; + break; + case 'H': + case 'h': + csc->mtxtype = PastixHermitian; + /** + * We should not arrive here, since the fortran driver is not able to + * read complex matrices + */ + assert(0); + break; + case 'U': + case 'u': + csc->mtxtype = PastixGeneral; + break; + default: + fprintf(stderr,"readmm: Unsupported type of matrix.\n"); + return PASTIX_ERR_BADPARAMETER; + } + + csc->flttype = PastixDouble; + csc->fmttype = PastixCSC; + csc->gN = N; + csc->n = N; + csc->gnnz = Nnz; + csc->nnz = Nnz; + csc->dof = 1; + csc->loc2glob= NULL; + + tmpcolptr = (int*) malloc( (N+1) * sizeof(int) ); + assert( tmpcolptr ); + + tmprows = (int*) malloc( Nnz * sizeof(int) ); + assert( tmprows ); + + /* RSA reads only double */ + csc->values = (double*) malloc( Nnz * sizeof(double) ); + assert( csc->values ); + + len = strlen(filename); + tmp = 2; + nrhs = 0; + + FC_GLOBAL(wreadmtc,WREADMTC) + (&N, &Nnz, &tmp, filename, &len, csc->values, tmprows, tmpcolptr, crhs, + &nrhs, RhsType, &M, &N, &Nnz, title, key, Type, &ierr ); + + assert( (tmpcolptr[N]-tmpcolptr[0]) == Nnz ); + + csc->colptr = spmIntConvert( N+1, tmpcolptr ); + csc->rowptr = spmIntConvert( Nnz, tmprows ); + + RhsType[0] = '\0'; + if(ierr != 0) { + fprintf(stderr, "cannot read matrix (job=2)\n"); + return PASTIX_ERR_IO; + } + + return PASTIX_SUCCESS; +} diff --git a/drivers/skitf.f b/drivers/skitf.f new file mode 100644 index 0000000000000000000000000000000000000000..5c5973459219657a2d8fcec4fc0306eaf972155c --- /dev/null +++ b/drivers/skitf.f @@ -0,0 +1,3376 @@ +c----------------------------------------------------------------------- +c----------------------------------------------------------------------- +c routines from SPARSKIT and extensions +c----------------------------------------------------------------------- +c----------------------------------------------------------------------- + subroutine dperm (nrow,a,ja,ia,ao,jao,iao,perm,qperm,job) + integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(nrow), + + qperm(*),job + real*8 a(*),ao(*) +c----------------------------------------------------------------------- +c This routine permutes the rows and columns of a matrix stored in CSR +c format. i.e., it computes P A Q, where P, Q are permutation matrices. +c P maps row i into row perm(i) and Q maps column j into column qperm(j): +c a(i,j) becomes a(perm(i),qperm(j)) in new matrix +c In the particular case where Q is the transpose of P (symmetric +c permutation of A) then qperm is not needed. +c note that qperm should be of length ncol (number of columns) but this +c is not checked. +c----------------------------------------------------------------------- +c Y. Saad, Sep. 21 1989 / recoded Jan. 28 1991. +c----------------------------------------------------------------------- +c on entry: +c---------- +c n = dimension of the matrix +c a, ja, +c ia = input matrix in a, ja, ia format +c perm = integer array of length n containing the permutation arrays +c for the rows: perm(i) is the destination of row i in the +c permuted matrix -- also the destination of column i in case +c permutation is symmetric (job .le. 2) +c +c qperm = same thing for the columns. This should be provided only +c if job=3 or job=4, i.e., only in the case of a nonsymmetric +c permutation of rows and columns. Otherwise qperm is a dummy +c +c job = integer indicating the work to be done: +c * job = 1,2 permutation is symmetric Ao :== P * A * transp(P) +c job = 1 permute a, ja, ia into ao, jao, iao +c job = 2 permute matrix ignoring real values. +c * job = 3,4 permutation is non-symmetric Ao :== P * A * Q +c job = 3 permute a, ja, ia into ao, jao, iao +c job = 4 permute matrix ignoring real values. +c +c on return: +c----------- +c ao, jao, iao = input matrix in a, ja, ia format +c +c in case job .eq. 2 or job .eq. 4, a and ao are never referred to +c and can be dummy arguments. +c Notes: +c------- +c 1) algorithm is in place +c 2) column indices may not be sorted on return even though they may be +c on entry. +c----------------------------------------------------------------------c +c local variables + integer locjob, mod +c +c locjob indicates whether or not real values must be copied. +c + locjob = mod(job,2) +c +c permute rows first +c + call rperm (nrow,a,ja,ia,ao,jao,iao,perm,locjob) +c +c then permute columns +c + locjob = 0 +c + if (job .le. 2) then + call cperm (nrow,ao,jao,iao,ao,jao,iao,perm,locjob) + else + call cperm (nrow,ao,jao,iao,ao,jao,iao,qperm,locjob) + endif +c + return +c-------end-of-dperm---------------------------------------------------- +c----------------------------------------------------------------------- + end + +c----------------------------------------------------------------------- + subroutine dperm1 (i1,i2,a,ja,ia,b,jb,ib,perm,ipos,job) + integer i1,i2,job,ja(*),ia(*),jb(*),ib(*),perm(*) + real*8 a(*),b(*) +c----------------------------------------------------------------------- +c general submatrix permutation/ extraction routine. +c----------------------------------------------------------------------- +c extracts rows perm(i1), perm(i1+1), ..., perm(i2) (in this order) +c from a matrix (doing nothing in the column indices.) The resulting +c submatrix is constructed in b, jb, ib. A pointer ipos to the +c beginning of arrays b,jb,is also allowed (i.e., nonzero elements +c are accumulated starting in position ipos of b, jb). +c----------------------------------------------------------------------- +c Y. Saad,Sep. 21 1989 / recoded Jan. 28 1991 / modified for SPARSLIB +c Sept. 1997.. +c----------------------------------------------------------------------- +c on entry: +c---------- +c n = dimension of the matrix +c a,ja, +c ia = input matrix in CSR format +c perm = integer array of length n containing the indices of the rows +c to be extracted. +c +c job = job indicator. if (job .ne.1) values are not copied (i.e., +c only pattern is copied). +c +c on return: +c----------- +c b,jb, +c ib = matrix in csr format. b(ipos:ipos+nnz-1),jb(ipos:ipos+nnz-1) +c contain the value and column indices respectively of the nnz +c nonzero elements of the permuted matrix. thus ib(1)=ipos. +c +c Notes: +c------- +c algorithm is NOT in place +c----------------------------------------------------------------------- +c local variables +c + integer ko,irow,k + logical values +c----------------------------------------------------------------------- + values = (job .eq. 1) + ko = ipos + ib(1) = ko + do 900 i=i1,i2 + irow = perm(i) + do 800 k=ia(irow),ia(irow+1)-1 + if (values) b(ko) = a(k) + jb(ko) = ja(k) + ko=ko+1 + 800 continue + ib(i-i1+2) = ko + 900 continue + return +c--------end-of-dperm1-------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine dperm2 (i1,i2,a,ja,ia,b,jb,ib,perm,rperm,istart, + * ipos,job) + integer i1,i2,job,istart,ja(*),ia(*),jb(*),ib(*),perm(*),rperm(*) + real*8 a(*),b(*) +c----------------------------------------------------------------------- +c general submatrix permutation/ extraction routine. +c----------------------------------------------------------------------- +c extracts rows rperm(i1), rperm(i1+1), ..., rperm(i2) and does an +c associated column permutation (using array perm). The resulting +c submatrix is constructed in b, jb, ib. For added flexibility, +c the extracted are put in sequence starting from row 'istart' of B. +c In addition a pointer ipos to the beginning of arrays b,jb,is also +c allowed (i.e., nonzero elements are accumulated starting in +c position ipos of b, jb). +c extremely flexible. exple +c to permute msr to msr (excluding diagonals) +c call dperm2 (1,n,a,ja,ja,b,jb,jb,perm,rperm,1,n+2) +c----------------------------------------------------------------------- +c Y. Saad,Sep. 21 1989 / recoded Jan. 28 1991. +c----------------------------------------------------------------------- +c on entry: +c---------- +c n = dimension of the matrix +c a,ja, +c ia = input matrix in CSR format +c perm = integer array of length n containing the permutation arrays +c for the rows: perm(i) is the destination of row i in the +c permuted matrix -- also the destination of column i. +c +c rperm = reverse permutation. defined by rperm(iperm(i))=i for all i +c +c job = job indicator. if (job .ne.1) values are not copied (i.e., +c only pattern is copied). +c +c on return: +c----------- +c b,ja, +c ib = matrix in csr format. positions 1,2,...,istart-1 of ib +c are not touched. b(ipos:ipos+nnz-1),jb(ipos:ipos+nnz-1) +c contain the value and column indices respectively of the nnz +c nonzero elements of the permuted matrix. thus ib(istart)=ipos. +c +c Notes: +c------- +c 1) algorithm is NOT in place +c 2) column indices may not be sorted on return even though they +c may be on entry. +c----------------------------------------------------------------------- +c local variables +c + integer ko,irow,k + logical values +c----------------------------------------------------------------------- + values = (job .eq. 1) + ko = ipos + ib(istart) = ko + do 900 i=i1,i2 + irow = rperm(i) + do 800 k=ia(irow),ia(irow+1)-1 + if (values) b(ko) = a(k) + jb(ko) = perm(ja(k)) + ko=ko+1 + 800 continue + ib(istart+i-i1+1) = ko + 900 continue + return +c--------end-of-dperm2-------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine multic (n,ja,ia,ncol,kolrs,il,iord,maxcol,ierr) + integer n, ja(*),ia(n+1),kolrs(n),iord(n),il(maxcol+1),ierr +c----------------------------------------------------------------------- +c multic: +c multicoloring ordering -- greedy algorithm -- +c determines the coloring permutation and sets up +c corresponding data structures for it. +c----------------------------------------------------------------------- +c on entry +c -------- +c n = row and column dimention of matrix +c ja = column indices of nonzero elements of matrix, stored rowwise. +c ia = pointer to beginning of each row in ja. +c maxcol= maximum number of colors allowed -- the size of il is +c maxcol+1 at least. Note: the number of colors does not +c exceed the maximum degree of each node +1. +c +c on return +c --------- +c ncol = number of colours found +c kolrs = integer array containing the color number assigned to each node +c il = integer array containing the pointers to the +c beginning of each color set. In the permuted matrix +c the rows /columns il(kol) to il(kol+1)-1 have the same color. +c iord = permutation array corresponding to the multicolor ordering. +c row number i will become row nbumber iord(i) in permuted +c matrix. (iord = destination permutation array). +c ierr = integer. Error message. normal return ierr = 0 +c----------------------------------------------------------------------- +c + integer kol, i, j, k, maxcol, mycol +c + ierr = 0 + do 1 j=1, n + kolrs(j) = 0 + iord(j) = j + 1 continue + do 11 j=1, maxcol + il(j) = 0 + 11 continue +c + ncol = 0 +c +c scan all nodes +c + do 4 ii=1, n + i = iord(ii) +c +c look at adjacent nodes to determine colors already assigned +c + mcol = 0 + do 2 k=ia(i), ia(i+1)-1 + j = ja(k) + icol = kolrs(j) + if (icol .ne. 0) then + mcol = max(mcol,icol) +c +c il used as temporary to record already assigned colors. +c + il(icol) = 1 + endif + 2 continue +c +c colors taken determined. scan il until a slot opens up. +c + mycol = 1 + 3 if (il(mycol) .eq. 1) then + mycol = mycol+1 +c +c #### put test elsewhere -- +c + if (mycol .gt. maxcol) goto 99 + if (mycol .le. mcol) goto 3 + endif +c +c reset il to zero for next nodes +c + do 35 j=1, mcol + il(j) = 0 + 35 continue +c +c assign color and update number of colors so far +c + kolrs(i) = mycol + ncol = max(ncol,mycol) + 4 continue +c +c every node has now been colored. Count nodes of each color +c + do 6 j=1, n + kol = kolrs(j)+1 + il(kol) = il(kol)+1 + 6 continue +c +c set pointers il +c + il(1) = 1 + do 7 j=1, ncol + il(j+1) = il(j)+il(j+1) + 7 continue +c +c set iord +c + do 8 j=1, n + kol = kolrs(j) + iord(j) = il(kol) + il(kol) = il(kol)+1 + 8 continue +c +c shift il back +c + do 9 j=ncol,1,-1 + il(j+1) = il(j) + 9 continue + il(1) = 1 +c + return + 99 ierr = 1 + return +c-----end-of-multic----------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine xtrows (i1,i2,a,ja,ia,ao,jao,iao,iperm,job) + integer i1,i2,ja(*),ia(*),jao(*),iao(*),iperm(*),job + real*8 a(*),ao(*) +c----------------------------------------------------------------------- +c this subroutine extracts given rows from a matrix in CSR format. +c Specifically, rows number iperm(i1), iperm(i1+1), ...., iperm(i2) +c are extracted and put in the output matrix ao, jao, iao, in CSR +c format. NOT in place. +c Youcef Saad -- coded Feb 15, 1992. +c----------------------------------------------------------------------- +c on entry: +c---------- +c i1,i2 = two integers indicating the rows to be extracted. +c xtrows will extract rows iperm(i1), iperm(i1+1),..,iperm(i2), +c from original matrix and stack them in output matrix +c ao, jao, iao in csr format +c +c a, ja, ia = input matrix in csr format +c +c iperm = integer array of length nrow containing the reverse permutation +c array for the rows. row number iperm(j) in permuted matrix PA +c used to be row number j in unpermuted matrix. +c ---> a(i,j) in the permuted matrix was a(iperm(i),j) +c in the inout matrix. +c +c job = integer indicating the work to be done: +c job .ne. 1 : get structure only of output matrix,, +c i.e., ignore real values. (in which case arrays a +c and ao are not used nor accessed). +c job = 1 get complete data structure of output matrix. +c (i.e., including arrays ao and iao). +c------------ +c on return: +c------------ +c ao, jao, iao = input matrix in a, ja, ia format +c note : +c if (job.ne.1) then the arrays a and ao are not used. +c----------------------------------------------------------------------c +c Y. Saad, revised May 2, 1990 c +c----------------------------------------------------------------------c + logical values + values = (job .eq. 1) +c +c copying +c + ko = 1 + iao(1) = ko + do 100 j=i1,i2 +c +c ii=iperm(j) is the index of old row to be copied. +c + ii = iperm(j) + do 60 k=ia(ii), ia(ii+1)-1 + jao(ko) = ja(k) + if (values) ao(ko) = a(k) + ko = ko+1 + 60 continue + iao(j-i1+2) = ko + 100 continue +c + return +c---------end-of-xtrows------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + +c----------------------------------------------------------------------- + subroutine amub (nrow,ncol,job,a,ja,ia,b,jb,ib, + * c,jc,ic,nzmax,iw,ierr) + real*8 a(*), b(*), c(*) + integer ja(*),jb(*),jc(*),ia(nrow+1),ib(ncol+1), + * ic(ncol+1),iw(ncol) +c----------------------------------------------------------------------- +c performs the matrix by matrix product C = A B +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c ncol = integer. The column dimension of A +c job = integer. Job indicator. When job = 0, only the structure +c (i.e. the arrays jc, ic) is computed and the +c real values are ignored. +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c b, +c jb, +c ib = Matrix B in compressed sparse row format. +c +c nzmax = integer. The length of the arrays c and jc. +c amub will stop if the result matrix C has a number +c of elements that exceeds exceeds nzmax. See ierr. +c +c on return: +c---------- +c c, +c jc, +c ic = resulting matrix C in compressed sparse row sparse format. +c +c ierr = integer. serving as error message. +c ierr = 0 means normal return, +c ierr .gt. 0 means that amub stopped while computing the +c i-th row of C with i=ierr, because the number +c of elements in C exceeds nzmax. +c +c work arrays: +c------------ +c iw = integer work array of length equal to the number of +c columns in A. +c Notes: +c------- +c The column dimension of B is not needed. +c +c----------------------------------------------------------------- + real*8 scal + logical values + values = (job .ne. 0) + len = 0 + ic(1) = 1 + ierr = 0 +c initialize array iw. + do 1 j=1, ncol + iw(j) = 0 + 1 continue +c + do 500 ii=1, nrow +c row i + do 200 ka=ia(ii), ia(ii+1)-1 + if (values) scal = a(ka) + jj = ja(ka) + do 100 kb=ib(jj),ib(jj+1)-1 + jcol = jb(kb) + jpos = iw(jcol) + if (jpos .eq. 0) then + len = len+1 + if (len .gt. nzmax) then + ierr = ii + return + endif + jc(len) = jcol + iw(jcol)= len + if (values) c(len) = scal*b(kb) + else + if (values) c(jpos) = c(jpos) + scal*b(kb) + endif + 100 continue + 200 continue + do 201 k=ic(ii), len + iw(jc(k)) = 0 + 201 continue + ic(ii+1) = len+1 + 500 continue + return +c-------------end-of-amub----------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine dump (i1,i2,values,a,ja,ia,iout) + integer i1, i2, ia(*), ja(*), iout + real*8 a(*) + logical values +c outputs rows i1 through i2 of a sparse matrix stored in CSR format +c (or columns i1 through i2 of a matrix stored in CSC format) in a file, +c one (column) row at a time in a nice readable format. +c This is a simple routine which is useful for debugging. +c----------------------------------------------------------------------- +c on entry: +c--------- +c i1 = first row (column) to print out +c i2 = last row (column) to print out +c values= logical. indicates whether or not to print real values. +c if value = .false. only the pattern will be output. +c a, +c ja, +c ia = matrix in CSR format (or CSC format) +c iout = logical unit number for output. +c---------- +c the output file iout will have written in it the rows or columns +c of the matrix in one of two possible formats (depending on the max +c number of elements per row. The values are output with only +c two digits of accuracy (D9.2). ) +c----------------------------------------------------------------------- +c local variables + integer maxr, i, k1, k2 +c +c select mode horizontal or vertical +c + maxr = 0 + do 1 i=i1, i2 + maxr = max0(maxr,ia(i+1)-ia(i)) + 1 continue + + if (maxr .le. 8) then +c +c able to do one row acros line +c + do 2 i=i1, i2 + write(iout,100) i + k1=ia(i) + k2 = ia(i+1)-1 + write (iout,101) (ja(k),k=k1,k2) + if (values) write (iout,102) (a(k),k=k1,k2) + 2 continue + else +c +c unable to one row acros line. do three items at a time +c across a line + do 3 i=i1, i2 + if (values) then + write(iout,200) i + else + write(iout,203) i + endif + k1=ia(i) + k2 = ia(i+1)-1 + if (values) then + write (iout,201) (ja(k),a(k),k=k1,k2) + else + write (iout,202) (ja(k),k=k1,k2) + endif + 3 continue + endif +c +c formats : +c + 100 format (1h ,35(1h-),' row',i6,1x,35(1h-) ) + 101 format(' col:',8(i5,6h : )) + 102 format(' val:',8(D9.2,2h :) ) + 200 format (1h ,31(1h-),' row',i3,1x,31(1h-),/ + * 3(' columns : values * ') ) +c-------------xiiiiiihhhhhhddddddddd-*- + 201 format(3(1h ,i6,6h : ,D9.2,3h * ) ) + 202 format(6(1h ,i5,6h * ) ) + 203 format (1h ,31(1h-),' row',i3,1x,31(1h-),/ + * 3(' column : column *') ) + return +c----end-of-dump-------------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine readmt (nmax,nzmax,job,iounit,a,ja,ia,rhs,nrhs, + * guesol,nrow,ncol,nnz,title,key,type,ierr) +c----------------------------------------------------------------------- +c this subroutine reads a boeing/harwell matrix. handles right hand +c sides in full format only (no sparse right hand sides). +c Also the matrix must be in assembled forms. +c Author: Youcef Saad - Date: Sept., 1989 +c updated Oct 31, 1989. +c----------------------------------------------------------------------- +c on entry: +c--------- +c nmax = max column dimension allowed for matrix. The array ia should +c be of length at least ncol+1 (see below) if job.gt.0 +c nzmax = max number of nonzeros elements allowed. the arrays a, +c and ja should be of length equal to nnz (see below) if these +c arrays are to be read (see job). +c +c job = integer to indicate what is to be read. (note: job is an +c input and output parameter, it can be modified on return) +c job = 0 read the values of ncol, nrow, nnz, title, key, +c type and return. matrix is not read and arrays +c a, ja, ia, rhs are not touched. +c job = 1 read srtucture only, i.e., the arrays ja and ia. +c job = 2 read matrix including values, i.e., a, ja, ia +c job = 3 read matrix and right hand sides: a,ja,ia,rhs. +c rhs may contain initial guesses and exact +c solutions appended to the actual right hand sides. +c this will be indicated by the output parameter +c guesol [see below]. +c +c nrhs = integer. nrhs is an input as well as ouput parameter. +c at input nrhs contains the total length of the array rhs. +c See also ierr and nrhs in output parameters. +c +c iounit = logical unit number where to read the matrix from. +c +c on return: +c---------- +c job = on return job may be modified to the highest job it could +c do: if job=2 on entry but no matrix values are available it +c is reset to job=1 on return. Similarly of job=3 but no rhs +c is provided then it is rest to job=2 or job=1 depending on +c whether or not matrix values are provided. +c Note that no error message is triggered (i.e. ierr = 0 +c on return in these cases. It is therefore important to +c compare the values of job on entry and return ). +c +c a = the a matrix in the a, ia, ja (column) storage format +c ja = row number of element a(i,j) in array a. +c ia = pointer array. ia(i) points to the beginning of column i. +c +c rhs = real array of size nrow + 1 if available (see job) +c +c nrhs = integer containing the number of right-hand sides found +c each right hand side may be accompanied with an intial guess +c and also the exact solution. +c +c guesol = a 2-character string indicating whether an initial guess +c (1-st character) and / or the exact solution (2-nd +c character) is provided with the right hand side. +c if the first character of guesol is 'G' it means that an +c an intial guess is provided for each right-hand side. +c These are appended to the right hand-sides in the array rhs. +c if the second character of guesol is 'X' it means that an +c exact solution is provided for each right-hand side. +c These are appended to the right hand-sides +c and the initial guesses (if any) in the array rhs. +c +c nrow = number of rows in matrix +c ncol = number of columns in matrix +c nnz = number of nonzero elements in A. This info is returned +c even if there is not enough space in a, ja, ia, in order +c to determine the minimum storage needed. +c +c title = character*100 = title of matrix test ( character a*72). +c key = character*8 = key of matrix +c type = charatcer*3 = type of matrix. +c for meaning of title, key and type refer to documentation +c Harwell/Boeing matrices. +c +c ierr = integer used for error messages +c * ierr = 0 means that the matrix has been read normally. +c * ierr = 1 means that the array matrix could not be read +c because ncol+1 .gt. nmax +c * ierr = 2 means that the array matrix could not be read +c because nnz .gt. nzmax +c * ierr = 3 means that the array matrix could not be read +c because both (ncol+1 .gt. nmax) and (nnz .gt. nzmax ) +c * ierr = 4 means that the right hand side (s) initial +c guesse (s) and exact solution (s) could not be +c read because they are stored in sparse format (not handled +c by this routine ...) +c * ierr = 5 means that the right-hand-sides, initial guesses +c and exact solutions could not be read because the length of +c rhs as specified by the input value of nrhs is not +c sufficient to store them. The rest of the matrix may have +c been read normally. +c +c Notes: +c------- +c 1) The file inout must be open (and possibly rewound if necessary) +c prior to calling readmt. +c 2) Refer to the documentation on the Harwell-Boeing formats +c for details on the format assumed by readmt. +c We summarize the format here for convenience. +c +c a) all lines in inout are assumed to be 80 character long. +c b) the file consists of a header followed by the block of the +c column start pointers followed by the block of the +c row indices, followed by the block of the real values and +c finally the numerical values of the right-hand-side if a +c right hand side is supplied. +c c) the file starts by a header which contains four lines if no +c right hand side is supplied and five lines otherwise. +c * first line contains the title (72 characters long) followed by +c the 8-character identifier (name of the matrix, called key) +c [ A72,A8 ] +c * second line contains the number of lines for each +c of the following data blocks (4 of them) and the total number +c of lines excluding the header. +c [5i4] +c * the third line contains a three character string identifying +c the type of matrices as they are referenced in the Harwell +c Boeing documentation [e.g., rua, rsa,..] and the number of +c rows, columns, nonzero entries. +c [A3,11X,4I14] +c * The fourth line contains the variable fortran format +c for the following data blocks. +c [2A16,2A20] +c * The fifth line is only present if right-hand-sides are +c supplied. It consists of three one character-strings containing +c the storage format for the right-hand-sides +c ('F'= full,'M'=sparse=same as matrix), an initial guess +c indicator ('G' for yes), an exact solution indicator +c ('X' for yes), followed by the number of right-hand-sides +c and then the number of row indices. +c [A3,11X,2I14] +c d) The three following blocks follow the header as described +c above. +c e) In case the right hand-side are in sparse formats then +c the fourth block uses the same storage format as for the matrix +c to describe the NRHS right hand sides provided, with a column +c being replaced by a right hand side. +c----------------------------------------------------------------------- + character title*72, key*8, type*3, ptrfmt*16, indfmt*16, + 1 valfmt*20, rhsfmt*20, rhstyp*3, guesol*2 + integer totcrd, ptrcrd, indcrd, valcrd, rhscrd, nrow, ncol, + 1 nnz, neltvl, nrhs, nmax, nzmax, nrwindx + integer ia (nmax+1), ja (nzmax) + real*8 a(nzmax), rhs(*) +c----------------------------------------------------------------------- + ierr = 0 + lenrhs = nrhs +c + read (iounit,10) title, key, totcrd, ptrcrd, indcrd, valcrd, + 1 rhscrd, type, nrow, ncol, nnz, neltvl, ptrfmt, indfmt, + 2 valfmt, rhsfmt + 10 format (a72, a8 / 5i14 / a3, 11x, 4i14 / 2a16, 2a20) +c + if (rhscrd .gt. 0) read (iounit,11) rhstyp, nrhs, nrwindx + 11 format (a3,11x,i14,i14) +c +c anything else to read ? +c + if (job .le. 0) return +c ---- check whether matrix is readable ------ + n = ncol + if (ncol .gt. nmax) ierr = 1 + if (nnz .gt. nzmax) ierr = ierr + 2 + if (ierr .ne. 0) return +c ---- read pointer and row numbers ---------- + read (iounit,ptrfmt) (ia (i), i = 1, n+1) + read (iounit,indfmt) (ja (i), i = 1, nnz) +c --- reading values of matrix if required.... + if (job .le. 1) return +c --- and if available ----------------------- + if (valcrd .le. 0) then + job = 1 + return + endif + read (iounit,valfmt) (a(i), i = 1, nnz) +c --- reading rhs if required ---------------- + if (job .le. 2) return +c --- and if available ----------------------- + if ( rhscrd .le. 0) then + job = 2 + return + endif +c +c --- read right-hand-side.-------------------- +c + if (rhstyp(1:1) .eq. 'M') then + ierr = 4 + return + endif +c + guesol = rhstyp(2:3) +c + nvec = 1 + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') nvec=nvec+1 + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') nvec=nvec+1 +c + len = nrhs*nrow +c + if (len*nvec .gt. lenrhs) then + ierr = 5 + return + endif +c +c read right-hand-sides +c + next = 1 + iend = len + read(iounit,rhsfmt) (rhs(i), i = next, iend) +c +c read initial guesses if available +c + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c +c read exact solutions if available +c + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c + return +c---------end-of-readmt------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine roscal(nrow,job,nrm,a,ja,ia,diag,b,jb,ib,ierr) + real*8 a(*), b(*), diag(nrow) + integer nrow,job,nrm,ja(*),jb(*),ia(nrow+1),ib(nrow+1),ierr +c----------------------------------------------------------------------- +c scales the rows of A such that their norms are one on return +c 3 choices of norms: 1-norm, 2-norm, max-norm. +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c job = integer. job indicator. Job=0 means get array b only +c job = 1 means get b, and the integer arrays ib, jb. +c +c nrm = integer. norm indicator. nrm = 1, means 1-norm, nrm =2 +c means the 2-nrm, nrm = 0 means max norm +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c on return: +c---------- +c +c diag = diagonal matrix stored as a vector containing the matrix +c by which the rows have been scaled, i.e., on return +c we have B = Diag*A. +c +c b, +c jb, +c ib = resulting matrix B in compressed sparse row sparse format. +c +c ierr = error message. ierr=0 : Normal return +c ierr=i > 0 : Row number i is a zero row. +c Notes: +c------- +c 1) The column dimension of A is not needed. +c 2) algorithm in place (B can take the place of A). +c----------------------------------------------------------------- + call rnrms (nrow,nrm,a,ja,ia,diag) + + ierr = 0 + do 1 j=1, nrow + if (diag(j) .eq. 0.0d0) then + ierr = j + return + else + diag(j) = 1.0d0/diag(j) + endif + 1 continue + call diamua(nrow,job,a,ja,ia,diag,b,jb,ib) + return +c-------end-of-roscal--------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine coscal(nrow,job,nrm,a,ja,ia,diag,b,jb,ib,ierr) +c----------------------------------------------------------------------- + real*8 a(*),b(*),diag(nrow) + integer nrow,job,ja(*),jb(*),ia(nrow+1),ib(nrow+1),ierr +c----------------------------------------------------------------------- +c scales the columns of A such that their norms are one on return +c result matrix written on b, or overwritten on A. +c 3 choices of norms: 1-norm, 2-norm, max-norm. in place. +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c job = integer. job indicator. Job=0 means get array b only +c job = 1 means get b, and the integer arrays ib, jb. +c +c nrm = integer. norm indicator. nrm = 1, means 1-norm, nrm =2 +c means the 2-nrm, nrm = 0 means max norm +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c on return: +c---------- +c +c diag = diagonal matrix stored as a vector containing the matrix +c by which the columns have been scaled, i.e., on return +c we have B = A * Diag +c +c b, +c jb, +c ib = resulting matrix B in compressed sparse row sparse format. +c +c ierr = error message. ierr=0 : Normal return +c ierr=i > 0 : Column number i is a zero row. +c Notes: +c------- +c 1) The column dimension of A is not needed. +c 2) algorithm in place (B can take the place of A). +c----------------------------------------------------------------- + call cnrms (nrow,nrm,a,ja,ia,diag) + ierr = 0 + do 1 j=1, nrow + if (diag(j) .eq. 0.0) then + ierr = j + return + else + diag(j) = 1.0d0/diag(j) + endif + 1 continue + call amudia (nrow,job,a,ja,ia,diag,b,jb,ib) + return +c--------end-of-coscal-------------------------------------------------- +c----------------------------------------------------------------------- + end + subroutine rnrms (nrow, nrm, a, ja, ia, diag) + real*8 a(*), diag(nrow), scal + integer ja(*), ia(nrow+1) +c----------------------------------------------------------------------- +c gets the norms of each row of A. (choice of three norms) +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c nrm = integer. norm indicator. nrm = 1, means 1-norm, nrm =2 +c means the 2-nrm, nrm = 0 means max norm +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c on return: +c---------- +c +c diag = real vector of length nrow containing the norms +c +c----------------------------------------------------------------- + do 1 ii=1,nrow +c +c compute the norm if each element. +c + scal = 0.0d0 + k1 = ia(ii) + k2 = ia(ii+1)-1 + if (nrm .eq. 0) then + do 2 k=k1, k2 + scal = max(scal,abs(a(k) ) ) + 2 continue + elseif (nrm .eq. 1) then + do 3 k=k1, k2 + scal = scal + abs(a(k) ) + 3 continue + else + do 4 k=k1, k2 + scal = scal+a(k)**2 + 4 continue + endif + if (nrm .eq. 2) scal = sqrt(scal) + diag(ii) = scal + 1 continue + return +c----------------------------------------------------------------------- +c-------------end-of-rnrms---------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine cnrms (nrow, nrm, a, ja, ia, diag) + real*8 a(*), diag(nrow) + integer ja(*), ia(nrow+1) +c----------------------------------------------------------------------- +c gets the norms of each column of A. (choice of three norms) +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c nrm = integer. norm indicator. nrm = 1, means 1-norm, nrm =2 +c means the 2-nrm, nrm = 0 means max norm +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c on return: +c---------- +c +c diag = real vector of length nrow containing the norms +c +c----------------------------------------------------------------- + do 10 k=1, nrow + diag(k) = 0.0d0 + 10 continue + do 1 ii=1,nrow + k1 = ia(ii) + k2 = ia(ii+1)-1 + do 2 k=k1, k2 + j = ja(k) +c update the norm of each column + if (nrm .eq. 0) then + diag(j) = max(diag(j),abs(a(k) ) ) + elseif (nrm .eq. 1) then + diag(j) = diag(j) + abs(a(k) ) + else + diag(j) = diag(j)+a(k)**2 + endif + 2 continue + 1 continue + if (nrm .ne. 2) return + do 3 k=1, nrow + diag(k) = sqrt(diag(k)) + 3 continue + return +c----------------------------------------------------------------------- +c------------end-of-cnrms----------------------------------------------- + end + subroutine diamua (nrow,job, a, ja, ia, diag, b, jb, ib) + real*8 a(*), b(*), diag(nrow), scal + integer ja(*),jb(*), ia(nrow+1),ib(nrow+1) +c----------------------------------------------------------------------- +c performs the matrix by matrix product B = Diag * A (in place) +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c job = integer. job indicator. Job=0 means get array b only +c job = 1 means get b, and the integer arrays ib, jb. +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c diag = diagonal matrix stored as a vector dig(1:n) +c +c on return: +c---------- +c +c b, +c jb, +c ib = resulting matrix B in compressed sparse row sparse format. +c +c Notes: +c------- +c 1) The column dimension of A is not needed. +c 2) algorithm in place (B can take the place of A). +c in this case use job=0. +c----------------------------------------------------------------- + do 1 ii=1,nrow +c +c normalize each row +c + k1 = ia(ii) + k2 = ia(ii+1)-1 + scal = diag(ii) + do 2 k=k1, k2 + b(k) = a(k)*scal + 2 continue + 1 continue +c + if (job .eq. 0) return +c + do 3 ii=1, nrow+1 + ib(ii) = ia(ii) + 3 continue + do 31 k=ia(1), ia(nrow+1) -1 + jb(k) = ja(k) + 31 continue + return +c----------end-of-diamua------------------------------------------------ +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine amudia (nrow,job, a, ja, ia, diag, b, jb, ib) + real*8 a(*), b(*), diag(nrow) + integer ja(*),jb(*), ia(nrow+1),ib(nrow+1) +c----------------------------------------------------------------------- +c performs the matrix by matrix product B = A * Diag (in place) +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A +c +c job = integer. job indicator. Job=0 means get array b only +c job = 1 means get b, and the integer arrays ib, jb. +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c diag = diagonal matrix stored as a vector dig(1:n) +c +c on return: +c---------- +c +c b, +c jb, +c ib = resulting matrix B in compressed sparse row sparse format. +c +c Notes: +c------- +c 1) The column dimension of A is not needed. +c 2) algorithm in place (B can take the place of A). +c----------------------------------------------------------------- + do 1 ii=1,nrow +c +c scale each element +c + k1 = ia(ii) + k2 = ia(ii+1)-1 + do 2 k=k1, k2 + b(k) = a(k)*diag(ja(k)) + 2 continue + 1 continue +c + if (job .eq. 0) return +c + do 3 ii=1, nrow+1 + ib(ii) = ia(ii) + 3 continue + do 31 k=ia(1), ia(nrow+1) -1 + jb(k) = ja(k) + 31 continue + return +c----------------------------------------------------------------------- +c-----------end-of-amudiag---------------------------------------------- + end +c aplb : computes C = A+B c + subroutine aplb (nrow,ncol,job,a,ja,ia,b,jb,ib, + * c,jc,ic,nzmax,iw,ierr) + real*8 a(*), b(*), c(*) + integer ja(*),jb(*),jc(*),ia(nrow+1),ib(nrow+1),ic(nrow+1), + * iw(ncol) +c----------------------------------------------------------------------- +c performs the matrix sum C = A+B. +c----------------------------------------------------------------------- +c on entry: +c --------- +c nrow = integer. The row dimension of A and B +c ncol = integer. The column dimension of A and B. +c job = integer. Job indicator. When job = 0, only the structure +c (i.e. the arrays jc, ic) is computed and the +c real values are ignored. +c +c a, +c ja, +c ia = Matrix A in compressed sparse row format. +c +c b, +c jb, +c ib = Matrix B in compressed sparse row format. +c +c nzmax = integer. The length of the arrays c and jc. +c amub will stop if the result matrix C has a number +c of elements that exceeds exceeds nzmax. See ierr. +c +c on return: +c---------- +c c, +c jc, +c ic = resulting matrix C in compressed sparse row sparse format. +c +c ierr = integer. serving as error message. +c ierr = 0 means normal return, +c ierr .gt. 0 means that amub stopped while computing the +c i-th row of C with i=ierr, because the number +c of elements in C exceeds nzmax. +c +c work arrays: +c------------ +c iw = integer work array of length equal to the number of +c columns in A. +c +c----------------------------------------------------------------------- + logical values + + values = (job .ne. 0) + ierr = 0 + len = 0 + ic(1) = 1 + do 1 j=1, ncol + iw(j) = 0 + 1 continue +c + do 500 ii=1, nrow +c row i + do 200 ka=ia(ii), ia(ii+1)-1 + len = len+1 + jcol = ja(ka) + if (len .gt. nzmax) goto 999 + jc(len) = jcol + if (values) c(len) = a(ka) + iw(jcol)= len + 200 continue +c + do 300 kb=ib(ii),ib(ii+1)-1 + jcol = jb(kb) + jpos = iw(jcol) + if (jpos .eq. 0) then + len = len+1 + if (len .gt. nzmax) goto 999 + jc(len) = jcol + if (values) c(len) = b(kb) + iw(jcol)= len + else + if (values) c(jpos) = c(jpos) + b(kb) + endif + 300 continue + do 301 k=ic(ii), len + iw(jc(k)) = 0 + 301 continue + ic(ii+1) = len+1 + 500 continue + return + 999 ierr = ii + return +c------------end of aplb ----------------------------------------------- + end + +c csrcsc : converts compressed sparse row format to compressed sparse c + subroutine csrcsc (n,job,ipos,a,ja,ia,ao,jao,iao) + integer ia(n+1),iao(n+1),ja(*),jao(*) + real*8 a(*),ao(*) +c----------------------------------------------------------------------- +c Compressed Sparse Row to Compressed Sparse Column +c +c (transposition operation) Not in place. +c----------------------------------------------------------------------- +c -- not in place -- +c this subroutine transposes a matrix stored in a, ja, ia format. +c --------------- +c on entry: +c---------- +c n = dimension of A. +c job = integer to indicate whether to fill the values (job.eq.1) of the +c matrix ao or only the pattern., i.e.,ia, and ja (job .ne.1) +c +c ipos = starting position in ao, jao of the transposed matrix. +c the iao array takes this into account (thus iao(1) is set to ipos.) +c Note: this may be useful if one needs to append the data structure +c of the transpose to that of A. In this case use for example +c call csrcsc (n,1,n+2,a,ja,ia,a,ja,ia(n+2)) +c for any other normal usage, enter ipos=1. +c a = real array of length nnz (nnz=number of nonzero elements in input +c matrix) containing the nonzero elements. +c ja = integer array of length nnz containing the column positions +c of the corresponding elements in a. +c ia = integer of size n+1. ia(k) contains the position in a, ja of +c the beginning of the k-th row. +c +c on return: +c ---------- +c output arguments: +c ao = real array of size nzz containing the "a" part of the transpose +c jao = integer array of size nnz containing the column indices. +c iao = integer array of size n+1 containing the "ia" index array of +c the transpose. +c +c----------------------------------------------------------------------- + call csrcsc2 (n,n,job,ipos,a,ja,ia,ao,jao,iao) + end +c----------------------------------------------------------------------- + subroutine csrcsc2 (n,n2,job,ipos,a,ja,ia,ao,jao,iao) + integer ia(n+1),iao(n2+1),ja(*),jao(*) + real*8 a(*),ao(*) +c----------------------------------------------------------------------- +c Compressed Sparse Row to Compressed Sparse Column +c +c (transposition operation) Not in place. +c----------------------------------------------------------------------- +c Rectangular version. n is number of rows of CSR matrix, +c n2 (input) is number of columns of CSC matrix. +c----------------------------------------------------------------------- +c -- not in place -- +c this subroutine transposes a matrix stored in a, ja, ia format. +c --------------- +c on entry: +c---------- +c n = number of rows of CSR matrix. +c n2 = number of columns of CSC matrix. +c job = integer to indicate whether to fill the values (job.eq.1) of the +c matrix ao or only the pattern., i.e.,ia, and ja (job .ne.1) +c +c ipos = starting position in ao, jao of the transposed matrix. +c the iao array takes this into account (thus iao(1) is set to ipos.) +c Note: this may be useful if one needs to append the data structure +c of the transpose to that of A. In this case use for example +c call csrcsc2 (n,n,1,n+2,a,ja,ia,a,ja,ia(n+2)) +c for any other normal usage, enter ipos=1. +c a = real array of length nnz (nnz=number of nonzero elements in input +c matrix) containing the nonzero elements. +c ja = integer array of length nnz containing the column positions +c of the corresponding elements in a. +c ia = integer of size n+1. ia(k) contains the position in a, ja of +c the beginning of the k-th row. +c +c on return: +c ---------- +c output arguments: +c ao = real array of size nzz containing the "a" part of the transpose +c jao = integer array of size nnz containing the column indices. +c iao = integer array of size n+1 containing the "ia" index array of +c the transpose. +c +c----------------------------------------------------------------------- +c----------------- compute lengths of rows of transp(A) ---------------- + do 1 i=1,n2+1 + iao(i) = 0 + 1 continue + do 3 i=1, n + do 2 k=ia(i), ia(i+1)-1 + j = ja(k)+1 + iao(j) = iao(j)+1 + 2 continue + 3 continue +c---------- compute pointers from lengths ------------------------------ + iao(1) = ipos + do 4 i=1,n2 + iao(i+1) = iao(i) + iao(i+1) + 4 continue +c--------------- now do the actual copying ----------------------------- + do 6 i=1,n + do 62 k=ia(i),ia(i+1)-1 + j = ja(k) + next = iao(j) + if (job .eq. 1) ao(next) = a(k) + jao(next) = i + iao(j) = next+1 + 62 continue + 6 continue +c-------------------------- reshift iao and leave ---------------------- + do 7 i=n2,1,-1 + iao(i+1) = iao(i) + 7 continue + iao(1) = ipos +c--------------- end of csrcsc2 ---------------------------------------- +c----------------------------------------------------------------------- + end + +c----------------------------------------------------------------------- + subroutine atmux (n, x, y, a, ja, ia) + real*8 x(*), y(*), a(*) + integer n, ia(*), ja(*) +c----------------------------------------------------------------------- +c transp( A ) times a vector +c----------------------------------------------------------------------- +c multiplies the transpose of a matrix by a vector when the original +c matrix is stored in compressed sparse row storage. Can also be +c viewed as the product of a matrix by a vector when the original +c matrix is stored in the compressed sparse column format. +c----------------------------------------------------------------------- +c +c on entry: +c---------- +c n = row dimension of A +c x = real array of length equal to the column dimension of +c the A matrix. +c a, ja, +c ia = input matrix in compressed sparse row format. +c +c on return: +c----------- +c y = real array of length n, containing the product y=transp(A)*x +c +c----------------------------------------------------------------------- +c local variables +c + integer i, k +c----------------------------------------------------------------------- +c +c zero out output vector +c + do 1 i=1,n + y(i) = 0.0 + 1 continue +c +c loop over the rows +c + do 100 i = 1,n + do 99 k=ia(i), ia(i+1)-1 + y(ja(k)) = y(ja(k)) + x(i)*a(k) + 99 continue + 100 continue +c + return +c-------------end-of-atmux---------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine atmuxr (m, n, x, y, a, ja, ia) + real*8 x(*), y(*), a(*) + integer m, n, ia(*), ja(*) +c----------------------------------------------------------------------- +c transp( A ) times a vector, A can be rectangular +c----------------------------------------------------------------------- +c See also atmux. The essential difference is how the solution vector +c is initially zeroed. If using this to multiply rectangular CSC +c matrices by a vector, m number of rows, n is number of columns. +c----------------------------------------------------------------------- +c +c on entry: +c---------- +c m = column dimension of A +c n = row dimension of A +c x = real array of length equal to the column dimension of +c the A matrix. +c a, ja, +c ia = input matrix in compressed sparse row format. +c +c on return: +c----------- +c y = real array of length n, containing the product y=transp(A)*x +c +c----------------------------------------------------------------------- +c local variables +c + integer i, k +c----------------------------------------------------------------------- +c +c zero out output vector +c + do 1 i=1,m + y(i) = 0.0 + 1 continue +c +c loop over the rows +c + do 100 i = 1,n + do 99 k=ia(i), ia(i+1)-1 + y(ja(k)) = y(ja(k)) + x(i)*a(k) + 99 continue + 100 continue +c + return +c-------------end-of-atmuxr--------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine readmt_c (nmax,nzmax,job,fname,a,ja,ia,rhs,nrhs, + * guesol,nrow,ncol,nnz,title,key,type,ierr) +c----------------------------------------------------------------------- +c this subroutine reads a boeing/harwell matrix, given the +c corresponding file. handles right hand sides in full format +c only (no sparse right hand sides). Also the matrix must be +c in assembled forms. +c It differs from readmt, in that the name of the file needs +c to be passed, and then the file is opened and closed within +c this routine. +c Author: Youcef Saad - Date: Oct 31, 1989 +c updated Jul 20, 1998 by Irene Moulitsas +c----------------------------------------------------------------------- +c on entry: +c--------- +c nmax = max column dimension allowed for matrix. The array ia should +c be of length at least ncol+1 (see below) if job.gt.0 +c nzmax = max number of nonzeros elements allowed. the arrays a, +c and ja should be of length equal to nnz (see below) if these +c arrays are to be read (see job). +c +c job = integer to indicate what is to be read. (note: job is an +c input and output parameter, it can be modified on return) +c job = 0 read the values of ncol, nrow, nnz, title, key, +c type and return. matrix is not read and arrays +c a, ja, ia, rhs are not touched. +c job = 1 read srtucture only, i.e., the arrays ja and ia. +c job = 2 read matrix including values, i.e., a, ja, ia +c job = 3 read matrix and right hand sides: a,ja,ia,rhs. +c rhs may contain initial guesses and exact +c solutions appended to the actual right hand sides. +c this will be indicated by the output parameter +c guesol [see below]. +c +c fname = name of the file where to read the matrix from. +c +c nrhs = integer. nrhs is an input as well as ouput parameter. +c at input nrhs contains the total length of the array rhs. +c See also ierr and nrhs in output parameters. +c +c on return: +c---------- +c job = on return job may be modified to the highest job it could +c do: if job=2 on entry but no matrix values are available it +c is reset to job=1 on return. Similarly of job=3 but no rhs +c is provided then it is rest to job=2 or job=1 depending on +c whether or not matrix values are provided. +c Note that no error message is triggered (i.e. ierr = 0 +c on return in these cases. It is therefore important to +c compare the values of job on entry and return ). +c +c a = the a matrix in the a, ia, ja (column) storage format +c ja = column number of element a(i,j) in array a. +c ia = pointer array. ia(i) points to the beginning of column i. +c +c rhs = real array of size nrow + 1 if available (see job) +c +c nrhs = integer containing the number of right-hand sides found +c each right hand side may be accompanied with an intial guess +c and also the exact solution. +c +c guesol = a 2-character string indicating whether an initial guess +c (1-st character) and / or the exact solution (2-nd +c character) is provided with the right hand side. +c if the first character of guesol is 'G' it means that an +c an intial guess is provided for each right-hand side. +c These are appended to the right hand-sides in the array rhs. +c if the second character of guesol is 'X' it means that an +c exact solution is provided for each right-hand side. +c These are appended to the right hand-sides +c and the initial guesses (if any) in the array rhs. +c +c nrow = number of rows in matrix +c ncol = number of columns in matrix +c nnz = number of nonzero elements in A. This info is returned +c even if there is not enough space in a, ja, ia, in order +c to determine the minimum storage needed. +c +c title = character*72 = title of matrix test ( character a*72). +c key = character*8 = key of matrix +c type = charatcer*3 = type of matrix. +c for meaning of title, key and type refer to documentation +c Harwell/Boeing matrices. +c +c ierr = integer used for error messages +c * ierr = 0 means that the matrix has been read normally. +c * ierr = 1 means that the array matrix could not be read +c because ncol+1 .gt. nmax +c * ierr = 2 means that the array matrix could not be read +c because nnz .gt. nzmax +c * ierr = 3 means that the array matrix could not be read +c because both (ncol+1 .gt. nmax) and (nnz .gt. nzmax ) +c * ierr = 4 means that the right hand side (s) initial +c guesse (s) and exact solution (s) could not be +c read because they are stored in sparse format (not handled +c by this routine ...) +c * ierr = 5 means that the right-hand-sides, initial guesses +c and exact solutions could not be read because the length of +c rhs as specified by the input value of nrhs is not +c sufficient to store them. The rest of the matrix may have +c been read normally. +c +c Notes: +c------- +c 1) This routine can be interfaced with the C language, since only +c the name of the file needs to be passed and no iounti number. +c +c 2) Refer to the documentation on the Harwell-Boeing formats for +c details on the format assumed by readmt. +c We summarize the format here for convenience. +c +c a) all lines in inout are assumed to be 80 character long. +c b) the file consists of a header followed by the block of the +c column start pointers followed by the block of the row +c indices, followed by the block of the real values and +c finally the numerical values of the right-hand-side if a +c right hand side is supplied. +c c) the file starts by a header which contains four lines if no +c right hand side is supplied and five lines otherwise. +c * first line contains the title (72 characters long) +c followed by the 8-character identifier (name of the +c matrix, called key) [ A72,A8 ] +c * second line contains the number of lines for each of the +c following data blocks (4 of them) and the total number of +c lines excluding the header. [5i4] +c * the third line contains a three character string +c identifying the type of matrices as they are referenced +c in the Harwell Boeing documentation [e.g., rua, rsa,..] +c and the number of rows, columns, nonzero entries. +c [A3,11X,4I14] +c * The fourth line contains the variable fortran format for +c the following data blocks. [2A16,2A20] +c * The fifth line is only present if right-hand-sides are +c supplied. It consists of three one character-strings +c containing the storage format for the right-hand-sides +c ('F'= full,'M'=sparse=same as matrix), an initial guess +c indicator ('G' for yes), an exact solution indicator +c ('X' for yes), followed by the number of right-hand-sides +c and then the number of row indices. [A3,11X,2I14] +c d) The three following blocks follow the header as described +c above. +c e) In case the right hand-side are in sparse formats then the +c fourth block uses the same storage format as for the +c matrix to describe the NRHS right hand sides provided, +c with a column being replaced by a right hand side. +c----------------------------------------------------------------------- + character title*72, key*8, type*3, ptrfmt*16, indfmt*16, + & valfmt*20, rhsfmt*20, rhstyp*3, guesol*2 + integer totcrd, ptrcrd, indcrd, valcrd, rhscrd, nrow, ncol, + & nnz, neltvl, nrhs, nmax, nzmax, nrwindx + integer ia (nmax+1), ja (nzmax) + real*8 a(nzmax), rhs(*) + character fname*100 +c----------------------------------------------------------------------- + ierr = 0 + lenrhs = nrhs +c + iounit=15 + open(iounit,file = fname) + read (iounit,10) title, key, totcrd, ptrcrd, indcrd, valcrd, + & rhscrd, type, nrow, ncol, nnz, neltvl, ptrfmt, indfmt, + & valfmt, rhsfmt + 10 format (a72, a8 / 5i14 / a3, 11x, 4i14 / 2a16, 2a20) +c + if (rhscrd .gt. 0) read (iounit,11) rhstyp, nrhs, nrwindx + 11 format (a3,11x,i14,i14) +c +c anything else to read ? +c + if (job .le. 0) goto 12 + +c ---- check whether matrix is readable ------ + n = ncol + if (ncol .gt. nmax) ierr = 1 + if (nnz .gt. nzmax) ierr = ierr + 2 + if (ierr .ne. 0) goto 12 + +c ---- read pointer and row numbers ---------- + read (iounit,ptrfmt) (ia (i), i = 1, n+1) + read (iounit,indfmt) (ja (i), i = 1, nnz) + +c --- reading values of matrix if required.... + if (job .le. 1) goto 12 +c --- and if available ----------------------- + if (valcrd .le. 0) then + job = 1 + goto 12 + endif + read (iounit,valfmt) (a(i), i = 1, nnz) + +c --- reading rhs if required ---------------- + if (job .le. 2) goto 12 +c --- and if available ----------------------- + if ( rhscrd .le. 0) then + job = 2 + goto 12 + endif +c +c --- read right-hand-side.-------------------- +c + if (rhstyp(1:1) .eq. 'M') then + ierr = 4 + goto 12 + endif +c + guesol = rhstyp(2:3) +c + nvec = 1 + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') nvec=nvec+1 + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') nvec=nvec+1 +c + len = nrhs*nrow +c + if (len*nvec .gt. lenrhs) then + ierr = 5 + goto 12 + endif +c +c read right-hand-sides +c + next = 1 + iend = len + read(iounit,rhsfmt) (rhs(i), i = next, iend) +c +c read initial guesses if available +c + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c +c read exact solutions if available +c + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c + 12 close(iounit) + return +c---------end-of-readmt_c------------------------------------------------- +c------------------------------------------------------------------------- + end + +c------------------------------------------------------------------------- + subroutine csort (n,a,ja,ia,iwork,values) + logical values + integer n, ja(*), ia(n+1), iwork(*) + real*8 a(*) +c----------------------------------------------------------------------- +c This routine sorts the elements of a matrix (stored in Compressed +c Sparse Row Format) in increasing order of their column indices within +c each row. It uses a form of bucket sort with a cost of O(nnz) where +c nnz = number of nonzero elements. +c requires an integer work array of length 2*nnz. +c----------------------------------------------------------------------- +c on entry: +c--------- +c n = the row dimension of the matrix +c a = the matrix A in compressed sparse row format. +c ja = the array of column indices of the elements in array a. +c ia = the array of pointers to the rows. +c iwork = integer work array of length max ( n+1, 2*nnz ) +c where nnz = (ia(n+1)-ia(1)) ) . +c values= logical indicating whether or not the real values a(*) must +c also be permuted. if (.not. values) then the array a is not +c touched by csort and can be a dummy array. +c +c on return: +c---------- +c the matrix stored in the structure a, ja, ia is permuted in such a +c way that the column indices are in increasing order within each row. +c iwork(1:nnz) contains the permutation used to rearrange the elements. +c----------------------------------------------------------------------- +c Y. Saad - Feb. 1, 1991. +c revised by Zhongze Li - June 13th, 2001 +c----------------------------------------------------------------------- +c local variables + integer i, k, j, ifirst, offset, nnz, next +c +c count the number of elements in each column +c + offset = 1 - ia(1) + do 1 i=1,n+1 + iwork(i) = 0 + 1 continue + do 3 i=1, n + do 2 k=ia(i), ia(i+1)-1 + j = ja(k+offset)+1+offset + iwork(j) = iwork(j)+1 + 2 continue + 3 continue +c +c compute pointers from lengths. +c + iwork(1) = 1 + do 4 i=1,n + iwork(i+1) = iwork(i) + iwork(i+1) + 4 continue +c +c get the positions of the nonzero elements in order of columns. +c + ifirst = ia(1); + nnz = ia(n+1)-ifirst + do 5 i=1,n + do 51 k=ia(i),ia(i+1)-1 + j = ja(k+offset)+offset + next = iwork(j) + iwork(nnz+next) = k+offset + iwork(j) = next+1 + 51 continue + 5 continue +c +c convert to coordinate format +c + do 6 i=1, n + do 61 k=ia(i), ia(i+1)-1 + iwork(k+offset) = i + 61 continue + 6 continue +c +c loop to find permutation: for each element find the correct +c position in (sorted) arrays a, ja. Record this in iwork. +c + do 7 k=1, nnz + ko = iwork(nnz+k) + irow = iwork(ko) + next = ia(irow) +c +c the current element should go in next position in row. iwork +c records this position. +c + iwork(ko) = next+offset + ia(irow) = next+1 + 7 continue +c +c perform an in-place permutation of the arrays. +c + call ivperm (nnz, ja, iwork) + if (values) call dvperm (nnz, a, iwork) +c +c reshift the pointers of the original matrix back. +c + do 8 i=n,1,-1 + ia(i+1) = ia(i) + 8 continue + ia(1) = ifirst +c + return +c---------------end-of-csort-------------------------------------------- +c----------------------------------------------------------------------- + end + +c----------------------------------------------------------------------- + subroutine dvperm (n, x, perm) + integer n, perm(n) + real*8 x(n) +c----------------------------------------------------------------------- +c this subroutine performs an in-place permutation of a real vector x +c according to the permutation array perm(*), i.e., on return, +c the vector x satisfies, +c +c x(perm(j)) :== x(j), j=1,2,.., n +c +c----------------------------------------------------------------------- +c on entry: +c--------- +c n = length of vector x. +c perm = integer array of length n containing the permutation array. +c x = input vector +c +c on return: +c---------- +c x = vector x permuted according to x(perm(*)) := x(*) +c +c----------------------------------------------------------------------c +c Y. Saad, Sep. 21 1989 c +c----------------------------------------------------------------------c +c local variables + real*8 tmp, tmp1 +c + init = 1 + tmp = x(init) + ii = perm(init) + perm(init)= -perm(init) + k = 0 +c +c loop +c + 6 k = k+1 +c +c save the chased element -- +c + tmp1 = x(ii) + x(ii) = tmp + next = perm(ii) + if (next .lt. 0 ) goto 65 +c +c test for end +c + if (k .gt. n) goto 101 + tmp = tmp1 + perm(ii) = - perm(ii) + ii = next +c +c end loop +c + goto 6 +c +c reinitilaize cycle -- +c + 65 init = init+1 + if (init .gt. n) goto 101 + if (perm(init) .lt. 0) goto 65 + tmp = x(init) + ii = perm(init) + perm(init)=-perm(init) + goto 6 +c + 101 continue + do 200 j=1, n + perm(j) = -perm(j) + 200 continue +c + return +c-------------------end-of-dvperm--------------------------------------- +c----------------------------------------------------------------------- + end + +c----------------------------------------------------------------------- + subroutine ivperm (n, ix, perm) + integer n, perm(n), ix(n) +c----------------------------------------------------------------------- +c this subroutine performs an in-place permutation of an integer vector +c ix according to the permutation array perm(*), i.e., on return, +c the vector x satisfies, +c +c ix(perm(j)) :== ix(j), j=1,2,.., n +c +c----------------------------------------------------------------------- +c on entry: +c--------- +c n = length of vector x. +c perm = integer array of length n containing the permutation array. +c ix = input vector +c +c on return: +c---------- +c ix = vector x permuted according to ix(perm(*)) := ix(*) +c +c----------------------------------------------------------------------c +c Y. Saad, Sep. 21 1989 c +c----------------------------------------------------------------------c +c local variables + integer tmp, tmp1 +c + init = 1 + tmp = ix(init) + ii = perm(init) + perm(init)= -perm(init) + k = 0 +c +c loop +c + 6 k = k+1 +c +c save the chased element -- +c + tmp1 = ix(ii) + ix(ii) = tmp + next = perm(ii) + if (next .lt. 0 ) goto 65 +c +c test for end +c + if (k .gt. n) goto 101 + tmp = tmp1 + perm(ii) = - perm(ii) + ii = next +c +c end loop +c + goto 6 +c +c reinitilaize cycle -- +c + 65 init = init+1 + if (init .gt. n) goto 101 + if (perm(init) .lt. 0) goto 65 + tmp = ix(init) + ii = perm(init) + perm(init)=-perm(init) + goto 6 +c + 101 continue + do 200 j=1, n + perm(j) = -perm(j) + 200 continue +c + return +c-------------------end-of-ivperm--------------------------------------- +c----------------------------------------------------------------------- + end + + +c----------------------------------------------------------------------- + subroutine rperm (nrow,a,ja,ia,ao,jao,iao,perm,job) + integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(nrow),job + real*8 a(*),ao(*) +c----------------------------------------------------------------------- +c this subroutine permutes the rows of a matrix in CSR format. +c rperm computes B = P A where P is a permutation matrix. +c the permutation P is defined through the array perm: for each j, +c perm(j) represents the destination row number of row number j. +c Youcef Saad -- recoded Jan 28, 1991. +c----------------------------------------------------------------------- +c on entry: +c---------- +c n = dimension of the matrix +c a, ja, ia = input matrix in csr format +c perm = integer array of length nrow containing the permutation arrays +c for the rows: perm(i) is the destination of row i in the +c permuted matrix. +c ---> a(i,j) in the original matrix becomes a(perm(i),j) +c in the output matrix. +c +c job = integer indicating the work to be done: +c job = 1 permute a, ja, ia into ao, jao, iao +c (including the copying of real values ao and +c the array iao). +c job .ne. 1 : ignore real values. +c (in which case arrays a and ao are not needed nor +c used). +c +c------------ +c on return: +c------------ +c ao, jao, iao = input matrix in a, ja, ia format +c note : +c if (job.ne.1) then the arrays a and ao are not used. +c----------------------------------------------------------------------c +c Y. Saad, May 2, 1990 c +c----------------------------------------------------------------------c + logical values + values = (job .eq. 1) +c +c determine pointers for output matix. +c + do 50 j=1,nrow + i = perm(j) + iao(i+1) = ia(j+1) - ia(j) + 50 continue +c +c get pointers from lengths +c + iao(1) = 1 + do 51 j=1,nrow + iao(j+1)=iao(j+1)+iao(j) + 51 continue +c +c copying +c + do 100 ii=1,nrow +c +c old row = ii -- new row = iperm(ii) -- ko = new pointer +c + ko = iao(perm(ii)) + do 60 k=ia(ii), ia(ii+1)-1 + jao(ko) = ja(k) + if (values) ao(ko) = a(k) + ko = ko+1 + 60 continue + 100 continue +c + return +c---------end-of-rperm ------------------------------------------------- +c----------------------------------------------------------------------- + end + + subroutine cperm (nrow,a,ja,ia,ao,jao,iao,perm,job) + integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(*), job + real*8 a(*), ao(*) +c----------------------------------------------------------------------- +c this subroutine permutes the columns of a matrix a, ja, ia. +c the result is written in the output matrix ao, jao, iao. +c cperm computes B = A P, where P is a permutation matrix +c that maps column j into column perm(j), i.e., on return +c a(i,j) becomes a(i,perm(j)) in new matrix +c Y. Saad, May 2, 1990 / modified Jan. 28, 1991. +c----------------------------------------------------------------------- +c on entry: +c---------- +c nrow = row dimension of the matrix +c +c a, ja, ia = input matrix in csr format. +c +c perm = integer array of length ncol (number of columns of A +c containing the permutation array the columns: +c a(i,j) in the original matrix becomes a(i,perm(j)) +c in the output matrix. +c +c job = integer indicating the work to be done: +c job = 1 permute a, ja, ia into ao, jao, iao +c (including the copying of real values ao and +c the array iao). +c job .ne. 1 : ignore real values ao and ignore iao. +c +c------------ +c on return: +c------------ +c ao, jao, iao = input matrix in a, ja, ia format (array ao not needed) +c +c Notes: +c------- +c 1. if job=1 then ao, iao are not used. +c 2. This routine is in place: ja, jao can be the same. +c 3. If the matrix is initially sorted (by increasing column number) +c then ao,jao,iao may not be on return. +c +c----------------------------------------------------------------------c +c local parameters: + integer k, i, nnz +c + nnz = ia(nrow+1)-1 + do 100 k=1,nnz + jao(k) = perm(ja(k)) + 100 continue +c +c done with ja array. return if no need to touch values. +c + if (job .ne. 1) return +c +c else get new pointers -- and copy values too. +c + do 1 i=1, nrow+1 + iao(i) = ia(i) + 1 continue +c + do 2 k=1, nnz + ao(k) = a(k) + 2 continue +c + return +c---------end-of-cperm-------------------------------------------------- +c----------------------------------------------------------------------- + end +c + subroutine wreadmtc (nmax,nzmax,job,fname,length,a,ja,ia,rhs,nrhs, + * guesol,nrow,ncol,nnz,title,key,type,ierr) +c----------------------------------------------------------------------- +c this subroutine reads a boeing/harwell matrix, given the +c corresponding file. handles right hand sides in full format +c only (no sparse right hand sides). Also the matrix must be +c in assembled forms. +c It differs from readmt, in that the name of the file needs +c to be passed, and then the file is opened and closed within +c this routine. +c Author: Youcef Saad - Date: Oct 31, 1989 +c updated Feb 6th, 2001 by Zhongze Li +c----------------------------------------------------------------------- + character fname*100, title*72, key*8, type*3, guesol*2, fname1*100 + integer nrow, ncol, nnz, nrhs, nmax, nzmax, job, length, ierr + integer ia (nmax+1), ja (nzmax) + real*8 a(nzmax), rhs(*) + + fname1 = " " + fname1(1:length) = fname(1:length) +c print *,nmax,nzmax,job,fname,length,nrhs, +c * nrow,ncol,nnz,title,key,type,ierr + call readmtc(nmax,nzmax,job,fname1,a,ja,ia,rhs,nrhs, + * guesol,nrow,ncol,nnz,title,key,type,ierr) +c print *,nmax,nzmax,job,fname,length,nrhs, +c * nrow,ncol,nnz,title,key,type,ierr + return + end + + subroutine readmtc (nmax,nzmax,job,fname,a,ja,ia,rhs,nrhs, + * guesol,nrow,ncol,nnz,title,key,type,ierr) +c----------------------------------------------------------------------- +c this subroutine reads a boeing/harwell matrix, given the +c corresponding file. handles right hand sides in full format +c only (no sparse right hand sides). Also the matrix must be +c in assembled forms. +c It differs from readmt, in that the name of the file needs +c to be passed, and then the file is opened and closed within +c this routine. +c Author: Youcef Saad - Date: Oct 31, 1989 +c updated Jul 20, 1998 by Irene Moulitsas +c----------------------------------------------------------------------- +c on entry: +c--------- +c nmax = max column dimension allowed for matrix. The array ia should +c be of length at least ncol+1 (see below) if job.gt.0 +c nzmax = max number of nonzeros elements allowed. the arrays a, +c and ja should be of length equal to nnz (see below) if these +c arrays are to be read (see job). +c +c job = integer to indicate what is to be read. (note: job is an +c input and output parameter, it can be modified on return) +c job = 0 read the values of ncol, nrow, nnz, title, key, +c type and return. matrix is not read and arrays +c a, ja, ia, rhs are not touched. +c job = 1 read srtucture only, i.e., the arrays ja and ia. +c job = 2 read matrix including values, i.e., a, ja, ia +c job = 3 read matrix and right hand sides: a,ja,ia,rhs. +c rhs may contain initial guesses and exact +c solutions appended to the actual right hand sides. +c this will be indicated by the output parameter +c guesol [see below]. +c +c fname = name of the file where to read the matrix from. +c +c nrhs = integer. nrhs is an input as well as ouput parameter. +c at input nrhs contains the total length of the array rhs. +c See also ierr and nrhs in output parameters. +c +c on return: +c---------- +c job = on return job may be modified to the highest job it could +c do: if job=2 on entry but no matrix values are available it +c is reset to job=1 on return. Similarly of job=3 but no rhs +c is provided then it is rest to job=2 or job=1 depending on +c whether or not matrix values are provided. +c Note that no error message is triggered (i.e. ierr = 0 +c on return in these cases. It is therefore important to +c compare the values of job on entry and return ). +c +c a = the a matrix in the a, ia, ja (column) storage format +c ja = column number of element a(i,j) in array a. +c ia = pointer array. ia(i) points to the beginning of column i. +c +c rhs = real array of size nrow + 1 if available (see job) +c +c nrhs = integer containing the number of right-hand sides found +c each right hand side may be accompanied with an intial guess +c and also the exact solution. +c +c guesol = a 2-character string indicating whether an initial guess +c (1-st character) and / or the exact solution (2-nd +c character) is provided with the right hand side. +c if the first character of guesol is 'G' it means that an +c an intial guess is provided for each right-hand side. +c These are appended to the right hand-sides in the array rhs. +c if the second character of guesol is 'X' it means that an +c exact solution is provided for each right-hand side. +c These are appended to the right hand-sides +c and the initial guesses (if any) in the array rhs. +c +c nrow = number of rows in matrix +c ncol = number of columns in matrix +c nnz = number of nonzero elements in A. This info is returned +c even if there is not enough space in a, ja, ia, in order +c to determine the minimum storage needed. +c +c title = character*72 = title of matrix test ( character a*72). +c key = character*8 = key of matrix +c type = charatcer*3 = type of matrix. +c for meaning of title, key and type refer to documentation +c Harwell/Boeing matrices. +c +c ierr = integer used for error messages +c * ierr = 0 means that the matrix has been read normally. +c * ierr = 1 means that the array matrix could not be read +c because ncol+1 .gt. nmax +c * ierr = 2 means that the array matrix could not be read +c because nnz .gt. nzmax +c * ierr = 3 means that the array matrix could not be read +c because both (ncol+1 .gt. nmax) and (nnz .gt. nzmax ) +c * ierr = 4 means that the right hand side (s) initial +c guesse (s) and exact solution (s) could not be +c read because they are stored in sparse format (not handled +c by this routine ...) +c * ierr = 5 means that the right-hand-sides, initial guesses +c and exact solutions could not be read because the length of +c rhs as specified by the input value of nrhs is not +c sufficient to store them. The rest of the matrix may have +c been read normally. +c +c Notes: +c------- +c 1) This routine can be interfaced with the C language, since only +c the name of the file needs to be passed and no iounti number. +c +c 2) Refer to the documentation on the Harwell-Boeing formats for +c details on the format assumed by readmt. +c We summarize the format here for convenience. +c +c a) all lines in inout are assumed to be 80 character long. +c b) the file consists of a header followed by the block of the +c column start pointers followed by the block of the row +c indices, followed by the block of the real values and +c finally the numerical values of the right-hand-side if a +c right hand side is supplied. +c c) the file starts by a header which contains four lines if no +c right hand side is supplied and five lines otherwise. +c * first line contains the title (72 characters long) +c followed by the 8-character identifier (name of the +c matrix, called key) [ A72,A8 ] +c * second line contains the number of lines for each of the +c following data blocks (4 of them) and the total number of +c lines excluding the header. [5i4] +c * the third line contains a three character string +c identifying the type of matrices as they are referenced +c in the Harwell Boeing documentation [e.g., rua, rsa,..] +c and the number of rows, columns, nonzero entries. +c [A3,11X,4I14] +c * The fourth line contains the variable fortran format for +c the following data blocks. [2A16,2A20] +c * The fifth line is only present if right-hand-sides are +c supplied. It consists of three one character-strings +c containing the storage format for the right-hand-sides +c ('F'= full,'M'=sparse=same as matrix), an initial guess +c indicator ('G' for yes), an exact solution indicator +c ('X' for yes), followed by the number of right-hand-sides +c and then the number of row indices. [A3,11X,2I14] +c d) The three following blocks follow the header as described +c above. +c e) In case the right hand-side are in sparse formats then the +c fourth block uses the same storage format as for the +c matrix to describe the NRHS right hand sides provided, +c with a column being replaced by a right hand side. +c----------------------------------------------------------------------- + character title*72, key*8, type*3, ptrfmt*16, indfmt*16, + & valfmt*20, rhsfmt*20, rhstyp*3, guesol*2 + integer totcrd, ptrcrd, indcrd, valcrd, rhscrd, nrow, ncol, + & nnz, neltvl, nrhs, nmax, nzmax, nrwindx + integer ia (nmax+1), ja (nzmax) + real*8 a(nzmax), rhs(*) + character fname*100 +c----------------------------------------------------------------------- + ierr = 0 + lenrhs = nrhs +c + iounit=15 + open(iounit,file = fname) + read (iounit,10) title, key, totcrd, ptrcrd, indcrd, valcrd, + & rhscrd, type, nrow, ncol, nnz, neltvl, ptrfmt, indfmt, + & valfmt, rhsfmt + 10 format (a72, a8 / 5i14 / a3, 11x, 4i14 / 2a16, 2a20) +c + if (rhscrd .gt. 0) read (iounit,11) rhstyp, nrhs, nrwindx + 11 format (a3,11x,i14,i14) +c +c anything else to read ? +c + if (job .le. 0) goto 12 +c ---- check whether matrix is readable ------ + n = ncol + if (ncol .gt. nmax) ierr = 1 + if (nnz .gt. nzmax) ierr = ierr + 2 + if (ierr .ne. 0) goto 12 +c ---- read pointer and row numbers ---------- + read (iounit,ptrfmt) (ia (i), i = 1, n+1) + read (iounit,indfmt) (ja (i), i = 1, nnz) +c --- reading values of matrix if required.... + if (job .le. 1) goto 12 +c --- and if available ----------------------- + if (valcrd .le. 0) then + job = 1 + goto 12 + endif + read (iounit,valfmt) (a(i), i = 1, nnz) +c --- reading rhs if required ---------------- + if (job .le. 2) goto 12 +c --- and if available ----------------------- + if ( rhscrd .le. 0) then + job = 2 + goto 12 + endif +c +c --- read right-hand-side.-------------------- +c + if (rhstyp(1:1) .eq. 'M') then + ierr = 4 + goto 12 + endif +c + guesol = rhstyp(2:3) +c + nvec = 1 + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') nvec=nvec+1 + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') nvec=nvec+1 +c + len = nrhs*nrow +c + if (len*nvec .gt. lenrhs) then + ierr = 5 + goto 12 + endif +c +c read right-hand-sides +c + next = 1 + iend = len + read(iounit,rhsfmt) (rhs(i), i = next, iend) +c +c read initial guesses if available +c + if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c +c read exact solutions if available +c + if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') then + next = next+len + iend = iend+ len + read(iounit,valfmt) (rhs(i), i = next, iend) + endif +c + 12 close(iounit) + return +c---------end-of-readmt_c------------------------------------------------- +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- +c some routines extracted from SPARSKIT2 and BLAS. +c----------------------------------------------------------------------- +c----------------------------------------------------------------------- +c----------------------------------------------------------------------- + function distdot(n,x,ix,y,iy) + integer n, ix, iy + real*8 distdot, x(*), y(*), ddot + external ddot + distdot = ddot(n,x,ix,y,iy) + return + end +c-----end-of-distdot +c----------------------------------------------------------------------- + +c----------------------------------------------------------------------c +c S P A R S K I T c +c----------------------------------------------------------------------c +c REORDERING ROUTINES -- LEVEL SET BASED ROUTINES c +c----------------------------------------------------------------------c +c dblstr : doubled stripe partitioner +c rdis : recursive dissection partitioner +c dse2way : distributed site expansion usuing sites from dblstr +c dse : distributed site expansion usuing sites from rdis +c------------- utility routines ----------------------------------------- +c BFS : Breadth-First search traversal algorithm +c add_lvst : routine to add a level -- used by BFS +c stripes : finds the level set structure +c stripes0 : finds a trivial one-way partitioning from level-sets +c perphn : finds a pseudo-peripheral node and performs a BFS from it. +c mapper4 : routine used by dse and dse2way to do center expansion +c get_domns: routine to find subdomaine from linked lists found by +c mapper4. +c add_lk : routine to add entry to linked list -- used by mapper4. +c find_ctr : routine to locate an approximate center of a subgraph. +c rversp : routine to reverse a given permutation (e.g., for RCMK) +c maskdeg : integer function to compute the `masked' of a node +c----------------------------------------------------------------------- + subroutine dblstr(n,ja,ia,ip1,ip2,nfirst,riord,ndom,map,mapptr, + * mask,levels,iwk) + implicit none + integer ndom,ja(*),ia(*),ip1,ip2,nfirst,riord(*),map(*),mapptr(*), + * mask(*),levels(*),iwk(*),nextdom +c----------------------------------------------------------------------- +c this routine does a two-way partitioning of a graph using +c level sets recursively. First a coarse set is found by a +c simple cuthill-mc Kee type algorithm. Them each of the large +c domains is further partitioned into subsets using the same +c technique. The ip1 and ip2 parameters indicate the desired number +c number of partitions 'in each direction'. So the total number of +c partitions on return ought to be equal (or close) to ip1*ip2 +c----------------------parameters---------------------------------------- +c on entry: +c--------- +c n = row dimension of matrix == number of vertices in graph +c ja, ia = pattern of matrix in CSR format (the ja,ia arrays of csr data +c structure) +c ip1 = integer indicating the number of large partitions ('number of +c paritions in first direction') +c ip2 = integer indicating the number of smaller partitions, per +c large partition, ('number of partitions in second direction') +c nfirst = number of nodes in the first level that is input in riord +c riord = (also an ouput argument). on entry riord contains the labels +c of the nfirst nodes that constitute the first level. +c on return: +c----------- +c ndom = total number of partitions found +c map = list of nodes listed partition by partition from partition 1 +c to paritition ndom. +c mapptr = pointer array for map. All nodes from position +c k1=mapptr(idom),to position k2=mapptr(idom+1)-1 in map belong +c to partition idom. +c work arrays: +c------------- +c mask = array of length n, used to hold the partition number of each +c node for the first (large) partitioning. +c mask is also used as a marker of visited nodes. +c levels = integer array of length .le. n used to hold the pointer +c arrays for the various level structures obtained from BFS. +c +c----------------------------------------------------------------------- + integer n, j,idom,kdom,jdom,maskval,k,nlev,init,ndp1,numnod + maskval = 1 + do j=1, n + mask(j) = maskval + enddo + iwk(1) = 0 + call BFS(n,ja,ia,nfirst,iwk,mask,maskval,riord,levels,nlev) +c +c init = riord(1) +c call perphn (ja,ia,mask,maskval,init,nlev,riord,levels) + call stripes (nlev,riord,levels,ip1,map,mapptr,ndom) +c----------------------------------------------------------------------- + if (ip2 .eq. 1) return + ndp1 = ndom+1 +c +c pack info into array iwk +c + do j = 1, ndom+1 + iwk(j) = ndp1+mapptr(j) + enddo + do j=1, mapptr(ndom+1)-1 + iwk(ndp1+j) = map(j) + enddo + do idom=1, ndom + j = iwk(idom) + numnod = iwk(idom+1) - iwk(idom) + init = iwk(j) + do k=j, iwk(idom+1)-1 + enddo + enddo + + do idom=1, ndom + do k=mapptr(idom),mapptr(idom+1)-1 + mask(map(k)) = idom + enddo + enddo + nextdom = 1 +c +c jdom = counter for total number of (small) subdomains +c + jdom = 1 + mapptr(jdom) = 1 +c----------------------------------------------------------------------- + do idom =1, ndom + maskval = idom + nfirst = 1 + numnod = iwk(idom+1) - iwk(idom) + j = iwk(idom) + init = iwk(j) + nextdom = mapptr(jdom) +c note: old version uses iperm array + call perphn(numnod,ja,ia,init,mask,maskval, + * nlev,riord,levels) +c + call stripes (nlev,riord,levels,ip2,map(nextdom), + * mapptr(jdom),kdom) +c + mapptr(jdom) = nextdom + do j = jdom,jdom+kdom-1 + mapptr(j+1) = nextdom + mapptr(j+1)-1 + enddo + jdom = jdom + kdom + enddo +c + ndom = jdom - 1 + return + end +c----------------------------------------------------------------------- + subroutine rdis(n,ja,ia,ndom,map,mapptr,mask,levels,size,iptr) + implicit none +c----- + integer n,ja(*),ia(*),ndom,map(*),mapptr(*),mask(*),levels(*), + * size(ndom+1),iptr(ndom+1) +c----------------------------------------------------------------------- +c recursive dissection algorithm for partitioning. +c initial graph is cut in two - then each time, the largest set +c is cut in two until we reach desired number of domains. +c----------------------------------------------------------------------- +c input +c n, ja, ia = graph +c ndom = desired number of subgraphs +c output +c ------ +c map, mapptr = pointer array data structure for domains. +c if k1 = mapptr(i), k2=mapptr(i+1)-1 then +c map(k1:k2) = points in domain number i +c work arrays: +c ------------- +c mask(1:n) integer +c levels(1:n) integer +c size(1:ndom) integer +c iptr(1:ndom) integer +c----------------------------------------------------------------------- + integer idom,maskval,k,nlev,init,nextsiz,wantsiz,lev,ko, + * maxsiz,j,nextdom +c----------------------------------------------------------------------- + idom = 1 +c----------------------------------------------------------------------- +c size(i) = size of domnain i +c iptr(i) = index of first element of domain i +c----------------------------------------------------------------------- + size(idom) = n + iptr(idom) = 1 + do j=1, n + mask(j) = 1 + enddo +c +c domain loop +c + 1 continue +c +c select domain with largest size +c + maxsiz = 0 + do j=1, idom + if (size(j) .gt. maxsiz) then + maxsiz = size(j) + nextdom = j + endif + enddo +c +c do a Prphn/ BFS on nextdom +c + maskval = nextdom + init = iptr(nextdom) + call perphn(n,ja,ia,init,mask,maskval,nlev,map,levels) +c +c determine next subdomain +c + nextsiz = 0 + wantsiz = maxsiz/2 + idom = idom+1 + lev = nlev + do while (nextsiz .lt. wantsiz) + do k = levels(lev), levels(lev+1)-1 + mask(map(k)) = idom + enddo + nextsiz = nextsiz + levels(lev+1) - levels(lev) + lev = lev-1 + enddo +c + size(nextdom) = size(nextdom) - nextsiz + size(idom) = nextsiz +c +c new initial point = last point of previous domain +c + iptr(idom) = map(levels(nlev+1)-1) +c iptr(idom) = map(levels(lev)+1) +c iptr(idom) = 1 +c +c alternative +c lev = 1 +c do while (nextsiz .lt. wantsiz) +c do k = levels(lev), levels(lev+1)-1 +c mask(map(k)) = idom +c enddo +c nextsiz = nextsiz + levels(lev+1) - levels(lev) +c lev = lev+1 +c enddo +c +c set size of new domain and adjust previous one +c +c size(idom) = nextsiz +c size(nextdom) = size(nextdom) - nextsiz +c iptr(idom) = iptr(nextdom) +c iptr(nextdom) = map(levels(lev)) + + if (idom .lt. ndom) goto 1 +c +c domains found -- build data structure +c + mapptr(1) = 1 + do idom=1, ndom + mapptr(idom+1) = mapptr(idom) + size(idom) + enddo + do k=1, n + idom = mask(k) + ko = mapptr(idom) + map(ko) = k + mapptr(idom) = ko+1 + enddo +c +c reset pointers +c + do j = ndom,1,-1 + mapptr(j+1) = mapptr(j) + enddo + mapptr(1) = 1 +c + return + end +c----------------------------------------------------------------------- + subroutine dse2way(n,ja,ia,ip1,ip2,nfirst,riord,ndom,dom,idom, + * mask,jwk,link) +c----------------------------------------------------------------------- +c uses centers obtained from dblstr partition to get new partition +c----------------------------------------------------------------------- +c input: n, ja, ia = matrix +c nfirst = number of first points +c riord = riord(1:nfirst) initial points +c output +c ndom = number of domains +c dom, idom = pointer array structure for domains. +c mask , jwk, link = work arrays, +c----------------------------------------------------------------------- + implicit none + integer n, ja(*), ia(*), ip1, ip2, nfirst, riord(*), dom(*), + * idom(*), mask(*), jwk(n+1),ndom,link(*) +c +c----------------------------------------------------------------------- +c local variables + integer i, mid,nsiz, maskval,init, outer, nouter, k + call dblstr(n,ja,ia,ip1,ip2,nfirst,riord,ndom,dom,idom,mask, + * link,jwk) +c + nouter = 3 +c----------------------------------------------------------------------- + + do outer =1, nouter +c +c set masks +c + do i=1, ndom + do k=idom(i),idom(i+1)-1 + mask(dom(k)) = i + enddo + enddo +c +c get centers +c + do i =1, ndom + nsiz = idom(i+1) - idom(i) + init = dom(idom(i)) + maskval = i +c +c use link for local riord -- jwk for other arrays -- +c + call find_ctr(n,nsiz,ja,ia,init,mask,maskval,link, + * jwk,mid,jwk(nsiz+1)) + riord(i) = mid + enddo +c +c do level-set expansion from centers -- save previous diameter +c + call mapper4(n,ja,ia,ndom,riord,jwk,mask,link) + call get_domns2(ndom,riord,link,jwk,dom,idom) +c----------------------------------------------------------------------- + enddo + return + end +c----------------------------------------------------------------------- + subroutine dse(n,ja,ia,ndom,riord,dom,idom,mask,jwk,link) + implicit none + integer n, ja(*), ia(n+1), ndom, riord(n), dom(*), + * idom(ndom+1),mask(n),jwk(2*ndom),link(n) +c----------------------------------------------------------------------- +c uses centers produced from rdis to get a new partitioning -- +c see calling sequence in rdis.. +c on entry: +c n, ja, ia = graph +c ndom = number of desired subdomains +c on return +c dom, idom = +c pointer array structure for the ndom domains. +c +c----------------------------------------------------------------------- +c size of array jwk = 2*ndom +c size array link +c dom = array of size at least n +c mask = array of size n. +c link = same size as riord +c riord = n list of nodes listed in sequence for all subdomains +c----------------------------------------------------------------------- +c local variables + integer i, mid, nsiz, maskval,init, outer, nouter, k +c----------------------------------------------------------------------- + nouter = 3 +c + call rdis(n,ja,ia,ndom,dom,idom,mask,link,jwk,jwk(ndom+1)) +c +c initial points = +c + do outer =1, nouter +c +c set masks +c + do i=1, ndom + do k=idom(i),idom(i+1)-1 + mask(dom(k)) = i + enddo + enddo +c +c get centers +c + do i =1, ndom + nsiz = idom(i+1) - idom(i) + init = dom(idom(i)) + maskval = i +c +c use link for local riord -- jwk for other arrays -- +c + call find_ctr(n,nsiz,ja,ia,init,mask,maskval,link, + * jwk,mid,jwk(nsiz+1)) + riord(i) = mid + enddo +c +c do level-set expansion from centers -- save previous diameter +c + call mapper4(n,ja,ia,ndom,riord,jwk,mask,link) + call get_domns2(ndom,riord,link,jwk,dom,idom) +c----------------------------------------------------------------------- + enddo + return + end +c----------------------------------------------------------------------- + subroutine BFS(n,ja,ia,nfirst,iperm,mask,maskval,riord,levels, + * nlev) + implicit none + integer n,ja(*),ia(*),nfirst,iperm(n),mask(n+1),riord(*), + * levels(*), + * nlev,maskval +c----------------------------------------------------------------------- +c finds the level-structure (breadth-first-search or CMK) ordering for a +c given sparse matrix. Uses add_lvst. Allows an set of nodes to be +c the initial level (instead of just one node). +c-------------------------parameters------------------------------------ +c on entry: +c--------- +c n = number of nodes in the graph +c ja, ia = pattern of matrix in CSR format (the ja,ia arrays of csr data +c structure) +c nfirst = number of nodes in the first level that is input in riord +c iperm = integer array indicating in which order to traverse the graph +c in order to generate all connected components. +c if iperm(1) .eq. 0 on entry then BFS will traverse the nodes +c in the order 1,2,...,n. +c +c riord = (also an ouput argument). On entry riord contains the labels +c of the nfirst nodes that constitute the first level. +c +c mask = array used to indicate whether or not a node should be +c condidered in the graph. see maskval. +c mask is also used as a marker of visited nodes. +c +c maskval= consider node i only when: mask(i) .eq. maskval +c maskval must be .gt. 0. +c thus, to consider all nodes, take mask(1:n) = 1. +c maskval=1 (for example) +c +c on return +c --------- +c mask = on return mask is restored to its initial state. +c riord = `reverse permutation array'. Contains the labels of the nodes +c constituting all the levels found, from the first level to +c the last. +c levels = pointer array for the level structure. If lev is a level +c number, and k1=levels(lev),k2=levels(lev+1)-1, then +c all the nodes of level number lev are: +c riord(k1),riord(k1+1),...,riord(k2) +c nlev = number of levels found +c----------------------------------------------------------------------- +c + integer j, ii, nod, istart, iend + logical permut + permut = (iperm(1) .ne. 0) +c +c start pointer structure to levels +c + nlev = 0 +c +c previous end +c + istart = 0 + ii = 0 +c +c current end +c + iend = nfirst +c +c intialize masks to zero -- except nodes of first level -- +c + do 12 j=1, nfirst +c print *,'mask',riord(j),j,nfirst + mask(riord(j)) = 0 + 12 continue +c----------------------------------------------------------------------- + 13 continue +c + 1 nlev = nlev+1 + levels(nlev) = istart + 1 + call add_lvst (istart,iend,nlev,riord,ja,ia,mask,maskval) + if (istart .lt. iend) goto 1 + 2 ii = ii+1 + if (ii .le. n) then + nod = ii + if (permut) nod = iperm(nod) + if (mask(nod) .eq. maskval) then +c +c start a new level +c + istart = iend + iend = iend+1 + riord(iend) = nod + mask(nod) = 0 + goto 1 + else + goto 2 + endif + endif +c----------------------------------------------------------------------- + 3 levels(nlev+1) = iend+1 + do j=1, iend + mask(riord(j)) = maskval + enddo +c----------------------------------------------------------------------- + return + end +c----------------------------------------------------------------------- + subroutine add_lvst(istart,iend,nlev,riord,ja,ia,mask,maskval) + integer nlev, nod, riord(*), ja(*), ia(*), mask(*) +c------------------------------------------------------------- +c adds one level set to the previous sets.. +c span all nodes of previous mask +c------------------------------------------------------------- + nod = iend + do 25 ir = istart+1,iend + i = riord(ir) + do 24 k=ia(i),ia(i+1)-1 + j = ja(k) + if (mask(j) .eq. maskval) then + nod = nod+1 + mask(j) = 0 + riord(nod) = j + endif + 24 continue + 25 continue + istart = iend + iend = nod + return + end +c----------------------------------------------------------------------- + subroutine stripes (nlev,riord,levels,ip,map,mapptr,ndom) + implicit none + integer nlev,riord(*),levels(nlev+1),ip,map(*), + * mapptr(*), ndom +c----------------------------------------------------------------------- +c this is a post processor to BFS. stripes uses the output of BFS to +c find a decomposition of the adjacency graph by stripes. It fills +c the stripes level by level until a number of nodes .gt. ip is +c is reached. +c---------------------------parameters----------------------------------- +c on entry: +c -------- +c nlev = number of levels as found by BFS +c riord = reverse permutation array produced by BFS -- +c levels = pointer array for the level structure as computed by BFS. If +c lev is a level number, and k1=levels(lev),k2=levels(lev+1)-1, +c then all the nodes of level number lev are: +c riord(k1),riord(k1+1),...,riord(k2) +c ip = number of desired partitions (subdomains) of about equal size. +c +c on return +c --------- +c ndom = number of subgraphs (subdomains) found +c map = node per processor list. The nodes are listed contiguously +c from proc 1 to nproc = mpx*mpy. +c mapptr = pointer array for array map. list for proc. i starts at +c mapptr(i) and ends at mapptr(i+1)-1 in array map. +c----------------------------------------------------------------------- +c local variables. +c + integer ib,ktr,ilev,k,nsiz,psiz + ndom = 1 + ib = 1 +c to add: if (ip .le. 1) then ... + nsiz = levels(nlev+1) - levels(1) + psiz = (nsiz-ib)/max(1,(ip - ndom + 1)) + 1 + mapptr(ndom) = ib + ktr = 0 + do 10 ilev = 1, nlev +c +c add all nodes of this level to domain +c + do 3 k=levels(ilev), levels(ilev+1)-1 + map(ib) = riord(k) + ib = ib+1 + ktr = ktr + 1 + if (ktr .ge. psiz .or. k .ge. nsiz) then + ndom = ndom + 1 + mapptr(ndom) = ib + psiz = (nsiz-ib)/max(1,(ip - ndom + 1)) + 1 + ktr = 0 + endif +c + 3 continue + 10 continue + ndom = ndom-1 + return + end +c----------------------------------------------------------------------- + subroutine stripes0 (ip,nlev,il,ndom,iptr) + integer ip, nlev, il(*), ndom, iptr(*) +c----------------------------------------------------------------------- +c This routine is a simple level-set partitioner. It scans +c the level-sets as produced by BFS from one to nlev. +c each time the number of nodes in the accumulated set of +c levels traversed exceeds the parameter ip, this set defines +c a new subgraph. +c-------------------------parameter-list--------------------------------- +c on entry: +c -------- +c ip = desired number of nodes per subgraph. +c nlev = number of levels found as output by BFS +c il = integer array containing the pointer array for +c the level data structure as output by BFS. +c thus il(lev+1) - il(lev) = the number of +c nodes that constitute the level numbe lev. +c on return +c --------- +c ndom = number of sungraphs found +c iptr = pointer array for the sugraph data structure. +c thus, iptr(idom) points to the first level that +c consistutes the subgraph number idom, in the +c level data structure. +c----------------------------------------------------------------------- + ktr = 0 + iband = 1 + iptr(iband) = 1 +c----------------------------------------------------------------------- + + do 10 ilev = 1, nlev + ktr = ktr + il(ilev+1) - il(ilev) + if (ktr .gt. ip) then + iband = iband+1 + iptr(iband) = ilev+1 + ktr = 0 + endif +c + 10 continue +c-----------returning -------------------- + iptr(iband) = nlev + 1 + ndom = iband-1 + return +c----------------------------------------------------------------------- +c-----end-of-stripes0--------------------------------------------------- + end +c----------------------------------------------------------------------- + integer function maskdeg (ja,ia,nod,mask,maskval) + implicit none + integer ja(*),ia(*),nod,mask(*),maskval +c----------------------------------------------------------------------- + integer deg, k + deg = 0 + do k =ia(nod),ia(nod+1)-1 + if (mask(ja(k)) .eq. maskval) deg = deg+1 + enddo + maskdeg = deg + return + end +c----------------------------------------------------------------------- + subroutine perphn(n,ja,ia,init,mask,maskval,nlev,riord,levels) + implicit none + integer n,ja(*),ia(*),init,mask(*),maskval, + * nlev,riord(*),levels(*) +c----------------------------------------------------------------------- +c finds a peripheral node and does a BFS search from it. +c----------------------------------------------------------------------- +c see routine dblstr for description of parameters +c input: +c------- +c ja, ia = list pointer array for the adjacency graph +c mask = array used for masking nodes -- see maskval +c maskval = value to be checked against for determing whether or +c not a node is masked. If mask(k) .ne. maskval then +c node k is not considered. +c init = init node in the pseudo-peripheral node algorithm. +c +c output: +c------- +c init = actual pseudo-peripherial node found. +c nlev = number of levels in the final BFS traversal. +c riord = +c levels = +c----------------------------------------------------------------------- + integer j,nlevp,deg,nfirst,mindeg,nod,maskdeg + integer iperm(1) + nlevp = 0 + 1 continue + riord(1) = init + nfirst = 1 + iperm(1) = 0 +c + call BFS(n,ja,ia,nfirst,iperm,mask,maskval,riord,levels,nlev) + if (nlev .gt. nlevp) then + mindeg = n+1 + do j=levels(nlev),levels(nlev+1)-1 + nod = riord(j) + deg = maskdeg(ja,ia,nod,mask,maskval) + if (deg .lt. mindeg) then + init = nod + mindeg = deg + endif + enddo + nlevp = nlev + goto 1 + endif + return + end +c----------------------------------------------------------------------- + subroutine mapper4 (n,ja,ia,ndom,nodes,levst,marker,link) + implicit none + integer n,ndom,ja(*),ia(*),marker(n),levst(2*ndom), + * nodes(*),link(*) +c----------------------------------------------------------------------- +c finds domains given ndom centers -- by doing a level set expansion +c----------------------------------------------------------------------- +c on entry: +c --------- +c n = dimension of matrix +c ja, ia = adajacency list of matrix (CSR format without values) -- +c ndom = number of subdomains (nr output by coarsen) +c nodes = array of size at least n. On input the first ndom entries +c of nodes should contain the labels of the centers of the +c ndom domains from which to do the expansions. +c +c on return +c --------- +c link = linked list array for the ndom domains. +c nodes = contains the list of nodes of the domain corresponding to +c link. (nodes(i) and link(i) are related to the same node). +c +c levst = levst(j) points to beginning of subdomain j in link. +c +c work arrays: +c ----------- +c levst : work array of length 2*ndom -- contains the beginning and +c end of current level in link. +c beginning of last level in link for each processor. +c also ends in levst(ndom+i) +c marker : work array of length n. +c +c Notes on implementation: +c ----------------------- +c for j .le. ndom link(j) is <0 and indicates the end of the +c linked list. The most recent element added to the linked +c list is added at the end of the list (traversal=backward) +c For j .le. ndom, the value of -link(j) is the size of +c subdomain j. +c +c----------------------------------------------------------------------- +c local variables + integer mindom,j,lkend,nod,nodprev,idom,next,i,kk,ii,ilast,nstuck, + * isiz, nsize +c +c initilaize nodes and link arrays +c + do 10 j=1, n + marker(j) = 0 + 10 continue +c + do 11 j=1, ndom + link(j) = -1 + marker(nodes(j)) = j + levst(j) = j + levst(ndom+j) = j + 11 continue +c +c ii = next untouched node for restarting new connected component. +c + ii = 0 +c + lkend = ndom + nod = ndom + nstuck = 0 +c----------------------------------------------------------------------- + 100 continue + idom = mindom(n,ndom,levst,link) +c----------------------------------------------------------------------- +c begin level-set loop +c----------------------------------------------------------------------- + 3 nodprev = nod + ilast = levst(ndom+idom) + levst(ndom+idom) = lkend + next = levst(idom) +c +c linked list traversal loop +c + isiz = 0 + nsize = link(idom) + 1 i = nodes(next) + isiz = isiz + 1 +c +c adjacency list traversal loop +c + do 2 kk=ia(i), ia(i+1)-1 + j = ja(kk) + if (marker(j) .eq. 0) then + call add_lk(j,nod,idom,ndom,lkend,levst,link,nodes,marker) + endif + 2 continue +c +c if last element of the previous level not reached continue +c + if (next .gt. ilast) then + next = link(next) + if (next .gt. 0) goto 1 + endif +c----------------------------------------------------------------------- +c end level-set traversal -- +c----------------------------------------------------------------------- + if (nodprev .eq. nod) then +c +c link(idom) >0 indicates that set is stuck -- +c + link(idom) = -link(idom) + nstuck = nstuck+1 + endif +c + if (nstuck .lt. ndom) goto 100 +c +c reset sizes -- +c + do j=1, ndom + if (link(j) .gt. 0) link(j) = -link(j) + enddo +c + if (nod .eq. n) return +c +c stuck. add first non assigned point to smallest domain +c + 20 ii = ii+1 + if (ii .le. n) then + if (marker(ii) .eq. 0) then + idom = 0 + isiz = n+1 + do 30 kk=ia(ii), ia(ii+1)-1 + i = marker(ja(kk)) + if (i .ne. 0) then + nsize = abs(link(i)) + if (nsize .lt. isiz) then + isiz = nsize + idom = i + endif + endif + 30 continue +c +c if no neighboring domain select smallest one +c + if (idom .eq. 0) idom = mindom(n,ndom,levst,link) +c +c add ii to sudomain idom at end of linked list +c + call add_lk(ii,nod,idom,ndom,lkend,levst,link,nodes,marker) + goto 3 + else + goto 20 + endif + endif + return + end +c----------------------------------------------------------------------- + subroutine get_domns2(ndom,nodes,link,levst,riord,iptr) + implicit none + integer ndom,nodes(*),link(*),levst(*),riord(*),iptr(*) +c----------------------------------------------------------------------- +c constructs the subdomains from its linked list data structure +c----------------------------------------------------------------------- +c input: +c ndom = number of subdomains +c nodes = sequence of nodes are produced by mapper4. +c link = link list array as produced by mapper4. +c on return: +c---------- +c riord = contains the nodes in each subdomain in succession. +c iptr = pointer in riord for beginnning of each subdomain. +c Thus subdomain number i consists of nodes +c riord(k1),riord(k1)+1,...,riord(k2) +c where k1 = iptr(i), k2= iptr(i+1)-1 +c +c----------------------------------------------------------------------- +c local variables + integer nod, j, next, ii + nod = 1 + iptr(1) = nod + do 21 j=1, ndom + next = levst(j) + 22 ii = nodes(next) + riord(nod) = ii + nod = nod+1 + next = link(next) + if (next .gt. 0) goto 22 + iptr(j+1) = nod + 21 continue +c + return +c----------------------------------------------------------------------- + end +c----------------------------------------------------------------------- + function mindom(n, ndom, levst, link) + implicit none + integer mindom, n, ndom, levst(2*ndom),link(n) +c----------------------------------------------------------------------- +c returns the domain with smallest size +c----------------------------------------------------------------------- +c locals +c + integer i, nsize, isiz +c + isiz = n+1 + do 10 i=1, ndom + nsize = - link(i) + if (nsize .lt. 0) goto 10 + if (nsize .lt. isiz) then + isiz = nsize + mindom = i + endif + 10 continue + return + end +c----------------------------------------------------------------------- + subroutine add_lk(new,nod,idom,ndom,lkend,levst,link,nodes,marker) + implicit none + integer new,nod,idom,ndom,lkend,levst(*),link(*),nodes(*), + * marker(*) +c----------------------------------------------------------------------- +c adds from head -- +c +c adds one entry (new) to linked list and ipdates everything. +c new = node to be added +c nod = current number of marked nodes +c idom = domain to which new is to be added +c ndom = total number of domains +c lkend= location of end of structure (link and nodes) +c levst= pointer array for link, nodes +c link = link array +c nodes= nodes array -- +c marker = marker array == if marker(k) =0 then node k is not +c assigned yet. +c----------------------------------------------------------------------- +c locals +c + integer ktop + lkend = lkend + 1 + nodes(lkend) = new + nod = nod+1 + marker(new) = idom + ktop = levst(idom) + link(lkend) = ktop + link(idom) = link(idom)-1 + levst(idom) = lkend + return +c----------------------------------------------------------------------- +c-------end-of-add_lk--------------------------------------------------- + end +c----------------------------------------------------------------------- + subroutine find_ctr(n,nsiz,ja,ia,init,mask,maskval,riord, + * levels,center,iwk) + implicit none + integer n,nsiz,ja(*),ia(*),init,mask(*),maskval,riord(*), + * levels(*),center,iwk(*) +c----------------------------------------------------------------------- +c finds a center point of a subgraph -- +c----------------------------------------------------------------------- +c n, ja, ia = graph +c nsiz = size of current domain. +c init = initial node in search +c mask +c maskval +c----------------------------------------------------------------------- +c local variables + integer midlev, nlev,newmask, k, kr, kl, init0, nlev0 + call perphn(n,ja,ia,init,mask,maskval,nlev,riord,levels) +c----------------------------------------------------------------------- +c midlevel = level which cuts domain into 2 roughly equal-size +c regions +c + midlev = 1 + k = 0 + 1 continue + k = k + levels(midlev+1)-levels(midlev) + if (k*2 .lt. nsiz) then + midlev = midlev+1 + goto 1 + endif +c----------------------------------------------------------------------- + newmask = n+maskval +c +c assign temporary masks to mid-level elements +c + do k=levels(midlev),levels(midlev+1)-1 + mask(riord(k)) = newmask + enddo +c +c find pseudo-periph node for mid-level `line' +c + kr = 1 + kl = kr + nsiz + init0 = riord(levels(midlev)) + call perphn(n,ja,ia,init0,mask,newmask,nlev0,iwk(kr),iwk(kl)) +c----------------------------------------------------------------------- +c restore mask to initial state +c----------------------------------------------------------------------- + do k=levels(midlev),levels(midlev+1)-1 + mask(riord(k)) = maskval + enddo +c----------------------------------------------------------------------- +c define center +c----------------------------------------------------------------------- + midlev = 1 + (nlev0-1)/2 + k = iwk(kl+midlev-1) + center = iwk(k) +c----------------------------------------------------------------------- + return + end +c----------------------------------------------------------------------- + subroutine rversp (n, riord) + integer n, riord(n) +c----------------------------------------------------------------------- +c this routine does an in-place reversing of the permutation array +c riord -- +c----------------------------------------------------------------------- + integer j, k + do 26 j=1,n/2 + k = riord(j) + riord(j) = riord(n-j+1) + riord(n-j+1) = k + 26 continue + return + end +c----------------------------------------------------------------------- diff --git a/integer_sort.c b/integer_sort.c new file mode 100644 index 0000000000000000000000000000000000000000..edf89a3db75a30940f9ff00fd35122b822518c97 --- /dev/null +++ b/integer_sort.c @@ -0,0 +1,259 @@ +/* This file is part of the Scotch distribution. It does +** not have the stardard Scotch header with the INRIA +** copyright notice because it is a very slight adaptation +** of the qsort routine of glibc 2.4, taylored to match +** Scotch needs. As Scotch is distributed according to the +** CeCILL-C license, which is LGPL-compatible, no further +** notices are required. +*/ + +/* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* If you consider tuning this algorithm, you should consult first: + Engineering a sort function; Jon Bentley and M. Douglas McIlroy; + Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ + + +/** + * @file integer_sort.c + * + * File to include to create integer sort function using qsort based + * algorithm. DO NOT compile directly. + * + * @author François Pellegrini + * +*/ + +#ifndef MAX_THRESH +#define MAX_THRESH 6 + +#define max_thresh (MAX_THRESH * INTSORTSIZE) /* Variable turned into constant */ + +/* Stack node declarations used to store unfulfilled partition obligations. */ +typedef struct + { + char *lo; + char *hi; + } stack_node; + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH)). Since total_elements has type size_t, we get as + upper bound for log (total_elements): + bits per byte (CHAR_BIT) * sizeof(size_t). */ +#define STACK_SIZE (CHAR_BIT * sizeof (pastix_int_t)) +#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) +#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) +#define STACK_NOT_EMPTY (stack < top) + +#endif /* MAX_THRESH */ + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving + insertion sort to order the MAX_THRESH items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + +/* To be defined : +** INTSORTNAME : Name of function +** INTSORTSIZE : Size of elements to sort +** INTSORTSWAP : Swapping macro +** INTSORTCMP : Comparison function +*/ + +void +INTSORTNAME ( +void * const pbase, /*+ Array to sort +*/ +const pastix_int_t total_elems) /*+ Number of entries to sort +*/ +{ + register char *base_ptr = (char *) pbase; + + if (total_elems == 0) + /* Avoid lossage with unsigned arithmetic below. */ + return; + + if (total_elems > MAX_THRESH) + { + char *lo = base_ptr; + char *hi = &lo[INTSORTSIZE * (total_elems - 1)]; + stack_node stack[STACK_SIZE]; + stack_node *top = stack; + + PUSH (NULL, NULL); + + while (STACK_NOT_EMPTY) + { + char *left_ptr; + char *right_ptr; + + /* Select median value from among LO, MID, and HI. Rearrange + LO and HI so the three values are sorted. This lowers the + probability of picking a pathological pivot value and + skips a comparison for both the LEFT_PTR and RIGHT_PTR in + the while loops. */ + + char *mid = lo + INTSORTSIZE * ((hi - lo) / INTSORTSIZE >> 1); + + if (INTSORTCMP ((void *) mid, (void *) lo)) + INTSORTSWAP (mid, lo); + if (INTSORTCMP ((void *) hi, (void *) mid)) + INTSORTSWAP (mid, hi); + else + goto jump_over; + if (INTSORTCMP ((void *) mid, (void *) lo)) + INTSORTSWAP (mid, lo); + jump_over:; + + left_ptr = lo + INTSORTSIZE; + right_ptr = hi - INTSORTSIZE; + + /* Here's the famous ``collapse the walls'' section of quicksort. + Gotta like those tight inner loops! They are the main reason + that this algorithm runs much faster than others. */ + do + { + while (INTSORTCMP ((void *) left_ptr, (void *) mid)) + left_ptr += INTSORTSIZE; + + while (INTSORTCMP ((void *) mid, (void *) right_ptr)) + right_ptr -= INTSORTSIZE; + + if (left_ptr < right_ptr) + { + INTSORTSWAP (left_ptr, right_ptr); + if (mid == left_ptr) + mid = right_ptr; + else if (mid == right_ptr) + mid = left_ptr; + left_ptr += INTSORTSIZE; + right_ptr -= INTSORTSIZE; + } + else if (left_ptr == right_ptr) + { + left_ptr += INTSORTSIZE; + right_ptr -= INTSORTSIZE; + break; + } + } + while (left_ptr <= right_ptr); + + /* Set up pointers for next iteration. First determine whether + left and right partitions are below the threshold size. If so, + ignore one or both. Otherwise, push the larger partition's + bounds on the stack and continue sorting the smaller one. */ + + if ((size_t) (right_ptr - lo) <= max_thresh) + { + if ((size_t) (hi - left_ptr) <= max_thresh) + /* Ignore both small partitions. */ + POP (lo, hi); + else + /* Ignore small left partition. */ + lo = left_ptr; + } + else if ((size_t) (hi - left_ptr) <= max_thresh) + /* Ignore small right partition. */ + hi = right_ptr; + else if ((right_ptr - lo) > (hi - left_ptr)) + { + /* Push larger left partition indices. */ + PUSH (lo, right_ptr); + lo = left_ptr; + } + else + { + /* Push larger right partition indices. */ + PUSH (left_ptr, hi); + hi = right_ptr; + } + } + } + + /* Once the BASE_PTR array is partially sorted by quicksort the rest + is completely sorted using insertion sort, since this is efficient + for partitions below MAX_THRESH size. BASE_PTR points to the beginning + of the array to sort, and END_PTR points at the very last element in + the array (*not* one beyond it!). */ + +#define min(x, y) ((x) < (y) ? (x) : (y)) + + { + char *const end_ptr = &base_ptr[INTSORTSIZE * (total_elems - 1)]; + char *tmp_ptr = base_ptr; + char *thresh = min(end_ptr, base_ptr + max_thresh); + register char *run_ptr; + + /* Find smallest element in first threshold and place it at the + array's beginning. This is the smallest array element, + and the operation speeds up insertion sort's inner loop. */ + + for (run_ptr = tmp_ptr + INTSORTSIZE; run_ptr <= thresh; run_ptr += INTSORTSIZE) + if (INTSORTCMP ((void *) run_ptr, (void *) tmp_ptr)) + tmp_ptr = run_ptr; + + if (tmp_ptr != base_ptr) + INTSORTSWAP (tmp_ptr, base_ptr); + + /* Insertion sort, running from left-hand-side up to right-hand-side. */ + + run_ptr = base_ptr + INTSORTSIZE; + while ((run_ptr += INTSORTSIZE) <= end_ptr) + { + tmp_ptr = run_ptr - INTSORTSIZE; + while (INTSORTCMP ((void *) run_ptr, (void *) tmp_ptr)) + tmp_ptr -= INTSORTSIZE; + + tmp_ptr += INTSORTSIZE; + if (tmp_ptr != run_ptr) + { + char *trav; + + trav = run_ptr + INTSORTSIZE; + while (--trav >= run_ptr) + { + char c = *trav; + char *hi, *lo; + + for (hi = lo = trav; (lo -= INTSORTSIZE) >= tmp_ptr; hi = lo) + *hi = *lo; + *hi = c; + } + } + } + } +} diff --git a/integer_sort_mtypes.c b/integer_sort_mtypes.c new file mode 100644 index 0000000000000000000000000000000000000000..ccafb33d5cb8b91fd3cff0811fdd1b6c609ff60d --- /dev/null +++ b/integer_sort_mtypes.c @@ -0,0 +1,283 @@ +/* This file is part of the Scotch distribution. It does +** not have the stardard Scotch header with the INRIA +** copyright notice because it is a very slight adaptation +** of the qsort routine of glibc 2.4, taylored to match +** Scotch needs. As Scotch is distributed according to the +** CeCILL-C license, which is LGPL-compatible, no further +** notices are required. +*/ + +/* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* If you consider tuning this algorithm, you should consult first: + Engineering a sort function; Jon Bentley and M. Douglas McIlroy; + Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ + + +/** + * @file integer_sort_mtypes.c + * + * File to include to create a multi-types sort function using qsort based + * algorithm. DO NOT compile directly. + * + * @author François Pellegrini + * + */ +#ifndef MAX_THRESH_2 + +#define MAX_THRESH_2 6 + +#define max_thresh_2 (MAX_THRESH_2 * INTSORTSIZE(0)) /* Variable turned into constant */ + +/* Stack node declarations used to store unfulfilled partition obligations. */ +typedef struct + { + char *lo; + char *hi; + } stack_node_2; + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH_2)). Since total_elements has type size_t, we get as + upper bound for log (total_elements): + bits per unsigned char (CHAR_BIT) * sizeof(size_t). */ +#define STACK_SIZE_2 (CHAR_BIT * sizeof (pastix_int_t)) +#define PUSH_2(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) +#define POP_2(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) +#define STACK_NOT_EMPTY_2 (stack < top) + +#endif /* MAX_THRESH_2 */ + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH_2 partitions, leaving + insertion sort to order the MAX_THRESH_2 items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + +/* To be defined : +** INTSORTNAME : Name of function +** INTSORTSIZE : Size of elements to sort +** INTSORTSWAP : Swapping macro +** INTSORTCMP : Comparison function +*/ + +void +INTSORTNAME ( +void ** const pbase, /*+ Array of arrays to sort +*/ +const pastix_int_t total_elems) /*+ Number of entries to sort +*/ +{ + register char *base_ptr = (char *) (*pbase); + + if (total_elems == 0) + /* Avoid lossage with unsigned arithmetic below. */ + return; + + if (total_elems > MAX_THRESH_2) + { + char *lo = base_ptr; + char *hi = &lo[INTSORTSIZE(0) * (total_elems - 1)]; + stack_node_2 stack[STACK_SIZE_2]; + stack_node_2 *top = stack; + + PUSH_2 (NULL, NULL); + + while (STACK_NOT_EMPTY_2) + { + char *left_ptr; + char *right_ptr; + + /* Select median value from among LO, MID, and HI. Rearrange + LO and HI so the three values are sorted. This lowers the + probability of picking a pathological pivot value and + skips a comparison for both the LEFT_PTR and RIGHT_PTR in + the while loops. */ + + char *mid = lo + INTSORTSIZE(0) * ((hi - lo) / INTSORTSIZE(0) >> 1); + + if (INTSORTCMP (mid, lo)) + INTSORTSWAP (mid, lo); + if (INTSORTCMP (hi, mid)) + INTSORTSWAP (mid, hi); + else + goto jump_over; + if (INTSORTCMP ((void *) mid, (void *) lo)) + INTSORTSWAP (mid, lo); + jump_over:; + + left_ptr = lo + INTSORTSIZE(0); + right_ptr = hi - INTSORTSIZE(0); + + /* Here's the famous ``collapse the walls'' section of quicksort. + Gotta like those tight inner loops! They are the main reason + that this algorithm runs much faster than others. */ + do + { + while (INTSORTCMP ((void *) left_ptr, (void *) mid)) + left_ptr += INTSORTSIZE(0); + + while (INTSORTCMP ((void *) mid, (void *) right_ptr)) + right_ptr -= INTSORTSIZE(0); + + if (left_ptr < right_ptr) + { + INTSORTSWAP (left_ptr, right_ptr); + if (mid == left_ptr) + mid = right_ptr; + else if (mid == right_ptr) + mid = left_ptr; + left_ptr += INTSORTSIZE(0); + right_ptr -= INTSORTSIZE(0); + } + else if (left_ptr == right_ptr) + { + left_ptr += INTSORTSIZE(0); + right_ptr -= INTSORTSIZE(0); + break; + } + } + while (left_ptr <= right_ptr); + + /* Set up pointers for next iteration. First determine whether + left and right partitions are below the threshold size. If so, + ignore one or both. Otherwise, push the larger partition's + bounds on the stack and continue sorting the smaller one. */ + + if ((size_t) (right_ptr - lo) <= max_thresh_2) + { + if ((size_t) (hi - left_ptr) <= max_thresh_2) + /* Ignore both small partitions. */ + POP_2 (lo, hi); + else + /* Ignore small left partition. */ + lo = left_ptr; + } + else if ((size_t) (hi - left_ptr) <= max_thresh_2) + /* Ignore small right partition. */ + hi = right_ptr; + else if ((right_ptr - lo) > (hi - left_ptr)) + { + /* Push larger left partition indices. */ + PUSH_2 (lo, right_ptr); + lo = left_ptr; + } + else + { + /* Push larger right partition indices. */ + PUSH_2 (left_ptr, hi); + hi = right_ptr; + } + } + } + + /* Once the BASE_PTR array is partially sorted by quicksort the rest + is completely sorted using insertion sort, since this is efficient + for partitions below MAX_THRESH_2 size. BASE_PTR points to the beginning + of the array to sort, and END_PTR points at the very last element in + the array (*not* one beyond it!). */ + +#define min(x, y) ((x) < (y) ? (x) : (y)) + + { + char *const end_ptr = &base_ptr[INTSORTSIZE(0) * (total_elems - 1)]; + char *tmp_ptr = base_ptr; + char *thresh = min(end_ptr, base_ptr + max_thresh_2); + register char *run_ptr; + + /* Find smallest element in first threshold and place it at the + array's beginning. This is the smallest array element, + and the operation speeds up insertion sort's inner loop. */ + + for (run_ptr = tmp_ptr + INTSORTSIZE(0); run_ptr <= thresh; run_ptr += INTSORTSIZE(0)) + if (INTSORTCMP ((void *) run_ptr, (void *) tmp_ptr)) + tmp_ptr = run_ptr; + + if (tmp_ptr != base_ptr) + INTSORTSWAP (tmp_ptr, base_ptr); + + /* Insertion sort, running from left-hand-side up to right-hand-side. */ + + run_ptr = base_ptr + INTSORTSIZE(0); + while ((run_ptr += INTSORTSIZE(0)) <= end_ptr) + { + tmp_ptr = run_ptr - INTSORTSIZE(0); + while (INTSORTCMP ((void *) run_ptr, (void *) tmp_ptr)) + tmp_ptr -= INTSORTSIZE(0); + + tmp_ptr += INTSORTSIZE(0); + if (tmp_ptr != run_ptr) + { + char *trav; + int itertab; + trav = run_ptr + INTSORTSIZE(0); + while (--trav >= run_ptr) + { + char c = *trav; + char *hi, *lo; + + for (hi = lo = trav; (lo -= INTSORTSIZE(0)) >= tmp_ptr; hi = lo) + *hi = *lo; + *hi = c; + } + + for (itertab = 1; itertab < INTSORTNTAB; itertab++) + { + + trav = ((run_ptr - base_ptr)/INTSORTSIZE(0)) + *INTSORTSIZE(itertab) + + (char *) pbase[itertab]+ INTSORTSIZE(itertab) ; + + while (--trav >= ((run_ptr - base_ptr)/INTSORTSIZE(0)) + *INTSORTSIZE(itertab) + + (char *) pbase[itertab]) + { + char c = *trav; + char *hi, *lo; + + for (hi = lo = trav; + (lo -= INTSORTSIZE(itertab)) >= + ((tmp_ptr - base_ptr)/INTSORTSIZE(0)) + *INTSORTSIZE(itertab) + + (char *) pbase[itertab]; hi = lo) + *hi = *lo; + *hi = c; + } + } + } + } + } +} diff --git a/spm.c b/spm.c index bb9ab891c3ebe3cca96667e305e22c7dc01bf6ca..54e3afc724064617dbf015eb0a93177240acf899 100644 --- a/spm.c +++ b/spm.c @@ -162,19 +162,19 @@ spmBase( pastix_spm_t *spm, /* Parameter checks */ if ( spm == NULL ) { - errorPrint("spmBase: spm pointer is NULL"); + pastix_error_print("spmBase: spm pointer is NULL"); return; } if ( (spm->colptr == NULL) || (spm->rowptr == NULL) ) { - errorPrint("spmBase: spm pointer is not correctly initialized"); + pastix_error_print("spmBase: spm pointer is not correctly initialized"); return; } if ( (baseval != 0) && (baseval != 1) ) { - errorPrint("spmBase: baseval is incorrect, must be 0 or 1"); + pastix_error_print("spmBase: baseval is incorrect, must be 0 or 1"); return; } diff --git a/spm.h b/spm.h index 971fc622c5fe4836e774f998e2719e4c06373099..c2742b4281098a6865fa39d1d090691c2febd31a 100644 --- a/spm.h +++ b/spm.h @@ -15,6 +15,35 @@ #ifndef _SPM_H_ #define _SPM_H_ +/** + * @ingroup pastix_spm + * + * @enum pastix_driver_e + * + * @brief The list of matrix driver reader and generators + * + */ +typedef enum pastix_driver_e { + PastixDriverRSA, /* ok */ + PastixDriverCCC,// + PastixDriverRCC,// + PastixDriverOlaf,// + PastixDriverPeer,// + PastixDriverHB, /* ok */ + PastixDriverIJV, /* ok */ + PastixDriverMM, /* ok */ + PastixDriverDMM, /* ok */ + PastixDriverPetscS, /* ok */ + PastixDriverPetscU, /* ok */ + PastixDriverPetscH, /* ok */ + PastixDriverCSCD,// + PastixDriverLaplacian, /* ok */ + PastixDriverXLaplacian, /* ok */ + PastixDriverBRGM,// + PastixDriverBRGMD,// + PastixDriverGraph +} pastix_driver_t; + /** * @ingroup pastix_spm * @@ -77,6 +106,14 @@ csc_save( pastix_int_t n, void *values, int dof, FILE *outfile ); +/** + * Integer arrays subroutines + */ +pastix_int_t *spmIntConvert( pastix_int_t n, int *input ); +void spmIntSort1Asc1(void * const pbase, const pastix_int_t n); +void spmIntSort2Asc1(void * const pbase, const pastix_int_t n); +void spmIntSort2Asc2(void * const pbase, const pastix_int_t n); + int spmLoad( pastix_spm_t *spm, FILE *infile ); int spmSave( pastix_spm_t *spm, FILE *outfile ); @@ -102,4 +139,9 @@ pastix_spm_t *spmCheckAndCorrect( pastix_spm_t *spm ); void spmExpand(pastix_spm_t* spm); void dofVar(pastix_spm_t* spm);//tmp +int spmReadDriver( pastix_driver_t driver, + char *filename, + pastix_spm_t *spm, + MPI_Comm pastix_comm ); + #endif /* _SPM_H_ */ diff --git a/spm_drivers.h b/spm_drivers.h new file mode 100644 index 0000000000000000000000000000000000000000..20981aefc15cd12445e74d07d5841dcf8e277bcc --- /dev/null +++ b/spm_drivers.h @@ -0,0 +1,33 @@ +/** + * @file drivers.h + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @date 2011-11-11 + * + **/ +#ifndef _SPM_DRIVER_H_ +#define _SPM_DRIVER_H_ + +#include "spm.h" + +void convertArrayToComplex64( pastix_int_t n, const double *A, void **B ); +void convertArrayToComplex32( pastix_int_t n, const double *A, void **B ); +void convertArrayToDouble( pastix_int_t n, const double *A, void **B ); +void convertArrayToFloat( pastix_int_t n, const double *A, void **B ); + +int readHB ( const char *filename, pastix_spm_t *spm ); +int readRSA ( const char *filename, pastix_spm_t *spm ); +int readIJV ( const char *filename, pastix_spm_t *spm ); +int readMM ( const char *filename, pastix_spm_t *spm ); +int readDMM ( const char *filename, pastix_spm_t *spm ); +int readPETSC( const char *filename, pastix_spm_t *spm ); +int readCSCD ( const char *filename, pastix_spm_t *spm, void **rhs, MPI_Comm pastix_comm ); +int genLaplacian( const char *filename, pastix_spm_t *spm ); +int genExtendedLaplacian( const char *filename, pastix_spm_t *spm ); + +#endif /* _SPM_DRIVER_H_ */ diff --git a/spm_integers.c b/spm_integers.c new file mode 100644 index 0000000000000000000000000000000000000000..b45833a29b6d3d44d3370d36af47e17991a71b21 --- /dev/null +++ b/spm_integers.c @@ -0,0 +1,256 @@ +/** + * + * @file spm_integers.c + * + * PaStiX spm routines + * PaStiX is a software package provided by Inria Bordeaux - Sud-Ouest, + * LaBRI, University of Bordeaux 1 and IPB. + * + * @version 5.1.0 + * @author Francois Pellegrini + * @author Xavier Lacoste + * @author Pierre Ramet + * @author Mathieu Faverge + * @date 2013-06-24 + * + **/ +#include <ctype.h> +#include <limits.h> +#include <time.h> +#include <stdlib.h> +#include "common.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Convert integer array to the pastix_int_t format if it is not already + * the case. + * + ******************************************************************************* + * + * @param[in] n + * The number of elements in the array. + * + * @param[in,out] input + * The input array. If the type size is not the same, the array is + * freed on exit. + * + ******************************************************************************* + * + * @return The pointer to the new allocated array if size has changed, or to + * input if sizes are identical. + * + *******************************************************************************/ +pastix_int_t * +spmIntConvert( pastix_int_t n, int *input ) +{ + if (sizeof(pastix_int_t) != sizeof(int)) { + pastix_int_t *output, *tmpo; + int *tmpi, i; + + output = malloc( n * sizeof(pastix_int_t) ); + tmpi = input; + tmpo = output; + for(i=0; i<n; i++, tmpi++, tmpo++) { + *tmpo = (pastix_int_t)(*tmpi); + } + free(input); + return output; + } + else { + return (pastix_int_t*)input; + } +} + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sorts in ascending order array of element composed of one single + * pastix_int_t with a single key value. + * + ******************************************************************************* + */ +#define INTSORTNAME spmIntSort1Asc1 +#define INTSORTSIZE (sizeof (pastix_int_t)) +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t; \ + t = *((pastix_int_t *) (p)); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (q)) = t; \ + } while (0) +#define INTSORTCMP(p,q) (*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) +#include "integer_sort.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sorts in ascending order array of element composed of two + * pastix_int_t by ascending order. The first value is used as key. + * + ******************************************************************************* + */ +#define INTSORTNAME spmIntSort2Asc1 +#define INTSORTSIZE (2 * sizeof (pastix_int_t)) +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t, u; \ + t = *((pastix_int_t *) (p)); \ + u = *((pastix_int_t *) (p) + 1); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (p) + 1) = *((pastix_int_t *) (q) + 1); \ + *((pastix_int_t *) (q)) = t; \ + *((pastix_int_t *) (q) + 1) = u; \ + } while (0) +#define INTSORTCMP(p,q) (*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) +#include "integer_sort.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sorts in ascending order array of element composed of three + * pastix_int_t by ascending order. The first value is used as key. + * + ******************************************************************************* + */ +#define INTSORTNAME spmInt_intSort3Asc1 +#define INTSORTSIZE (3 * sizeof (pastix_int_t)) +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t, u, v; \ + t = *((pastix_int_t *) (p)); \ + u = *((pastix_int_t *) (p) + 1); \ + v = *((pastix_int_t *) (p) + 2); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (p) + 1) = *((pastix_int_t *) (q) + 1); \ + *((pastix_int_t *) (p) + 2) = *((pastix_int_t *) (q) + 2); \ + *((pastix_int_t *) (q)) = t; \ + *((pastix_int_t *) (q) + 1) = u; \ + *((pastix_int_t *) (q) + 2) = v; \ + } while (0) +#define INTSORTCMP(p,q) (*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) +#include "integer_sort.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sorts in ascending order array of element composed of two + * pastix_int_t by ascending order. Both values are used as key. + * + ******************************************************************************* + */ +#define INTSORTNAME spmIntSort2Asc2 +#define INTSORTSIZE (2 * sizeof (pastix_int_t)) +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t, u; \ + t = *((pastix_int_t *) (p)); \ + u = *((pastix_int_t *) (p) + 1); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (p) + 1) = *((pastix_int_t *) (q) + 1); \ + *((pastix_int_t *) (q)) = t; \ + *((pastix_int_t *) (q) + 1) = u; \ + } while (0) +#define INTSORTCMP(p,q) ((*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) || ((*((pastix_int_t *) (p)) == *((pastix_int_t *) (q))) && (*((pastix_int_t *) (p) + 1) < *((pastix_int_t *) (q) + 1)))) +#include "integer_sort.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sort 2 arrays simultaneously, the first array is an array of + * pastix_int_t and used as primary key for sorting. The second array is an + * other array of pastix_int_t used as secondary key. + * + ******************************************************************************* + */ +#define INTSORTNAME spmIntMSortIntAsc +#define INTSORTSIZE(x) (sizeof (pastix_int_t)) +#define INTSORTNTAB 2 +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t; \ + long disp_p = (((pastix_int_t*)p)-((pastix_int_t*)base_ptr)); \ + long disp_q = (((pastix_int_t*)q)-((pastix_int_t*)base_ptr)); \ + pastix_int_t * int2ptr = *(pbase+1); \ + /* swap integers */ \ + t = *((pastix_int_t *) (p)); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (q)) = t; \ + /* swap on secont integer array */ \ + t = int2ptr[disp_p]; \ + int2ptr[disp_p] = int2ptr[disp_q]; \ + int2ptr[disp_q] = t; \ + } while (0) +#define INTSORTCMP(p,q) ((*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) || \ + ((*((pastix_int_t *) (p)) == *((pastix_int_t *) (q))) && \ + ((( pastix_int_t *)(*(pbase+1)))[(((pastix_int_t*)p)-((pastix_int_t*)base_ptr))] < \ + (( pastix_int_t *)(*(pbase+1)))[(((pastix_int_t*)q)-((pastix_int_t*)base_ptr))]))) +#include "integer_sort_mtypes.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP +#undef INTSORTNTAB + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sort 2 arrays simultaneously, the first array is an array of + * pastix_int_t and used as primary key for sorting. The second array is an + * other array of pastix_int_t used as secondary key. + * + ******************************************************************************* + */ +#define INTSORTNAME spmIntMSortSmallIntAsc +#define INTSORTSIZE(x) (sizeof (int)) +#define INTSORTNTAB 2 +#define INTSORTSWAP(p,q) do { \ + int t; \ + long disp_p = (((int*)p)-((int*)base_ptr)); \ + long disp_q = (((int*)q)-((int*)base_ptr)); \ + int * int2ptr = *(pbase+1); \ + /* swap integers */ \ + t = *((int *) (p)); \ + *((int *) (p)) = *((int *) (q)); \ + *((int *) (q)) = t; \ + /* swap on secont integer array */ \ + t = int2ptr[disp_p]; \ + int2ptr[disp_p] = int2ptr[disp_q]; \ + int2ptr[disp_q] = t; \ + } while (0) +#define INTSORTCMP(p,q) ((*((int *) (p)) < *((int *) (q))) || \ + ((*((int *) (p)) == *((int *) (q))) && \ + ((( int *)(*(pbase+1)))[(((int*)p)-((int*)base_ptr))] < \ + (( int *)(*(pbase+1)))[(((int*)q)-((int*)base_ptr))]))) +#include "integer_sort_mtypes.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP +#undef INTSORTNTAB + diff --git a/spm_read_driver.c b/spm_read_driver.c new file mode 100644 index 0000000000000000000000000000000000000000..20ee25267b16856640a4b4ffb4a6af433ddc6eba --- /dev/null +++ b/spm_read_driver.c @@ -0,0 +1,331 @@ +/** + * @file drivers.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @author Theophile Terraz + * @date 2011-11-11 + * + **/ +#include "common.h" +#include "spm.h" +#include "spm_drivers.h" +#if defined(HAVE_SCOTCH) +#include <scotch.h> +#endif + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Import a matrix file into a spm structure + * + * This function read or generate a sparse matrix from a file to store it into a + * spm structure. The different formats accepted by this driver are described by + * the driver field. + * + ******************************************************************************* + * + * @param[in] driver + * This defines the driver to use to create the spm structure. + * = PastixDriverRSA + * = PastixDriverHB + * = PastixDriverIJV + * = PastixDriverMM + * = PastixDriverLaplacian + * = PastixDriverXLaplacian + * + * @param[in] filename + * The name of the file that stores the matrix (see driver) + * + * @param[in,out] spm + * On entry, an allocated sparse matrix structure. + * On exit, the filled sparse matrix structure with the matrix from the + * file. + * + * @param[in] pastix_comm + * The MPI communicator on which to distribute the sparse matrix. This + * is also used in case of distributed formats. + * + *******************************************************************************/ +int +spmReadDriver( pastix_driver_t driver, + char *filename, + pastix_spm_t *spm, + MPI_Comm comm ) +{ + int mpirank = 0; + int mpiinit; + + spm->mtxtype = PastixGeneral; + spm->flttype = PastixDouble; + spm->fmttype = PastixCSC; + spm->gN = 0; + spm->n = 0; + spm->gnnz = 0; + spm->nnz = 0; + spm->dof = 1; + spm->colptr = NULL; + spm->rowptr = NULL; + spm->values = NULL; + spm->loc2glob = NULL; + + MPI_Initialized( &mpiinit ); + if (mpiinit) { + MPI_Comm_rank( comm, &mpirank ); + } + + if ( (mpirank == 0) || + (driver == PastixDriverLaplacian) || + (driver == PastixDriverCSCD) || + (driver == PastixDriverBRGMD) || + (driver == PastixDriverDMM) ) + { + switch(driver) + { + case PastixDriverCCC: + case PastixDriverRCC: + case PastixDriverOlaf: + case PastixDriverPeer: + fprintf(stderr, "driver: Driver not implemented\n"); + break; + + case PastixDriverHB: + /* TODO: Possible to read the RHS, the solution or a guess of the solution */ + printf("driver: HB file: %s\n", filename); + readHB( filename, spm ); + break; + + case PastixDriverIJV: + printf("driver: 3files file: %s\n", filename); + readIJV( filename, spm ); + break; + + case PastixDriverMM: + printf("driver: MatrixMarket file: %s\n", filename); + readMM( filename, spm ); + break; + + case PastixDriverDMM: + printf("driver: DistributedMatrixMarket file: %s\n", filename); + //readMMD( filename, spm ); + break; + + case PastixDriverPetscS: + case PastixDriverPetscU: + case PastixDriverPetscH: + printf("driver: PETSc file: %s\n", filename); + //readPETSC( filename, spm ); + if (driver == PastixDriverPetscS) spm->mtxtype = PastixSymmetric; + if (driver == PastixDriverPetscH) spm->mtxtype = PastixHermitian; + break; + + case PastixDriverCSCD: + printf("driver CSCd file: %s\n", filename); + //readCSCD( filename, spm, rhs, comm ); + break; + + case PastixDriverLaplacian: + if (mpirank == 0) + printf("driver Laplacian: %s\n", filename); + genLaplacian( filename, spm ); + break; + + case PastixDriverXLaplacian: + if (mpirank == 0) + printf("driver Extended Laplacian: %s\n", filename); + genExtendedLaplacian( filename, spm ); + break; + + case PastixDriverGraph: +#if defined(HAVE_SCOTCH) + { + SCOTCH_Graph sgraph; + FILE *file = fopen( filename, "r" ); + + /* Check integer compatibility */ + if (sizeof(pastix_int_t) != sizeof(SCOTCH_Num)) { + errorPrint("Inconsistent integer type\n"); + fclose(file); + return PASTIX_ERR_INTEGER_TYPE; + } + + SCOTCH_graphLoad( &sgraph, file, 1, 0 ); + SCOTCH_graphData( &sgraph, NULL, &(spm->n), &(spm->colptr), NULL, NULL, NULL, NULL, &(spm->rowptr), NULL ); + fclose(file); + + spm->flttype = PastixPattern; + spm->gN = spm->n; + spm->gnnz = spm->colptr[ spm->n ]; + spm->nnz = spm->gnnz; + } +#else + { + fprintf(stderr, "Scotch driver to read graph file unavailable.\n" + "Compile with Scotch support to provide it\n"); + return PASTIX_ERR_BADPARAMETER; + } +#endif + break; + + case PastixDriverRSA: + default: + readRSA( filename, spm ); + } + + spmConvert( PastixCSC, spm ); + } + + /* #ifndef TYPE_COMPLEX */ + /* if (*type) */ + /* if ((*type)[1] == 'H') */ + /* (*type)[1] = 'S'; */ + /* #endif */ + + /* /\* read RHS file *\/ */ + /* if (mpirank == 0) */ + /* { */ + /* FILE *file; */ + /* char filename2[256]; */ + /* pastix_int_t i; */ + /* double re; */ + + /* sprintf(filename2,"%s.rhs",filename); */ + /* fprintf(stderr,"open RHS file : %s\n",filename2); */ + /* file = fopen(filename2,"r"); */ + /* if (file==NULL) */ + /* { */ + /* fprintf(stderr,"cannot load %s\n", filename2); */ + /* } */ + /* else */ + /* { */ + /* *rhs = (pastix_complex64_t *) malloc((*ncol)*sizeof(pastix_complex64_t)); */ + /* for (i = 0; i < *ncol; i++) */ + /* { */ + /* (*rhs)[i] = 0.0; */ + /* if (1 != fscanf(file,"%lg\n", &re)) */ + /* { */ + /* fprintf(stderr, "ERROR: reading rhs(%ld)\n", (long int)i); */ + /* exit(1); */ + /* } */ + /* (*rhs)[i] = (pastix_complex64_t)re; */ + /* } */ + /* fclose(file); */ + /* } */ + /* } */ + + /* if (*rhs == NULL && ( mpirank == 0 || */ + /* driver_type == LAPLACIAN) && */ + /* driver_type != CSCD && driver_type != FDUP_DIST && driver_type != MMD) */ + /* { */ + /* *rhs = (pastix_complex64_t *) malloc((*ncol)*sizeof(pastix_complex64_t)); */ + /* pastix_int_t i,j; */ + /* for (i = 0; i < *ncol; i++) */ + /* (*rhs)[i] = 0.0; */ + + /* #ifdef TYPE_COMPLEX */ + /* fprintf(stdout, "Setting right-hand-side member such as X[i] = i + i*I\n"); */ + /* #else */ + /* fprintf(stdout, "Setting right-hand-side member such as X[i] = i\n"); */ + /* #endif */ + /* for (i = 0; i < *ncol; i++) */ + /* { */ + /* for (j = (*colptr)[i] -1; j < (*colptr)[i+1]-1; j++) */ + /* { */ + /* #ifdef TYPE_COMPLEX */ + /* (*rhs)[(*rows)[j]-1] += (i+1 + I*(i+1))*(*values)[j]; */ + /* #else */ + /* (*rhs)[(*rows)[j]-1] += (i+1)*(*values)[j]; */ + /* #endif */ + /* if (MTX_ISSYM((*type)) && i != (*rows)[j]-1) */ + /* { */ + /* #ifdef TYPE_COMPLEX */ + /* (*rhs)[i] += ((*rows)[j] + (*rows)[j] * I)*(*values)[j]; */ + /* #else */ + /* (*rhs)[i] += ((*rows)[j])*(*values)[j]; */ + /* #endif */ + /* } */ + /* } */ + /* } */ + /* } */ + /* if (driver_type == CSCD || driver_type == FDUP_DIST || driver_type == MMD) */ + /* { */ + /* pastix_int_t i,j; */ + /* pastix_int_t N; */ + /* pastix_int_t send[2], recv[2]; */ + /* int comm_size; */ + /* MPI_Comm_size(comm,&comm_size); */ + /* send[0] = *ncol; */ + /* send[1] = 0; */ + /* if (*ncol != 0 && *rhs == NULL) */ + /* send[1] = 1; */ + /* MPI_Allreduce(send, recv, 2, PASTIX_MPI_INT, MPI_SUM, comm); */ + /* N = recv[0]; */ + /* if (recv[1] > 0) */ + /* { */ + /* pastix_complex64_t *RHS; */ + /* pastix_complex64_t *RHS_recv; */ + /* RHS = (pastix_complex64_t *) malloc((N)*sizeof(pastix_complex64_t)); */ + + /* for (i = 0; i < N; i++) */ + /* (RHS)[i] = 0.0; */ + /* if (mpirank == 0) */ + /* fprintf(stdout, "Setting RHS such as X[i] = i\n"); */ + /* for (i = 0; i < *ncol; i++) */ + /* { */ + /* for (j = (*colptr)[i] -1; j < (*colptr)[i+1]-1; j++) */ + /* { */ + /* (RHS)[(*rows)[j]-1] += ((*loc2glob)[i])*(*values)[j]; */ + /* if (MTX_ISSYM((*type)) && i != (*rows)[j]-1) */ + /* (RHS)[(*loc2glob)[i]-1] += ((*rows)[j])*(*values)[j]; */ + /* } */ + /* } */ + /* RHS_recv = (pastix_complex64_t *) malloc((N)*sizeof(pastix_complex64_t)); */ + /* MPI_Allreduce(RHS, RHS_recv, N, PASTIX_MPI_FLOAT, MPI_SUM, comm); */ + /* free(RHS); */ + /* *rhs = (pastix_complex64_t *) malloc((*ncol)*sizeof(pastix_complex64_t)); */ + + /* for (i = 0; i < *ncol; i++) */ + /* (*rhs)[i] = RHS_recv[(*loc2glob)[i]-1]; */ + /* } */ + /* } */ + + if ( mpiinit ) + { + pastix_int_t nnz; + + if (mpirank == 0) { + nnz = spm->colptr[spm->gN] - spm->colptr[0]; + } + + MPI_Bcast( spm, 2*sizeof(int)+3*sizeof(pastix_int_t), MPI_CHAR, 0, comm ); + MPI_Bcast( &nnz, 1, PASTIX_MPI_INT, 0, comm ); + + fprintf(stderr, "%d: mtxtype=%d, flttype=%d, nnz=%ld, gN=%ld\n", + mpirank, spm->mtxtype, spm->flttype, (long)nnz, (long)spm->gN ); + + if ( mpirank != 0 ) + { + spm->colptr = (pastix_int_t *) malloc((spm->gN+1) * sizeof(pastix_int_t)); + spm->rowptr = (pastix_int_t *) malloc(nnz * sizeof(pastix_int_t)); + spm->values = (void *) malloc(nnz * pastix_size_of( spm->flttype )); + spm->loc2glob = NULL; + spm->loc2glob = NULL; + /* spm->rhs = (void *) malloc((*ncol)*sizeof(pastix_complex64_t)); */ + /* spm->type = (char *) malloc(4*sizeof(char)); */ + } + + MPI_Bcast( spm->colptr, spm->gN+1, PASTIX_MPI_INT, 0, comm ); + MPI_Bcast( spm->rowptr, nnz, PASTIX_MPI_INT, 0, comm ); + MPI_Bcast( spm->values, nnz * pastix_size_of( spm->flttype ), MPI_CHAR, 0, comm ); + /* MPI_Bcast(*rhs, *ncol, PASTIX_MPI_FLOAT, 0, comm); */ + /* MPI_Bcast(*type, 4, MPI_CHAR, 0, comm); */ + } + + return PASTIX_SUCCESS; +} diff --git a/z_spm.c b/z_spm.c index 4b740a84508924cf2362c226fe93cd0af13457a0..ed003155d59afced005f847862546d9db9ae2c3c 100644 --- a/z_spm.c +++ b/z_spm.c @@ -15,6 +15,7 @@ **/ #include "common.h" #include "spm.h" +#include "z_spm.h" /** ******************************************************************************* @@ -58,11 +59,11 @@ z_spmSort( pastix_spm_t *spm ) size = colptr[1] - colptr[0]; #if defined(PRECISION_p) - intSort1asc1( rowptr, size ); + spmIntSort1Asc1( rowptr, size ); #else sortptr[0] = rowptr; sortptr[1] = values; - z_qsortIntFloatAsc( sortptr, size ); + z_spmIntSortAsc( sortptr, size ); #endif rowptr += size; values += size; @@ -74,11 +75,11 @@ z_spmSort( pastix_spm_t *spm ) size = rowptr[1] - rowptr[0]; #if defined(PRECISION_p) - intSort1asc1( colptr, size ); + spmIntSort1Asc1( rowptr, size ); #else sortptr[0] = colptr; sortptr[1] = values; - z_qsortIntFloatAsc( sortptr, size ); + z_spmIntSortAsc( sortptr, size ); #endif colptr += size; values += size; @@ -291,7 +292,7 @@ z_spmSymmetrize( pastix_spm_t *spm ) pastix_int_t newsize, oldsize; /* Sort the array per column */ - intSort2asc1(toaddtab, toaddcnt); + spmIntSort2Asc1(toaddtab, toaddcnt); spm->nnz = spm->nnz + toaddcnt; spm->gnnz = spm->nnz; diff --git a/z_spm.h b/z_spm.h index 0c4ae86c7a2524d08556c68873b646f1cbd84349..db7474081ff004a99c239f12847a6fa6dd08a156 100644 --- a/z_spm.h +++ b/z_spm.h @@ -18,6 +18,11 @@ #ifndef _z_spm_H_ #define _z_spm_H_ +/** + * Integer routines + */ +int z_spmIntSortAsc(void ** const pbase, const pastix_int_t n); + /** * Conversion routines */ @@ -58,7 +63,7 @@ void z_spmDensePrint( pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pas void z_spmPrint( const pastix_spm_t *spm ); -void z_spmExpand(pastix_spm_t *spm); +int z_spmExpand(pastix_spm_t *spm); void z_spmDofs2Flat(pastix_spm_t *spm); #endif /* _z_spm_H_ */ diff --git a/z_spm_genrhs.c b/z_spm_genrhs.c index 47cf44daafd745c73b87e2263592a7e2c77968db..0f8ad1195ca813bd94e2f48c0f59d81602c739ad 100644 --- a/z_spm_genrhs.c +++ b/z_spm_genrhs.c @@ -13,6 +13,7 @@ * * @precisions normal z -> c s d **/ +#include "cblas.h" #include "lapacke.h" #include "common.h" #include "solver.h" @@ -373,10 +374,19 @@ z_spmCheckAxb( int nrhs, * Compute r = x0 - x */ if ( x0 != NULL ) { + const pastix_complex64_t *zx = (const pastix_complex64_t *)x; + pastix_complex64_t *zx0 = (pastix_complex64_t *)x0;; + pastix_int_t i; + normX0 = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', spm->n, nrhs, x0, ldx0 ); - core_zgeadd( PastixNoTrans, spm->n, nrhs, - -1., x, ldx, - 1., x0, ldx0 ); + + for( i=0; i<nrhs; i++) { + cblas_zaxpy( + spm->n, CBLAS_SADDR(mzone), + zx + ldx * i, 1, + zx0 + ldx0 * i, 1); + } + normR = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', spm->n, nrhs, x0, ldx0 ); forward = normR / normX0; diff --git a/z_spm_integer.c b/z_spm_integer.c new file mode 100644 index 0000000000000000000000000000000000000000..d9d3a115dac9483ca41ca3d065b44280d194cccf --- /dev/null +++ b/z_spm_integer.c @@ -0,0 +1,61 @@ +/** + * + * @file z_spm_integers.c + * + * PaStiX spm routines + * PaStiX is a software package provided by Inria Bordeaux - Sud-Ouest, + * LaBRI, University of Bordeaux 1 and IPB. + * + * @version 1.0.0 + * @author Francois Pellegrini + * @author Mathieu Faverge + * @author Pierre Ramet + * @author Xavier Lacoste + * @date 2011-11-11 + * + * @precisions normal z -> c d s + * + **/ +#include <ctype.h> +#include <limits.h> +#include <time.h> +#include "common.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_spm + * + * @brief Sort 2 arrays simultaneously, the first array is an array of + * pastix_int_t and used as key for sorting. The second array is an array of + * pastix_complex64_t. + * + ******************************************************************************* + */ +static size_t intsortsize[2] = { sizeof(pastix_int_t), sizeof(pastix_complex64_t) }; +#define INTSORTNAME z_spmIntSortAsc +#define INTSORTSIZE(x) (intsortsize[x]) +#define INTSORTNTAB 2 +#define INTSORTSWAP(p,q) do { \ + pastix_int_t t; \ + long disp_p = (((pastix_int_t*)p)-((pastix_int_t*)base_ptr)); \ + long disp_q = (((pastix_int_t*)q)-((pastix_int_t*)base_ptr)); \ + pastix_complex64_t * floatptr = *(pbase+1); \ + pastix_complex64_t f; \ + /* swap integers */ \ + t = *((pastix_int_t *) (p)); \ + *((pastix_int_t *) (p)) = *((pastix_int_t *) (q)); \ + *((pastix_int_t *) (q)) = t; \ + /* swap corresponding values */ \ + f = floatptr[disp_p]; \ + floatptr[disp_p] = floatptr[disp_q]; \ + floatptr[disp_q] = f; \ + } while (0) +#define INTSORTCMP(p,q) (*((pastix_int_t *) (p)) < *((pastix_int_t *) (q))) +#include "integer_sort_mtypes.c" +#undef INTSORTNAME +#undef INTSORTSIZE +#undef INTSORTSWAP +#undef INTSORTCMP +#undef INTSORTNTAB + diff --git a/z_spm_laplacian.c b/z_spm_laplacian.c new file mode 100644 index 0000000000000000000000000000000000000000..cc9cb89ccf0a96e1548590a549504020c93b3087 --- /dev/null +++ b/z_spm_laplacian.c @@ -0,0 +1,699 @@ +/** + * @file z_spm_laplacian.c + * + * $COPYRIGHTS$ + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Xavier Lacoste + * @author Theophile Terraz + * @date 2015-01-01 + * @precisions normal z -> c d s p + * + **/ + +#include "common.h" +#include "spm.h" +#include "laplacian.h" + +/** + ******************************************************************************* + * + * @ingroup pastix_spm_driver + * + * z_spmLaplacian1D - Generate a 1D laplacian matrix. + * + * Example: + * > 1 -1 0 0 + * > -1 2 -1 0 + * > 0 -1 2 -1 + * > 0 0 -1 1 + * + ******************************************************************************* + * + * @param[in,out] spm + * At start, an allocated spm structure. + * Contains the size of the laplacian in spm->n. + * At exit, contains the matrix in csc format. + * + * @param[in] dim1 + * contains the dimension of the 1D laplacian. + * + *******************************************************************************/ +void +z_spmLaplacian1D( pastix_spm_t *spm, + pastix_int_t dim1 ) +{ + pastix_complex64_t *valptr; + pastix_int_t *colptr, *rowptr; + pastix_int_t i, j; + pastix_int_t nnz = 2*(spm->gN) - 1; + + spm->mtxtype = PastixSymmetric; + spm->flttype = PastixComplex64; + spm->fmttype = PastixCSC; + spm->gnnz = nnz; + spm->nnz = nnz; + spm->dof = 1; + + assert( spm->gN == dim1 ); + + /* Allocating */ + spm->colptr = malloc((spm->n+1)*sizeof(pastix_int_t)); + spm->rowptr = malloc(nnz *sizeof(pastix_int_t)); + assert( spm->colptr ); + assert( spm->rowptr ); + +#if !defined(PRECISION_p) + spm->values = malloc(nnz *sizeof(pastix_complex64_t)); + assert( spm->values ); +#endif + + /* Building ia, ja and values*/ + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (pastix_complex64_t*)(spm->values); + + j = 0; + *colptr = 1; colptr++; /* baseval */ + for (i=0; i<dim1; i++, colptr++) + { + *rowptr = i+1; +#if !defined(PRECISION_p) + if ( (i == 0) || (i == dim1-1) ) { + *valptr = (pastix_complex64_t)1.; + } + else { + *valptr = (pastix_complex64_t)2.; + } +#endif + + j++; valptr++; rowptr++; + + if (i < spm->gN-1) { + *rowptr = i+2; + +#if defined(PRECISION_z) || defined(PRECISION_c) + *valptr = -1. + 2. * _Complex_I; +#elif defined(PRECISION_d) || defined(PRECISION_s) + *valptr = -1.; +#endif + j++; rowptr++; valptr++; + } + + *colptr = j+1; + } + + assert( (spm->colptr[ dim1 ] - spm->colptr[0]) == nnz ); +} + +/** + ******************************************************************************* + * + * @ingroup pastix_spm_driver + * + * z_spmLaplacian2D - Generate a 2D laplacian matrix. + * + * Example: + * > 2 -1 0 -1 0 0 + * > -1 3 -1 0 -1 0 + * > 0 -1 2 0 0 -1 + * > -1 0 0 2 -1 0 + * > 0 -1 0 -1 3 -1 + * > 0 0 -1 0 -1 2 + * + ******************************************************************************* + * + * @param[in,out] spm + * At start, an allocated spm structure. + * Contains the size of the laplacian in spm->n. + * At exit, contains the matrix in csc format. + * + * @param[in] dim1 + * contains the first dimension of the 2D grid of the laplacian. + * + * @param[in] dim2 + * contains the second dimension of the 2D grid of the laplacian. + * + *******************************************************************************/ +void +z_spmLaplacian2D( pastix_spm_t *spm, + pastix_int_t dim1, + pastix_int_t dim2 ) +{ + pastix_complex64_t *valptr; + pastix_int_t *colptr, *rowptr; + pastix_int_t i, j, k; + pastix_int_t nnz = (2*(dim1)-1)*dim2 + (dim2-1)*dim1; + + spm->mtxtype = PastixSymmetric; + spm->flttype = PastixComplex64; + spm->fmttype = PastixCSC; + spm->gnnz = nnz; + spm->nnz = nnz; + spm->dof = 1; + + assert( spm->gN == dim1*dim2 ); + + /* Allocating */ + spm->colptr = malloc((spm->n+1)*sizeof(pastix_int_t)); + spm->rowptr = malloc(nnz *sizeof(pastix_int_t)); + assert( spm->colptr ); + assert( spm->rowptr ); + +#if !defined(PRECISION_p) + spm->values = malloc(nnz *sizeof(pastix_complex64_t)); + assert( spm->values ); +#endif + + /* Building ia, ja and values*/ + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (pastix_complex64_t*)(spm->values); + + /* Building ia, ja and values*/ + *colptr = 1; + k = 1; /* Column index in the matrix ((i-1) * dim1 + j-1) */ + for(i=1; i<=dim2; i++) + { + for(j=1; j<=dim1; j++) + { + colptr[1] = colptr[0]; + + /* Diagonal value */ + *rowptr = k; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t) 4.; + if (j == dim1 || j == 1) + *valptr -= (pastix_complex64_t) 1.; + if (i == dim2 || i == 1) + *valptr -= (pastix_complex64_t) 1.; +#endif + valptr++; rowptr++; colptr[1]++; + + /* Connexion along dimension 1 */ + if (j < dim1) { + *rowptr = k+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + /* Connexion along dimension 2 */ + if (i < dim2) { + *rowptr = k+dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + colptr++; k++; + } + } + + assert( (spm->colptr[ spm->gN ] - spm->colptr[0]) == nnz ); +} + +/** + ******************************************************************************* + * + * @ingroup pastix_spm_driver + * + * z_spmLaplacian3D - Generate a 3D laplacian matrix. + * + * Example: + * > 3 -1 -1 0 -1 0 0 0 + * > -1 3 0 -1 0 -1 0 0 + * > -1 0 3 -1 0 0 -1 0 + * > 0 -1 -1 3 0 0 0 -1 + * > -1 0 0 0 3 -1 -1 0 + * > 0 -1 0 0 -1 3 0 -1 + * > 0 0 -1 0 -1 0 3 -1 + * > 0 0 0 -1 0 -1 -1 3 + * + ******************************************************************************* + * + * @param[in,out] spm + * At start, an allocated spm structure. + * Contains the size of the laplacian in spm->n. + * At exit, contains the matrix in csc format. + * + * @param[in] dim1 + * contains the first dimension of the 3D grid of the laplacian. + * + * @param[in] dim2 + * contains the second dimension of the 3D grid of the laplacian. + * + * @param[in] dim3 + * contains the third dimension of the 3D grid of the laplacian. + * + *******************************************************************************/ +void +z_spmLaplacian3D( pastix_spm_t *spm, + pastix_int_t dim1, + pastix_int_t dim2, + pastix_int_t dim3 ) +{ + + pastix_complex64_t *valptr; + pastix_int_t *colptr, *rowptr; + pastix_int_t i, j, k, l; + pastix_int_t nnz = (2*(dim1)-1)*dim2*dim3 + (dim2-1)*dim1*dim3 + dim2*dim1*(dim3-1); + + spm->mtxtype = PastixSymmetric; + spm->flttype = PastixComplex64; + spm->fmttype = PastixCSC; + spm->gnnz = nnz; + spm->nnz = nnz; + spm->dof = 1; + + assert( spm->gN == dim1*dim2*dim3 ); + + /* Allocating */ + spm->colptr = malloc((spm->n+1)*sizeof(pastix_int_t)); + spm->rowptr = malloc(nnz *sizeof(pastix_int_t)); + assert( spm->colptr ); + assert( spm->rowptr ); + +#if !defined(PRECISION_p) + spm->values = malloc(nnz *sizeof(pastix_complex64_t)); + assert( spm->values ); +#endif + + /* Building ia, ja and values*/ + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (pastix_complex64_t*)(spm->values); + + /* Building ia, ja and values*/ + *colptr = 1; + l = 1; /* Column index in the matrix ((i-1) * dim1 * dim2 + (j-1) * dim1 + k-1) */ + for(i=1; i<=dim3; i++) + { + for(j=1; j<=dim2; j++) + { + for(k=1; k<=dim1; k++) + { + + colptr[1] = colptr[0]; + + /* Diagonal value */ + *rowptr = l; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t) 6.; + if (k == dim1 || k == 1) + *valptr -= (pastix_complex64_t) 1.; + if (j == dim2 || j == 1) + *valptr -= (pastix_complex64_t) 1.; + if (i == dim3 || i == 1) + *valptr -= (pastix_complex64_t) 1.; +#endif + valptr++; rowptr++; colptr[1]++; + + /* Connexion along dimension 1 */ + if (k < dim1) { + *rowptr = l+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + /* Connexion along dimension 2 */ + if (j < dim2) { + *rowptr = l+dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + /* Connexion along dimension 3 */ + if (i < dim3) { + *rowptr = l+dim1*dim2; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + colptr++; l++; + } + } + } + + assert( (spm->colptr[ spm->gN ] - spm->colptr[0]) == nnz ); +} + +/** + ******************************************************************************* + * + * @ingroup pastix_spm_driver + * + * z_spmExtendedLaplacian2D - Generate a 2D extended laplacian matrix. + * + ******************************************************************************* + * + * @param[in,out] spm + * At start, an allocated spm structure. + * Contains the size of the laplacian in spm->n. + * At exit, contains the matrix in csc format. + * + * @param[in] dim1 + * contains the first dimension of the 2D grid of the laplacian. + * + * @param[in] dim2 + * contains the second dimension of the 2D grid of the laplacian. + * + *******************************************************************************/ +void +z_spmExtendedLaplacian2D( pastix_spm_t *spm, + pastix_int_t dim1, + pastix_int_t dim2 ) +{ + pastix_complex64_t *valptr; + pastix_int_t *colptr, *rowptr; + pastix_int_t i, j, k; + pastix_int_t nnz = (2*(dim1)-1)*dim2 + (dim2-1)*(3*dim1-2); + + spm->mtxtype = PastixSymmetric; + spm->flttype = PastixComplex64; + spm->fmttype = PastixCSC; + spm->gnnz = nnz; + spm->nnz = nnz; + spm->dof = 1; + + assert( spm->gN == dim1*dim2 ); + + /* Allocating */ + spm->colptr = malloc((spm->n+1)*sizeof(pastix_int_t)); + spm->rowptr = malloc(nnz *sizeof(pastix_int_t)); + assert( spm->colptr ); + assert( spm->rowptr ); + +#if !defined(PRECISION_p) + spm->values = malloc(nnz *sizeof(pastix_complex64_t)); + assert( spm->values ); +#endif + + /* Building ia, ja and values*/ + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (pastix_complex64_t*)(spm->values); + + /* Building ia, ja and values*/ + *colptr = 1; + k = 1; /* Column index in the matrix ((i-1) * dim1 + j-1) */ + for(i=1; i<=dim2; i++) + { + for(j=1; j<=dim1; j++) + { + colptr[1] = colptr[0]; + + /* Diagonal value */ + *rowptr = k; +#if !defined(PRECISION_p) + if ( (j == dim1 || j == 1) && (i == dim2 || i == 1) ) + *valptr = (pastix_complex64_t) 2.5; + else if (j == dim1 || j == 1 || i == dim2 || i == 1) + *valptr = (pastix_complex64_t) 4.; + else + *valptr = (pastix_complex64_t) 6.; +#endif + valptr++; rowptr++; colptr[1]++; + + /* Connexion along dimension 1 */ + if (j < dim1) { + *rowptr = k+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + /* Connexion along dimension 2 */ + if (i < dim2) + { + if (j > 1) + { + *rowptr = k+dim1-1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + *rowptr = k+dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + + if (j < dim1) + { + *rowptr = k+dim1+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + } + + colptr++; k++; + } + } + + assert( (spm->colptr[ spm->gN ] - spm->colptr[0]) == nnz ); +} + +/** + ******************************************************************************* + * + * @ingroup pastix_spm_driver + * + * z_spmExtendedLaplacian3D - Generate a 3D extended laplacian matrix. + * + ******************************************************************************* + * + * @param[in,out] spm + * At start, an allocated spm structure. + * Contains the size of the laplacian in spm->n. + * At exit, contains the matrix in csc format. + * + * @param[in] dim1 + * contains the first dimension of the 3D grid of the laplacian. + * + * @param[in] dim2 + * contains the second dimension of the 3D grid of the laplacian. + * + * @param[in] dim3 + * contains the second dimension of the 3D grid of the laplacian. + * + *******************************************************************************/ +void +z_spmExtendedLaplacian3D( pastix_spm_t *spm, + pastix_int_t dim1, + pastix_int_t dim2, + pastix_int_t dim3 ) +{ + + pastix_complex64_t *valptr; + pastix_int_t *colptr, *rowptr; + pastix_int_t i, j, k, l; + pastix_int_t nnz = (2*dim1-1)*dim2*dim3 + (3*dim1-2)*(dim2-1)*dim3 + ((3*dim1-2)*dim2+2*(3*dim1-2)*(dim2-1))*(dim3-1); + + spm->mtxtype = PastixSymmetric; + spm->flttype = PastixComplex64; + spm->fmttype = PastixCSC; + spm->gnnz = nnz; + spm->nnz = nnz; + spm->dof = 1; + + assert( spm->gN == dim1*dim2*dim3 ); + + /* Allocating */ + spm->colptr = malloc((spm->n+1)*sizeof(pastix_int_t)); + spm->rowptr = malloc(nnz *sizeof(pastix_int_t)); + assert( spm->colptr ); + assert( spm->rowptr ); + +#if !defined(PRECISION_p) + spm->values = malloc(nnz *sizeof(pastix_complex64_t)); + assert( spm->values ); +#endif + + /* Building ia, ja and values*/ + colptr = spm->colptr; + rowptr = spm->rowptr; + valptr = (pastix_complex64_t*)(spm->values); + + /* Building ia, ja and values*/ + *colptr = 1; + l = 1; /* Column index in the matrix ((i-1) * dim1 * dim2 + (j-1) * dim1 + k-1) */ + for(i=1; i<=dim3; i++) + { + for(j=1; j<=dim2; j++) + { + for(k=1; k<=dim1; k++) + { + + colptr[1] = colptr[0]; + + /* Diagonal value */ + *rowptr = l; +#if !defined(PRECISION_p) + if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) + *valptr = (pastix_complex64_t) 4.75; + else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k == dim1 || i == 1) ) + *valptr = (pastix_complex64_t) 10.; + else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) + *valptr = (pastix_complex64_t) 10.; + else if ( (j == dim2 || j == 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) + *valptr = (pastix_complex64_t) 10.; + else if ( (j != dim2 || j != 1) && (i != dim3 || i != 1) && (k == dim1 || i == 1) ) + *valptr = (pastix_complex64_t) 7.; + else if ( (j == dim2 || j == 1) && (i != dim3 || i != 1) && (k != dim1 || i != 1) ) + *valptr = (pastix_complex64_t) 7.; + else if ( (j != dim2 || j != 1) && (i == dim3 || i == 1) && (k != dim1 || i != 1) ) + *valptr = (pastix_complex64_t) 7.; + else + *valptr = (pastix_complex64_t) 14.; +#endif + valptr++; rowptr++; colptr[1]++; + + /* Connexion along dimension 1 */ + if (k < dim1) { + *rowptr = l+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + } + + /* Connexion along dimension 2 */ + if (j < dim2) + { + if (k > 1) + { + *rowptr = l+dim1-1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + *rowptr = l+dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + + if (k < dim1) + { + *rowptr = l+dim1+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + } + + /* Connexion along dimension 3 */ + if (i < dim3) { + if( j > 1 ) + { + if (k > 1) + { + *rowptr = l+dim1*dim2-dim1-1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.25; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + *rowptr = l+dim1*dim2-dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + if (k < dim1) + { + *rowptr = l+dim1*dim2-dim1+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.25; +#endif + valptr++; rowptr++; colptr[1]++; + + } + } + if (k > 1) + { + *rowptr = l+dim1*dim2-1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + *rowptr = l+dim1*dim2; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-1.; +#endif + valptr++; rowptr++; colptr[1]++; + + if (k < dim1) + { + *rowptr = l+dim1*dim2+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + if( j < dim2 ) + { + if (k > 1) + { + *rowptr = l+dim1*dim2+dim1-1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.25; +#endif + valptr++; rowptr++; colptr[1]++; + + } + + *rowptr = l+dim1*dim2+dim1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.5; +#endif + valptr++; rowptr++; colptr[1]++; + + if (k < dim1) + { + *rowptr = l+dim1*dim2+dim1+1; +#if !defined(PRECISION_p) + *valptr = (pastix_complex64_t)-0.25; +#endif + valptr++; rowptr++; colptr[1]++; + + } + } + } + + colptr++; l++; + } + } + } + + assert( (spm->colptr[ spm->gN ] - spm->colptr[0]) == nnz ); +}