diff --git a/coreblas/compute/core_ztpmlqt.c b/coreblas/compute/core_ztpmlqt.c index 162375f966f1076860acd44854cacaff9ea1381f..bd5e29973702c9a5d739de15e82756f3469d9562 100644 --- a/coreblas/compute/core_ztpmlqt.c +++ b/coreblas/compute/core_ztpmlqt.c @@ -112,8 +112,8 @@ * V = [V1] [V2]. * * The size of the trapezoidal block V2 is determined by the parameter L, - * where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L - * rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; + * where 0 <= L <= K; V2 is lower trapezoidal, consisting of the first L + * rows of a K-by-K upper triangular matrix. If L=K, V2 is lower triangular; * if L=0, there is no trapezoidal block, hence V = V1 is rectangular. * * If side = MorseLeft: C = [A] where A is K-by-N, B is M-by-N and V is K-by-M. diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 2363d67c0cbd74f7ab954db90bfa8e3f6817b906..090ce92f2af8af120e3f8e08e4de28f022026551 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -54,7 +54,7 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg) V, ldv, T, ldt, A, lda, B, ldb, WORK ); } -#if defined(CHAMELEON_USE_CUDA) +#if defined(CHAMELEON_USE_CUDA) && 0 static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) { MORSE_enum side; @@ -100,7 +100,8 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) +CODELETS_CPU(ztpmlqt, 5, cl_ztpmlqt_cpu_func) +//CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) void MORSE_TASK_ztpmlqt( const MORSE_option_t *options,