diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 40baddc598df9da3ce04b58a54b7fe645435b564..d458f074e25ef600048366ec650673df488694ef 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -53,6 +53,7 @@ set(ZSRC
     core_zgetrf.c
     core_zgetrf_incpiv.c
     core_zgetrf_nopiv.c
+    core_zgetrf_panel.c
     core_zgram.c
     core_zhe2ge.c
     core_zherfb.c
diff --git a/coreblas/compute/core_zgetrf_panel.c b/coreblas/compute/core_zgetrf_panel.c
new file mode 100644
index 0000000000000000000000000000000000000000..2ec4b23a8f3e2f7cb37ff08f0721d96f1ed6dd9e
--- /dev/null
+++ b/coreblas/compute/core_zgetrf_panel.c
@@ -0,0 +1,299 @@
+/**
+ *
+ * @file core_zgetrf_panel.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_zgetrf with partial pivoting CPU kernel
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2022-02-22
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas/lapacke.h"
+#include "coreblas.h"
+
+static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ * CORE_zgetrf_panel_diag computes the LU factorization of a single column on
+ * the diagonal block. Works in combination with CORE_zgetrf_panel_offdiag() to
+ * perform the full LU factorization with partial pivoting of a single column in
+ * a panel.
+ *
+ *******************************************************************************
+ *
+ * @param[in] m
+ *         The number of rows of the matrix A. m >= 0.
+ *
+ * @param[in] n
+ *          The number of columns of the matrix A. n >= 0.
+ *
+ * @param[in] h
+ *         The index of the column to factorize in the matrix A.
+ *
+ * @param[in,out] A
+ *          On entry, the matrix A where column h-1 needs to be factorized, and
+ *          pivot for column h needs to be selected.
+ *          The lower pentagonal part m-by-(h-1) are the L factors of the LU
+ *          decomposition of A for the first (h-1) columns/rows.
+ *          The upper pentagonal part (h-1)-by-n are the U factors of the LU
+ *          decomposition of A for the first (h-1) columns/rows.
+ *          On exit, A is factorized up to the (h-1)^th column/row and the
+ *          remaining part is updated. The pivot of column h is selected.
+ *
+ * @param[in] lda
+ *          The leading dimension of the array A. lda >= max(1,m).
+ *
+ * @param[in,out] IPIV
+ *          On entry, the pivot array of size min(m,n) with the first h-2 columns initialized.
+ *          On exit, IPIV[h-1] is updated with the selected pivot for the previous column.
+ *
+ * @param[in,out] nextpiv
+ *          On entry, the allocated and initialized CHAM_piv_t structure to
+ *          store the information related to pivot at stage h.
+ *          On exit, the diagrow is initialized with the diagonal h row from A,
+ *          and the pivot row is updated if a better pivot is found for step h.
+ *
+ * @param[in] prevpiv
+ *          Holds the CHAM_piv_t structure of the previous (h-1) step with the
+ *          (h-1) diagonal row, and the pivot selected row for (h-1) step.
+ *          Not referenced if h is equal to 0.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ *
+ */
+int
+CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
+                        CHAMELEON_Complex64_t *A, int lda,
+                        int *IPIV, CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
+{
+    CHAMELEON_Complex64_t *subA = A + lda * h + h;
+    CHAMELEON_Complex64_t  alpha;
+
+    if ( h > 0 ) {
+        CHAMELEON_Complex64_t *L, *pivrow;
+
+        int pivot_blkm0  = prevpiv->blkm0;
+        int pivot_blkidx = prevpiv->blkidx;
+        int pivot        = pivot_blkm0 + pivot_blkidx;
+
+        pivrow = (CHAMELEON_Complex64_t *)(prevpiv->pivrow);
+
+        /*
+         * Copy the previously selected row to the right place if it's not the original
+         * diagonal row or if does not belong to the diagonal block
+         */
+        if(   (pivot_blkm0 != m0) ||
+            ( (pivot_blkm0 == m0) && (pivot_blkidx != (h-1)) ) )
+        {
+            cblas_zcopy( n, pivrow, 1,
+                         A + h-1, lda );
+        }
+
+        /* Store the pivot */
+        IPIV[h-1] = pivot + 1;
+
+        /*
+         * Copy the diagonal row in place of the selected one (previous pivot) to the right place if:
+         *    - we own the previous pivot
+         *    - pivot index was not last step index
+         */
+        if( (pivot_blkm0  == m0 ) &&
+            (pivot_blkidx != h-1) )
+        {
+            cblas_zcopy( n, prevpiv->diagrow, 1,
+                            A + pivot_blkidx, lda );
+        }
+
+        /* Take the pointer to the element below the current diagonal element */
+        L = A + lda * (h-1) + h;
+
+        if ( h < m ) {
+            /* Scaling off-diagonal terms */
+            alpha = (CHAMELEON_Complex64_t)1. / pivrow[h-1];
+            cblas_zscal( m-h, CBLAS_SADDR( alpha ), L, 1 );
+        }
+
+        /*
+         * h is compared only to n, because if we are on the last column of a
+         * tile, m might be much smaller than n, and still we need to apply
+         * the geru call. If this is the diagonal tile, we will just look for
+         * the next maximum for nothing.
+         */
+        if ( h < n ) {
+            /* Applying the update */
+            cblas_zgeru(CblasColMajor, m-h, n-h,
+                        CBLAS_SADDR(mzone),
+                        L,          1,
+                        pivrow + h, 1,
+                        subA,       lda );
+        }
+        else {
+          return 0;
+        }
+    }
+
+    /* Looking for the local max index */
+    {
+        CHAMELEON_Complex64_t *next_pivrow = (CHAMELEON_Complex64_t *)(nextpiv->pivrow);
+        int                    index;
+
+        index = cblas_izamax( m-h, subA, 1 );
+        alpha = subA[index];
+
+        if ( cabs(alpha) > cabs(next_pivrow[h]) ) {
+            /* We save this index as a potential pivot for the next column of the panel  */
+            nextpiv->blkm0  = m0;                   /* block index */
+            nextpiv->blkidx = index + h;            /* index in local numbering */
+
+            /* Save the row holding the selected pivot */
+            cblas_zcopy( n, A + index + h, lda,
+                            next_pivrow,   1 );
+        }
+    }
+
+    /* Store current diagonal row (in full) into pivot structure */
+    cblas_zcopy( n, A + h,            lda,
+                    nextpiv->diagrow, 1 );
+    return 0;
+}
+
+/**
+ ******************************************************************************
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ * CORE_zgetrf_panel_offdiag computes the LU factorization of a single column on
+ * an off-diagonal block. Works in combination with CORE_zgetrf_panel_diag() to
+ * perform the full LU factorization with partial pivoting of a single column in
+ * a panel.
+ *
+ *******************************************************************************
+ *
+ * @param[in] m
+ *         The number of rows of the matrix A. m >= 0.
+ *
+ * @param[in] n
+ *          The number of columns of the matrix A. n >= 0.
+ *
+ * @param[in] h
+ *         The index of the column to factorize in the matrix A.
+ *
+ * @param[in,out] A
+ *          On entry, the matrix A where column h-1 needs to be factorized, and
+ *          pivot for column h needs to be selected.
+ *          The lower pentagonal part m-by-(h-1) are the L factors of the LU
+ *          decomposition of A for the first (h-1) columns/rows.
+ *          The upper pentagonal part (h-1)-by-n are the U factors of the LU
+ *          decomposition of A for the first (h-1) columns/rows.
+ *          On exit, A is factorized up to the (h-1)^th column/row and the
+ *          remaining part is updated. The pivot of column h is selected.
+ *
+ * @param[in] lda
+ *          The leading dimension of the array A. lda >= max(1,m).
+ *
+ * @param[in,out] nextpiv
+ *          On entry, the allocated and initialized CHAM_piv_t structure to
+ *          store the information related to pivot at stage h.
+ *          On exit, the diagrow is initialized with the diagonal h row from A,
+ *          and the pivot row is updated if a better pivot is found for step h.
+ *
+ * @param[in] prevpiv
+ *          Holds the CHAM_piv_t structure of the previous (h-1) step with the
+ *          (h-1) diagonal row, and the pivot selected row for (h-1) step.
+ *          Not referenced if h is equal to 0.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ *
+ */
+int
+CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
+                           CHAMELEON_Complex64_t *A, int lda,
+                           CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
+{
+    CHAMELEON_Complex64_t *subA = A + lda * h;
+    CHAMELEON_Complex64_t  alpha;
+
+    if ( h != 0 ) {
+        CHAMELEON_Complex64_t *L, *pivrow;
+
+        int pivot_blkm0  = prevpiv->blkm0;
+        int pivot_blkidx = prevpiv->blkidx;
+
+        pivrow = (CHAMELEON_Complex64_t *)(prevpiv->pivrow);
+
+        /*
+         * Copy the diagonal row in place of the selected one (previous pivot) to the right place if:
+         *    - the previous pivot block was this one
+         *    - it was not the previous row ??
+         */
+        if( pivot_blkm0 == m0 )
+        {
+            cblas_zcopy( n, prevpiv->diagrow, 1,
+                            A + pivot_blkidx, lda );
+        }
+
+        /* Take the pointer to the element below the current diagonal element */
+        L = A + lda * (h-1);
+
+        /* Scaling off-diagonal terms */
+        alpha = (CHAMELEON_Complex64_t)1. / pivrow[h-1];
+        cblas_zscal( m, CBLAS_SADDR( alpha ), L, 1 );
+
+        /*
+         * h is compared only to n, because if we are on the last column of a
+         * tile, m might be much smaller than n, and still we need to apply
+         * the geru call. If this is the diagonal tile, we will just look for
+         * the next maximum for nothing.
+         */
+        if ( h < n ) {
+            /* Applying the update */
+            cblas_zgeru(CblasColMajor, m, n-h,
+                        CBLAS_SADDR(mzone),
+                        L,          1,
+                        pivrow + h, 1,
+                        subA,       lda );
+        }
+        else {
+            return 0;
+        }
+    }
+
+    /* Looking for the local max index */
+    {
+        CHAMELEON_Complex64_t *next_pivrow = (CHAMELEON_Complex64_t *) nextpiv->pivrow;
+        int                    index;
+
+        index = cblas_izamax( m, subA, 1 );
+        alpha = subA[index];
+
+        if ( cabs(alpha) > cabs(next_pivrow[h]) )
+        {
+            /* We save this index as a potential pivot for the next column of the panel */
+            nextpiv->blkm0  = m0;           /* block index */
+            nextpiv->blkidx = index;        /* index in local numbering */
+
+            /* Save the row holding the selected pivot */
+            cblas_zcopy( n, A + index,   lda,
+                            next_pivrow, 1 );
+        }
+    }
+    return 0;
+}
diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h
index 050a7968769a5c37a138faee18ef0a577ee6be19..2b5f36eb8bf436ba3f4cf71bafb3ab47d88c78ad 100644
--- a/coreblas/include/coreblas/coreblas_z.h
+++ b/coreblas/include/coreblas/coreblas_z.h
@@ -86,6 +86,13 @@ int  CORE_zgetrf_incpiv(int M, int N, int IB,
 int CORE_zgetrf_nopiv(int M, int N, int IB,
                       CHAMELEON_Complex64_t *A, int LDA,
                       int *INFO);
+int CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
+                            CHAMELEON_Complex64_t *A, int lda,
+                            int *IPIV,
+                            CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv);
+int CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
+                               CHAMELEON_Complex64_t *A, int lda,
+                               CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv );
 int  CORE_zgetrf_reclap(int M, int N,
                         CHAMELEON_Complex64_t *A, int LDA,
                         int *IPIV, int *info);
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index a6175854e0a3dcdf4841e2d5a482d2856ef2bfb2..2b324ef54d9ef40113498052627b4a06714c7a2b 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -35,6 +35,16 @@ BEGIN_C_DECLS
 #define CHAMELEON_TILE_DESC     (1 << 1)
 #define CHAMELEON_TILE_HMAT     (1 << 2)
 
+/**
+ * @brief CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting
+ */
+typedef struct chameleon_pivot_s {
+    int   blkm0;   /**> The row index of the first row in the tile where the pivot has been selected */
+    int   blkidx;  /**> The relative row index in the tile where the pivot has been selected         */
+    void *pivrow;  /**> The copy of the row with the selected pivot                                  */
+    void *diagrow; /**> The copy of the diagonal row to permute                                      */
+} CHAM_pivot_t;
+
 typedef struct chameleon_tile_s {
 #if defined(CHAMELEON_KERNELS_TRACE)
     char  *name;