Commit 65e5f4df authored by Berenger Bramas's avatar Berenger Bramas

Update examples -- change store for non aligned memory in altivec example

parent 5345cde8
......@@ -306,12 +306,7 @@ inline void InaVecALTIVEC_exp(const float inVal[], float outVal[]) {
__vector int castedInteger = vec_cts(COEFF_A * x + COEFF_B, 0);
vec = reinterpret_cast<__vector float>(castedInteger);
alignas(16) float tmpptr[4];
vec_st(vec, 0, tmpptr);
outVal[0] = tmpptr[0];
outVal[1] = tmpptr[1];
outVal[2] = tmpptr[2];
outVal[3] = tmpptr[3];
vec_xst(vec, 0, outVal);
}
inline void InaVecALTIVEC_exp(const double inVal[], double outVal[]) {
......@@ -347,7 +342,7 @@ inline void InaVecALTIVEC_exp(const double inVal[], double outVal[]) {
ltmpptr[0] = long(tmpptr[0]);
ltmpptr[1] = long(tmpptr[1]);
vec = reinterpret_cast<__vector double>(vec_xl(0, ltmpptr));
vec_st(reinterpret_cast<__vector unsigned int>(vec), 0, reinterpret_cast<unsigned int*>(outVal));
vec_xst(reinterpret_cast<__vector unsigned int>(vec), 0, reinterpret_cast<unsigned int*>(outVal));
}
#endif
......
......@@ -631,13 +631,7 @@ void InaVecALTIVEC_ScalarGemmInaV2(const float* __restrict__ A, const float* __r
float* __restrict__ ptrC = &C[(jp+jb)*size + ip + ib];
for(size_t idxCol = 0 ; idxCol < BlockSize ; ++idxCol){
__vector float res = sum[idxCol] + vec_xl(0, &ptrC[idxCol*size]);
alignas(16) float tmpptr[4];
vec_st(res, 0, tmpptr);
ptrC[idxCol*size+0] = tmpptr[0];
ptrC[idxCol*size+1] = tmpptr[1];
ptrC[idxCol*size+2] = tmpptr[2];
ptrC[idxCol*size+3] = tmpptr[3];
vec_xst(res, 0, &ptrC[idxCol*size]);
}
}
}
......@@ -700,10 +694,7 @@ void InaVecALTIVEC_ScalarGemmInaV2(const double* __restrict__ A, const double* _
double* __restrict__ ptrC = &C[(jp+jb)*size + ip + ib];
for(size_t idxCol = 0 ; idxCol < BlockSize ; ++idxCol){
__vector double res = sum[idxCol] + vec_xl(0, &ptrC[idxCol*size]);
alignas(16) double tmpptr[2];
vec_st( reinterpret_cast<__vector unsigned int>(res), 0, reinterpret_cast<unsigned int*>(tmpptr));
ptrC[idxCol*size+0] = tmpptr[0];
ptrC[idxCol*size+1] = tmpptr[1];
vec_xst( reinterpret_cast<__vector unsigned int>(res), 0, reinterpret_cast<unsigned int*>(&ptrC[idxCol*size]));
}
}
}
......
......@@ -152,10 +152,7 @@ void HandVectorizedFunctionALTIVEC(const size_t nbParticles, const double* __res
const __vector double currentSource = vec_xl(0, &potentials[idxSource]);
alignas(16) double tmpptr[2];
vec_st(reinterpret_cast<__vector unsigned int>(resSource+currentSource), 0, reinterpret_cast<unsigned int*>(tmpptr));
potentials[idxSource+0] = tmpptr[0];
potentials[idxSource+1] = tmpptr[1];
vec_xst(reinterpret_cast<__vector unsigned int>(resSource+currentSource), 0, reinterpret_cast<unsigned int*>(&potentials[idxSource]));
}
potentials[idxTarget] += InaVecALTIVEC<double>(targetPotential).horizontalSum();
......@@ -224,12 +221,7 @@ void HandVectorizedFunctionALTIVEC(const size_t nbParticles, const float* __rest
vec_and(vec_nand(reinterpret_cast<__vector unsigned int>(testRes),reinterpret_cast<__vector unsigned int>(testRes)), reinterpret_cast<__vector unsigned int>(targetPhysicalValue-VecConstantIfCut))));
const __vector float currentSource = vec_xl(0, &potentials[idxSource]);
alignas(16) float tmpptr[4];
vec_st(resSource+currentSource, 0, tmpptr);
potentials[idxSource+0] = tmpptr[0];
potentials[idxSource+1] = tmpptr[1];
potentials[idxSource+2] = tmpptr[2];
potentials[idxSource+3] = tmpptr[3];
vec_xst(resSource+currentSource, 0, &potentials[idxSource]);
}
potentials[idxTarget] += InaVecALTIVEC<float>(targetPotential).horizontalSum();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment