diff options
Diffstat (limited to 'lib/raid6/recov_neon_inner.c')
| -rw-r--r-- | lib/raid6/recov_neon_inner.c | 19 | 
1 files changed, 8 insertions, 11 deletions
diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c index 8cd20c9f834a..f13c07f82297 100644 --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@  #include <arm_neon.h> -static const uint8x16_t x0f = { -	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; -  #ifdef CONFIG_ARM  /*   * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,  	uint8x16_t pm1 = vld1q_u8(pbmul + 16);  	uint8x16_t qm0 = vld1q_u8(qmul);  	uint8x16_t qm1 = vld1q_u8(qmul + 16); +	uint8x16_t x0f = vdupq_n_u8(0x0f);  	/*  	 * while ( bytes-- ) { @@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,  		px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));  		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); -		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); +		vy = vshrq_n_u8(vx, 4);  		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); -		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); +		vy = vqtbl1q_u8(qm1, vy);  		qx = veorq_u8(vx, vy); -		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4); +		vy = vshrq_n_u8(px, 4);  		vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f)); -		vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f)); +		vy = vqtbl1q_u8(pm1, vy);  		vx = veorq_u8(vx, vy);  		db = veorq_u8(vx, qx); @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,  {  	uint8x16_t qm0 = vld1q_u8(qmul);  	uint8x16_t qm1 = vld1q_u8(qmul + 16); +	uint8x16_t x0f = vdupq_n_u8(0x0f);  	/*  	 * while (bytes--) { @@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,  		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); -		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); +		vy = vshrq_n_u8(vx, 4);  		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); -		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); +		vy = vqtbl1q_u8(qm1, vy);  		vx = veorq_u8(vx, vy);  		vy = veorq_u8(vx, vld1q_u8(p));  |