3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 运维知识 > Android >内容正文

Android

clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明

發(fā)布時間:2025/3/15 Android 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

這里是官網(wǎng)的指令說明:

http://infocenter.arm.com/help/basic/help.jsp?topic=/com.arm.doc.dui0204ic/

下面是對應(yīng)arm_neon頭文件中,對應(yīng)的ABI說明。

//后期不斷維護(hù)更新

//2016_1_5 14:30

#ifndef __ARM_NEON__

#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h

#endif

/*(1)、正常指令:生成大小相同且類型通常與操作數(shù)向量相同的結(jié)果向量;

(2)、長指令:對雙字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量的結(jié)果。所生成的元素一般是操作數(shù)元素寬度的兩倍,

并屬于同一類型;

(3)、寬指令:一個雙字向量操作數(shù)和一個四字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量結(jié)果。所生成的元素和第一個

操作數(shù)的元素是第二個操作數(shù)元素寬度的兩倍;

(4)、窄指令:四字向量操作數(shù)執(zhí)行運(yùn)算,并生成雙字向量結(jié)果,所生成的元素一般是操作數(shù)元素寬度的一半;

(5)、飽和指令:當(dāng)超過數(shù)據(jù)類型指定的范圍則自動限制在該范圍內(nèi)。*/

/******************************************************Addition*************************/

/*--1、Vector add(正常指令): vadd -> ri = ai + bi; r, a, b have equal lane sizes--*/

int8x8_t vadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_add_epi8

int16x4_t vadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_add_epi16

int32x2_t vadd_s32 (int32x2_t __a, int32x2_t __b);//_mm_add_epi32

int64x1_t vadd_s64 (int64x1_t __a, int64x1_t __b);//_mm_add_epi64

//_mm_add_ps, SSE, use only low 64 bits

float32x2_t vadd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_add_epi8

uint16x4_t vadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_add_epi16

uint32x2_t vadd_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_add_epi32

uint64x1_t vadd_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_add_epi64

int8x16_t vaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_add_epi8

int16x8_t vaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_add_epi16

int32x4_t vaddq_s32 (int32x4_t __a, int32x4_t __b);//_mm_add_epi32

int64x2_t vaddq_s64 (int64x2_t __a, int64x2_t __b);//_mm_add_epi64

float32x4_t vaddq_f32 (float32x4_t __a, float32x4_t __b);//_mm_add_ps

uint8x16_t vaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_add_epi8

uint16x8_t vaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_add_epi16

uint32x4_t vaddq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_add_epi32

uint64x2_t vaddq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_add_epi64

/*--2、Vector long add(長指令): vaddl -> ri = ai + bi; a, b have equal lane sizes,

result is a 128 bit vector of lanes that are twice the width--*/

int16x8_t vaddl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vaddl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vaddl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vaddl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vaddl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vaddl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Vector wide add(寬指令): vaddw -> ri = ai + bi--*/

int16x8_t vaddw_s8 (int16x8_t __a, int8x8_t __b);

int32x4_t vaddw_s16 (int32x4_t __a, int16x4_t __b);

int64x2_t vaddw_s32 (int64x2_t __a, int32x2_t __b);

uint16x8_t vaddw_u8 (uint16x8_t __a, uint8x8_t __b);

uint32x4_t vaddw_u16 (uint32x4_t __a, uint16x4_t __b);

uint64x2_t vaddw_u32 (uint64x2_t __a, uint32x2_t __b);

/*--4、Vector halving add: vhadd -> ri = (ai + bi) >> 1;

shifts each result right one bit, Results are truncated--*/

int8x8_t vhadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vhadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vhadd_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vhadd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vhadd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vhadd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vhaddq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vhaddq_s16 (int16x8_t __a, int16x8_t __b)

int32x4_t vhaddq_s32 (int32x4_t __a, int32x4_t __b)

uint8x16_t vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)

uint16x8_t vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)

uint32x4_t vhaddq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--5、Vector rounding halving add: vrhadd -> ri = (ai + bi + 1) >> 1;

shifts each result right one bit, Results are rounded(四舍五入)--*/

int8x8_t vrhadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vrhadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vrhadd_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vrhadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_avg_epu8

uint16x4_t vrhadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_avg_epu16

uint32x2_t vrhadd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vrhaddq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vrhaddq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vrhaddq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_avg_epu8

uint16x8_t vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_avg_epu16

uint32x4_t vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector saturating add(飽和指令): vqadd -> ri = sat(ai + bi);

the results are saturated if they overflow--*/

int8x8_t vqadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_adds_epi8

int16x4_t vqadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_adds_epi16

int32x2_t vqadd_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqadd_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_adds_epu8

uint16x4_t vqadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_adds_epu16

uint32x2_t vqadd_u32 (uint32x2_t __a, uint32x2_t __b);

uint64x1_t vqadd_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vqaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_adds_epi8

int16x8_t vqaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_adds_epi16

int32x4_t vqaddq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqaddq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_adds_epu8

uint16x8_t vqaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_adds_epu16

uint32x4_t vqaddq_u32 (uint32x4_t __a, uint32x4_t __b);

uint64x2_t vqaddq_u64 (uint64x2_t __a, uint64x2_t __b);

/*--7、Vector add high half(窄指令): vaddhn -> ri = sat(ai + bi);

selecting High half, The results are truncated--*/

int8x8_t vaddhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vaddhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vaddhn_s64 (int64x2_t __a, int64x2_t __b);

uint8x8_t vaddhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vaddhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vaddhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*--8、Vector rounding add high half(窄指令): vraddhn -> ri = ai + bi;

selecting High half, The results are rounded--*/

int8x8_t vraddhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vraddhn_s32 (int32x4_t __a, int32x4_t __b)

int32x2_t vraddhn_s64 (int64x2_t __a, int64x2_t __b)

uint8x8_t vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)

uint16x4_t vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)

uint32x2_t vraddhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*******************************************Multiplication******************************/

/*--1、Vector multiply(正常指令): vmul -> ri = ai * bi;--*/

int8x8_t vmul_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vmul_s16 (int16x4_t __a, int16x4_t __b);//_mm_mullo_epi16

int32x2_t vmul_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vmul_f32 (float32x2_t __a, float32x2_t __b);//_mm_mul_ps

uint8x8_t vmul_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vmul_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_mullo_epi16

uint32x2_t vmul_u32 (uint32x2_t __a, uint32x2_t __b);

poly8x8_t vmul_p8 (poly8x8_t __a, poly8x8_t __b);

int8x16_t vmulq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vmulq_s16 (int16x8_t __a, int16x8_t __b);//_mm_mullo_epi16

int32x4_t vmulq_s32 (int32x4_t __a, int32x4_t __b);

float32x4_t vmulq_f32 (float32x4_t __a, float32x4_t __b);//_mm_mul_ps

uint8x16_t vmulq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vmulq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_mullo_epi16

uint32x4_t vmulq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16_t vmulq_p8 (poly8x16_t __a, poly8x16_t __b);

/*--2、Vector multiply accumulate: vmla -> ri = ai + bi * ci; --*/

int8x8_t vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

float32x2_t vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

float32x4_t vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--3、Vector multiply accumulate long: vmlal -> ri = ai + bi * ci --*/

int16x8_t vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/*--4、Vector multiply subtract: vmls -> ri = ai - bi * ci --*/

int8x8_t vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

float32x2_t vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

float32x4_t vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--5、Vector multiply subtract long:vmlsl -> ri = ai - bi * ci --*/

int16x8_t vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/*--6、Vector saturating doubling multiply high: vqdmulh -> ri = sat(ai * bi);

doubles the results and returns only the high half of the truncated results--*/

int16x4_t vqdmulh_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqdmulh_s32 (int32x2_t __a, int32x2_t __b);

int16x8_t vqdmulhq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqdmulhq_s32 (int32x4_t __a, int32x4_t __b);

/*--7、Vector saturating rounding doubling multiply high vqrdmulh -> ri = ai * bi:

doubles the results and returns only the high half of the rounded results.

The results are saturated if they overflow--*/

int16x4_t vqrdmulh_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqrdmulh_s32 (int32x2_t __a, int32x2_t __b);

int16x8_t vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b);

/*--8、Vector saturating doubling multiply accumulate long: vqdmlal -> ri = ai + bi * ci;

multiplies the elements in the second and third vectors, doubles the results and adds the

results to the values in the first vector. The results are saturated if they overflow--*/

int32x4_t vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

/*--9、Vector saturating doubling multiply subtract long: vqdmlsl -> ri = ai - bi * ci;

multiplies the elements in the second and third vectors, doubles the results and subtracts

the results from the elements in the first vector.

The results are saturated if they overflow--*/

int32x4_t vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

/*--10、Vector long multiply(長指令): vmull -> ri = ai * bi;--*/

int16x8_t vmull_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vmull_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vmull_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vmull_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vmull_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vmull_u32 (uint32x2_t __a, uint32x2_t __b);

poly16x8_t vmull_p8 (poly8x8_t __a, poly8x8_t __b);

/*--11、Vector saturating doubling long multiply: vqdmull -> ri = ai * bi;

If any of the results overflow, they are saturated--*/

int32x4_t vqdmull_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vqdmull_s32 (int32x2_t __a, int32x2_t __b);

/*--12、Fused multiply accumulate: vfma -> ri = ai + bi * ci;

The result of the multiply is not rounded before the accumulation--*/

float32x2_t vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)

float32x4_t vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

/*--13、Fused multiply subtract: vfms -> ri = ai - bi * ci;

The result of the multiply is not rounded before the subtraction--*/

float32x2_t vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);

float32x4_t vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);

/******************************************************Round to integral****************/

/*--1、to nearest, ties to even--*/

float32x2_t vrndn_f32 (float32x2_t __a);

float32x4_t vrndqn_f32 (float32x4_t __a);

/*--2、to nearest, ties away from zero--*/

float32x2_t vrnda_f32 (float32x2_t __a);

float32x4_t vrndqa_f32 (float32x4_t __a);

/*--3、towards +Inf--*/

float32x2_t vrndp_f32 (float32x2_t __a);

float32x4_t vrndqp_f32 (float32x4_t __a);

/*--4、towards -Inf--*/

float32x2_t vrndm_f32 (float32x2_t __a);

float32x4_t vrndqm_f32 (float32x4_t __a);

/*--5、towards 0--*/

float32x2_t vrnd_f32 (float32x2_t __a);

float32x4_t vrndq_f32 (float32x4_t __a);

/**********************************************Subtraction******************************/

/*--1、Vector subtract(正常指令):vsub -> ri = ai - bi;--*/

int8x8_t vsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_sub_epi8

int16x4_t vsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_sub_epi16

int32x2_t vsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_sub_epi32

int64x1_t vsub_s64 (int64x1_t __a, int64x1_t __b);//_mm_sub_epi64

float32x2_t vsub_f32 (float32x2_t __a, float32x2_t __b);//_mm_sub_ps

uint8x8_t vsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_sub_epi8

uint16x4_t vsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_sub_epi16

uint32x2_t vsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_sub_epi32

uint64x1_t vsub_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_sub_epi64

int8x16_t vsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_sub_epi8

int16x8_t vsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_sub_epi16

int32x4_t vsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_sub_epi32

int64x2_t vsubq_s64 (int64x2_t __a, int64x2_t __b);//_mm_sub_epi64

float32x4_t vsubq_f32 (float32x4_t __a, float32x4_t __b);//_mm_sub_ps

uint8x16_t vsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_sub_epi8

uint16x8_t vsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_sub_epi16

uint32x4_t vsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_sub_epi32

uint64x2_t vsubq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_sub_epi64

/*--2、Vector long subtract(長指令): vsubl -> ri = ai - bi; --*/

int16x8_t vsubl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vsubl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vsubl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vsubl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vsubl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vsubl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Vector wide subtract(寬指令): vsubw -> ri = ai - bi;--*/

int16x8_t vsubw_s8 (int16x8_t __a, int8x8_t __b);

int32x4_t vsubw_s16 (int32x4_t __a, int16x4_t __b);

int64x2_t vsubw_s32 (int64x2_t __a, int32x2_t __b);

uint16x8_t vsubw_u8 (uint16x8_t __a, uint8x8_t __b);

uint32x4_t vsubw_u16 (uint32x4_t __a, uint16x4_t __b);

uint64x2_t vsubw_u32 (uint64x2_t __a, uint32x2_t __b);

/*--4、Vector saturating subtract(飽和指令): vqsub -> ri = sat(ai - bi);

If any of the results overflow, they are saturated--*/

int8x8_t vqsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_subs_epi8

int16x4_t vqsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_subs_epi16

int32x2_t vqsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_subs_epi32

int64x1_t vqsub_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_subs_epu8

uint16x4_t vqsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_subs_epu16

uint32x2_t vqsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_subs_epu32

uint64x1_t vqsub_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vqsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_subs_epi8

int16x8_t vqsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_subs_epi16

int32x4_t vqsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_subs_epi32

int64x2_t vqsubq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_subs_epu8

uint16x8_t vqsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_subs_epu16

uint32x4_t vqsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_subs_epu32

uint64x2_t vqsubq_u64 (uint64x2_t __a, uint64x2_t __b);

/*--5、Vector halving subtract: vhsub -> ri = (ai - bi) >> 1;

shifts each result right one bit, The results are truncated.--*/

int8x8_t vhsub_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vhsub_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vhsub_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vhsub_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vhsub_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vhsub_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vhsubq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vhsubq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vhsubq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vhsubq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vhsubq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vhsubq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector subtract high half(窄指令): vsubhn -> ri = ai - bi;

It returns the most significant halves of the results. The results are truncated--*/

int8x8_t vsubhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vsubhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vsubhn_s64 (int64x2_t __a, int64x2_t __b);

uint8x8_t vsubhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vsubhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vsubhn_u64 (uint64x2_t __a, uint64x2_t __b);

/*--7、Vector rounding subtract high half(窄指令): vrsubhn -> ai - bi;

It returns the most significant halves of the results. The results are rounded--*/

int8x8_t vrsubhn_s16 (int16x8_t __a, int16x8_t __b);

int16x4_t vrsubhn_s32 (int32x4_t __a, int32x4_t __b);

int32x2_t vrsubhn_s64 (int64x2_t __a, int64x2_t __b)

uint8x8_t vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b);

uint16x4_t vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b);

uint32x2_t vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b);

/******************************************************Comparison***********************/

/*--1、Vector compare equal(正常指令): vceq -> ri = ai == bi ? 1...1 : 0...0;

If they are equal, the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros--*/

uint8x8_t vceq_s8 (int8x8_t __a, int8x8_t __b);//_mm_cmpeq_epi8

uint16x4_t vceq_s16 (int16x4_t __a, int16x4_t __b);//_mm_cmpeq_epi16

uint32x2_t vceq_s32 (int32x2_t __a, int32x2_t __b);//_mm_cmpeq_epi32

uint32x2_t vceq_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vceq_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_cmpeq_epi8

uint16x4_t vceq_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_cmpeq_epi16

uint32x2_t vceq_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_cmpeq_epi32

uint8x8_t vceq_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_cmpeq_epi8

uint8x16_t vceqq_s8 (int8x16_t __a, int8x16_t __b);//_mm_cmpeq_epi8

uint16x8_t vceqq_s16 (int16x8_t __a, int16x8_t __b);//_mm_cmpeq_epi16

uint32x4_t vceqq_s32 (int32x4_t __a, int32x4_t __b);//_mm_cmpeq_epi32

uint32x4_t vceqq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vceqq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_cmpeq_epi8

uint16x8_t vceqq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_cmpeq_epi16

uint32x4_t vceqq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_cmpeq_epi32

uint8x16_t vceqq_p8 (poly8x16_t __a, poly8x16_t __b);//_mm_cmpeq_epi8

/*--2、Vector compare greater-than or equal(正常指令): vcge-> ri = ai >= bi ? 1...1:0...0;

If it is greater than or equal to it, the corresponding element in the destination

vector is set to all ones. Otherwise, it is set to all zeros.--*/

uint8x8_t vcge_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcge_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcge_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcge_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcge_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcge_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcge_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcgeq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcgeq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcgeq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcgeq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcgeq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcgeq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcgeq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--3、Vector compare less-than or equal(正常指令): vcle -> ri = ai <= bi ? 1...1:0...0;

If it is less than or equal to it, the corresponding element in the destination vector

is set to all ones. Otherwise, it is set to all zeros.--*/

uint8x8_t vcle_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcle_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcle_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcle_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcle_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcle_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcle_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcleq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcleq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcleq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcleq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcleq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcleq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcleq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--4、Vector compare greater-than(正常指令): vcgt -> ri = ai > bi ? 1...1:0...0;

If it is greater than it, the corresponding element in the destination vector is

set to all ones. Otherwise, it is set to all zeros--*/

uint8x8_t vcgt_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vcgt_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vcgt_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vcgt_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vcgt_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vcgt_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vcgt_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcgtq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcgtq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcgtq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcgtq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcgtq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcgtq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcgtq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--5、Vector compare less-than(正常指令): vclt -> ri = ai < bi ? 1...1:0...0;

If it is less than it, the corresponding element in the destination vector is set

to all ones.Otherwise, it is set to all zeros--*/

uint8x8_t vclt_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vclt_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vclt_s32 (int32x2_t __a, int32x2_t __b);

uint32x2_t vclt_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vclt_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vclt_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vclt_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x16_t vcltq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vcltq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vcltq_s32 (int32x4_t __a, int32x4_t __b);

uint32x4_t vcltq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vcltq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vcltq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vcltq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--6、Vector compare absolute greater-than or equal(正常指令):

vcage -> ri = |ai| >= |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is greater than or equal to it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros.--*/

uint32x2_t vcage_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcageq_f32 (float32x4_t __a, float32x4_t __b);

/*--7、Vector compare absolute less-than or equal(正常指令):

vcale -> ri = |ai| <= |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is less than or equal to it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros--*/

uint32x2_t vcale_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcaleq_f32 (float32x4_t __a, float32x4_t __b);

/*--8、Vector compare absolute greater-than(正常指令):

vcage -> ri = |ai| > |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector. If it is greater than it,

the corresponding element in the destination vector is set to all ones.

Otherwise, it is set to all zeros.--*/

uint32x2_t vcagt_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcagtq_f32 (float32x4_t __a, float32x4_t __b);

/*--9、Vector compare absolute less-than(正常指令):

vcalt -> ri = |ai| < |bi| ? 1...1:0...0;

compares the absolute value of each element in a vector with the absolute value of the

corresponding element of a second vector.If it is less than it, the corresponding

element in the destination vector is set to all ones. Otherwise,it is set to all zeros--*/

uint32x2_t vcalt_f32 (float32x2_t __a, float32x2_t __b);

uint32x4_t vcaltq_f32 (float32x4_t __a, float32x4_t __b);

/**********************************************Vector test bits*************************/

/*--正常指令,vtst -> ri = (ai & bi != 0) ? 1...1:0...0;

bitwise logical ANDs each element in a vector with the corresponding element of a second

vector.If the result is not zero, the corresponding element in the destination vector

is set to all ones. Otherwise, it is set to all zeros--*/

uint8x8_t vtst_s8 (int8x8_t __a, int8x8_t __b);

uint16x4_t vtst_s16 (int16x4_t __a, int16x4_t __b);

uint32x2_t vtst_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vtst_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vtst_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vtst_u32 (uint32x2_t __a, uint32x2_t __b);

uint8x8_t vtst_p8 (poly8x8_t __a, poly8x8_t __b);

uint8x16_t vtstq_s8 (int8x16_t __a, int8x16_t __b);

uint16x8_t vtstq_s16 (int16x8_t __a, int16x8_t __b);

uint32x4_t vtstq_s32 (int32x4_t __a, int32x4_t __b);

uint8x16_t vtstq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vtstq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vtstq_u32 (uint32x4_t __a, uint32x4_t __b);

uint8x16_t vtstq_p8 (poly8x16_t __a, poly8x16_t __b);

/**********************************************Absolute difference**********************/

/*--1、Absolute difference between the arguments(正常指令): vabd -> ri = |ai - bi|;

returns the absolute values of the results--*/

int8x8_t vabd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vabd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vabd_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vabd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vabd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vabd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vabd_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16_t vabdq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vabdq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vabdq_s32 (int32x4_t __a, int32x4_t __b);

float32x4_t vabdq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16_t vabdq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vabdq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vabdq_u32 (uint32x4_t __a, uint32x4_t __b);

/*--2、Absolute difference - long(長指令): vabdl -> ri = |ai - bi|;

The elements in the result vector are wider--*/

int16x8_t vabdl_s8 (int8x8_t __a, int8x8_t __b);

int32x4_t vabdl_s16 (int16x4_t __a, int16x4_t __b);

int64x2_t vabdl_s32 (int32x2_t __a, int32x2_t __b);

uint16x8_t vabdl_u8 (uint8x8_t __a, uint8x8_t __b);

uint32x4_t vabdl_u16 (uint16x4_t __a, uint16x4_t __b);

uint64x2_t vabdl_u32 (uint32x2_t __a, uint32x2_t __b);

/*--3、Absolute difference and accumulate: vaba -> ri = ai + |bi - ci|;--*/

int8x8_t vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);

uint8x8_t vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

int8x16_t vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);

uint8x16_t vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

/*--4、Absolute difference and accumulate - long: vabal -> ri = ai + |bi - ci|;

The elements in the result are wider--*/

int16x8_t vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);

int32x4_t vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);

int64x2_t vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);

uint16x8_t vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint32x4_t vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint64x2_t vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);

/***********************************************Max*************************************/

/*--正常指令, vmax -> ri = ai >= bi ? ai : bi; returns the larger of each pair--*/

int8x8_t vmax_s8 (int8x8_t __a, int8x8_t __b);//_mm_max_epi8

int16x4_t vmax_s16 (int16x4_t __a, int16x4_t __b);//_mm_max_epi16

int32x2_t vmax_s32 (int32x2_t __a, int32x2_t __b);//_mm_max_epi32

float32x2_t vmax_f32 (float32x2_t __a, float32x2_t __b);//_mm_max_ps

uint8x8_t vmax_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_max_epu8

uint16x4_t vmax_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_max_epu16

uint32x2_t vmax_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_max_epu32

int8x16_t vmaxq_s8 (int8x16_t __a, int8x16_t __b);//_mm_max_epi8

int16x8_t vmaxq_s16 (int16x8_t __a, int16x8_t __b);//_mm_max_epi16

int32x4_t vmaxq_s32 (int32x4_t __a, int32x4_t __b);//_mm_max_epi32

float32x4_t vmaxq_f32 (float32x4_t __a, float32x4_t __b);//_mm_max_ps

uint8x16_t vmaxq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_max_epu8

uint16x8_t vmaxq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_max_epu16

uint32x4_t vmaxq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_max_epu32

/****************************************************Min********************************/

/*--正常指令, vmin -> ri = ai >= bi ? bi : ai; returns the smaller of each pair--*/

int8x8_t vmin_s8 (int8x8_t __a, int8x8_t __b);//_mm_min_epi8

int16x4_t vmin_s16 (int16x4_t __a, int16x4_t __b);//_mm_min_epi16

int32x2_t vmin_s32 (int32x2_t __a, int32x2_t __b);//_mm_min_epi32

float32x2_t vmin_f32 (float32x2_t __a, float32x2_t __b);//_mm_min_ps

uint8x8_t vmin_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_min_epu8

uint16x4_t vmin_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_min_epu16

uint32x2_t vmin_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_min_epu32

int8x16_t vminq_s8 (int8x16_t __a, int8x16_t __b);//_mm_min_epi8

int16x8_t vminq_s16 (int16x8_t __a, int16x8_t __b);//_mm_min_epi16

int32x4_t vminq_s32 (int32x4_t __a, int32x4_t __b);//_mm_min_epi32

float32x4_t vminq_f32 (float32x4_t __a, float32x4_t __b);//_mm_min_ps

uint8x16_t vminq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_min_epu8

uint16x8_t vminq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_min_epu16

uint32x4_t vminq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_min_epu32

/*******************************************Pairwise addition***************************/

/*--1、Pairwise add(正常指令):

vpadd -> r0 = a0 + a1, ..., r3 = a6 + a7, r4 = b0 + b1, ..., r7 = b6 + b7

adds adjacent pairs of elements of two vectors,

and places the results in the destination vector.--*/

//r0 = a0 + a1, ...,r3 = a6 + a7, r4 = b0 + b1, ...,r7 = b6 + b7

int8x8_t vpadd_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpadd_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpadd_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpadd_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpadd_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpadd_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpadd_u32 (uint32x2_t __a, uint32x2_t __b);

/*--2、Long pairwise add: vpaddl vpaddl -> r0 = a0 + a1, ..., r3 = a6 + a7;

adds adjacent pairs of elements of a vector, sign extends or zero extends the results to

twice their original width, and places the final results in the destination vector--*/

int16x4_t vpaddl_s8 (int8x8_t __a);

int32x2_t vpaddl_s16 (int16x4_t __a);

int64x1_t vpaddl_s32 (int32x2_t __a);

uint16x4_t vpaddl_u8 (uint8x8_t __a);

uint32x2_t vpaddl_u16 (uint16x4_t __a);

uint64x1_t vpaddl_u32 (uint32x2_t __a);

int16x8_t vpaddlq_s8 (int8x16_t __a);

int32x4_t vpaddlq_s16 (int16x8_t __a);

int64x2_t vpaddlq_s32 (int32x4_t __a);

uint16x8_t vpaddlq_u8 (uint8x16_t __a);

uint32x4_t vpaddlq_u16 (uint16x8_t __a);

uint64x2_t vpaddlq_u32 (uint32x4_t __a);

/*--3、Long pairwise add and accumulate:

vpadal -> r0 = a0 + (b0 + b1), ..., r3 = a3 + (b6 + b7);

adds adjacent pairs of elements in the second vector, sign extends or zero extends the

results to twice the original width. It then accumulates this with the corresponding

element in the first vector and places the final results in the destination vector--*/

int16x4_t vpadal_s8 (int16x4_t __a, int8x8_t __b);

int32x2_t vpadal_s16 (int32x2_t __a, int16x4_t __b);

int64x1_t vpadal_s32 (int64x1_t __a, int32x2_t __b);

uint16x4_t vpadal_u8 (uint16x4_t __a, uint8x8_t __b);

uint32x2_t vpadal_u16 (uint32x2_t __a, uint16x4_t __b);

uint64x1_t vpadal_u32 (uint64x1_t __a, uint32x2_t __b);

int16x8_t vpadalq_s8 (int16x8_t __a, int8x16_t __b);

int32x4_t vpadalq_s16 (int32x4_t __a, int16x8_t __b);

int64x2_t vpadalq_s32 (int64x2_t __a, int32x4_t __b);

uint16x8_t vpadalq_u8 (uint16x8_t __a, uint8x16_t __b);

uint32x4_t vpadalq_u16 (uint32x4_t __a, uint16x8_t __b);

uint64x2_t vpadalq_u32 (uint64x2_t __a, uint32x4_t __b);

/**********************************************Folding maximum**************************/

/*--飽和指令, vpmax -> vpmax r0 = a0 >= a1 ? a0 : a1, ..., r4 = b0 >= b1 ? b0 : b1, ...;

compares adjacent pairs of elements, and copies the larger of each pair into the

destination vector.The maximums from each pair of the first input vector are stored in

the lower half of the destination vector. The maximums from each pair of the second input

vector are stored in the higher half of the destination vector--*/

int8x8_t vpmax_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpmax_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpmax_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpmax_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpmax_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpmax_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpmax_u32 (uint32x2_t __a, uint32x2_t __b);

/***************************************************Folding minimum*********************/

/*--飽和指令, vpmin -> r0 = a0 >= a1 ? a1 : a0, ..., r4 = b0 >= b1 ? b1 : b0, ...;

compares adjacent pairs of elements, and copies the smaller of each pair into the

destination vector.The minimums from each pair of the first input vector are stored in

the lower half of the destination vector. The minimums from each pair of the second

input vector are stored in the higher half of the destination vector.--*/

int8x8_t vpmin_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vpmin_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vpmin_s32 (int32x2_t __a, int32x2_t __b);

float32x2_t vpmin_f32 (float32x2_t __a, float32x2_t __b);

uint8x8_t vpmin_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vpmin_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vpmin_u32 (uint32x2_t __a, uint32x2_t __b);

/***************************************************Reciprocal**************************/

/*--1、飽和指令, Newton-Raphson iteration(牛頓 - 拉夫遜迭代)

performs a Newton-Raphson step for finding the reciprocal. It multiplies the elements of

one vector by the corresponding elements of another vector, subtracts each of the results

from 2, and places the final results into the elements of the destination vector--*/

float32x2_t vrecps_f32 (float32x2_t __a, float32x2_t __b);

float32x4_t vrecpsq_f32 (float32x4_t __a, float32x4_t __b);

/*--2、飽和指令,performs a Newton-Raphson step for finding the reciprocal square root.

It multiplies the elements of one vector by the corresponding elements of another vector,

subtracts each of the results from 3, divides these results by two, and places

the final results into the elements of the destination vector--*/

float32x2_t vrsqrts_f32 (float32x2_t __a, float32x2_t __b);

float32x4_t vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b);

/************************************************Shifts by signed variable**************/

/*--1、Vector shift left(飽和指令): vshl -> ri = ai << bi; (negative values shift right)

left shifts each element in a vector by an amount specified in the corresponding element

in the second input vector. The shift amount is the signed integer value of the least

significant byte of the element in the second input vector. The bits shifted out of each

element are lost.If the signed integer value is negative, it results in a right shift--*/

int8x8_t vshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--2、Vector saturating shift left(飽和指令):

vqshl -> ri = ai << bi;(negative values shift right)

If the shift value is positive, the operation is a left shift. Otherwise, it is a

truncating right shift. left shifts each element in a vector of integers and places

the results in the destination vector. It is similar to VSHL.

The difference is that the sticky QC flag is set if saturation occurs--*/

int8x8_t vqshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vqshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vqshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vqshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vqshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vqshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vqshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vqshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vqshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vqshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--3、Vector rounding shift left(飽和指令):

vrshl -> ri = ai << bi;(negative values shift right)

If the shift value is positive, the operation is a left shift. Otherwise, it is a

rounding right shift. left shifts each element in a vector of integers and places

the results in the destination vector. It is similar to VSHL.

The difference is that the shifted value is then rounded.--*/

int8x8_t vrshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vrshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vrshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vrshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vrshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vrshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vrshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vrshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vrshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vrshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vrshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vrshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vrshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vrshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vrshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vrshlq_u64 (uint64x2_t __a, int64x2_t __b);

/*--4、Vector saturating rounding shift left(飽和指令):

vqrshl -> ri = ai << bi;(negative values shift right)

left shifts each element in a vector of integers and places the results in the

destination vector.It is similar to VSHL. The difference is that the shifted value

is rounded, and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqrshl_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vqrshl_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vqrshl_s32 (int32x2_t __a, int32x2_t __b);

int64x1_t vqrshl_s64 (int64x1_t __a, int64x1_t __b);

uint8x8_t vqrshl_u8 (uint8x8_t __a, int8x8_t __b);

uint16x4_t vqrshl_u16 (uint16x4_t __a, int16x4_t __b);

uint32x2_t vqrshl_u32 (uint32x2_t __a, int32x2_t __b);

uint64x1_t vqrshl_u64 (uint64x1_t __a, int64x1_t __b);

int8x16_t vqrshlq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vqrshlq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vqrshlq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vqrshlq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vqrshlq_u8 (uint8x16_t __a, int8x16_t __b);

uint16x8_t vqrshlq_u16 (uint16x8_t __a, int16x8_t __b);

uint32x4_t vqrshlq_u32 (uint32x4_t __a, int32x4_t __b);

uint64x2_t vqrshlq_u64 (uint64x2_t __a, int64x2_t __b);

/****************************************Shifts by a constant***************************/

/*--1、Vector shift right by constant: vshr -> ri = ai >> b;The results are truncated.

right shifts each element in a vector by an immediate value,

and places the results in the destination vector.--*/

int8x8_t vshr_n_s8 (int8x8_t __a, const int __b);

int16x4_t vshr_n_s16 (int16x4_t __a, const int __b);

int32x2_t vshr_n_s32 (int32x2_t __a, const int __b);

int64x1_t vshr_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vshr_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vshr_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vshr_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vshr_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vshrq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vshrq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vshrq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vshrq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vshrq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vshrq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vshrq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vshrq_n_u64 (uint64x2_t __a, const int __b);

/*--2、Vector shift left by constant: vshl -> ri = ai << b;

left shifts each element in a vector by an immediate value, and places the results in the

destination vector. The bits shifted out of the left of each element are lost--*/

int8x8_t vshl_n_s8 (int8x8_t __a, const int __b);

int16x4_t vshl_n_s16 (int16x4_t __a, const int __b);

int32x2_t vshl_n_s32 (int32x2_t __a, const int __b);

int64x1_t vshl_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vshl_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vshl_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vshl_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vshl_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vshlq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vshlq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vshlq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vshlq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vshlq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vshlq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vshlq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vshlq_n_u64 (uint64x2_t __a, const int __b);

/*--3、Vector rounding shift right by constant: vrshr -> ri = ai >> b;

right shifts each element in a vector by an immediate value, and places the results

in the destination vector. The shifted values are rounded.--*/

int8x8_t vrshr_n_s8 (int8x8_t __a, const int __b);

int16x4_t vrshr_n_s16 (int16x4_t __a, const int __b);

int32x2_t vrshr_n_s32 (int32x2_t __a, const int __b);

int64x1_t vrshr_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vrshr_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vrshr_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vrshr_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vrshr_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vrshrq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vrshrq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vrshrq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vrshrq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vrshrq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vrshrq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vrshrq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vrshrq_n_u64 (uint64x2_t __a, const int __b);

/*--4、Vector shift right by constant and accumulate: vsra -> ri = (ai >> c) + (bi >> c);

The results are truncated. right shifts each element in a vector by an immediate value,

and accumulates the results into the destination vector.--*/

int8x8_t vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

int8x16_t vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

/*--5、Vector rounding shift right by constant and accumulate:

vrsra -> ri = (ai >> c) + (bi >> c);

The results are rounded.right shifts each element in a vector by an immediate value,

and accumulates the rounded results into the destination vector.--*/

int8x8_t vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

int8x16_t vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

/*--6、Vector saturating shift left by constant: vqshl -> ri = sat(ai << b);

left shifts each element in a vector of integers by an immediate value, and places the

results in the destination vector,and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqshl_n_s8 (int8x8_t __a, const int __b);

int16x4_t vqshl_n_s16 (int16x4_t __a, const int __b);

int32x2_t vqshl_n_s32 (int32x2_t __a, const int __b);

int64x1_t vqshl_n_s64 (int64x1_t __a, const int __b);

uint8x8_t vqshl_n_u8 (uint8x8_t __a, const int __b);

uint16x4_t vqshl_n_u16 (uint16x4_t __a, const int __b);

uint32x2_t vqshl_n_u32 (uint32x2_t __a, const int __b);

uint64x1_t vqshl_n_u64 (uint64x1_t __a, const int __b);

int8x16_t vqshlq_n_s8 (int8x16_t __a, const int __b);

int16x8_t vqshlq_n_s16 (int16x8_t __a, const int __b);

int32x4_t vqshlq_n_s32 (int32x4_t __a, const int __b);

int64x2_t vqshlq_n_s64 (int64x2_t __a, const int __b);

uint8x16_t vqshlq_n_u8 (uint8x16_t __a, const int __b);

uint16x8_t vqshlq_n_u16 (uint16x8_t __a, const int __b);

uint32x4_t vqshlq_n_u32 (uint32x4_t __a, const int __b);

uint64x2_t vqshlq_n_u64 (uint64x2_t __a, const int __b);

/*--7、Vector signed->unsigned saturating shift left by constant: vqshlu -> ri = ai << b;

left shifts each element in a vector of integers by an immediate value, places the

results in the destination vector, the sticky QC flag is set if saturation occurs,

and indicates that the results are unsigned even though the operands are signed.--*/

uint8x8_t vqshlu_n_s8 (int8x8_t __a, const int __b);

uint16x4_t vqshlu_n_s16 (int16x4_t __a, const int __b);

uint32x2_t vqshlu_n_s32 (int32x2_t __a, const int __b);

uint64x1_t vqshlu_n_s64 (int64x1_t __a, const int __b);

uint8x16_t vqshluq_n_s8 (int8x16_t __a, const int __b);

uint16x8_t vqshluq_n_s16 (int16x8_t __a, const int __b);

uint32x4_t vqshluq_n_s32 (int32x4_t __a, const int __b);

uint64x2_t vqshluq_n_s64 (int64x2_t __a, const int __b);

/*--8、Vector narrowing shift right by constant: vshrn -> ri = ai >> b;

The results are truncated.right shifts each element in the input vector by an

immediate value. It then narrows the result by storing only the least significant

half of each element into the destination vector.--*/

int8x8_t vshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vshrn_n_u64 (uint64x2_t __a, const int __b);

/*--9、Vector signed->unsigned narrowing saturating shift right by constant:

vqshrun -> ri = ai >> b;

Results are truncated. right shifts each element in a quadword vector of integers by an

immediate value, and places the results in a doubleword vector. The results are unsigned,

although the operands are signed. The sticky QC flag is set if saturation occurs.--*/

uint8x8_t vqshrun_n_s16 (int16x8_t __a, const int __b);

uint16x4_t vqshrun_n_s32 (int32x4_t __a, const int __b);

uint32x2_t vqshrun_n_s64 (int64x2_t __a, const int __b);

/*--10、Vector signed->unsigned rounding narrowing saturating shift right by constant:

vqrshrun -> ri = ai >> b; Results are rounded. right shifts each element in a quadword

vector of integers by an immediate value, and places the rounded results in a doubleword

vector. The results are unsigned, although the operands are signed.--*/

uint8x8_t vqrshrun_n_s16 (int16x8_t __a, const int __b);

uint16x4_t vqrshrun_n_s32 (int32x4_t __a, const int __b);

uint32x2_t vqrshrun_n_s64 (int64x2_t __a, const int __b);

/*--11、Vector narrowing saturating shift right by constant: vqshrn -> ri = ai >> b;

Results are truncated. right shifts each element in a quadword vector of integers by an

immediate value, and places the results in a doubleword vector,

and the sticky QC flag is set if saturation occurs.--*/

int8x8_t vqshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vqshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vqshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vqshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vqshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vqshrn_n_u64 (uint64x2_t __a, const int __b);

/*--12、Vector rounding narrowing shift right by constant: vrshrn -> ri = ai >> b;

The results are rounded. right shifts each element in a vector by an immediate value,

and places the rounded,narrowed results in the destination vector.--*/

int8x8_t vrshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vrshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vrshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vrshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vrshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vrshrn_n_u64 (uint64x2_t __a, const int __b);

/*--13、Vector rounding narrowing saturating shift right by constant:

vqrshrn -> ri = ai >> b;

Results are rounded. right shifts each element in a quadword vector of integers by an

immediate value,and places the rounded,narrowed results in a doubleword vector.

The sticky QC flag is set if saturation occurs.--*/

int8x8_t vqrshrn_n_s16 (int16x8_t __a, const int __b);

int16x4_t vqrshrn_n_s32 (int32x4_t __a, const int __b);

int32x2_t vqrshrn_n_s64 (int64x2_t __a, const int __b);

uint8x8_t vqrshrn_n_u16 (uint16x8_t __a, const int __b);

uint16x4_t vqrshrn_n_u32 (uint32x4_t __a, const int __b);

uint32x2_t vqrshrn_n_u64 (uint64x2_t __a, const int __b);

/*--14、Vector widening shift left by constant: vshll -> ri = ai << b;

left shifts each element in a vector of integers by an immediate value,

and place the results in the destination vector. Bits shifted out of the left of each

element are lost and values are sign extended or zero extended.--*/

int16x8_t vshll_n_s8 (int8x8_t __a, const int __b);

int32x4_t vshll_n_s16 (int16x4_t __a, const int __b);

int64x2_t vshll_n_s32 (int32x2_t __a, const int __b);

uint16x8_t vshll_n_u8 (uint8x8_t __a, const int __b);

uint32x4_t vshll_n_u16 (uint16x4_t __a, const int __b);

uint64x2_t vshll_n_u32 (uint32x2_t __a, const int __b);

/********************************************Shifts with insert*************************/

/*--1、Vector shift right and insert: vsri -> ; The two most significant bits in the

destination vector are unchanged. right shifts each element in the second input vector

by an immediate value, and inserts the results in the destination vector. It does not

affect the highest n significant bits of the elements in the destination register.

Bits shifted out of the right of each element are lost.The first input vector holds

the elements of the destination vector before the operation is performed.--*/

int8x8_t vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

poly8x16_t vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);

/*--2、Vector shift left and insert: vsli ->; The least significant bit in each element

in the destination vector is unchanged. left shifts each element in the second input

vector by an immediate value, and inserts the results in the destination vector.

It does not affect the lowest n significant bits of the elements in the destination

register. Bits shifted out of the left of each element are lost. The first input vector

holds the elements of the destination vector before the operation is performed.--*/

int8x8_t vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);

uint8x8_t vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);

int16x8_t vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);

int32x4_t vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);

int64x2_t vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);

uint8x16_t vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);

uint64x2_t vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);

poly8x16_t vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);

/*****************************************Absolute value********************************/

/*--1、Absolute(正常指令): vabs -> ri = |ai|;

returns the absolute value of each element in a vector.--*/

int8x8_t vabs_s8 (int8x8_t __a);//_mm_abs_epi8

int16x4_t vabs_s16 (int16x4_t __a);//_mm_abs_epi16

int32x2_t vabs_s32 (int32x2_t __a);//_mm_abs_epi32

float32x2_t vabs_f32 (float32x2_t __a);

int8x16_t vabsq_s8 (int8x16_t __a);//_mm_abs_epi8

int16x8_t vabsq_s16 (int16x8_t __a);//_mm_abs_epi16

int32x4_t vabsq_s32 (int32x4_t __a);//_mm_abs_epi32

float32x4_t vabsq_f32 (float32x4_t __a);

/*--2、Saturating absolute(飽和指令): vqabs -> ri = sat(|ai|);

returns the absolute value of each element in a vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int8x8_t vqabs_s8 (int8x8_t __a);

int16x4_t vqabs_s16 (int16x4_t __a);

int32x2_t vqabs_s32 (int32x2_t __a);

int8x16_t vqabsq_s8 (int8x16_t __a);

int16x8_t vqabsq_s16 (int16x8_t __a);

int32x4_t vqabsq_s32 (int32x4_t __a);

/***************************************************Negation****************************/

/*--1、Negate(正常指令): vneg -> ri = -ai; negates each element in a vector.--*/

int8x8_t vneg_s8 (int8x8_t __a);

int16x4_t vneg_s16 (int16x4_t __a);

int32x2_t vneg_s32 (int32x2_t __a);

float32x2_t vneg_f32 (float32x2_t __a);

int8x16_t vnegq_s8 (int8x16_t __a);

int16x8_t vnegq_s16 (int16x8_t __a);

int32x4_t vnegq_s32 (int32x4_t __a);

float32x4_t vnegq_f32 (float32x4_t __a);

/*--2、Saturating Negate: vqneg -> ri = sat(-ai);

negates each element in a vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int8x8_t vqneg_s8 (int8x8_t __a);

int16x4_t vqneg_s16 (int16x4_t __a);

int32x2_t vqneg_s32 (int32x2_t __a);

int8x16_t vqnegq_s8 (int8x16_t __a);

int16x8_t vqnegq_s16 (int16x8_t __a);

int32x4_t vqnegq_s32 (int32x4_t __a);

/********************************************Logical operations*************************/

/*--1、Bitwise not(正常指令): vmvn -> ri = ~ai;

performs a bitwise inversion of each element from the input vector.--*/

int8x8_t vmvn_s8 (int8x8_t __a);

int16x4_t vmvn_s16 (int16x4_t __a);

int32x2_t vmvn_s32 (int32x2_t __a);

uint8x8_t vmvn_u8 (uint8x8_t __a);

uint16x4_t vmvn_u16 (uint16x4_t __a);

uint32x2_t vmvn_u32 (uint32x2_t __a);

poly8x8_t vmvn_p8 (poly8x8_t __a);

int8x16_t vmvnq_s8 (int8x16_t __a);

int16x8_t vmvnq_s16 (int16x8_t __a);

int32x4_t vmvnq_s32 (int32x4_t __a);

uint8x16_t vmvnq_u8 (uint8x16_t __a);

uint16x8_t vmvnq_u16 (uint16x8_t __a);

uint32x4_t vmvnq_u32 (uint32x4_t __a);

poly8x16_t vmvnq_p8 (poly8x16_t __a);

/*--2、Bitwise and(正常指令): vand -> ri = ai & bi; performs a bitwise AND between

corresponding elements of the input vectors.--*/

int8x8_t vand_s8 (int8x8_t __a, int8x8_t __b);//_mm_and_si128

int16x4_t vand_s16 (int16x4_t __a, int16x4_t __b);//_mm_and_si128

int32x2_t vand_s32 (int32x2_t __a, int32x2_t __b);//_mm_and_si128

uint8x8_t vand_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_and_si128

uint16x4_t vand_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_and_si128

uint32x2_t vand_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_and_si128

int64x1_t vand_s64 (int64x1_t __a, int64x1_t __b);//_mm_and_si128

uint64x1_t vand_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_and_si128

int8x16_t vandq_s8 (int8x16_t __a, int8x16_t __b);//_mm_and_si128

int16x8_t vandq_s16 (int16x8_t __a, int16x8_t __b);//_mm_and_si128

int32x4_t vandq_s32 (int32x4_t __a, int32x4_t __b);//_mm_and_si128

int64x2_t vandq_s64 (int64x2_t __a, int64x2_t __b);//_mm_and_si128

uint8x16_t vandq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_and_si128

uint16x8_t vandq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_and_si128

uint32x4_t vandq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_and_si128

uint64x2_t vandq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_and_si128

/*--3、Bitwise or(正常指令): vorr -> ri = ai | bi; performs a bitwise OR between

corresponding elements of the input vectors.--*/

int8x8_t vorr_s8 (int8x8_t __a, int8x8_t __b);//_mm_or_si128

int16x4_t vorr_s16 (int16x4_t __a, int16x4_t __b);//_mm_or_si128

int32x2_t vorr_s32 (int32x2_t __a, int32x2_t __b);//_mm_or_si128

uint8x8_t vorr_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_or_si128

uint16x4_t vorr_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_or_si128

uint32x2_t vorr_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_or_si128

int64x1_t vorr_s64 (int64x1_t __a, int64x1_t __b);//_mm_or_si128

uint64x1_t vorr_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_or_si128

int8x16_t vorrq_s8 (int8x16_t __a, int8x16_t __b);//_mm_or_si128

int16x8_t vorrq_s16 (int16x8_t __a, int16x8_t __b);//_mm_or_si128

int32x4_t vorrq_s32 (int32x4_t __a, int32x4_t __b);//_mm_or_si128

int64x2_t vorrq_s64 (int64x2_t __a, int64x2_t __b);//_mm_or_si128

uint8x16_t vorrq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_or_si128

uint16x8_t vorrq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_or_si128

uint32x4_t vorrq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_or_si128

uint64x2_t vorrq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_or_si128

/*--4、Bitwise exclusive or (EOR or XOR)(正常指令): veor -> ri = ai ^ bi;

performs a bitwise exclusive-OR between corresponding elements of the input vectors.--*/

int8x8_t veor_s8 (int8x8_t __a, int8x8_t __b);//_mm_xor_si128

int16x4_t veor_s16 (int16x4_t __a, int16x4_t __b);//_mm_xor_si128

int32x2_t veor_s32 (int32x2_t __a, int32x2_t __b);//_mm_xor_si128

uint8x8_t veor_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_xor_si128

uint16x4_t veor_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_xor_si128

uint32x2_t veor_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_xor_si128

int64x1_t veor_s64 (int64x1_t __a, int64x1_t __b);//_mm_xor_si128

uint64x1_t veor_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_xor_si128

int8x16_t veorq_s8 (int8x16_t __a, int8x16_t __b);//_mm_xor_si128

int16x8_t veorq_s16 (int16x8_t __a, int16x8_t __b);//_mm_xor_si128

int32x4_t veorq_s32 (int32x4_t __a, int32x4_t __b);//_mm_xor_si128

int64x2_t veorq_s64 (int64x2_t __a, int64x2_t __b);//_mm_xor_si128

uint8x16_t veorq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_xor_si128

uint16x8_t veorq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_xor_si128

uint32x4_t veorq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_xor_si128

uint64x2_t veorq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_xor_si128

/*--5、Bit Clear(正常指令): vbic -> ri = ~ai & bi;

VBIC (Vector Bitwise Clear) performs a bitwise logical AND complement operation between

values in two registers, and places the results in the destination register.--*/

int8x8_t vbic_s8 (int8x8_t __a, int8x8_t __b);//_mm_andnot_si128

int16x4_t vbic_s16 (int16x4_t __a, int16x4_t __b);//_mm_andnot_si128

int32x2_t vbic_s32 (int32x2_t __a, int32x2_t __b);//_mm_andnot_si128

uint8x8_t vbic_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_andnot_si128

uint16x4_t vbic_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_andnot_si128

uint32x2_t vbic_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_andnot_si128

int64x1_t vbic_s64 (int64x1_t __a, int64x1_t __b);//_mm_andnot_si128

uint64x1_t vbic_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_andnot_si128

int8x16_t vbicq_s8 (int8x16_t __a, int8x16_t __b);//_mm_andnot_si128

int16x8_t vbicq_s16 (int16x8_t __a, int16x8_t __b);//_mm_andnot_si128

int32x4_t vbicq_s32 (int32x4_t __a, int32x4_t __b);//_mm_andnot_si128

int64x2_t vbicq_s64 (int64x2_t __a, int64x2_t __b);//_mm_andnot_si128

uint8x16_t vbicq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_andnot_si128

uint16x8_t vbicq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_andnot_si128

uint32x4_t vbicq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_andnot_si128

uint64x2_t vbicq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_andnot_si128

/*--6、Bitwise OR complement(正常指令): vorn -> ri = ai | (~bi);

performs a bitwise logical OR NOT operation

between values in two registers, and places the results in the destination register.--*/

int8x8_t vorn_s8 (int8x8_t __a, int8x8_t __b);

int16x4_t vorn_s16 (int16x4_t __a, int16x4_t __b);

int32x2_t vorn_s32 (int32x2_t __a, int32x2_t __b);

uint8x8_t vorn_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4_t vorn_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2_t vorn_u32 (uint32x2_t __a, uint32x2_t __b);

int64x1_t vorn_s64 (int64x1_t __a, int64x1_t __b);

uint64x1_t vorn_u64 (uint64x1_t __a, uint64x1_t __b);

int8x16_t vornq_s8 (int8x16_t __a, int8x16_t __b);

int16x8_t vornq_s16 (int16x8_t __a, int16x8_t __b);

int32x4_t vornq_s32 (int32x4_t __a, int32x4_t __b);

int64x2_t vornq_s64 (int64x2_t __a, int64x2_t __b);

uint8x16_t vornq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8_t vornq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4_t vornq_u32 (uint32x4_t __a, uint32x4_t __b);

uint64x2_t vornq_u64 (uint64x2_t __a, uint64x2_t __b);

/****************************************Count leading sign bits************************/

/*--正常指令, vcls -> ; counts the number of consecutive bits, starting from the most

significant bit,that are the same as the most significant bit, in each element in a

vector, and places the count in the result vector.--*/

int8x8_t vcls_s8 (int8x8_t __a);

int16x4_t vcls_s16 (int16x4_t __a);

int32x2_t vcls_s32 (int32x2_t __a);

int8x16_t vclsq_s8 (int8x16_t __a);

int16x8_t vclsq_s16 (int16x8_t __a);

int32x4_t vclsq_s32 (int32x4_t __a);

/*******************************************Count leading zeros*************************/

/*--正常指令, vclz -> ; counts the number of consecutive zeros, starting from the most

significant bit, in each element in a vector, and places the count in result vector.--*/

int8x8_t vclz_s8 (int8x8_t __a);

int16x4_t vclz_s16 (int16x4_t __a);

int32x2_t vclz_s32 (int32x2_t __a);

uint8x8_t vclz_u8 (uint8x8_t __a);

uint16x4_t vclz_u16 (uint16x4_t __a);

uint32x2_t vclz_u32 (uint32x2_t __a);

int8x16_t vclzq_s8 (int8x16_t __a);

int16x8_t vclzq_s16 (int16x8_t __a);

int32x4_t vclzq_s32 (int32x4_t __a);

uint8x16_t vclzq_u8 (uint8x16_t __a);

uint16x8_t vclzq_u16 (uint16x8_t __a);

uint32x4_t vclzq_u32 (uint32x4_t __a);

/*******************************************Count number of set bits********************/

/*--正常指令, vcnt -> counts the number of bits that are one in each element in a vector,

and places the count in the result vector.--*/

int8x8_t vcnt_s8 (int8x8_t __a);

uint8x8_t vcnt_u8 (uint8x8_t __a);

poly8x8_t vcnt_p8 (poly8x8_t __a);

int8x16_t vcntq_s8 (int8x16_t __a);

uint8x16_t vcntq_u8 (uint8x16_t __a);

poly8x16_t vcntq_p8 (poly8x16_t __a);

/*****************************************Reciprocal estimate***************************/

/*--正常指令, vrecpe -> ; finds an approximate reciprocal of each element in a vector,

and places it in the result vector.--*/

float32x2_t vrecpe_f32 (float32x2_t __a);

uint32x2_t vrecpe_u32 (uint32x2_t __a);

float32x4_t vrecpeq_f32 (float32x4_t __a);

uint32x4_t vrecpeq_u32 (uint32x4_t __a);

/****************************************Reciprocal square-root estimate****************/

/*--正常指令, vrsqrte -> ; finds an approximate reciprocal square root of each element

in a vector, and places it in the return vector.--*/

float32x2_t vrsqrte_f32 (float32x2_t __a);

uint32x2_t vrsqrte_u32 (uint32x2_t __a);

float32x4_t vrsqrteq_f32 (float32x4_t __a);

uint32x4_t vrsqrteq_u32 (uint32x4_t __a);

/*******************************************Get lanes from a vector*********************/

/*--vmov -> r = a[b]; returns the value from the specified lane of a vector.

Extract lanes from a vector and put into a register.

These intrinsics extract a single lane (element) from a vector.--*/

int8_t vget_lane_s8 (int8x8_t __a, const int __b);//_mm_extract_epi8

int16_t vget_lane_s16 (int16x4_t __a, const int __b);//_mm_extract_epi16

int32_t vget_lane_s32 (int32x2_t __a, const int __b);//_mm_extract_epi32

float32_t vget_lane_f32 (float32x2_t __a, const int __b);

uint8_t vget_lane_u8 (uint8x8_t __a, const int __b);//_mm_extract_epi8

uint16_t vget_lane_u16 (uint16x4_t __a, const int __b);//_mm_extract_epi16

uint32_t vget_lane_u32 (uint32x2_t __a, const int __b);//_mm_extract_epi32

poly8_t vget_lane_p8 (poly8x8_t __a, const int __b);//_mm_extract_epi8

poly16_t vget_lane_p16 (poly16x4_t __a, const int __b);//_mm_extract_epi16

int64_t vget_lane_s64 (int64x1_t __a, const int __b);//_mm_extract_epi64

uint64_t vget_lane_u64 (uint64x1_t __a, const int __b);//_mm_extract_epi64

int8_t vgetq_lane_s8 (int8x16_t __a, const int __b);//_mm_extract_epi8

int16_t vgetq_lane_s16 (int16x8_t __a, const int __b);//_mm_extract_epi16

int32_t vgetq_lane_s32 (int32x4_t __a, const int __b);//_mm_extract_epi32

float32_t vgetq_lane_f32 (float32x4_t __a, const int __b);

uint8_t vgetq_lane_u8 (uint8x16_t __a, const int __b);//_mm_extract_epi8

uint16_t vgetq_lane_u16 (uint16x8_t __a, const int __b);//_mm_extract_epi16

uint32_t vgetq_lane_u32 (uint32x4_t __a, const int __b);//_mm_extract_epi32

poly8_t vgetq_lane_p8 (poly8x16_t __a, const int __b);//_mm_extract_epi8

poly16_t vgetq_lane_p16 (poly16x8_t __a, const int __b);//_mm_extract_epi16

int64_t vgetq_lane_s64 (int64x2_t __a, const int __b);//_mm_extract_epi64

uint64_t vgetq_lane_u64 (uint64x2_t __a, const int __b);//_mm_extract_epi64

/*********************************************Set lanes in a vector********************/

/*--vmov -> ; sets the value of the specified lane of a vector. It returns the vector

with the new value.Load a single lane of a vector from a literal. These intrinsics set

a single lane (element) within a vector.--*/

int8x8_t vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c);

int16x4_t vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c);

int32x2_t vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c);

float32x2_t vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c);

uint8x8_t vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c);

uint32x2_t vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c);

poly8x8_t vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c);

int64x1_t vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c);

uint64x1_t vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c);

int8x16_t vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c);

int16x8_t vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c);

int32x4_t vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c);

float32x4_t vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c);

uint8x16_t vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c);

uint16x8_t vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c);

uint32x4_t vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c);

poly8x16_t vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c);

poly16x8_t vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c);

int64x2_t vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c);

uint64x2_t vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c);

/****************************************Create vector from literal bit pattern*********/

/*--vmov -> ; creates a vector from a 64-bit pattern.

Initialize a vector from a literal bit pattern.--*/

int8x8_t vcreate_s8 (uint64_t __a);//_mm_loadl_epi64

int16x4_t vcreate_s16 (uint64_t __a);//_mm_loadl_epi64

int32x2_t vcreate_s32 (uint64_t __a);//_mm_loadl_epi64

int64x1_t vcreate_s64 (uint64_t __a);//_mm_loadl_epi64

float32x2_t vcreate_f32 (uint64_t __a);

uint8x8_t vcreate_u8 (uint64_t __a);//_mm_loadl_epi64

uint16x4_t vcreate_u16 (uint64_t __a);//_mm_loadl_epi64

uint32x2_t vcreate_u32 (uint64_t __a);//_mm_loadl_epi64

uint64x1_t vcreate_u64 (uint64_t __a);//_mm_loadl_epi64

poly8x8_t vcreate_p8 (uint64_t __a);//_mm_loadl_epi64

poly16x4_t vcreate_p16 (uint64_t __a);//_mm_loadl_epi64

/*****************************************Set all lanes to the same value***************/

/*--1、Load all lanes of vector to the same literal value: vdup/vmov -> ri = a;

duplicates a scalar into every element of the destination vector.

Load all lanes of vector to the same literal value--*/

int8x8_t vdup_n_s8 (int8_t __a);//_mm_set1_epi8

int16x4_t vdup_n_s16 (int16_t __a);//_mm_set1_epi16

int32x2_t vdup_n_s32 (int32_t __a);//_mm_set1_epi32

float32x2_t vdup_n_f32 (float32_t __a);//_mm_set1_ps

uint8x8_t vdup_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x4_t vdup_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x2_t vdup_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x8_t vdup_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x4_t vdup_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x1_t vdup_n_s64 (int64_t __a);

uint64x1_t vdup_n_u64 (uint64_t __a);

int8x16_t vdupq_n_s8 (int8_t __a);//_mm_set1_epi8

int16x8_t vdupq_n_s16 (int16_t __a);//_mm_set1_epi16

int32x4_t vdupq_n_s32 (int32_t __a);//_mm_set1_epi32

float32x4_t vdupq_n_f32 (float32_t __a);//_mm_set1_ps

uint8x16_t vdupq_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x8_t vdupq_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x4_t vdupq_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x16_t vdupq_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x8_t vdupq_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x2_t vdupq_n_s64 (int64_t __a);

uint64x2_t vdupq_n_u64 (uint64_t __a);

int8x8_t vmov_n_s8 (int8_t __a);//_mm_set1_epi8

int16x4_t vmov_n_s16 (int16_t __a);//_mm_set1_epi16

int32x2_t vmov_n_s32 (int32_t __a);//_mm_set1_epi32

float32x2_t vmov_n_f32 (float32_t __a);//_mm_set1_ps

uint8x8_t vmov_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x4_t vmov_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x2_t vmov_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x8_t vmov_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x4_t vmov_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x1_t vmov_n_s64 (int64_t __a);

uint64x1_t vmov_n_u64 (uint64_t __a);

int8x16_t vmovq_n_s8 (int8_t __a);//_mm_set1_epi8

int16x8_t vmovq_n_s16 (int16_t __a);//_mm_set1_epi16

int32x4_t vmovq_n_s32 (int32_t __a);//_mm_set1_epi32

float32x4_t vmovq_n_f32 (float32_t __a);//_mm_set1_ps

uint8x16_t vmovq_n_u8 (uint8_t __a);//_mm_set1_epi8

uint16x8_t vmovq_n_u16 (uint16_t __a);//_mm_set1_epi16

uint32x4_t vmovq_n_u32 (uint32_t __a);//_mm_set1_epi32

poly8x16_t vmovq_n_p8 (poly8_t __a);//_mm_set1_epi8

poly16x8_t vmovq_n_p16 (poly16_t __a);//_mm_set1_epi16

int64x2_t vmovq_n_s64 (int64_t __a);

uint64x2_t vmovq_n_u64 (uint64_t __a);

/*--2、Load all lanes of the vector to the value of a lane of a vector:

vdup/vmov -> ri = a[b];

duplicates a scalar into every element of the destination vector.--*/

int8x8_t vdup_lane_s8 (int8x8_t __a, const int __b);

int16x4_t vdup_lane_s16 (int16x4_t __a, const int __b);

int32x2_t vdup_lane_s32 (int32x2_t __a, const int __b);

float32x2_t vdup_lane_f32 (float32x2_t __a, const int __b);

uint8x8_t vdup_lane_u8 (uint8x8_t __a, const int __b);

uint16x4_t vdup_lane_u16 (uint16x4_t __a, const int __b);

uint32x2_t vdup_lane_u32 (uint32x2_t __a, const int __b);

poly8x8_t vdup_lane_p8 (poly8x8_t __a, const int __b);

poly16x4_t vdup_lane_p16 (poly16x4_t __a, const int __b);

int64x1_t vdup_lane_s64 (int64x1_t __a, const int __b);

uint64x1_t vdup_lane_u64 (uint64x1_t __a, const int __b);

int8x16_t vdupq_lane_s8 (int8x8_t __a, const int __b);

int16x8_t vdupq_lane_s16 (int16x4_t __a, const int __b);

int32x4_t vdupq_lane_s32 (int32x2_t __a, const int __b);

float32x4_t vdupq_lane_f32 (float32x2_t __a, const int __b);

uint8x16_t vdupq_lane_u8 (uint8x8_t __a, const int __b);

uint16x8_t vdupq_lane_u16 (uint16x4_t __a, const int __b);

uint32x4_t vdupq_lane_u32 (uint32x2_t __a, const int __b);

poly8x16_t vdupq_lane_p8 (poly8x8_t __a, const int __b);

poly16x8_t vdupq_lane_p16 (poly16x4_t __a, const int __b);

int64x2_t vdupq_lane_s64 (int64x1_t __a, const int __b);//_mm_unpacklo_epi64

uint64x2_t vdupq_lane_u64 (uint64x1_t __a, const int __b);//_mm_unpacklo_epi64

/********************************************Combining vectors**************************/

/*--長指令, -> r0 = a0, ..., r7 = a7, r8 = b0, ..., r15 = b7;

joins two 64-bit vectors into a single 128-bit vector.

The output vector contains twice the number of elements as each input vector.

The lower half of the output vector contains the elements of the first input vector.--*/

int8x16_t vcombine_s8 (int8x8_t __a, int8x8_t __b);//_mm_unpacklo_epi64

int16x8_t vcombine_s16 (int16x4_t __a, int16x4_t __b);//_mm_unpacklo_epi64

int32x4_t vcombine_s32 (int32x2_t __a, int32x2_t __b);//_mm_unpacklo_epi64

int64x2_t vcombine_s64 (int64x1_t __a, int64x1_t __b);//_mm_unpacklo_epi64

float32x4_t vcombine_f32 (float32x2_t __a, float32x2_t __b);

uint8x16_t vcombine_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_unpacklo_epi64

uint16x8_t vcombine_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_unpacklo_epi64

uint32x4_t vcombine_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_unpacklo_epi64

uint64x2_t vcombine_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_unpacklo_epi64

poly8x16_t vcombine_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_unpacklo_epi64

poly16x8_t vcombine_p16 (poly16x4_t __a, poly16x4_t __b);//_mm_unpacklo_epi64

/***************************************Splitting vectors*******************************/

/*--1、窄指令, -> ri = a(i+4); returns the higher half of the 128-bit input vector. The

output is a 64-bit vector that has half the number of elements as the input vector.--*/

int8x8_t vget_high_s8 (int8x16_t __a);//_mm_unpackhi_epi64

int16x4_t vget_high_s16 (int16x8_t __a);//_mm_unpackhi_epi64

int32x2_t vget_high_s32 (int32x4_t __a);//_mm_unpackhi_epi64

int64x1_t vget_high_s64 (int64x2_t __a);//_mm_unpackhi_epi64

float32x2_t vget_high_f32 (float32x4_t __a);

uint8x8_t vget_high_u8 (uint8x16_t __a);//_mm_unpackhi_epi64

uint16x4_t vget_high_u16 (uint16x8_t __a);//_mm_unpackhi_epi64

uint32x2_t vget_high_u32 (uint32x4_t __a);//_mm_unpackhi_epi64

uint64x1_t vget_high_u64 (uint64x2_t __a);//_mm_unpackhi_epi64

poly8x8_t vget_high_p8 (poly8x16_t __a);//_mm_unpackhi_epi64

poly16x4_t vget_high_p16 (poly16x8_t __a);//_mm_unpackhi_epi64

/*--2、窄指令, -> ri = ai; returns the lower half of the 128-bit input vector. The

output is a 64-bit vector that has half the number of elements as the input vector.--*/

int8x8_t vget_low_s8 (int8x16_t __a);

int16x4_t vget_low_s16 (int16x8_t __a);

int32x2_t vget_low_s32 (int32x4_t __a);

float32x2_t vget_low_f32 (float32x4_t __a);

uint8x8_t vget_low_u8 (uint8x16_t __a);

uint16x4_t vget_low_u16 (uint16x8_t __a);

uint32x2_t vget_low_u32 (uint32x4_t __a);

poly8x8_t vget_low_p8 (poly8x16_t __a);

poly16x4_t vget_low_p16 (poly16x8_t __a);

int64x1_t vget_low_s64 (int64x2_t __a);

uint64x1_t vget_low_u64 (uint64x2_t __a);

/****************************************************Conversions************************/

/*--1、Convert from float: vcvt ->, convert from floating-point to integer.--*/

int32x2_t vcvt_s32_f32 (float32x2_t __a);

uint32x2_t vcvt_u32_f32 (float32x2_t __a);

int32x4_t vcvtq_s32_f32 (float32x4_t __a);

uint32x4_t vcvtq_u32_f32 (float32x4_t __a);

int32x2_t vcvt_n_s32_f32 (float32x2_t __a, const int __b);

uint32x2_t vcvt_n_u32_f32 (float32x2_t __a, const int __b);

int32x4_t vcvtq_n_s32_f32 (float32x4_t __a, const int __b);

uint32x4_t vcvtq_n_u32_f32 (float32x4_t __a, const int __b);

/*--2、Convert to float: vcvt ->, convert from integer to floating-point.--*/

float32x2_t vcvt_f32_s32 (int32x2_t __a);

float32x2_t vcvt_f32_u32 (uint32x2_t __a);

float32x4_t vcvtq_f32_s32 (int32x4_t __a);

float32x4_t vcvtq_f32_u32 (uint32x4_t __a);

float32x2_t vcvt_n_f32_s32 (int32x2_t __a, const int __b);

float32x2_t vcvt_n_f32_u32 (uint32x2_t __a, const int __b);

float32x4_t vcvtq_n_f32_s32 (int32x4_t __a, const int __b);

float32x4_t vcvtq_n_f32_u32 (uint32x4_t __a, const int __b);

/*--3、between single-precision and double-precision numbers: vcvt ->--*/

float16x4_t vcvt_f16_f32(float32x4_t a);

float32x4_t vcvt_f32_f16(float16x4_t a);

/*************************************************Move**********************************/

/*--1、Vector narrow integer(窄指令): vmovn -> ri = ai[0...8]; copies the least

significant half of each element of a quadword vector into

the corresponding elements of a doubleword vector.--*/

int8x8_t vmovn_s16 (int16x8_t __a);

int16x4_t vmovn_s32 (int32x4_t __a);

int32x2_t vmovn_s64 (int64x2_t __a);

uint8x8_t vmovn_u16 (uint16x8_t __a);

uint16x4_t vmovn_u32 (uint32x4_t __a);

uint32x2_t vmovn_u64 (uint64x2_t __a);

/*--2、Vector long move(長指令): vmovl -> sign extends or zero extends each element

in a doubleword vector to twice its original length,

and places the results in a quadword vector.--*/

int16x8_t vmovl_s8 (int8x8_t __a);//_mm_cvtepi8_epi16

int32x4_t vmovl_s16 (int16x4_t __a);//_mm_cvtepi16_epi32

int64x2_t vmovl_s32 (int32x2_t __a);//_mm_cvtepi32_epi64

uint16x8_t vmovl_u8 (uint8x8_t __a);//_mm_cvtepu8_epi16

uint32x4_t vmovl_u16 (uint16x4_t __a);//_mm_cvtepu16_epi32

uint64x2_t vmovl_u32 (uint32x2_t __a);_mm_cvtepu32_epi64

/*--3、Vector saturating narrow integer(窄指令): vqmovn -> copies each element of the

operand vector to the corresponding element of the destination vector.

The result element is half the width of

the operand element, and values are saturated to the result width.

The results are the same type as the operands.--*/

int8x8_t vqmovn_s16 (int16x8_t __a);//_mm_packs_epi16

int16x4_t vqmovn_s32 (int32x4_t __a);//_mm_packs_epi32

int32x2_t vqmovn_s64 (int64x2_t __a);

uint8x8_t vqmovn_u16 (uint16x8_t __a);

uint16x4_t vqmovn_u32 (uint32x4_t __a);

uint32x2_t vqmovn_u64 (uint64x2_t __a);

/*--4、Vector saturating narrow integer signed->unsigned(窄指令): copies each element of

the operand vector to the corresponding element of the destination vector.

The result element is half the width of the operand element,

and values are saturated to the result width.

The elements in the operand are signed and the elements in the result are unsigned.--*/

uint8x8_t vqmovun_s16 (int16x8_t __a);//_mm_packus_epi16

uint16x4_t vqmovun_s32 (int32x4_t __a);//_mm_packus_epi32

uint32x2_t vqmovun_s64 (int64x2_t __a);

/******************************************************Table lookup*********************/

/*--1、Table lookup: vtbl -> uses byte indexes in a control vector to look up byte

values in a table and generate a new vector. Indexes out of range return 0.

The table is in Vector1 and uses one(or two or three or four)D registers.--*/

int8x8_t vtbl1_s8 (int8x8_t __a, int8x8_t __b);

uint8x8_t vtbl1_u8 (uint8x8_t __a, uint8x8_t __b);

poly8x8_t vtbl1_p8 (poly8x8_t __a, uint8x8_t __b);

int8x8_t vtbl2_s8 (int8x8x2_t __a, int8x8_t __b);

uint8x8_t vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b);

poly8x8_t vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b);

int8x8_t vtbl3_s8 (int8x8x3_t __a, int8x8_t __b);

uint8x8_t vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b);

poly8x8_t vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b);

int8x8_t vtbl4_s8 (int8x8x4_t __a, int8x8_t __b);

uint8x8_t vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b);

poly8x8_t vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b);

/*--2、Extended table lookup: vtbx -> uses byte indexes in a control vector to look up

byte values in a table and generate a new vector. Indexes out of range leave the

destination element unchanged.The table is in Vector2 and uses one(or two or three or

four) D register. Vector1 contains the elements of the destination vector.--*/

int8x8_t vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);

uint8x8_t vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

poly8x8_t vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c);

int8x8_t vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c);

uint8x8_t vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c);

poly8x8_t vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c);

int8x8_t vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c);

uint8x8_t vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c);

poly8x8_t vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c);

int8x8_t vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c);

uint8x8_t vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c);

poly8x8_t vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c);

/***************************************Multiply, scalar, lane**************************/

/*--1、Vector multiply by scalar: vmul -> ri = ai * b;

multiplies each element in a vector by a scalar,

and places the results in the destination vector.--*/

int16x4_t vmul_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vmul_n_s32 (int32x2_t __a, int32_t __b);

float32x2_t vmul_n_f32 (float32x2_t __a, float32_t __b);

uint16x4_t vmul_n_u16 (uint16x4_t __a, uint16_t __b);

uint32x2_t vmul_n_u32 (uint32x2_t __a, uint32_t __b);

int16x8_t vmulq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vmulq_n_s32 (int32x4_t __a, int32_t __b);

float32x4_t vmulq_n_f32 (float32x4_t __a, float32_t __b);

uint16x8_t vmulq_n_u16 (uint16x8_t __a, uint16_t __b);

uint32x4_t vmulq_n_u32 (uint32x4_t __a, uint32_t __b);

/*--2、Vector multiply by scalar: -> ri = ai * b[c];

multiplies the first vector by a scalar.

The scalar is the element in the second vector with index c.--*/

int16x4_t vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

float32x2_t vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c);

uint16x4_t vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

int16x8_t vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

float32x4_t vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c);

uint16x8_t vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c);

uint32x4_t vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c);

/*--3、Vector long multiply with scalar: vmull -> ri = ai * b;

multiplies a vector by a scalar.

Elements in the result are wider than elements in input vector.--*/

int32x4_t vmull_n_s16 (int16x4_t __a, int16_t __b);

int64x2_t vmull_n_s32 (int32x2_t __a, int32_t __b);

uint32x4_t vmull_n_u16 (uint16x4_t __a, uint16_t __b);

uint64x2_t vmull_n_u32 (uint32x2_t __a, uint32_t __b);

/*--4、Vector long multiply by scalar: vmull -> ri = ai * b[c];

multiplies the first vector by a scalar.

The scalar is the element in the second vector with index c.

The elements in the result are wider than the elements in input vector.--*/

int32x4_t vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int64x2_t vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

uint32x4_t vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint64x2_t vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

/*--5、Vector saturating doubling long multiply with scalar: vqdmull -> ri = sat(ai * b);

multiplies the elements in the vector by a scalar, and doubles the results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmull_n_s16 (int16x4_t __a, int16_t __b);

int64x2_t vqdmull_n_s32 (int32x2_t __a, int32_t __b);

/*--6、Vector saturating doubling long multiply by scalar: vqdmull -> ri = sat(ai * b[c]);

multiplies the elements in the first vector by a scalar, and doubles the results.

The scalar has index c in the second vector. If any of the results overflow,

they are saturated and the sticky QC flagis set.--*/

int32x4_t vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int64x2_t vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

/*--7、Vector saturating doubling multiply high with scalar: vqdmulh -> ri = sat(ai * b)

multiplies the elements of the vector by a scalar, and doubles the results.

It then returns only the high half of the results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqdmulh_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vqdmulh_n_s32 (int32x2_t __a, int32_t __b);

int16x8_t vqdmulhq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vqdmulhq_n_s32 (int32x4_t __a, int32_t __b);

/*--8、Vector saturating doubling multiply high by scalar:

vqdmulh -> ri = sat(ai * b[c]);

multiplies the elements of the first vector by a scalar, and doubles the results. It then

returns only the high half of the results. The scalar has index n in the second vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int16x8_t vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

/*--9、Vector saturating rounding doubling multiply high with scalar:

vqqrdmulh -> ri = sat(ai * b);

multiplies the elements of the vector by a scalar and doubles the results.

It then returns only the high half of the rounded results.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int16x4_t vqrdmulh_n_s16 (int16x4_t __a, int16_t __b);

int32x2_t vqrdmulh_n_s32 (int32x2_t __a, int32_t __b);

int16x8_t vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b);

int32x4_t vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b);

/*--10、Vector rounding saturating doubling multiply high by scalar:

vqrdmulh -> ri = sat(ai * b[c]);

multiplies the elements of the first vector by a scalar and doubles the results.

It then returns only the high half of the rounded results.

The scalar has index n in the second vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int16x4_t vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int16x8_t vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);

int32x4_t vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);

/*--11、Vector multiply accumulate with scalar: vmla -> ri = ai + bi * c;

multiplies each element in the second vector by a scalar,

and adds the results to the corresponding elements of the first vector.--*/

int16x4_t vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);

int32x2_t vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);

float32x2_t vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);

uint16x4_t vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);

uint32x2_t vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);

int16x8_t vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);

int32x4_t vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);

float32x4_t vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);

uint16x8_t vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);

uint32x4_t vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);

/*--12、Vector multiply accumulate by scalar: vmla -> ri = ai + bi * c[d];

multiplies each element in the second vector by a scalar,

and adds the results to the corresponding elements of the first vector.

The scalar has index d in the third vector.--*/

int16x4_t vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int32x2_t vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

float32x2_t vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,

const int __d);

uint16x4_t vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint32x2_t vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

int16x8_t vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);

int32x4_t vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);

float32x4_t vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,

const int __d);

uint16x8_t vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);

uint32x4_t vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);

/*--13、Vector widening multiply accumulate with scalar: vmlal -> ri = ai + bi * c;

multiplies each element in the second vector by a scalar, and adds the results into the

corresponding elements of the first vector.

The scalar has index n in the third vector. The elements in the result are wider.--*/

int32x4_t vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

uint32x4_t vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);

uint64x2_t vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);

/*--14、Vector widening multiply accumulate by scalar: vmlal -> ri = ai + bi * c[d];

multiplies each element in the second vector by a scalar, and adds the results to the

corresponding elements of the first vector. The scalar has index d in the third vector.

The elements in the result are wider.--*/

int32x4_t vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

uint32x4_t vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint64x2_t vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

/*--15、Vector widening saturating doubling multiply accumulate with scalar:

vqdmlal -> ri = sat(ai + bi * c);

multiplies the elements in the second vector by a scalar, and doubles the results.

It then adds the results to the elements in the first vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

/*--16、Vector widening saturating doubling multiply accumulate by scalar:

vqdmlal -> ri = sat(ai + bi * c[d])

multiplies each element in the second vector by a scalar, doubles the results and adds

them to the corresponding elements of the first vector. The scalar has index d in the

third vector. If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

/*--17、Vector multiply subtract with scalar: vmls -> ri = ai - bi * c;

multiplies each element in a vector by a scalar, subtracts the results from the

corresponding elements of the destination vector,

and places the final results in the destination vector.--*/

int16x4_t vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);

int32x2_t vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);

float32x2_t vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);

uint16x4_t vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);

uint32x2_t vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);

int16x8_t vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);

int32x4_t vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);

float32x4_t vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);

uint16x8_t vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);

uint32x4_t vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);

/*--18、Vector multiply subtract by scalar: vmls -> ri = ai - bi * c[d];

multiplies each element in the second vector by a scalar, and subtracts them from the

corresponding elements of the first vector.

The scalar has index d in the third vector.--*/

int16x4_t vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int32x2_t vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

float32x2_t vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,

const int __d);

uint16x4_t vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);

uint32x2_t vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

int16x8_t vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);

int32x4_t vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);

float32x4_t vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,

const int __d);

uint16x8_t vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);

uint32x4_t vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);

/*--19、Vector widening multiply subtract with scalar: vmlsl -> ri = ai - bi * c;

multiplies the elements in the second vector by a scalar, then subtracts the results from

the elements in the first vector. The elements of the result are wider.--*/

int32x4_t vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

uint32x4_t vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);

uint64x2_t vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);

/*--20、Vector widening multiply subtract by scalar: vmlsl -> ri = ai - bi * c[d];

multiplies each element in the second vector by a scalar,

and subtracts them from the corresponding elements of the first vector.

The scalar has index d in the third vector. The elements in the result are wider.--*/

int32x4_t vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

uint32x4_t vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)

uint64x2_t vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);

/*--21、Vector widening saturating doubling multiply subtract with scalar:

vqdmlsl -> ri = sat(ai - bi * c);

multiplies the elements of the second vector with a scalar and doubles the results.

It then subtracts the results from the elements in the first vector.

If any of the results overflow, they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);

int64x2_t vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);

/*--22、Vector widening saturating doubling multiply subtract by scalar:

vqdmlsl -> ri = sat(ai - bi * c[[d]);

multiplies each element in the second vector by a scalar, doubles the results and subtracts

them from the corresponding elements of the first vector. The scalar has index n in the

third vector.If any of the results overflow,

they are saturated and the sticky QC flag is set.--*/

int32x4_t vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);

int64x2_t vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);

/*****************************************************Vector extract********************/

/*--Vector extract: vext -> extracts n elements from the lower end of the second operand

vector and the remaining elements from the higher end of the first, and combines them to

form the result vector. The elements from the second operand are placed in the most

significant part of the result vector.The elements from the first operand are placed in

the least significant part of the result vector.This intrinsic cycles the elements

through the lanes if the two input vectors are the same.--*/

int8x8_t vext_s8 (int8x8_t __a, int8x8_t __b, const int __c);

int16x4_t vext_s16 (int16x4_t __a, int16x4_t __b, const int __c);

int32x2_t vext_s32 (int32x2_t __a, int32x2_t __b, const int __c);

int64x1_t vext_s64 (int64x1_t __a, int64x1_t __b, const int __c);

float32x2_t vext_f32 (float32x2_t __a, float32x2_t __b, const int __c);

uint8x8_t vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);

uint16x4_t vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);

uint32x2_t vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);

uint64x1_t vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);

poly8x8_t vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);

poly16x4_t vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);

int8x16_t vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c);//_mm_alignr_epi8

int16x8_t vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c);//_mm_alignr_epi8

int32x4_t vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c);//_mm_alignr_epi8

int64x2_t vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c);//_mm_alignr_epi8

float32x4_t vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c);//_mm_alignr_epi8

uint8x16_t vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);//_mm_alignr_epi8

uint16x8_t vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);//_mm_alignr_epi8

uint32x4_t vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);//_mm_alignr_epi8

uint64x2_t vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);//_mm_alignr_epi8

poly8x16_t vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);//_mm_alignr_epi8

poly16x8_t vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);//_mm_alignr_epi8

/****************************************************Reverse elements*******************/

/*--1、Reverse vector elements (swap endianness): vrev64 -> reverses the order of 8-bit,

16-bit, or 32-bit elements within each doubleword of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev64_s8 (int8x8_t __a);

int16x4_t vrev64_s16 (int16x4_t __a);

int32x2_t vrev64_s32 (int32x2_t __a);

float32x2_t vrev64_f32 (float32x2_t __a);//_mm_shuffle_ps

uint8x8_t vrev64_u8 (uint8x8_t __a);

uint16x4_t vrev64_u16 (uint16x4_t __a);

uint32x2_t vrev64_u32 (uint32x2_t __a);

poly8x8_t vrev64_p8 (poly8x8_t __a);

poly16x4_t vrev64_p16 (poly16x4_t __a);

int8x16_t vrev64q_s8 (int8x16_t __a);

int16x8_t vrev64q_s16 (int16x8_t __a);

int32x4_t vrev64q_s32 (int32x4_t __a);

float32x4_t vrev64q_f32 (float32x4_t __a);//_mm_shuffle_ps

uint8x16_t vrev64q_u8 (uint8x16_t __a);

uint16x8_t vrev64q_u16 (uint16x8_t __a);

uint32x4_t vrev64q_u32 (uint32x4_t __a);

poly8x16_t vrev64q_p8 (poly8x16_t __a);

poly16x8_t vrev64q_p16 (poly16x8_t __a);

/*--2、Reverse vector elements (swap endianness): vrev32 -> reverses the order of 8-bit

or 16-bit elements within each word of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev32_s8 (int8x8_t __a);

int16x4_t vrev32_s16 (int16x4_t __a);

uint8x8_t vrev32_u8 (uint8x8_t __a);

uint16x4_t vrev32_u16 (uint16x4_t __a);

poly8x8_t vrev32_p8 (poly8x8_t __a);

poly16x4_t vrev32_p16 (poly16x4_t __a);

int8x16_t vrev32q_s8 (int8x16_t __a);

int16x8_t vrev32q_s16 (int16x8_t __a);

uint8x16_t vrev32q_u8 (uint8x16_t __a);

uint16x8_t vrev32q_u16 (uint16x8_t __a);

poly8x16_t vrev32q_p8 (poly8x16_t __a);

poly16x8_t vrev32q_p16 (poly16x8_t __a);

/*--3、Reverse vector elements (swap endianness): vrev16 -> reverses the order

of 8-bit elements within each halfword of the vector,

and places the result in the corresponding destination vector.--*/

int8x8_t vrev16_s8 (int8x8_t __a);

uint8x8_t vrev16_u8 (uint8x8_t __a);

poly8x8_t vrev16_p8 (poly8x8_t __a);

int8x16_t vrev16q_s8 (int8x16_t __a);

uint8x16_t vrev16q_u8 (uint8x16_t __a);

poly8x16_t vrev16q_p8 (poly8x16_t __a);

/**********************************************************Bitwise Select***************/

/*--Bitwise Select: vbsl -> selects each bit for the destination from the first operand

if the corresponding bit of the destination is 1,

or from the second operand if the corresponding bit of the destination is 0.--*/

int8x8_t vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c);

int16x4_t vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c);

int32x2_t vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c);

int64x1_t vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c);

float32x2_t vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c);

uint8x8_t vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);

uint16x4_t vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);

uint32x2_t vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);

uint64x1_t vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c);

poly8x8_t vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c);

poly16x4_t vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c);

int8x16_t vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c);

int16x8_t vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c);

int32x4_t vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c);

int64x2_t vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c);

float32x4_t vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c);

uint8x16_t vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);

uint16x8_t vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);

uint32x4_t vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);

uint64x2_t vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c);

poly8x16_t vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c);

poly16x8_t vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c);

/************************************Transposition operations***************************/

/*--1、Transpose elements: vtrn -> treats the elements of its input vectors as elements

of 2 x 2 matrices, and transposes the matrices. Essentially, it exchanges the elements

with odd indices from Vector1 with the elements with even indices from Vector2.--*/

int8x8x2_t vtrn_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vtrn_s16 (int16x4_t __a, int16x4_t __b);

uint8x8x2_t vtrn_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vtrn_u16 (uint16x4_t __a, uint16x4_t __b);

poly8x8x2_t vtrn_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vtrn_p16 (poly16x4_t __a, poly16x4_t __b);

int32x2x2_t vtrn_s32 (int32x2_t __a, int32x2_t __b)

float32x2x2_t vtrn_f32 (float32x2_t __a, float32x2_t __b)

uint32x2x2_t vtrn_u32 (uint32x2_t __a, uint32x2_t __b)

int8x16x2_t vtrnq_s8 (int8x16_t __a, int8x16_t __b)

int16x8x2_t vtrnq_s16 (int16x8_t __a, int16x8_t __b)

int32x4x2_t vtrnq_s32 (int32x4_t __a, int32x4_t __b)

float32x4x2_t vtrnq_f32 (float32x4_t __a, float32x4_t __b)

uint8x16x2_t vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)

uint16x8x2_t vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)

uint32x4x2_t vtrnq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vtrnq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vtrnq_p16 (poly16x8_t __a, poly16x8_t __b);

/*--2、Interleave elements(Zip elements):

vzip -> (Vector Zip) interleaves the elements of two vectors.--*/

int8x8x2_t vzip_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vzip_s16 (int16x4_t __a, int16x4_t __b);

uint8x8x2_t vzip_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vzip_u16 (uint16x4_t __a, uint16x4_t __b);

poly8x8x2_t vzip_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vzip_p16 (poly16x4_t __a, poly16x4_t __b);

int32x2x2_t vzip_s32 (int32x2_t __a, int32x2_t __b);

float32x2x2_t vzip_f32 (float32x2_t __a, float32x2_t __b);

uint32x2x2_t vzip_u32 (uint32x2_t __a, uint32x2_t __b);

int8x16x2_t vzipq_s8 (int8x16_t __a, int8x16_t __b);

int16x8x2_t vzipq_s16 (int16x8_t __a, int16x8_t __b);

int32x4x2_t vzipq_s32 (int32x4_t __a, int32x4_t __b);

float32x4x2_t vzipq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16x2_t vzipq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8x2_t vzipq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4x2_t vzipq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vzipq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vzipq_p16 (poly16x8_t __a, poly16x8_t __b);

/*--3、De-Interleave elements(Unzip elements):

vuzp -> (Vector Unzip) de-interleaves the elements of two vectors.

De-interleaving is the inverse process of interleaving.--*/

int8x8x2_t vuzp_s8 (int8x8_t __a, int8x8_t __b);

int16x4x2_t vuzp_s16 (int16x4_t __a, int16x4_t __b);

int32x2x2_t vuzp_s32 (int32x2_t __a, int32x2_t __b);

float32x2x2_t vuzp_f32 (float32x2_t __a, float32x2_t __b);

uint8x8x2_t vuzp_u8 (uint8x8_t __a, uint8x8_t __b);

uint16x4x2_t vuzp_u16 (uint16x4_t __a, uint16x4_t __b);

uint32x2x2_t vuzp_u32 (uint32x2_t __a, uint32x2_t __b);

poly8x8x2_t vuzp_p8 (poly8x8_t __a, poly8x8_t __b);

poly16x4x2_t vuzp_p16 (poly16x4_t __a, poly16x4_t __b);

int8x16x2_t vuzpq_s8 (int8x16_t __a, int8x16_t __b);

int16x8x2_t vuzpq_s16 (int16x8_t __a, int16x8_t __b);

int32x4x2_t vuzpq_s32 (int32x4_t __a, int32x4_t __b);

float32x4x2_t vuzpq_f32 (float32x4_t __a, float32x4_t __b);

uint8x16x2_t vuzpq_u8 (uint8x16_t __a, uint8x16_t __b);

uint16x8x2_t vuzpq_u16 (uint16x8_t __a, uint16x8_t __b);

uint32x4x2_t vuzpq_u32 (uint32x4_t __a, uint32x4_t __b);

poly8x16x2_t vuzpq_p8 (poly8x16_t __a, poly8x16_t __b);

poly16x8x2_t vuzpq_p16 (poly16x8_t __a, poly16x8_t __b);

/*********************************************************Load**************************/

/*--1、Load a single vector from memory: vld1 -> loads a vector from memory.--*/

int8x8_t vld1_s8 (const int8_t * __a);

int16x4_t vld1_s16 (const int16_t * __a);

int32x2_t vld1_s32 (const int32_t * __a);

int64x1_t vld1_s64 (const int64_t * __a);

float32x2_t vld1_f32 (const float32_t * __a);

uint8x8_t vld1_u8 (const uint8_t * __a);//_mm_loadl_epi64

uint16x4_t vld1_u16 (const uint16_t * __a);//_mm_loadl_epi64

uint32x2_t vld1_u32 (const uint32_t * __a);//_mm_loadl_epi64

uint64x1_t vld1_u64 (const uint64_t * __a);//_mm_loadl_epi64

poly8x8_t vld1_p8 (const poly8_t * __a);

poly16x4_t vld1_p16 (const poly16_t * __a);

int8x16_t vld1q_s8 (const int8_t * __a);

int16x8_t vld1q_s16 (const int16_t * __a);

int32x4_t vld1q_s32 (const int32_t * __a);

int64x2_t vld1q_s64 (const int64_t * __a);

float32x4_t vld1q_f32 (const float32_t * __a);

uint8x16_t vld1q_u8 (const uint8_t * __a);

uint16x8_t vld1q_u16 (const uint16_t * __a);

uint32x4_t vld1q_u32 (const uint32_t * __a);

uint64x2_t vld1q_u64 (const uint64_t * __a);

poly8x16_t vld1q_p8 (const poly8_t * __a);

poly16x8_t vld1q_p16 (const poly16_t * __a);

/*--2、Load a single lane from memory: vld1 -> loads one element of the input vector

from memory and returns this in the result vector. Elements of the vector that are not

loaded are returned in the result vector unaltered.

c is the index of the element to load.--*/

int8x8_t vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c);//_mm_insert_epi8

int16x4_t vld1_lane_s16 (const int16_t * __a, int16x4_t __b,

const int __c);//_mm_insert_epi16

int32x2_t vld1_lane_s32 (const int32_t * __a, int32x2_t __b,

const int __c);//_mm_insert_epi32

float32x2_t vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c);

uint8x8_t vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b,

const int __c);//_mm_insert_epi8

uint16x4_t vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b,

const int __c);//_mm_insert_epi16

uint32x2_t vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b,

const int __c);//_mm_insert_epi32

poly8x8_t vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b,

const int __c);//_mm_insert_epi8

poly16x4_t vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b,

const int __c);//_mm_insert_epi16

int64x1_t vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c);

uint64x1_t vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c);

int8x16_t vld1q_lane_s8 (const int8_t * __a, int8x16_t __b,

const int __c);//_mm_insert_epi8

int16x8_t vld1q_lane_s16 (const int16_t * __a, int16x8_t __b,

const int __c);//_mm_insert_epi16

int32x4_t vld1q_lane_s32 (const int32_t * __a, int32x4_t __b,

const int __c);//_mm_insert_epi32

float32x4_t vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c);

uint8x16_t vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b,

const int __c);//_mm_insert_epi8

uint16x8_t vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b,

const int __c);//_mm_insert_epi16

uint32x4_t vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b,

const int __c);//_mm_insert_epi32

poly8x16_t vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b,

const int __c);//_mm_insert_epi8

poly16x8_t vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b,

const int __c);//_mm_insert_epi16

int64x2_t vld1q_lane_s64 (const int64_t * __a, int64x2_t __b,

const int __c);//_mm_insert_epi64

uint64x2_t vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b,

const int __c);//_mm_insert_epi64

/*--3、Load all lanes of vector with same value from memory: vld1 ->

loads one element in a vector from memory.

The loaded element is copied to all other lanes of the vector.--*/

int8x8_t vld1_dup_s8 (const int8_t * __a);//_mm_set1_epi8

int16x4_t vld1_dup_s16 (const int16_t * __a);//_mm_set1_epi16

int32x2_t vld1_dup_s32 (const int32_t * __a);//_mm_set1_epi32

float32x2_t vld1_dup_f32 (const float32_t * __a);//_mm_set1_ps

uint8x8_t vld1_dup_u8 (const uint8_t * __a);//_mm_set1_epi8

uint16x4_t vld1_dup_u16 (const uint16_t * __a);//_mm_set1_epi16

uint32x2_t vld1_dup_u32 (const uint32_t * __a);//_mm_set1_epi32

poly8x8_t vld1_dup_p8 (const poly8_t * __a);//_mm_set1_epi8

poly16x4_t vld1_dup_p16 (const poly16_t * __a);//_mm_set1_epi16

int64x1_t vld1_dup_s64 (const int64_t * __a);

uint64x1_t vld1_dup_u64 (const uint64_t * __a);

int8x16_t vld1q_dup_s8 (const int8_t * __a);//_mm_set1_epi8

int16x8_t vld1q_dup_s16 (const int16_t * __a);//_mm_set1_epi16

int32x4_t vld1q_dup_s32 (const int32_t * __a);//_mm_set1_epi32

float32x4_t vld1q_dup_f32 (const float32_t * __a);//_mm_set1_ps

uint8x16_t vld1q_dup_u8 (const uint8_t * __a);//_mm_set1_epi8

uint16x8_t vld1q_dup_u16 (const uint16_t * __a);//_mm_set1_epi16

uint32x4_t vld1q_dup_u32 (const uint32_t * __a);//_mm_set1_epi32

poly8x16_t vld1q_dup_p8 (const poly8_t * __a);//_mm_set1_epi8

poly16x8_t vld1q_dup_p16 (const poly16_t * __a);//_mm_set1_epi16

int64x2_t vld1q_dup_s64 (const int64_t * __a);

uint64x2_t vld1q_dup_u64 (const uint64_t * __a);

/*--4、Load 2-element structure from memory: vld2 -> loads 2 vectors from memory.

It performs a 2-way de-interleave from memory to the vectors.--*/

int8x8x2_t vld2_s8 (const int8_t * __a);

int16x4x2_t vld2_s16 (const int16_t * __a);

int32x2x2_t vld2_s32 (const int32_t * __a);

float32x2x2_t vld2_f32 (const float32_t * __a);

uint8x8x2_t vld2_u8 (const uint8_t * __a);

uint16x4x2_t vld2_u16 (const uint16_t * __a);

uint32x2x2_t vld2_u32 (const uint32_t * __a);

poly8x8x2_t vld2_p8 (const poly8_t * __a);

poly16x4x2_t vld2_p16 (const poly16_t * __a);

int64x1x2_t vld2_s64 (const int64_t * __a);

uint64x1x2_t vld2_u64 (const uint64_t * __a);

int8x16x2_t vld2q_s8 (const int8_t * __a);

int16x8x2_t vld2q_s16 (const int16_t * __a);

int32x4x2_t vld2q_s32 (const int32_t * __a);

float32x4x2_t vld2q_f32 (const float32_t * __a);

uint8x16x2_t vld2q_u8 (const uint8_t * __a);

uint16x8x2_t vld2q_u16 (const uint16_t * __a);

uint32x4x2_t vld2q_u32 (const uint32_t * __a);

poly8x16x2_t vld2q_p8 (const poly8_t * __a);

poly16x8x2_t vld2q_p16 (const poly16_t * __a);

/*--5、Load a single lane of 2-element structure from memory: vld2 ->

loads two elements in a double-vector structure from memory and returns this in

the result. The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the elements to load.--*/

int8x8x2_t vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c);

int16x4x2_t vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c);

int32x2x2_t vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c);

float32x2x2_t vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c);

uint8x8x2_t vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c);

uint16x4x2_t vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c);

uint32x2x2_t vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c);

poly8x8x2_t vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c);

poly16x4x2_t vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c);

int16x8x2_t vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c);

int32x4x2_t vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c);

float32x4x2_t vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c);

uint16x8x2_t vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c);

uint32x4x2_t vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c);

poly16x8x2_t vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c);

/*--6、Load all lanes of 2-element structure with same value from memory: vld2 ->

loads 2 elements from memory and returns a double-vector structure.

The first element is copied to all lanes of the first vector.

The second element is copied to all lanes of the second vector.--*/

int8x8x2_t vld2_dup_s8 (const int8_t * __a);

int16x4x2_t vld2_dup_s16 (const int16_t * __a);

int32x2x2_t vld2_dup_s32 (const int32_t * __a);

float32x2x2_t vld2_dup_f32 (const float32_t * __a);

uint8x8x2_t vld2_dup_u8 (const uint8_t * __a);

uint16x4x2_t vld2_dup_u16 (const uint16_t * __a);

uint32x2x2_t vld2_dup_u32 (const uint32_t * __a);

poly8x8x2_t vld2_dup_p8 (const poly8_t * __a);

poly16x4x2_t vld2_dup_p16 (const poly16_t * __a);

int64x1x2_t vld2_dup_s64 (const int64_t * __a);

uint64x1x2_t vld2_dup_u64 (const uint64_t * __a);

/*--7、Load 3-element structure from memory: vld3 ->

loads 3 vectors from memory.

It performs a 3-way de-interleave from memory to the vectors.--*/

int8x8x3_t vld3_s8 (const int8_t * __a);

int16x4x3_t vld3_s16 (const int16_t * __a);

int32x2x3_t vld3_s32 (const int32_t * __a);

float32x2x3_t vld3_f32 (const float32_t * __a);

uint8x8x3_t vld3_u8 (const uint8_t * __a);

uint16x4x3_t vld3_u16 (const uint16_t * __a);

uint32x2x3_t vld3_u32 (const uint32_t * __a);

poly8x8x3_t vld3_p8 (const poly8_t * __a);

poly16x4x3_t vld3_p16 (const poly16_t * __a);

int64x1x3_t vld3_s64 (const int64_t * __a);

uint64x1x3_t vld3_u64 (const uint64_t * __a);

int8x16x3_t vld3q_s8 (const int8_t * __a);

int16x8x3_t vld3q_s16 (const int16_t * __a);

int32x4x3_t vld3q_s32 (const int32_t * __a);

float32x4x3_t vld3q_f32 (const float32_t * __a);

uint8x16x3_t vld3q_u8 (const uint8_t * __a);

uint16x8x3_t vld3q_u16 (const uint16_t * __a);

uint32x4x3_t vld3q_u32 (const uint32_t * __a);

poly8x16x3_t vld3q_p8 (const poly8_t * __a);

poly16x8x3_t vld3q_p16 (const poly16_t * __a);

/*--8、Load a single lane of 3-element structure from memory: vld3 ->

loads three elements in a triple-vector structure from memory and returns this in the

result. The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the element to load.--*/

int8x8x3_t vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c);

int16x4x3_t vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c);

int32x2x3_t vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c);

float32x2x3_t vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c);

uint8x8x3_t vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c);

uint16x4x3_t vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c);

uint32x2x3_t vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c);

poly8x8x3_t vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c);

poly16x4x3_t vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c);

int16x8x3_t vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c);

int32x4x3_t vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c);

float32x4x3_t vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c);

uint16x8x3_t vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c);

uint32x4x3_t vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c);

poly16x8x3_t vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c);

/*--9、Load all lanes of 3-element structure with same value from memory: vld3 ->

loads 3 elements from memory and returns a triple-vector structure. The first element

is copied to all lanes of the first vector. And similarly the second and third elements

are copied to the second and third vectors respectively.--*/

int8x8x3_t vld3_dup_s8 (const int8_t * __a);

int16x4x3_t vld3_dup_s16 (const int16_t * __a);

int32x2x3_t vld3_dup_s32 (const int32_t * __a);

float32x2x3_t vld3_dup_f32 (const float32_t * __a);

uint8x8x3_t vld3_dup_u8 (const uint8_t * __a);

uint16x4x3_t vld3_dup_u16 (const uint16_t * __a);

uint32x2x3_t vld3_dup_u32 (const uint32_t * __a);

poly8x8x3_t vld3_dup_p8 (const poly8_t * __a);

poly16x4x3_t vld3_dup_p16 (const poly16_t * __a);

int64x1x3_t vld3_dup_s64 (const int64_t * __a);

uint64x1x3_t vld3_dup_u64 (const uint64_t * __a);

/*--10、Load 4-element structure from memory: vld4 ->

loads 4 vectors from memory.

It performs a 4-way de-interleave from memory to the vectors.--*/

int8x8x4_t vld4_s8 (const int8_t * __a);

int16x4x4_t vld4_s16 (const int16_t * __a);

int32x2x4_t vld4_s32 (const int32_t * __a);

float32x2x4_t vld4_f32 (const float32_t * __a);

uint8x8x4_t vld4_u8 (const uint8_t * __a);

uint16x4x4_t vld4_u16 (const uint16_t * __a);

uint32x2x4_t vld4_u32 (const uint32_t * __a);

poly8x8x4_t vld4_p8 (const poly8_t * __a);

poly16x4x4_t vld4_p16 (const poly16_t * __a);

int64x1x4_t vld4_s64 (const int64_t * __a);

uint64x1x4_t vld4_u64 (const uint64_t * __a);

int8x16x4_t vld4q_s8 (const int8_t * __a);

int16x8x4_t vld4q_s16 (const int16_t * __a);

int32x4x4_t vld4q_s32 (const int32_t * __a);

float32x4x4_t vld4q_f32 (const float32_t * __a);

uint8x16x4_t vld4q_u8 (const uint8_t * __a);

uint16x8x4_t vld4q_u16 (const uint16_t * __a);

uint32x4x4_t vld4q_u32 (const uint32_t * __a);

poly8x16x4_t vld4q_p8 (const poly8_t * __a);

poly16x8x4_t vld4q_p16 (const poly16_t * __a);

/*--11、Load a single lane of 4-element structure from memory: vld4 ->

loads four elements in a quad-vector structure from memory and returns this in the result.

The loaded values are from consecutive memory addresses.

Elements in the structure that are not loaded are returned in the result unaltered.

c is the index of the element to load.--*/

int8x8x4_t vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c);

int16x4x4_t vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c);

int32x2x4_t vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c);

float32x2x4_t vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c);

uint8x8x4_t vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c);

uint16x4x4_t vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c);

uint32x2x4_t vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c);

poly8x8x4_t vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c);

poly16x4x4_t vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c);

int16x8x4_t vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c);

int32x4x4_t vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c);

float32x4x4_t vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c);

uint16x8x4_t vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c);

uint32x4x4_t vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c);

poly16x8x4_t vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c);

/*--12、Load all lanes of 4-element structure with same value from memory: vld4 ->

loads 4 elements from memory and returns a quad-vector structure. The first element is

copied to all lanes of the first vector. And similarly the second, third, and fourth

elements are copied to the second, third, and fourth vectors respectively.--*/

int8x8x4_t vld4_dup_s8 (const int8_t * __a);

int16x4x4_t vld4_dup_s16 (const int16_t * __a);

int32x2x4_t vld4_dup_s32 (const int32_t * __a);

float32x2x4_t vld4_dup_f32 (const float32_t * __a);

uint8x8x4_t vld4_dup_u8 (const uint8_t * __a);

uint16x4x4_t vld4_dup_u16 (const uint16_t * __a);

uint32x2x4_t vld4_dup_u32 (const uint32_t * __a);

poly8x8x4_t vld4_dup_p8 (const poly8_t * __a);

poly16x4x4_t vld4_dup_p16 (const poly16_t * __a);

int64x1x4_t vld4_dup_s64 (const int64_t * __a);

uint64x1x4_t vld4_dup_u64 (const uint64_t * __a);

/*****************************************************Store*****************************/

/*--1、Store a single vector into memory: vst1 -> stores a vector into memory.--*/

void vst1_s8 (int8_t * __a, int8x8_t __b);

void vst1_s16 (int16_t * __a, int16x4_t __b);

void vst1_s32 (int32_t * __a, int32x2_t __b);

void vst1_s64 (int64_t * __a, int64x1_t __b);

void vst1_f32 (float32_t * __a, float32x2_t __b);

void vst1_u8 (uint8_t * __a, uint8x8_t __b);

void vst1_u16 (uint16_t * __a, uint16x4_t __b);

void vst1_u32 (uint32_t * __a, uint32x2_t __b);

void vst1_u64 (uint64_t * __a, uint64x1_t __b);

void vst1_p8 (poly8_t * __a, poly8x8_t __b);

void vst1_p16 (poly16_t * __a, poly16x4_t __b);

void vst1q_s8 (int8_t * __a, int8x16_t __b);

void vst1q_s16 (int16_t * __a, int16x8_t __b);

void vst1q_s32 (int32_t * __a, int32x4_t __b);

void vst1q_s64 (int64_t * __a, int64x2_t __b);

void vst1q_f32 (float32_t * __a, float32x4_t __b);

void vst1q_u8 (uint8_t * __a, uint8x16_t __b);

void vst1q_u16 (uint16_t * __a, uint16x8_t __b);

void vst1q_u32 (uint32_t * __a, uint32x4_t __b);

void vst1q_u64 (uint64_t * __a, uint64x2_t __b);

void vst1q_p8 (poly8_t * __a, poly8x16_t __b);

void vst1q_p16 (poly16_t * __a, poly16x8_t __b);

/*--2、Store a single lane into memory: vst1 ->

stores one element of the vector into memory.

c is the index in the vector to be stored.--*/

void vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c);

void vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c);

void vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c);

void vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c);

void vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c);

void vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c);

void vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c);

void vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c);

void vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c);

void vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c);

void vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c);

void vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c);

void vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c);

void vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c);

void vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c);

void vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c);

void vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c);

void vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c);

void vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c);

void vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c);

void vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c);

void vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c);

/*--3、Store 2 vectors into memory: vst2 ->

stores 2 vectors into memory. It interleaves the 2 vectors into memory.--*/

void vst2_s8 (int8_t * __a, int8x8x2_t __b);

void vst2_s16 (int16_t * __a, int16x4x2_t __b);

void vst2_s32 (int32_t * __a, int32x2x2_t __b);

void vst2_f32 (float32_t * __a, float32x2x2_t __b);

void vst2_u8 (uint8_t * __a, uint8x8x2_t __b);

void vst2_u16 (uint16_t * __a, uint16x4x2_t __b);

void vst2_u32 (uint32_t * __a, uint32x2x2_t __b);

void vst2_p8 (poly8_t * __a, poly8x8x2_t __b);

void vst2_p16 (poly16_t * __a, poly16x4x2_t __b);

void vst2_s64 (int64_t * __a, int64x1x2_t __b);

void vst2_u64 (uint64_t * __a, uint64x1x2_t __b);

void vst2q_s8 (int8_t * __a, int8x16x2_t __b);

void vst2q_s16 (int16_t * __a, int16x8x2_t __b);

void vst2q_s32 (int32_t * __a, int32x4x2_t __b);

void vst2q_f32 (float32_t * __a, float32x4x2_t __b);

void vst2q_u8 (uint8_t * __a, uint8x16x2_t __b);

void vst2q_u16 (uint16_t * __a, uint16x8x2_t __b);

void vst2q_u32 (uint32_t * __a, uint32x4x2_t __b);

void vst2q_p8 (poly8_t * __a, poly8x16x2_t __b);

void vst2q_p16 (poly16_t * __a, poly16x8x2_t __b);

/*--4、Store a lane of two elements into memory: vst2 ->

stores a lane of two elements from a double-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c);

void vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c);

void vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c);

void vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c);

void vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c);

void vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c);

void vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c);

void vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c);

void vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c);

void vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c);

void vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c);

void vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c);

void vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c);

void vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c);

void vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c);

/*--5、Store 3 vectors into memory: vst3 ->

stores 3 vectors into memory. It interleaves the 3 vectors into memory.--*/

void vst3_s8 (int8_t * __a, int8x8x3_t __b);

void vst3_s16 (int16_t * __a, int16x4x3_t __b);

void vst3_s32 (int32_t * __a, int32x2x3_t __b);

void vst3_f32 (float32_t * __a, float32x2x3_t __b);

void vst3_u8 (uint8_t * __a, uint8x8x3_t __b);

void vst3_u16 (uint16_t * __a, uint16x4x3_t __b);

void vst3_u32 (uint32_t * __a, uint32x2x3_t __b);

void vst3_p8 (poly8_t * __a, poly8x8x3_t __b);

void vst3_p16 (poly16_t * __a, poly16x4x3_t __b);

void vst3_s64 (int64_t * __a, int64x1x3_t __b);

void vst3_u64 (uint64_t * __a, uint64x1x3_t __b);

void vst3q_s8 (int8_t * __a, int8x16x3_t __b);

void vst3q_s16 (int16_t * __a, int16x8x3_t __b);

void vst3q_s32 (int32_t * __a, int32x4x3_t __b);

void vst3q_f32 (float32_t * __a, float32x4x3_t __b);

void vst3q_u8 (uint8_t * __a, uint8x16x3_t __b);

void vst3q_u16 (uint16_t * __a, uint16x8x3_t __b);

void vst3q_u32 (uint32_t * __a, uint32x4x3_t __b);

void vst3q_p8 (poly8_t * __a, poly8x16x3_t __b);

void vst3q_p16 (poly16_t * __a, poly16x8x3_t __b);

/*--6、Store a lane of three elements into memory: vst3 ->

stores a lane of three elements from a triple-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c);

void vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c);

void vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c);

void vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c);

void vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c);

void vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c);

void vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c);

void vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c);

void vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c);

void vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c);

void vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c);

void vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c);

void vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c);

void vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c);

void vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c);

/*--7、Store 4 vectors into memory: vst4 ->

stores 4 vectors into memory. It interleaves the 4 vectors into memory.--*/

void vst4_s8 (int8_t * __a, int8x8x4_t __b);

void vst4_s16 (int16_t * __a, int16x4x4_t __b);

void vst4_s32 (int32_t * __a, int32x2x4_t __b);

void vst4_f32 (float32_t * __a, float32x2x4_t __b);

void vst4_u8 (uint8_t * __a, uint8x8x4_t __b);

void vst4_u16 (uint16_t * __a, uint16x4x4_t __b);

void vst4_u32 (uint32_t * __a, uint32x2x4_t __b);

void vst4_p8 (poly8_t * __a, poly8x8x4_t __b);

void vst4_p16 (poly16_t * __a, poly16x4x4_t __b);

void vst4_s64 (int64_t * __a, int64x1x4_t __b);

void vst4_u64 (uint64_t * __a, uint64x1x4_t __b);

void vst4q_s8 (int8_t * __a, int8x16x4_t __b);

void vst4q_s16 (int16_t * __a, int16x8x4_t __b);

void vst4q_s32 (int32_t * __a, int32x4x4_t __b);

void vst4q_f32 (float32_t * __a, float32x4x4_t __b);

void vst4q_u8 (uint8_t * __a, uint8x16x4_t __b);

void vst4q_u16 (uint16_t * __a, uint16x8x4_t __b);

void vst4q_u32 (uint32_t * __a, uint32x4x4_t __b);

void vst4q_p8 (poly8_t * __a, poly8x16x4_t __b);

void vst4q_p16 (poly16_t * __a, poly16x8x4_t __b);

/*--8、Store a lane of four elements into memory: vst4 ->

stores a lane of four elements from a quad-vector structure into memory.

The elements to be stored are from the same lane in the vectors and their index is c.--*/

void vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c);

void vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c);

void vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c);

void vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c);

void vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c);

void vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c);

void vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c);

void vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c);

void vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c);

void vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c);

void vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c);

void vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c);

void vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c);

void vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c);

void vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c);

/*********************************Reinterpret casts(type conversion)********************/

/*--convert between types: vreinterpret -> treats a vector as having a different

datatype, without changing its value.--*/

poly8x8_t vreinterpret_p8_s8 (int8x8_t __a);

poly8x8_t vreinterpret_p8_s16 (int16x4_t __a);

poly8x8_t vreinterpret_p8_s32 (int32x2_t __a);

poly8x8_t vreinterpret_p8_s64 (int64x1_t __a);

poly8x8_t vreinterpret_p8_f32 (float32x2_t __a);

poly8x8_t vreinterpret_p8_u8 (uint8x8_t __a);

poly8x8_t vreinterpret_p8_u16 (uint16x4_t __a);

poly8x8_t vreinterpret_p8_u32 (uint32x2_t __a);

poly8x8_t vreinterpret_p8_u64 (uint64x1_t __a);

poly8x8_t vreinterpret_p8_p16 (poly16x4_t __a);

poly8x16_t vreinterpretq_p8_s8 (int8x16_t __a);

poly8x16_t vreinterpretq_p8_s16 (int16x8_t __a);

poly8x16_t vreinterpretq_p8_s32 (int32x4_t __a);

poly8x16_t vreinterpretq_p8_s64 (int64x2_t __a);

poly8x16_t vreinterpretq_p8_f32 (float32x4_t __a);

poly8x16_t vreinterpretq_p8_u8 (uint8x16_t __a);

poly8x16_t vreinterpretq_p8_u16 (uint16x8_t __a);

poly8x16_t vreinterpretq_p8_u32 (uint32x4_t __a);

poly8x16_t vreinterpretq_p8_u64 (uint64x2_t __a);

poly8x16_t vreinterpretq_p8_p16 (poly16x8_t __a);

poly16x4_t vreinterpret_p16_s8 (int8x8_t __a);

poly16x4_t vreinterpret_p16_s16 (int16x4_t __a);

poly16x4_t vreinterpret_p16_s32 (int32x2_t __a);

poly16x4_t vreinterpret_p16_s64 (int64x1_t __a);

poly16x4_t vreinterpret_p16_f32 (float32x2_t __a);

poly16x4_t vreinterpret_p16_u8 (uint8x8_t __a);

poly16x4_t vreinterpret_p16_u16 (uint16x4_t __a);

poly16x4_t vreinterpret_p16_u32 (uint32x2_t __a);

poly16x4_t vreinterpret_p16_u64 (uint64x1_t __a);

poly16x4_t vreinterpret_p16_p8 (poly8x8_t __a);

poly16x8_t vreinterpretq_p16_s8 (int8x16_t __a);

poly16x8_t vreinterpretq_p16_s16 (int16x8_t __a);

poly16x8_t vreinterpretq_p16_s32 (int32x4_t __a);

poly16x8_t vreinterpretq_p16_s64 (int64x2_t __a);

poly16x8_t vreinterpretq_p16_f32 (float32x4_t __a);

poly16x8_t vreinterpretq_p16_u8 (uint8x16_t __a);

poly16x8_t vreinterpretq_p16_u16 (uint16x8_t __a);

poly16x8_t vreinterpretq_p16_u32 (uint32x4_t __a);

poly16x8_t vreinterpretq_p16_u64 (uint64x2_t __a);

poly16x8_t vreinterpretq_p16_p8 (poly8x16_t __a);

float32x2_t vreinterpret_f32_s8 (int8x8_t __a);

float32x2_t vreinterpret_f32_s16 (int16x4_t __a);

float32x2_t vreinterpret_f32_s32 (int32x2_t __a);

float32x2_t vreinterpret_f32_s64 (int64x1_t __a);

float32x2_t vreinterpret_f32_u8 (uint8x8_t __a);

float32x2_t vreinterpret_f32_u16 (uint16x4_t __a);

float32x2_t vreinterpret_f32_u32 (uint32x2_t __a);

float32x2_t vreinterpret_f32_u64 (uint64x1_t __a);

float32x2_t vreinterpret_f32_p8 (poly8x8_t __a);

float32x2_t vreinterpret_f32_p16 (poly16x4_t __a);

float32x4_t vreinterpretq_f32_s8 (int8x16_t __a);

float32x4_t vreinterpretq_f32_s16 (int16x8_t __a);

float32x4_t vreinterpretq_f32_s32 (int32x4_t __a);

float32x4_t vreinterpretq_f32_s64 (int64x2_t __a);

float32x4_t vreinterpretq_f32_u8 (uint8x16_t __a);

float32x4_t vreinterpretq_f32_u16 (uint16x8_t __a);

float32x4_t vreinterpretq_f32_u32 (uint32x4_t __a);

float32x4_t vreinterpretq_f32_u64 (uint64x2_t __a);

float32x4_t vreinterpretq_f32_p8 (poly8x16_t __a);

float32x4_t vreinterpretq_f32_p16 (poly16x8_t __a);

int64x1_t vreinterpret_s64_s8 (int8x8_t __a);

int64x1_t vreinterpret_s64_s16 (int16x4_t __a);

int64x1_t vreinterpret_s64_s32 (int32x2_t __a);

int64x1_t vreinterpret_s64_f32 (float32x2_t __a);

int64x1_t vreinterpret_s64_u8 (uint8x8_t __a);

int64x1_t vreinterpret_s64_u16 (uint16x4_t __a);

int64x1_t vreinterpret_s64_u32 (uint32x2_t __a);

int64x1_t vreinterpret_s64_u64 (uint64x1_t __a);

int64x1_t vreinterpret_s64_p8 (poly8x8_t __a);

int64x1_t vreinterpret_s64_p16 (poly16x4_t __a);

int64x2_t vreinterpretq_s64_s8 (int8x16_t __a);

int64x2_t vreinterpretq_s64_s16 (int16x8_t __a);

int64x2_t vreinterpretq_s64_s32 (int32x4_t __a);

int64x2_t vreinterpretq_s64_f32 (float32x4_t __a);

int64x2_t vreinterpretq_s64_u8 (uint8x16_t __a);

int64x2_t vreinterpretq_s64_u16 (uint16x8_t __a);

int64x2_t vreinterpretq_s64_u32 (uint32x4_t __a);

int64x2_t vreinterpretq_s64_u64 (uint64x2_t __a);

int64x2_t vreinterpretq_s64_p8 (poly8x16_t __a);

int64x2_t vreinterpretq_s64_p16 (poly16x8_t __a);

uint64x1_t vreinterpret_u64_s8 (int8x8_t __a);

uint64x1_t vreinterpret_u64_s16 (int16x4_t __a);

uint64x1_t vreinterpret_u64_s32 (int32x2_t __a);

uint64x1_t vreinterpret_u64_s64 (int64x1_t __a);

uint64x1_t vreinterpret_u64_f32 (float32x2_t __a);

uint64x1_t vreinterpret_u64_u8 (uint8x8_t __a);

uint64x1_t vreinterpret_u64_u16 (uint16x4_t __a);

uint64x1_t vreinterpret_u64_u32 (uint32x2_t __a);

uint64x1_t vreinterpret_u64_p8 (poly8x8_t __a);

uint64x1_t vreinterpret_u64_p16 (poly16x4_t __a);

uint64x2_t vreinterpretq_u64_s8 (int8x16_t __a);

uint64x2_t vreinterpretq_u64_s16 (int16x8_t __a);

uint64x2_t vreinterpretq_u64_s32 (int32x4_t __a);

uint64x2_t vreinterpretq_u64_s64 (int64x2_t __a);

uint64x2_t vreinterpretq_u64_f32 (float32x4_t __a);

uint64x2_t vreinterpretq_u64_u8 (uint8x16_t __a);

uint64x2_t vreinterpretq_u64_u16 (uint16x8_t __a);

uint64x2_t vreinterpretq_u64_u32 (uint32x4_t __a);

uint64x2_t vreinterpretq_u64_p8 (poly8x16_t __a);

uint64x2_t vreinterpretq_u64_p16 (poly16x8_t __a);

int8x8_t vreinterpret_s8_s16 (int16x4_t __a);

int8x8_t vreinterpret_s8_s32 (int32x2_t __a);

int8x8_t vreinterpret_s8_s64 (int64x1_t __a);

int8x8_t vreinterpret_s8_f32 (float32x2_t __a);

int8x8_t vreinterpret_s8_u8 (uint8x8_t __a);

int8x8_t vreinterpret_s8_u16 (uint16x4_t __a);

int8x8_t vreinterpret_s8_u32 (uint32x2_t __a);

int8x8_t vreinterpret_s8_u64 (uint64x1_t __a);

int8x8_t vreinterpret_s8_p8 (poly8x8_t __a);

int8x8_t vreinterpret_s8_p16 (poly16x4_t __a);

int8x16_t vreinterpretq_s8_s16 (int16x8_t __a);

int8x16_t vreinterpretq_s8_s32 (int32x4_t __a);

int8x16_t vreinterpretq_s8_s64 (int64x2_t __a);

int8x16_t vreinterpretq_s8_f32 (float32x4_t __a);

int8x16_t vreinterpretq_s8_u8 (uint8x16_t __a);

int8x16_t vreinterpretq_s8_u16 (uint16x8_t __a);

int8x16_t vreinterpretq_s8_u32 (uint32x4_t __a);

int8x16_t vreinterpretq_s8_u64 (uint64x2_t __a);

int8x16_t vreinterpretq_s8_p8 (poly8x16_t __a);

int8x16_t vreinterpretq_s8_p16 (poly16x8_t __a);

int16x4_t vreinterpret_s16_s8 (int8x8_t __a);

int16x4_t vreinterpret_s16_s32 (int32x2_t __a);

int16x4_t vreinterpret_s16_s64 (int64x1_t __a);

int16x4_t vreinterpret_s16_f32 (float32x2_t __a);

int16x4_t vreinterpret_s16_u8 (uint8x8_t __a);

int16x4_t vreinterpret_s16_u16 (uint16x4_t __a);

int16x4_t vreinterpret_s16_u32 (uint32x2_t __a);

int16x4_t vreinterpret_s16_u64 (uint64x1_t __a);

int16x4_t vreinterpret_s16_p8 (poly8x8_t __a);

int16x4_t vreinterpret_s16_p16 (poly16x4_t __a);

int16x8_t vreinterpretq_s16_s8 (int8x16_t __a);

int16x8_t vreinterpretq_s16_s32 (int32x4_t __a);

int16x8_t vreinterpretq_s16_s64 (int64x2_t __a);

int16x8_t vreinterpretq_s16_f32 (float32x4_t __a);

int16x8_t vreinterpretq_s16_u8 (uint8x16_t __a);

int16x8_t vreinterpretq_s16_u16 (uint16x8_t __a);

int16x8_t vreinterpretq_s16_u32 (uint32x4_t __a);

int16x8_t vreinterpretq_s16_u64 (uint64x2_t __a);

int16x8_t vreinterpretq_s16_p8 (poly8x16_t __a);

int16x8_t vreinterpretq_s16_p16 (poly16x8_t __a);

int32x2_t vreinterpret_s32_s8 (int8x8_t __a);

int32x2_t vreinterpret_s32_s16 (int16x4_t __a);

int32x2_t vreinterpret_s32_s64 (int64x1_t __a);

int32x2_t vreinterpret_s32_f32 (float32x2_t __a);

int32x2_t vreinterpret_s32_u8 (uint8x8_t __a);

int32x2_t vreinterpret_s32_u16 (uint16x4_t __a);

int32x2_t vreinterpret_s32_u32 (uint32x2_t __a);

int32x2_t vreinterpret_s32_u64 (uint64x1_t __a);

int32x2_t vreinterpret_s32_p8 (poly8x8_t __a);

int32x2_t vreinterpret_s32_p16 (poly16x4_t __a);

int32x4_t vreinterpretq_s32_s8 (int8x16_t __a);

int32x4_t vreinterpretq_s32_s16 (int16x8_t __a);

int32x4_t vreinterpretq_s32_s64 (int64x2_t __a);

int32x4_t vreinterpretq_s32_f32 (float32x4_t __a);

int32x4_t vreinterpretq_s32_u8 (uint8x16_t __a);

int32x4_t vreinterpretq_s32_u16 (uint16x8_t __a);

int32x4_t vreinterpretq_s32_u32 (uint32x4_t __a);

int32x4_t vreinterpretq_s32_u64 (uint64x2_t __a);

int32x4_t vreinterpretq_s32_p8 (poly8x16_t __a);

int32x4_t vreinterpretq_s32_p16 (poly16x8_t __a);

uint8x8_t vreinterpret_u8_s8 (int8x8_t __a);

uint8x8_t vreinterpret_u8_s16 (int16x4_t __a);

uint8x8_t vreinterpret_u8_s32 (int32x2_t __a);

uint8x8_t vreinterpret_u8_s64 (int64x1_t __a);

uint8x8_t vreinterpret_u8_f32 (float32x2_t __a);

uint8x8_t vreinterpret_u8_u16 (uint16x4_t __a);

uint8x8_t vreinterpret_u8_u32 (uint32x2_t __a);

uint8x8_t vreinterpret_u8_u64 (uint64x1_t __a);

uint8x8_t vreinterpret_u8_p8 (poly8x8_t __a);

uint8x8_t vreinterpret_u8_p16 (poly16x4_t __a);

uint8x16_t vreinterpretq_u8_s8 (int8x16_t __a);

uint8x16_t vreinterpretq_u8_s16 (int16x8_t __a);

uint8x16_t vreinterpretq_u8_s32 (int32x4_t __a);

uint8x16_t vreinterpretq_u8_s64 (int64x2_t __a);

uint8x16_t vreinterpretq_u8_f32 (float32x4_t __a);

uint8x16_t vreinterpretq_u8_u16 (uint16x8_t __a);

uint8x16_t vreinterpretq_u8_u32 (uint32x4_t __a);

uint8x16_t vreinterpretq_u8_u64 (uint64x2_t __a);

uint8x16_t vreinterpretq_u8_p8 (poly8x16_t __a);

uint8x16_t vreinterpretq_u8_p16 (poly16x8_t __a);

uint16x4_t vreinterpret_u16_s8 (int8x8_t __a);

uint16x4_t vreinterpret_u16_s16 (int16x4_t __a);

uint16x4_t vreinterpret_u16_s32 (int32x2_t __a);

uint16x4_t vreinterpret_u16_s64 (int64x1_t __a);

uint16x4_t vreinterpret_u16_f32 (float32x2_t __a);

uint16x4_t vreinterpret_u16_u8 (uint8x8_t __a);

uint16x4_t vreinterpret_u16_u32 (uint32x2_t __a);

uint16x4_t vreinterpret_u16_u64 (uint64x1_t __a);

uint16x4_t vreinterpret_u16_p8 (poly8x8_t __a);

uint16x4_t vreinterpret_u16_p16 (poly16x4_t __a);

uint16x8_t vreinterpretq_u16_s8 (int8x16_t __a);

uint16x8_t vreinterpretq_u16_s16 (int16x8_t __a);

uint16x8_t vreinterpretq_u16_s32 (int32x4_t __a);

uint16x8_t vreinterpretq_u16_s64 (int64x2_t __a);

uint16x8_t vreinterpretq_u16_f32 (float32x4_t __a);

uint16x8_t vreinterpretq_u16_u8 (uint8x16_t __a);

uint16x8_t vreinterpretq_u16_u32 (uint32x4_t __a);

uint16x8_t vreinterpretq_u16_u64 (uint64x2_t __a);

uint16x8_t vreinterpretq_u16_p8 (poly8x16_t __a);

uint16x8_t vreinterpretq_u16_p16 (poly16x8_t __a);

uint32x2_t vreinterpret_u32_s8 (int8x8_t __a);

uint32x2_t vreinterpret_u32_s16 (int16x4_t __a);

uint32x2_t vreinterpret_u32_s32 (int32x2_t __a);

uint32x2_t vreinterpret_u32_s64 (int64x1_t __a);

uint32x2_t vreinterpret_u32_f32 (float32x2_t __a);

uint32x2_t vreinterpret_u32_u8 (uint8x8_t __a);

uint32x2_t vreinterpret_u32_u16 (uint16x4_t __a);

uint32x2_t vreinterpret_u32_u64 (uint64x1_t __a);

uint32x2_t vreinterpret_u32_p8 (poly8x8_t __a);

uint32x2_t vreinterpret_u32_p16 (poly16x4_t __a);

uint32x4_t vreinterpretq_u32_s8 (int8x16_t __a);

uint32x4_t vreinterpretq_u32_s16 (int16x8_t __a);

uint32x4_t vreinterpretq_u32_s32 (int32x4_t __a);

uint32x4_t vreinterpretq_u32_s64 (int64x2_t __a);

uint32x4_t vreinterpretq_u32_f32 (float32x4_t __a);

uint32x4_t vreinterpretq_u32_u8 (uint8x16_t __a);

uint32x4_t vreinterpretq_u32_u16 (uint16x8_t __a);

uint32x4_t vreinterpretq_u32_u64 (uint64x2_t __a);

uint32x4_t vreinterpretq_u32_p8 (poly8x16_t __a);

uint32x4_t vreinterpretq_u32_p16 (poly16x8_t __a);

總結(jié)

以上是生活随笔為你收集整理的clsq客户端android,Android NDK开发之 arm_neon.h文件ABI说明的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。

日韩欧美群交p片內射中文 | 国产激情艳情在线看视频 | 在线欧美精品一区二区三区 | 亚洲熟妇色xxxxx欧美老妇 | 久久精品国产一区二区三区肥胖 | 天天av天天av天天透 | 亚洲 另类 在线 欧美 制服 | 午夜福利试看120秒体验区 | 国语精品一区二区三区 | 婷婷五月综合缴情在线视频 | 亚洲七七久久桃花影院 | 亚洲人成影院在线观看 | 亚洲精品欧美二区三区中文字幕 | 超碰97人人射妻 | 牲欲强的熟妇农村老妇女 | 国产色精品久久人妻 | 国产国语老龄妇女a片 | 久久亚洲精品中文字幕无男同 | 午夜精品久久久久久久 | 欧美 丝袜 自拍 制服 另类 | 精品国产av色一区二区深夜久久 | 国内精品九九久久久精品 | 国产亚洲美女精品久久久2020 | 国产精品鲁鲁鲁 | www国产亚洲精品久久久日本 | 日韩人妻少妇一区二区三区 | 国产卡一卡二卡三 | 日本精品少妇一区二区三区 | 中文字幕av日韩精品一区二区 | 激情国产av做激情国产爱 | 国产免费无码一区二区视频 | 亚洲人成网站免费播放 | 99er热精品视频 | 国产97人人超碰caoprom | 又大又黄又粗又爽的免费视频 | 国产av剧情md精品麻豆 | 国内丰满熟女出轨videos | 精品国产aⅴ无码一区二区 | 久久99精品久久久久婷婷 | 国产精品二区一区二区aⅴ污介绍 | 国产97在线 | 亚洲 | 国产超级va在线观看视频 | 亚洲熟熟妇xxxx | 久久久久久久女国产乱让韩 | 国产成人精品一区二区在线小狼 | 日本在线高清不卡免费播放 | 小鲜肉自慰网站xnxx | 婷婷六月久久综合丁香 | 久久久精品人妻久久影视 | 丰满人妻翻云覆雨呻吟视频 | 欧美猛少妇色xxxxx | 99麻豆久久久国产精品免费 | 7777奇米四色成人眼影 | 99久久婷婷国产综合精品青草免费 | √天堂中文官网8在线 | 亚洲精品一区二区三区四区五区 | 亚洲区欧美区综合区自拍区 | 扒开双腿吃奶呻吟做受视频 | 国产av久久久久精东av | 国精产品一区二区三区 | 国内综合精品午夜久久资源 | 国产猛烈高潮尖叫视频免费 | 国产精品久久久久无码av色戒 | 亚洲 欧美 激情 小说 另类 | 大色综合色综合网站 | 国产偷国产偷精品高清尤物 | 日本熟妇人妻xxxxx人hd | 亚洲阿v天堂在线 | 成年美女黄网站色大免费全看 | 日本熟妇人妻xxxxx人hd | 欧美日韩精品 | 欧美黑人性暴力猛交喷水 | 欧美日韩视频无码一区二区三 | 亚洲а∨天堂久久精品2021 | 国产精品美女久久久 | 久久久亚洲欧洲日产国码αv | 国产三级精品三级男人的天堂 | 麻豆国产丝袜白领秘书在线观看 | 中文字幕av无码一区二区三区电影 | 久久99精品久久久久久 | 欧美怡红院免费全部视频 | 丰满妇女强制高潮18xxxx | www国产精品内射老师 | 亚洲色欲久久久综合网东京热 | 又大又黄又粗又爽的免费视频 | 日韩成人一区二区三区在线观看 | 精品国产一区二区三区四区 | 内射后入在线观看一区 | 国产精品无套呻吟在线 | 亚洲熟妇色xxxxx欧美老妇 | 精品一二三区久久aaa片 | √8天堂资源地址中文在线 | 精品欧美一区二区三区久久久 | 狠狠色色综合网站 | 国产人妻人伦精品1国产丝袜 | 欧洲精品码一区二区三区免费看 | 熟妇女人妻丰满少妇中文字幕 | 成在人线av无码免观看麻豆 | 国产亚洲精品久久久闺蜜 | 国产精品久久久久久久影院 | 久久久久久久女国产乱让韩 | 欧美日韩在线亚洲综合国产人 | 国产av无码专区亚洲awww | 亚洲爆乳精品无码一区二区三区 | 亚洲自偷自偷在线制服 | 日韩av无码中文无码电影 | 日本欧美一区二区三区乱码 | 久久97精品久久久久久久不卡 | 国产成人亚洲综合无码 | 亚洲а∨天堂久久精品2021 | 免费无码的av片在线观看 | 国内综合精品午夜久久资源 | 一本久道久久综合婷婷五月 | 国产精品亚洲а∨无码播放麻豆 | 久久精品国产日本波多野结衣 | 在线播放无码字幕亚洲 | 成人性做爰aaa片免费看不忠 | 婷婷六月久久综合丁香 | 国产亚洲精品久久久久久国模美 | 亚洲精品久久久久中文第一幕 | 亚洲乱码国产乱码精品精 | 久久综合香蕉国产蜜臀av | 国产福利视频一区二区 | 国产av一区二区精品久久凹凸 | 欧美激情内射喷水高潮 | 一本久道久久综合婷婷五月 | 国产无遮挡又黄又爽免费视频 | 巨爆乳无码视频在线观看 | 亚洲爆乳大丰满无码专区 | 欧美日韩久久久精品a片 | 精品一区二区三区无码免费视频 | 久久久久久久久蜜桃 | 色综合久久88色综合天天 | 色综合久久久无码网中文 | 久久综合狠狠综合久久综合88 | 成人亚洲精品久久久久软件 | 成人欧美一区二区三区 | 无套内谢的新婚少妇国语播放 | 国产尤物精品视频 | 国产精品人人爽人人做我的可爱 | 中文精品久久久久人妻不卡 | 天天摸天天透天天添 | 天天摸天天碰天天添 | 熟女少妇人妻中文字幕 | 中文毛片无遮挡高清免费 | 亚洲另类伦春色综合小说 | 精品aⅴ一区二区三区 | 中文无码精品a∨在线观看不卡 | 内射老妇bbwx0c0ck | 超碰97人人射妻 | 精品亚洲韩国一区二区三区 | 丝袜人妻一区二区三区 | 国内少妇偷人精品视频免费 | 大乳丰满人妻中文字幕日本 | 国产两女互慰高潮视频在线观看 | 久久国产精品二国产精品 | 少妇愉情理伦片bd | 老熟妇仑乱视频一区二区 | 色一情一乱一伦一视频免费看 | 国产片av国语在线观看 | 红桃av一区二区三区在线无码av | 国产av久久久久精东av | 人人爽人人爽人人片av亚洲 | 久久久久成人片免费观看蜜芽 | 综合激情五月综合激情五月激情1 | 欧美丰满熟妇xxxx | 日日摸日日碰夜夜爽av | 东京热男人av天堂 | 夜夜影院未满十八勿进 | 乱码午夜-极国产极内射 | 亚洲毛片av日韩av无码 | 国产熟妇高潮叫床视频播放 | 国产成人无码av片在线观看不卡 | 午夜男女很黄的视频 | 成人性做爰aaa片免费看 | 老司机亚洲精品影院无码 | 初尝人妻少妇中文字幕 | 在线天堂新版最新版在线8 | 成人性做爰aaa片免费看不忠 | 亚洲熟妇色xxxxx欧美老妇y | 少妇被粗大的猛进出69影院 | yw尤物av无码国产在线观看 | 在教室伦流澡到高潮hnp视频 | 国产av无码专区亚洲a∨毛片 | 国产婷婷色一区二区三区在线 | 亚洲理论电影在线观看 | 欧美人妻一区二区三区 | 亚洲一区二区三区国产精华液 | 成人综合网亚洲伊人 | 日本欧美一区二区三区乱码 | 欧美性生交xxxxx久久久 | 免费无码一区二区三区蜜桃大 | 亚洲日韩精品欧美一区二区 | 99久久精品无码一区二区毛片 | 亚洲色无码一区二区三区 | 久精品国产欧美亚洲色aⅴ大片 | 色窝窝无码一区二区三区色欲 | 丰满人妻一区二区三区免费视频 | 国产精品人人爽人人做我的可爱 | 2020久久超碰国产精品最新 | 久久精品国产大片免费观看 | 狠狠色噜噜狠狠狠狠7777米奇 | 国产97人人超碰caoprom | 欧美色就是色 | 夜夜高潮次次欢爽av女 | 国精产品一品二品国精品69xx | 国产乱码精品一品二品 | 久久久久久av无码免费看大片 | 日韩av无码一区二区三区 | √8天堂资源地址中文在线 | 日本精品人妻无码77777 天堂一区人妻无码 | 九月婷婷人人澡人人添人人爽 | 欧美 日韩 亚洲 在线 | 日韩成人一区二区三区在线观看 | 精品成人av一区二区三区 | 精品亚洲韩国一区二区三区 | 精品国产一区av天美传媒 | yw尤物av无码国产在线观看 | 欧美日韩在线亚洲综合国产人 | 国产农村乱对白刺激视频 | 午夜成人1000部免费视频 | 蜜臀av无码人妻精品 | 日日干夜夜干 | 国产精品久久久久久久9999 | 又黄又爽又色的视频 | 麻豆人妻少妇精品无码专区 | 精品一区二区三区波多野结衣 | 婷婷五月综合激情中文字幕 | 伊人久久大香线蕉av一区二区 | 人人澡人人妻人人爽人人蜜桃 | 久久精品人人做人人综合 | 激情亚洲一区国产精品 | 牲交欧美兽交欧美 | 欧美喷潮久久久xxxxx | 精品乱码久久久久久久 | 国产精品久久久久影院嫩草 | 无码av免费一区二区三区试看 | 亚洲精品久久久久avwww潮水 | 免费国产成人高清在线观看网站 | a国产一区二区免费入口 | 日本丰满熟妇videos | 麻豆成人精品国产免费 | 国产尤物精品视频 | 爽爽影院免费观看 | 天堂亚洲2017在线观看 | 精品乱码久久久久久久 | 麻豆精产国品 | 欧美人与牲动交xxxx | 色婷婷综合中文久久一本 | 亚洲人成无码网www | 人妻人人添人妻人人爱 | 亚洲男人av天堂午夜在 | 色婷婷综合中文久久一本 | 欧美喷潮久久久xxxxx | 日韩欧美群交p片內射中文 | 4hu四虎永久在线观看 | 欧美自拍另类欧美综合图片区 | 欧美黑人乱大交 | 色狠狠av一区二区三区 | 日本一区二区更新不卡 | 麻花豆传媒剧国产免费mv在线 | 人人爽人人澡人人人妻 | 成人精品天堂一区二区三区 | 亚洲va欧美va天堂v国产综合 | 无码精品国产va在线观看dvd | 青青草原综合久久大伊人精品 | a片免费视频在线观看 | 久久精品女人天堂av免费观看 | 中文字幕日产无线码一区 | 日本精品久久久久中文字幕 | 一本大道久久东京热无码av | 少妇久久久久久人妻无码 | 久久久久免费看成人影片 | 国产精品国产三级国产专播 | aa片在线观看视频在线播放 | 国产乱子伦视频在线播放 | 免费观看又污又黄的网站 | 国产精品人妻一区二区三区四 | 国产精品久久久 | av无码不卡在线观看免费 | 少妇性荡欲午夜性开放视频剧场 | 欧美zoozzooz性欧美 | 特大黑人娇小亚洲女 | 红桃av一区二区三区在线无码av | av在线亚洲欧洲日产一区二区 | 天天av天天av天天透 | 色爱情人网站 | 国产亚洲欧美在线专区 | 特级做a爰片毛片免费69 | 亚洲精品中文字幕乱码 | 国产日产欧产精品精品app | 国产午夜无码精品免费看 | 又湿又紧又大又爽a视频国产 | 国产成人午夜福利在线播放 | 亚洲中文字幕在线无码一区二区 | 亚洲成色www久久网站 | 国产舌乚八伦偷品w中 | 国产三级精品三级男人的天堂 | 国产成人精品视频ⅴa片软件竹菊 | 欧美日韩视频无码一区二区三 | 亚洲精品久久久久久久久久久 | 强辱丰满人妻hd中文字幕 | 国产亚洲人成a在线v网站 | 性做久久久久久久免费看 | 亚洲午夜无码久久 | 国产亚洲精品久久久ai换 | 精品国偷自产在线 | 999久久久国产精品消防器材 | 国产97人人超碰caoprom | av无码不卡在线观看免费 | 久久久久亚洲精品男人的天堂 | 亚洲人成网站在线播放942 | 老太婆性杂交欧美肥老太 | 国产一区二区三区四区五区加勒比 | 中文精品无码中文字幕无码专区 | 国产手机在线αⅴ片无码观看 | 国产特级毛片aaaaaaa高清 | 老头边吃奶边弄进去呻吟 | 久久久精品456亚洲影院 | 日本大香伊一区二区三区 | 野外少妇愉情中文字幕 | √天堂资源地址中文在线 | 东京热男人av天堂 | 亚洲色成人中文字幕网站 | 欧美日韩在线亚洲综合国产人 | 给我免费的视频在线观看 | 久久久久99精品国产片 | 国精品人妻无码一区二区三区蜜柚 | 无码播放一区二区三区 | 老子影院午夜伦不卡 | 中文字幕乱码亚洲无线三区 | 亚洲熟妇色xxxxx欧美老妇 | 国产精品久久久久无码av色戒 | 欧美成人高清在线播放 | 欧美日韩在线亚洲综合国产人 | 久久99精品久久久久久 | 白嫩日本少妇做爰 | 亚洲色偷偷偷综合网 | 又色又爽又黄的美女裸体网站 | 蜜桃臀无码内射一区二区三区 | 久久久久99精品国产片 | 久久人人爽人人爽人人片av高清 | 国产性生大片免费观看性 | www国产亚洲精品久久网站 | 白嫩日本少妇做爰 | 国产激情艳情在线看视频 | 在线亚洲高清揄拍自拍一品区 | 欧美黑人性暴力猛交喷水 | 日日干夜夜干 | 中文字幕乱码亚洲无线三区 | 久久午夜夜伦鲁鲁片无码免费 | 亚洲乱码国产乱码精品精 | 在线成人www免费观看视频 | 中文字幕无码免费久久99 | 女人高潮内射99精品 | 亚洲色欲色欲天天天www | 亚洲成在人网站无码天堂 | 日本熟妇人妻xxxxx人hd | 2020最新国产自产精品 | 午夜精品久久久久久久久 | 麻豆国产人妻欲求不满 | 日本精品少妇一区二区三区 | 精品国产一区二区三区四区在线看 | 免费无码av一区二区 | 国精品人妻无码一区二区三区蜜柚 | 亚洲国产精品成人久久蜜臀 | 国产亚洲精品久久久久久久久动漫 | yw尤物av无码国产在线观看 | 久久综合给合久久狠狠狠97色 | 国产精品美女久久久 | 99麻豆久久久国产精品免费 | 日日噜噜噜噜夜夜爽亚洲精品 | 日韩精品成人一区二区三区 | 久久国内精品自在自线 | 国产亚洲精品久久久久久久 | 精品无码成人片一区二区98 | 国内精品一区二区三区不卡 | 亚洲人成影院在线无码按摩店 | 67194成是人免费无码 | 兔费看少妇性l交大片免费 | 亚洲国产精品无码久久久久高潮 | 精品无人区无码乱码毛片国产 | 国产做国产爱免费视频 | 免费无码av一区二区 | 西西人体www44rt大胆高清 | 国产乱人伦偷精品视频 | 日本在线高清不卡免费播放 | 国产精品丝袜黑色高跟鞋 | 男人的天堂av网站 | 国产福利视频一区二区 | 国产又爽又猛又粗的视频a片 | 日本一卡2卡3卡4卡无卡免费网站 国产一区二区三区影院 | 漂亮人妻洗澡被公强 日日躁 | 丰满护士巨好爽好大乳 | 动漫av一区二区在线观看 | 国产精品手机免费 | 一本色道久久综合狠狠躁 | 国产在线精品一区二区高清不卡 | 国产av人人夜夜澡人人爽麻豆 | 欧美日韩久久久精品a片 | 国产精品久久久久无码av色戒 | 美女黄网站人色视频免费国产 | 精品久久久久久亚洲精品 | 亚洲一区二区三区 | 丝袜美腿亚洲一区二区 | 欧美人与物videos另类 | 一个人看的www免费视频在线观看 | 亚洲精品一区国产 | 亚洲区欧美区综合区自拍区 | 久久这里只有精品视频9 | 人人妻人人澡人人爽欧美一区 | 精品国产青草久久久久福利 | 性生交大片免费看女人按摩摩 | 国产欧美亚洲精品a | 久久天天躁狠狠躁夜夜免费观看 | 曰韩无码二三区中文字幕 | 国产成人无码午夜视频在线观看 | 久久久久久av无码免费看大片 | 国色天香社区在线视频 | 51国偷自产一区二区三区 | 国产成人精品久久亚洲高清不卡 | 日韩 欧美 动漫 国产 制服 | 国产性生交xxxxx无码 | 亚洲乱亚洲乱妇50p | av小次郎收藏 | 乱码av麻豆丝袜熟女系列 | 俺去俺来也www色官网 | 美女扒开屁股让男人桶 | 国产麻豆精品精东影业av网站 | 久久99精品久久久久久 | 国产 浪潮av性色四虎 | 无码午夜成人1000部免费视频 | 欧美日韩视频无码一区二区三 | 亚洲 激情 小说 另类 欧美 | 少女韩国电视剧在线观看完整 | 男人的天堂av网站 | 亚洲日韩一区二区 | 中文字幕无码日韩专区 | 中国大陆精品视频xxxx | 人妻与老人中文字幕 | 伊人久久大香线蕉亚洲 | 性欧美videos高清精品 | 人妻有码中文字幕在线 | 麻豆国产97在线 | 欧洲 | 国产精品亚洲一区二区三区喷水 | 老司机亚洲精品影院 | 国产免费无码一区二区视频 | yw尤物av无码国产在线观看 | 国产suv精品一区二区五 | 色欲人妻aaaaaaa无码 | 激情国产av做激情国产爱 | 欧美兽交xxxx×视频 | 九九热爱视频精品 | 欧美人与动性行为视频 | 人妻无码αv中文字幕久久琪琪布 | 日日鲁鲁鲁夜夜爽爽狠狠 | 丰满人妻一区二区三区免费视频 | 免费人成在线观看网站 | 精品国产麻豆免费人成网站 | 国产精品.xx视频.xxtv | 女人被爽到呻吟gif动态图视看 | 久久精品国产精品国产精品污 | 亚洲一区二区观看播放 | 成人性做爰aaa片免费看不忠 | 中文字幕人成乱码熟女app | 亚洲一区二区三区无码久久 | 亚洲国产欧美在线成人 | 美女扒开屁股让男人桶 | 久久亚洲日韩精品一区二区三区 | 狠狠综合久久久久综合网 | 一本色道久久综合亚洲精品不卡 | 国产午夜视频在线观看 | 性欧美大战久久久久久久 | 国产成人精品必看 | 蜜臀aⅴ国产精品久久久国产老师 | 久久国产精品精品国产色婷婷 | 亚洲色大成网站www | 国产成人无码av在线影院 | 午夜无码区在线观看 | 国产女主播喷水视频在线观看 | 亚洲成a人片在线观看日本 | 亚洲成色在线综合网站 | 无套内谢的新婚少妇国语播放 | 国产特级毛片aaaaaaa高清 | 精品人妻人人做人人爽 | 牲欲强的熟妇农村老妇女 | 国产成人精品久久亚洲高清不卡 | 亚洲色欲色欲欲www在线 | 国产情侣作爱视频免费观看 | 精品国产乱码久久久久乱码 | 国产精品香蕉在线观看 | 久久伊人色av天堂九九小黄鸭 | 亚洲色www成人永久网址 | 女人被男人爽到呻吟的视频 | 无码毛片视频一区二区本码 | 在线观看国产一区二区三区 | 国产真实夫妇视频 | 熟女体下毛毛黑森林 | 成人av无码一区二区三区 | 国内精品九九久久久精品 | 西西人体www44rt大胆高清 | 激情爆乳一区二区三区 | 青青草原综合久久大伊人精品 | 超碰97人人做人人爱少妇 | 国产精品高潮呻吟av久久 | 国产精品久久久av久久久 | 麻豆md0077饥渴少妇 | 国产极品视觉盛宴 | 在线精品国产一区二区三区 | 四虎影视成人永久免费观看视频 | 婷婷六月久久综合丁香 | 久久久久成人精品免费播放动漫 | 国产激情无码一区二区 | 蜜桃av蜜臀av色欲av麻 999久久久国产精品消防器材 | 精品日本一区二区三区在线观看 | 精品国产成人一区二区三区 | 激情综合激情五月俺也去 | 丰满少妇弄高潮了www | 免费观看黄网站 | 久久久久成人片免费观看蜜芽 | 中文字幕乱码中文乱码51精品 | 国产网红无码精品视频 | 亚洲欧美色中文字幕在线 | 最近的中文字幕在线看视频 | av无码电影一区二区三区 | 亚洲自偷自拍另类第1页 | 国产一区二区三区四区五区加勒比 | 亚洲乱码中文字幕在线 | 影音先锋中文字幕无码 | 久久99精品国产.久久久久 | 亚洲精品国产a久久久久久 | 国产成人无码av片在线观看不卡 | 国产猛烈高潮尖叫视频免费 | 天堂一区人妻无码 | 久久www免费人成人片 | 国产人妻久久精品二区三区老狼 | 久久成人a毛片免费观看网站 | 欧美老妇与禽交 | 四虎永久在线精品免费网址 | 对白脏话肉麻粗话av | 亚洲无人区午夜福利码高清完整版 | 免费播放一区二区三区 | 欧美午夜特黄aaaaaa片 | 伦伦影院午夜理论片 | 永久免费精品精品永久-夜色 | 午夜丰满少妇性开放视频 | 亚洲大尺度无码无码专区 | 久久亚洲精品成人无码 | 亚洲日韩精品欧美一区二区 | 久久午夜无码鲁丝片 | 55夜色66夜色国产精品视频 | 少妇高潮一区二区三区99 | 亚洲色欲色欲天天天www | 狠狠色欧美亚洲狠狠色www | 亚洲国产成人a精品不卡在线 | 欧美阿v高清资源不卡在线播放 | 精品久久久无码中文字幕 | 亚洲一区二区三区四区 | 国产熟妇另类久久久久 | 夜夜夜高潮夜夜爽夜夜爰爰 | 亚洲成a人片在线观看无码3d | yw尤物av无码国产在线观看 | 精品国产av色一区二区深夜久久 | 日本成熟视频免费视频 | 欧美阿v高清资源不卡在线播放 | 国产精品第一区揄拍无码 | 中文字幕av日韩精品一区二区 | 亚洲一区二区三区香蕉 | 一本色道久久综合狠狠躁 | 人人妻人人藻人人爽欧美一区 | 两性色午夜免费视频 | 日本大乳高潮视频在线观看 | a片在线免费观看 | 人妻人人添人妻人人爱 | 色诱久久久久综合网ywww | 97久久超碰中文字幕 | 无码国产激情在线观看 | av无码不卡在线观看免费 | 黑人巨大精品欧美一区二区 | 国产精品久久久久久久9999 | 成人无码精品一区二区三区 | 精品无码国产一区二区三区av | 亚洲精品国产第一综合99久久 | 久久人人爽人人爽人人片av高清 | 人妻插b视频一区二区三区 | 99久久精品日本一区二区免费 | 欧美日韩亚洲国产精品 | 欧美肥老太牲交大战 | 国产精品永久免费视频 | 欧美第一黄网免费网站 | 亚洲日韩一区二区 | 久久精品无码一区二区三区 | 欧美猛少妇色xxxxx | 久久综合给合久久狠狠狠97色 | 伊人色综合久久天天小片 | 成人免费视频视频在线观看 免费 | 国产精品久久精品三级 | aa片在线观看视频在线播放 | 亚洲色大成网站www国产 | 国产成人亚洲综合无码 | 国产精品亚洲lv粉色 | 国产一区二区不卡老阿姨 | 波多野结衣高清一区二区三区 | 久久综合给久久狠狠97色 | 国产乡下妇女做爰 | 日韩精品久久久肉伦网站 | 老司机亚洲精品影院 | 久久久久av无码免费网 | 久久精品国产大片免费观看 | 国产高清不卡无码视频 | 国产网红无码精品视频 | 天堂无码人妻精品一区二区三区 | 国产精品久久久一区二区三区 | 日本一卡2卡3卡四卡精品网站 | 国产舌乚八伦偷品w中 | 99久久人妻精品免费一区 | 欧美日韩人成综合在线播放 | 欧美老妇交乱视频在线观看 | 好爽又高潮了毛片免费下载 | 日本一卡2卡3卡4卡无卡免费网站 国产一区二区三区影院 | 久久精品国产99精品亚洲 | 永久免费观看美女裸体的网站 | 亚洲国产欧美国产综合一区 | 亚洲熟妇自偷自拍另类 | 97无码免费人妻超级碰碰夜夜 | 国产另类ts人妖一区二区 | 精品无码成人片一区二区98 | 水蜜桃av无码 | 亚洲熟妇色xxxxx欧美老妇y | 国产人妖乱国产精品人妖 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 131美女爱做视频 | 2020久久香蕉国产线看观看 | 人人妻人人澡人人爽欧美精品 | 免费观看激色视频网站 | 精品国产青草久久久久福利 | 精品久久久无码人妻字幂 | 欧美亚洲日韩国产人成在线播放 | 99久久人妻精品免费二区 | 人人超人人超碰超国产 | 亚洲欧洲中文日韩av乱码 | 亚洲综合在线一区二区三区 | 亚洲精品国产品国语在线观看 | 久久久久亚洲精品中文字幕 | 久久精品中文字幕大胸 | 六月丁香婷婷色狠狠久久 | 成熟女人特级毛片www免费 | 成人免费无码大片a毛片 | 天下第一社区视频www日本 | 伦伦影院午夜理论片 | 蜜臀aⅴ国产精品久久久国产老师 | 欧美老熟妇乱xxxxx | 精品国偷自产在线 | 亚洲欧美中文字幕5发布 | 亚洲成a人一区二区三区 | 日本护士毛茸茸高潮 | 一本加勒比波多野结衣 | 大肉大捧一进一出视频出来呀 | 欧美激情一区二区三区成人 | 狠狠色噜噜狠狠狠狠7777米奇 | 成人精品视频一区二区三区尤物 | 少妇高潮喷潮久久久影院 | 日韩精品久久久肉伦网站 | 国产肉丝袜在线观看 | 精品无码国产一区二区三区av | 精品国产麻豆免费人成网站 | 久久久久人妻一区精品色欧美 | 噜噜噜亚洲色成人网站 | 最近免费中文字幕中文高清百度 | 亚洲中文字幕久久无码 | 无遮无挡爽爽免费视频 | 精品少妇爆乳无码av无码专区 | 久久人人爽人人爽人人片av高清 | 欧洲vodafone精品性 | 97夜夜澡人人双人人人喊 | 无码人妻精品一区二区三区不卡 | 女人色极品影院 | 国产电影无码午夜在线播放 | 亚洲欧美日韩成人高清在线一区 | 国产婷婷色一区二区三区在线 | 亚洲va欧美va天堂v国产综合 | 十八禁视频网站在线观看 | 国产成人无码a区在线观看视频app | 久久亚洲中文字幕精品一区 | 久久久久免费精品国产 | 人妻夜夜爽天天爽三区 | 亚洲一区av无码专区在线观看 | 亚洲精品国偷拍自产在线观看蜜桃 | 无码吃奶揉捏奶头高潮视频 | 女人被爽到呻吟gif动态图视看 | 国产精品亚洲综合色区韩国 | 鲁大师影院在线观看 | 欧美日韩综合一区二区三区 | 人人妻人人藻人人爽欧美一区 | 亚洲精品一区国产 | 国产美女极度色诱视频www | 欧美日韩视频无码一区二区三 | 55夜色66夜色国产精品视频 | 国产黄在线观看免费观看不卡 | 漂亮人妻洗澡被公强 日日躁 | 国产绳艺sm调教室论坛 | 日本一区二区三区免费播放 | 无套内射视频囯产 | 亚洲熟女一区二区三区 | 亚洲熟妇自偷自拍另类 | 在线 国产 欧美 亚洲 天堂 | 亚洲天堂2017无码中文 | 双乳奶水饱满少妇呻吟 | 成人免费无码大片a毛片 | 国产香蕉97碰碰久久人人 | 国产午夜亚洲精品不卡下载 | 中文字幕无码热在线视频 | 2020最新国产自产精品 | 精品无人国产偷自产在线 | 国产网红无码精品视频 | 青青久在线视频免费观看 | 国产精品久久久午夜夜伦鲁鲁 | 国产乱人偷精品人妻a片 | 国产极品美女高潮无套在线观看 | 内射欧美老妇wbb | 精品国产国产综合精品 | 无码av中文字幕免费放 | 日本熟妇乱子伦xxxx | 久久人人爽人人爽人人片av高清 | 欧美 日韩 亚洲 在线 | 亚洲一区二区观看播放 | 伊人久久大香线蕉亚洲 | 免费播放一区二区三区 | 天天躁日日躁狠狠躁免费麻豆 | 搡女人真爽免费视频大全 | 人妻人人添人妻人人爱 | 亚洲天堂2017无码 | 夜夜躁日日躁狠狠久久av | 国产精品久久久久影院嫩草 | 日本熟妇乱子伦xxxx | 亚洲精品综合一区二区三区在线 | 婷婷综合久久中文字幕蜜桃三电影 | 高清不卡一区二区三区 | 亚洲成熟女人毛毛耸耸多 | 成人试看120秒体验区 | 精品一二三区久久aaa片 | 免费无码一区二区三区蜜桃大 | 丰满妇女强制高潮18xxxx | 装睡被陌生人摸出水好爽 | 国产乱人无码伦av在线a | 国产麻豆精品精东影业av网站 | 一本久道久久综合婷婷五月 | 欧美xxxx黑人又粗又长 | 欧美丰满少妇xxxx性 | 波多野结衣一区二区三区av免费 | 亚洲va中文字幕无码久久不卡 | 日韩精品一区二区av在线 | 国产精品人人爽人人做我的可爱 | 国产精品视频免费播放 | 中文字幕av无码一区二区三区电影 | 国产三级精品三级男人的天堂 | 99久久久无码国产aaa精品 | 亚洲中文字幕乱码av波多ji | 女人和拘做爰正片视频 | 亚洲精品一区二区三区在线观看 | 大肉大捧一进一出视频出来呀 | 亚洲男人av香蕉爽爽爽爽 | 久久久无码中文字幕久... | 综合网日日天干夜夜久久 | 波多野结衣乳巨码无在线观看 | 国产av一区二区三区最新精品 | 激情内射日本一区二区三区 | 国产色在线 | 国产 | 成熟女人特级毛片www免费 | 亚洲乱亚洲乱妇50p | 无码人妻丰满熟妇区毛片18 | 精品人妻人人做人人爽 | 国产人妻精品午夜福利免费 | 国产精品久久久久久久9999 | 久久国产精品二国产精品 | 在线欧美精品一区二区三区 | 最新国产乱人伦偷精品免费网站 | 欧美日韩一区二区免费视频 | 国语自产偷拍精品视频偷 | 水蜜桃av无码 | 色一情一乱一伦 | 成熟人妻av无码专区 | 内射白嫩少妇超碰 | 亚洲欧美精品aaaaaa片 | 无码免费一区二区三区 | 久久国产精品二国产精品 | 欧美日韩综合一区二区三区 | 熟妇人妻中文av无码 | 国产精品沙发午睡系列 | 欧美肥老太牲交大战 | 国产精品无码一区二区桃花视频 | 性生交大片免费看女人按摩摩 | 亚洲精品一区二区三区在线 | 初尝人妻少妇中文字幕 | 极品嫩模高潮叫床 | 久久精品国产99久久6动漫 | 久久精品中文闷骚内射 | 性史性农村dvd毛片 | 日日碰狠狠躁久久躁蜜桃 | 亚洲国产精品无码一区二区三区 | 97精品人妻一区二区三区香蕉 | 欧美精品一区二区精品久久 | 国产精品毛片一区二区 | 久久精品中文字幕大胸 | 国产精品内射视频免费 | 无码一区二区三区在线观看 | 丝袜美腿亚洲一区二区 | 老头边吃奶边弄进去呻吟 | 国产69精品久久久久app下载 | 国产人妻大战黑人第1集 | 狠狠色噜噜狠狠狠狠7777米奇 | 亚洲国精产品一二二线 | 大胆欧美熟妇xx | 东京热一精品无码av | 内射爽无广熟女亚洲 | 老熟妇乱子伦牲交视频 | 四虎4hu永久免费 | 18精品久久久无码午夜福利 | 少妇性俱乐部纵欲狂欢电影 | 一本色道婷婷久久欧美 | 无码人妻精品一区二区三区不卡 | 初尝人妻少妇中文字幕 | 狂野欧美性猛交免费视频 | 国产乱人偷精品人妻a片 | 欧美国产亚洲日韩在线二区 | 国产xxx69麻豆国语对白 | av在线亚洲欧洲日产一区二区 | 国产精品久久久 | 欧美猛少妇色xxxxx | 精品 日韩 国产 欧美 视频 | 欧美精品国产综合久久 | 亚洲区小说区激情区图片区 | 久久综合九色综合97网 | 无遮挡国产高潮视频免费观看 | 久久午夜夜伦鲁鲁片无码免费 | 亚洲成a人片在线观看日本 | 国语自产偷拍精品视频偷 | 久久久久se色偷偷亚洲精品av | 水蜜桃亚洲一二三四在线 | 天堂久久天堂av色综合 | 欧美猛少妇色xxxxx | 成人影院yy111111在线观看 | 欧美放荡的少妇 | 欧美日本免费一区二区三区 | 免费视频欧美无人区码 | 极品尤物被啪到呻吟喷水 | 波多野结衣aⅴ在线 | 欧美 丝袜 自拍 制服 另类 | 中文字幕无码人妻少妇免费 | 国产疯狂伦交大片 | 熟妇人妻中文av无码 | 欧美三级不卡在线观看 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 欧美国产日韩亚洲中文 | 精品乱子伦一区二区三区 | 亚洲爆乳精品无码一区二区三区 | 亚洲欧美中文字幕5发布 | 夜夜夜高潮夜夜爽夜夜爰爰 | 一本大道伊人av久久综合 | 国产精品高潮呻吟av久久 | 一区二区传媒有限公司 | 国产熟妇另类久久久久 | 少妇人妻偷人精品无码视频 | 精品国产aⅴ无码一区二区 | 激情人妻另类人妻伦 | 中文字幕无码视频专区 | 国产精品美女久久久久av爽李琼 | 婷婷六月久久综合丁香 | 久久久久av无码免费网 | 理论片87福利理论电影 | 精品厕所偷拍各类美女tp嘘嘘 | 国产精品成人av在线观看 | 国产福利视频一区二区 | 无码人妻出轨黑人中文字幕 | 高潮毛片无遮挡高清免费 | 蜜桃臀无码内射一区二区三区 | 任你躁在线精品免费 | 日韩少妇白浆无码系列 | 少妇邻居内射在线 | 人妻人人添人妻人人爱 | 国産精品久久久久久久 | 久久久久久a亚洲欧洲av冫 | 又色又爽又黄的美女裸体网站 | 激情国产av做激情国产爱 | 好男人社区资源 | 日本精品人妻无码77777 天堂一区人妻无码 | 国产亚洲精品精品国产亚洲综合 | 亚洲码国产精品高潮在线 | 天天综合网天天综合色 | 日韩人妻无码中文字幕视频 | 亚洲伊人久久精品影院 | 高清国产亚洲精品自在久久 | 丝袜 中出 制服 人妻 美腿 | 亚洲色偷偷男人的天堂 | 亚洲成a人一区二区三区 | 免费无码肉片在线观看 | 在线视频网站www色 | 妺妺窝人体色www在线小说 | √天堂资源地址中文在线 | 无套内谢的新婚少妇国语播放 | 好屌草这里只有精品 | 漂亮人妻洗澡被公强 日日躁 | 亚洲国产精品成人久久蜜臀 | 荫蒂添的好舒服视频囗交 | 国产莉萝无码av在线播放 | 亚洲欧美国产精品久久 | 中国女人内谢69xxxx | 久久熟妇人妻午夜寂寞影院 | 国产亚洲精品久久久闺蜜 | 少妇性l交大片 | 国产国产精品人在线视 | 亚洲小说春色综合另类 | 欧美日本精品一区二区三区 | 久久综合网欧美色妞网 | 欧美丰满老熟妇xxxxx性 | 亚洲国产精华液网站w | 青青久在线视频免费观看 | 色婷婷av一区二区三区之红樱桃 | 欧美日韩在线亚洲综合国产人 | 免费人成网站视频在线观看 | 日日天干夜夜狠狠爱 | 久久亚洲中文字幕精品一区 | 国产成人无码区免费内射一片色欲 | 亚洲国产欧美国产综合一区 | 欧美三级a做爰在线观看 | 亚洲成在人网站无码天堂 | 国产办公室秘书无码精品99 | а√天堂www在线天堂小说 | 四虎国产精品一区二区 | 永久黄网站色视频免费直播 | 三上悠亚人妻中文字幕在线 | 中文字幕 人妻熟女 | 无码人妻精品一区二区三区下载 | 国产乱码精品一品二品 | 国产成人av免费观看 | 国产成人精品视频ⅴa片软件竹菊 | 2020最新国产自产精品 | 18无码粉嫩小泬无套在线观看 | 扒开双腿疯狂进出爽爽爽视频 | 久久综合给久久狠狠97色 | 午夜福利电影 | 人妻尝试又大又粗久久 | 中文字幕日产无线码一区 | 蜜臀av无码人妻精品 | 国产精品福利视频导航 | 亚洲 激情 小说 另类 欧美 | 日本熟妇大屁股人妻 | 黑人巨大精品欧美一区二区 | 久久 国产 尿 小便 嘘嘘 | 久久人人爽人人爽人人片av高清 | 日韩欧美群交p片內射中文 | 国产乱人偷精品人妻a片 | 国产精品第一国产精品 | 成熟妇人a片免费看网站 | 国产免费无码一区二区视频 | 天堂一区人妻无码 | 国产麻豆精品精东影业av网站 | 图片区 小说区 区 亚洲五月 | 亚洲色欲色欲欲www在线 | 久久久亚洲欧洲日产国码αv | 欧美国产亚洲日韩在线二区 | 99riav国产精品视频 | 精品国偷自产在线视频 | 国产色在线 | 国产 | 大地资源中文第3页 | 国产精品嫩草久久久久 | 国精产品一品二品国精品69xx | 日本大乳高潮视频在线观看 | 99re在线播放 | 欧美黑人巨大xxxxx | 久久五月精品中文字幕 | 成人综合网亚洲伊人 | 精品偷自拍另类在线观看 | 欧美 丝袜 自拍 制服 另类 | 人妻少妇精品久久 | 亚洲の无码国产の无码影院 | 免费国产成人高清在线观看网站 | 狠狠综合久久久久综合网 | 在线欧美精品一区二区三区 | 四虎国产精品免费久久 | 精品久久8x国产免费观看 | 澳门永久av免费网站 | 色欲久久久天天天综合网精品 | 亚洲成av人综合在线观看 | 日日夜夜撸啊撸 | 国产色在线 | 国产 | 日韩人妻无码中文字幕视频 | 亚洲日韩一区二区 | 青青草原综合久久大伊人精品 | 妺妺窝人体色www在线小说 | 精品国产青草久久久久福利 | 亚洲爆乳大丰满无码专区 | 牲交欧美兽交欧美 | 伊人色综合久久天天小片 | 久久国产劲爆∧v内射 | 精品国精品国产自在久国产87 | 麻豆av传媒蜜桃天美传媒 | 一本一道久久综合久久 | 国产精品爱久久久久久久 | a国产一区二区免费入口 | 一本大道伊人av久久综合 | 无码国产激情在线观看 | 女人高潮内射99精品 | 丰满少妇人妻久久久久久 | 国产9 9在线 | 中文 | 国产69精品久久久久app下载 | 久久99精品久久久久久动态图 | 国产成人精品无码播放 | 亚洲日韩乱码中文无码蜜桃臀网站 | 国产色在线 | 国产 | 中文字幕无码日韩专区 | 无码人妻出轨黑人中文字幕 | 天天躁日日躁狠狠躁免费麻豆 | 在线观看免费人成视频 | 色综合视频一区二区三区 | 久久亚洲a片com人成 | 人人妻人人澡人人爽人人精品浪潮 | 久久久中文久久久无码 | 久久这里只有精品视频9 | 国产真实乱对白精彩久久 | 中文字幕久久久久人妻 | 4hu四虎永久在线观看 | 欧美黑人巨大xxxxx | 日日夜夜撸啊撸 | 人妻中文无码久热丝袜 | 福利一区二区三区视频在线观看 | 亚洲精品午夜无码电影网 | 无码人妻av免费一区二区三区 | 精品成在人线av无码免费看 | 欧美一区二区三区视频在线观看 | 国产精品自产拍在线观看 | 亚洲区小说区激情区图片区 | 久久国产精品萌白酱免费 | 又粗又大又硬又长又爽 | 亚洲综合精品香蕉久久网 | 久久久精品456亚洲影院 | 国产亚洲精品久久久久久 | 亚洲精品一区二区三区婷婷月 | 日本一卡2卡3卡4卡无卡免费网站 国产一区二区三区影院 | 搡女人真爽免费视频大全 | 中文字幕中文有码在线 | 国产精品第一国产精品 | 无人区乱码一区二区三区 | 秋霞成人午夜鲁丝一区二区三区 | 四虎影视成人永久免费观看视频 | 一本久久伊人热热精品中文字幕 | 国产深夜福利视频在线 | www国产精品内射老师 | 免费看男女做好爽好硬视频 | 少妇性l交大片 | 偷窥日本少妇撒尿chinese | 亚洲综合无码一区二区三区 | 亚洲国产精品一区二区美利坚 | 狠狠色欧美亚洲狠狠色www | 无码福利日韩神码福利片 | 夜精品a片一区二区三区无码白浆 | 5858s亚洲色大成网站www | www国产亚洲精品久久久日本 | 搡女人真爽免费视频大全 | 97久久国产亚洲精品超碰热 | 一本色道久久综合亚洲精品不卡 | 99精品无人区乱码1区2区3区 | 国产香蕉尹人综合在线观看 | 国产精品久久久一区二区三区 | 亚洲色无码一区二区三区 | 学生妹亚洲一区二区 | 国产精品美女久久久 | 亚洲综合色区中文字幕 | 天天综合网天天综合色 | 日韩人妻无码中文字幕视频 | 久久人妻内射无码一区三区 | 永久免费观看美女裸体的网站 | 国产精品欧美成人 | 亚洲一区av无码专区在线观看 | 99久久精品无码一区二区毛片 | 亚洲 a v无 码免 费 成 人 a v | 国产精品亚洲lv粉色 | 精品国产一区av天美传媒 | 国产av无码专区亚洲awww | 无码av最新清无码专区吞精 | 午夜无码区在线观看 | 国产激情无码一区二区 | 亚洲国产精品一区二区美利坚 | 扒开双腿疯狂进出爽爽爽视频 | 欧美日韩亚洲国产精品 | 中文字幕亚洲情99在线 | 国产成人无码专区 | 国产日产欧产精品精品app | 两性色午夜视频免费播放 | 国产精品内射视频免费 | 亚洲日韩精品欧美一区二区 | 2020最新国产自产精品 | 在线观看免费人成视频 | 欧美放荡的少妇 | 国产精品人人爽人人做我的可爱 | 中文字幕无线码免费人妻 | 日本爽爽爽爽爽爽在线观看免 | 一个人看的视频www在线 | 国产成人精品三级麻豆 | 国产乱码精品一品二品 | 亚洲综合无码一区二区三区 | 牲交欧美兽交欧美 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 人人妻人人澡人人爽欧美一区 | 久久国产自偷自偷免费一区调 | 亚洲区欧美区综合区自拍区 | 成人片黄网站色大片免费观看 | 无套内谢的新婚少妇国语播放 | 欧洲极品少妇 | 国产乱人伦偷精品视频 | 亚洲天堂2017无码中文 | 亚无码乱人伦一区二区 | 老熟女重囗味hdxx69 | 亚洲精品一区二区三区四区五区 | 国产乱人偷精品人妻a片 | 久9re热视频这里只有精品 | 亚洲色欲色欲欲www在线 | 露脸叫床粗话东北少妇 | 在线观看欧美一区二区三区 | 鲁一鲁av2019在线 | 亚洲中文字幕成人无码 | 久久综合香蕉国产蜜臀av | 性色av无码免费一区二区三区 | 两性色午夜免费视频 | 欧美三级不卡在线观看 | 99精品视频在线观看免费 | 国内精品久久毛片一区二区 | 国产猛烈高潮尖叫视频免费 | 欧美性生交活xxxxxdddd | 少妇被黑人到高潮喷出白浆 | 成人性做爰aaa片免费看 | 久久精品国产一区二区三区肥胖 | 人妻夜夜爽天天爽三区 | 色婷婷av一区二区三区之红樱桃 | av在线亚洲欧洲日产一区二区 | 国内揄拍国内精品少妇国语 | 国产亚av手机在线观看 | 日本肉体xxxx裸交 | 亚洲中文字幕va福利 | 天海翼激烈高潮到腰振不止 | 白嫩日本少妇做爰 | 中文精品久久久久人妻不卡 | 欧美熟妇另类久久久久久多毛 | 久久国产精品精品国产色婷婷 | 国产高清不卡无码视频 | 十八禁视频网站在线观看 | 极品尤物被啪到呻吟喷水 | 在线播放亚洲第一字幕 | 无码乱肉视频免费大全合集 | 999久久久国产精品消防器材 | 国产精品永久免费视频 | 亚洲中文字幕av在天堂 | 欧美人与善在线com | 成熟女人特级毛片www免费 | 亚洲综合在线一区二区三区 | 久热国产vs视频在线观看 | 久久午夜夜伦鲁鲁片无码免费 | 国产精品鲁鲁鲁 | 四虎永久在线精品免费网址 | 无码播放一区二区三区 | 伊人久久婷婷五月综合97色 | 国产在线精品一区二区高清不卡 | 国产午夜手机精彩视频 | aa片在线观看视频在线播放 | 免费视频欧美无人区码 | 亚洲色无码一区二区三区 | 亚洲人成网站在线播放942 | 人妻尝试又大又粗久久 | 亚洲中文字幕无码中文字在线 | 激情内射亚州一区二区三区爱妻 | 精品偷自拍另类在线观看 | 国精品人妻无码一区二区三区蜜柚 | 国产欧美亚洲精品a | 久久久久成人精品免费播放动漫 | 一个人看的视频www在线 | 成人av无码一区二区三区 | 国产免费观看黄av片 | 亚洲一区二区三区四区 | 高潮毛片无遮挡高清免费视频 | 国产人妻精品一区二区三区 | 内射老妇bbwx0c0ck | 无码精品国产va在线观看dvd | 日本一区二区三区免费播放 | 国产两女互慰高潮视频在线观看 | 久久熟妇人妻午夜寂寞影院 | 麻豆蜜桃av蜜臀av色欲av | 大胆欧美熟妇xx | 亚洲精品国产a久久久久久 | 女人高潮内射99精品 | 国产精品久久精品三级 | 97精品国产97久久久久久免费 | 高清国产亚洲精品自在久久 | 国产成人综合美国十次 | 国产精品自产拍在线观看 | 国产高清不卡无码视频 | 日本丰满护士爆乳xxxx | 国产成人精品优优av | 精品国产乱码久久久久乱码 | 久久国产精品_国产精品 | 99视频精品全部免费免费观看 | 日韩精品久久久肉伦网站 | 少妇久久久久久人妻无码 | 欧美性生交活xxxxxdddd | 婷婷五月综合缴情在线视频 | 精品乱子伦一区二区三区 | 亚洲一区二区三区含羞草 | 久久97精品久久久久久久不卡 | 亚洲一区二区三区国产精华液 | √天堂中文官网8在线 | 中文精品久久久久人妻不卡 | 日韩欧美群交p片內射中文 | 婷婷丁香六月激情综合啪 | 99久久精品日本一区二区免费 | 人人妻人人澡人人爽欧美一区 | 男女爱爱好爽视频免费看 | 丰腴饱满的极品熟妇 | 无码国内精品人妻少妇 | 国产suv精品一区二区五 | 国产特级毛片aaaaaa高潮流水 | 国产 浪潮av性色四虎 | 欧美老人巨大xxxx做受 | 国产性生交xxxxx无码 | 乌克兰少妇性做爰 | 精品偷自拍另类在线观看 | 黑人玩弄人妻中文在线 | 免费中文字幕日韩欧美 | 人人妻在人人 | 久久亚洲精品中文字幕无男同 | 日韩精品无码一区二区中文字幕 | 内射欧美老妇wbb | 色欲av亚洲一区无码少妇 | 亚洲综合无码久久精品综合 | 国产亚洲精品久久久久久久 | 女高中生第一次破苞av | 丰满少妇高潮惨叫视频 | 中文字幕人成乱码熟女app | 无码人妻久久一区二区三区不卡 | 人人妻人人澡人人爽欧美一区 | 国内精品一区二区三区不卡 | 精品国产乱码久久久久乱码 | 强伦人妻一区二区三区视频18 | 欧美真人作爱免费视频 | 丰满少妇女裸体bbw | 国产精品鲁鲁鲁 | 国产高潮视频在线观看 | 中文字幕人妻无码一区二区三区 | 无码人妻丰满熟妇区毛片18 | 香蕉久久久久久av成人 | 亚洲欧美色中文字幕在线 | 久久久婷婷五月亚洲97号色 | 成人无码精品1区2区3区免费看 | 国产婷婷色一区二区三区在线 | 精品国产精品久久一区免费式 | 亚洲日韩乱码中文无码蜜桃臀网站 | 亚洲成a人片在线观看日本 | 水蜜桃亚洲一二三四在线 | 国产精品视频免费播放 | 综合人妻久久一区二区精品 | 亚洲日韩av一区二区三区中文 | 亚欧洲精品在线视频免费观看 | 国模大胆一区二区三区 | 亚洲精品www久久久 | 国模大胆一区二区三区 | 日本又色又爽又黄的a片18禁 | 欧美日韩在线亚洲综合国产人 | 亚洲精品一区二区三区婷婷月 | 天天拍夜夜添久久精品 | 中文字幕无码免费久久99 | 欧美日韩亚洲国产精品 | 1000部啪啪未满十八勿入下载 | 国产人妻精品午夜福利免费 | 婷婷五月综合激情中文字幕 | 国产乡下妇女做爰 | 久久精品人妻少妇一区二区三区 | 国产电影无码午夜在线播放 | 国产精品无码一区二区三区不卡 | 亚洲综合无码一区二区三区 | 日本精品人妻无码77777 天堂一区人妻无码 | 国产疯狂伦交大片 | 国产精品国产自线拍免费软件 | 无码任你躁久久久久久久 | 人人爽人人澡人人人妻 | 国产精品亚洲lv粉色 | 国产电影无码午夜在线播放 | 成人片黄网站色大片免费观看 | 18精品久久久无码午夜福利 | 欧美zoozzooz性欧美 | 国产成人午夜福利在线播放 | 人妻aⅴ无码一区二区三区 | 性史性农村dvd毛片 | 中文字幕av无码一区二区三区电影 | 妺妺窝人体色www在线小说 | 国产成人av免费观看 | 中文字幕无码日韩专区 | 一个人看的www免费视频在线观看 | 99久久久国产精品无码免费 | 日韩精品无码免费一区二区三区 | 97色伦图片97综合影院 | 中文字幕av无码一区二区三区电影 | 日韩精品一区二区av在线 | 亚洲一区二区三区播放 | 男女下面进入的视频免费午夜 | 色综合久久网 | 亚洲日本va中文字幕 | 久久久精品成人免费观看 | 久久综合狠狠综合久久综合88 | 亚洲中文字幕久久无码 | 亚洲精品无码国产 | 亚洲人成人无码网www国产 | 女高中生第一次破苞av | 日韩av激情在线观看 | 在线观看国产午夜福利片 | 国产亚洲精品精品国产亚洲综合 | 国产激情无码一区二区 | 亚洲欧美日韩成人高清在线一区 | 国产一区二区三区影院 | 77777熟女视频在线观看 а天堂中文在线官网 | 啦啦啦www在线观看免费视频 | 亚洲色www成人永久网址 | 亚洲欧美精品伊人久久 | 亚洲精品国产精品乱码不卡 | 成人精品天堂一区二区三区 | 免费看男女做好爽好硬视频 | 久久综合网欧美色妞网 | 97无码免费人妻超级碰碰夜夜 | 乌克兰少妇xxxx做受 | 亚洲成色在线综合网站 | 亚洲 a v无 码免 费 成 人 a v | √天堂中文官网8在线 | 狠狠色欧美亚洲狠狠色www | 两性色午夜视频免费播放 | 麻花豆传媒剧国产免费mv在线 | 亚洲日韩av片在线观看 | 思思久久99热只有频精品66 | 欧美大屁股xxxxhd黑色 | 无人区乱码一区二区三区 | 免费播放一区二区三区 | 又大又硬又黄的免费视频 | 国产成人精品视频ⅴa片软件竹菊 | 国产真实伦对白全集 | 日韩av激情在线观看 | 人人澡人人妻人人爽人人蜜桃 | 两性色午夜视频免费播放 | 青青久在线视频免费观看 | 内射后入在线观看一区 | 国产欧美精品一区二区三区 | 乱中年女人伦av三区 | 76少妇精品导航 | 高清国产亚洲精品自在久久 | 亚洲日韩av片在线观看 | 亚洲自偷精品视频自拍 | 图片区 小说区 区 亚洲五月 | 国产乡下妇女做爰 | 日本护士毛茸茸高潮 | av无码久久久久不卡免费网站 | √8天堂资源地址中文在线 | 一本大道伊人av久久综合 | 国产农村乱对白刺激视频 | 精品水蜜桃久久久久久久 | a片在线免费观看 | 久久久精品人妻久久影视 | 西西人体www44rt大胆高清 | 亚洲午夜久久久影院 | 夜精品a片一区二区三区无码白浆 | 成人精品视频一区二区 | 狠狠cao日日穞夜夜穞av | 日本一区二区更新不卡 | 亚洲综合无码一区二区三区 | 国产国语老龄妇女a片 | 日本一卡二卡不卡视频查询 | a片免费视频在线观看 | 欧美激情综合亚洲一二区 | 亚洲国产精品久久人人爱 | 天干天干啦夜天干天2017 | 97夜夜澡人人双人人人喊 | 国产99久久精品一区二区 | 中文字幕av伊人av无码av | 色欲综合久久中文字幕网 | 无码播放一区二区三区 | 国产一区二区不卡老阿姨 | 7777奇米四色成人眼影 | 色一情一乱一伦一视频免费看 | 一二三四社区在线中文视频 | 国产真人无遮挡作爱免费视频 | 午夜福利试看120秒体验区 | 无码av岛国片在线播放 | a在线亚洲男人的天堂 | 国产亚洲tv在线观看 | 国产网红无码精品视频 | 国产又粗又硬又大爽黄老大爷视 | 99riav国产精品视频 | 国产欧美精品一区二区三区 | 亚洲精品午夜国产va久久成人 | 国精品人妻无码一区二区三区蜜柚 | 久久人人爽人人爽人人片av高清 | 亚洲gv猛男gv无码男同 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 啦啦啦www在线观看免费视频 | 精品水蜜桃久久久久久久 | 欧美老妇交乱视频在线观看 | 国产人妻人伦精品1国产丝袜 | 成熟女人特级毛片www免费 | 亚欧洲精品在线视频免费观看 | 免费无码肉片在线观看 | 国产乱人伦app精品久久 国产在线无码精品电影网 国产国产精品人在线视 | 毛片内射-百度 | 伊人色综合久久天天小片 | 色一情一乱一伦一区二区三欧美 | av香港经典三级级 在线 | 任你躁国产自任一区二区三区 | 在线视频网站www色 | 无码人妻久久一区二区三区不卡 | 亚洲人成影院在线观看 | 欧美成人家庭影院 | 无码国模国产在线观看 | 大地资源中文第3页 | 国产婷婷色一区二区三区在线 | 久青草影院在线观看国产 | 亚洲色偷偷男人的天堂 | 少妇性俱乐部纵欲狂欢电影 | 国产午夜无码视频在线观看 | 男女爱爱好爽视频免费看 | 日本熟妇浓毛 | 亚洲小说春色综合另类 | 成人欧美一区二区三区 | 国产精品久久久久久久9999 | 中文字幕日韩精品一区二区三区 | 亚洲成a人片在线观看日本 | 欧美国产日韩久久mv | 久久无码中文字幕免费影院蜜桃 | 理论片87福利理论电影 | 精品aⅴ一区二区三区 | √天堂中文官网8在线 | 成人aaa片一区国产精品 | 精品亚洲韩国一区二区三区 | 亚洲成a人一区二区三区 | 牛和人交xxxx欧美 | 国产午夜无码精品免费看 | 亚洲七七久久桃花影院 | 久久zyz资源站无码中文动漫 | 精品一区二区三区无码免费视频 | 国产精品久久久av久久久 | 久久精品国产精品国产精品污 | 久久aⅴ免费观看 | 高中生自慰www网站 | 撕开奶罩揉吮奶头视频 | 无码午夜成人1000部免费视频 | 日本精品久久久久中文字幕 | 欧美激情一区二区三区成人 | 日本乱人伦片中文三区 | 国产高清不卡无码视频 | 无码国产乱人伦偷精品视频 | 色一情一乱一伦一区二区三欧美 | 亚洲国产精品一区二区第一页 | 亚洲s色大片在线观看 | 亚洲另类伦春色综合小说 | 国产精品久久久久久亚洲毛片 | 久久久久av无码免费网 | 精品夜夜澡人妻无码av蜜桃 | 东京无码熟妇人妻av在线网址 | 国产精品人妻一区二区三区四 | 国产精品无码永久免费888 | 亚洲中文字幕av在天堂 | 国产日产欧产精品精品app | 中文字幕无码视频专区 | 国产区女主播在线观看 | 亚洲精品久久久久avwww潮水 | 亚洲成色在线综合网站 | 日韩成人一区二区三区在线观看 | 精品人妻人人做人人爽夜夜爽 | 成人综合网亚洲伊人 | 香港三级日本三级妇三级 | 四虎国产精品免费久久 | 97久久国产亚洲精品超碰热 | 99久久久国产精品无码免费 | 成年美女黄网站色大免费视频 | 国产精品无码永久免费888 | 久青草影院在线观看国产 | 人人澡人人妻人人爽人人蜜桃 | 亚洲人亚洲人成电影网站色 | 免费播放一区二区三区 | 内射巨臀欧美在线视频 | 四虎永久在线精品免费网址 | 亚洲欧美精品aaaaaa片 | 国产又粗又硬又大爽黄老大爷视 | 午夜熟女插插xx免费视频 | 水蜜桃色314在线观看 | 国产av一区二区三区最新精品 | 蜜臀av在线播放 久久综合激激的五月天 | 国产人妖乱国产精品人妖 | 国产成人精品视频ⅴa片软件竹菊 | 国产成人无码一二三区视频 | 国产午夜亚洲精品不卡 | 中文字幕亚洲情99在线 | 在线看片无码永久免费视频 | 内射欧美老妇wbb | 人妻与老人中文字幕 | 76少妇精品导航 | 天堂无码人妻精品一区二区三区 | 国产亚洲美女精品久久久2020 | 天下第一社区视频www日本 | 精品无码一区二区三区的天堂 | 精品午夜福利在线观看 | 欧美日韩视频无码一区二区三 | 偷窥村妇洗澡毛毛多 | 小sao货水好多真紧h无码视频 | 男人扒开女人内裤强吻桶进去 | 久久99精品久久久久久动态图 | 国产熟妇高潮叫床视频播放 | 国产情侣作爱视频免费观看 | www一区二区www免费 | 乱码午夜-极国产极内射 | 日韩欧美中文字幕在线三区 | 无码av最新清无码专区吞精 | www一区二区www免费 | 国产在线精品一区二区高清不卡 | 国产精品毛片一区二区 | 日日橹狠狠爱欧美视频 | 精品人人妻人人澡人人爽人人 | 亚洲综合精品香蕉久久网 | 夜夜影院未满十八勿进 | 亚洲欧美精品伊人久久 | 成人性做爰aaa片免费看不忠 | 少妇高潮喷潮久久久影院 | 久久无码人妻影院 | 国产av无码专区亚洲awww | 久久国产精品二国产精品 | а√天堂www在线天堂小说 | 国产人妻久久精品二区三区老狼 | 人妻无码αv中文字幕久久琪琪布 | 日日碰狠狠躁久久躁蜜桃 | 亚洲 日韩 欧美 成人 在线观看 | 女人被男人躁得好爽免费视频 | 国产无遮挡又黄又爽又色 | 免费国产黄网站在线观看 | 丰满少妇熟乱xxxxx视频 | 久久精品国产亚洲精品 | 免费无码av一区二区 | 久久午夜无码鲁丝片午夜精品 | 99精品国产综合久久久久五月天 | 亚洲gv猛男gv无码男同 | 亚洲色大成网站www | 欧美黑人性暴力猛交喷水 | 人人超人人超碰超国产 | 毛片内射-百度 | 丰满岳乱妇在线观看中字无码 | 日韩在线不卡免费视频一区 | 国产成人精品优优av | 98国产精品综合一区二区三区 | 亚洲码国产精品高潮在线 | 中文字幕av伊人av无码av | 亚洲中文字幕久久无码 | 国产精品怡红院永久免费 | 宝宝好涨水快流出来免费视频 | 九九久久精品国产免费看小说 | 成人欧美一区二区三区黑人免费 | 在线观看欧美一区二区三区 | 久久久久国色av免费观看性色 | 精品欧美一区二区三区久久久 | 大肉大捧一进一出好爽视频 | 中文字幕人妻丝袜二区 | 国产一区二区三区四区五区加勒比 | 丰满少妇人妻久久久久久 | 欧美大屁股xxxxhd黑色 | 欧美亚洲国产一区二区三区 | 爽爽影院免费观看 | 欧美日韩色另类综合 | 国产小呦泬泬99精品 | 澳门永久av免费网站 | 亚洲自偷自拍另类第1页 | 在线 国产 欧美 亚洲 天堂 | 人妻少妇被猛烈进入中文字幕 | 少妇性俱乐部纵欲狂欢电影 | 国产成人精品无码播放 | 亚洲国产精品美女久久久久 | 午夜时刻免费入口 | 亚洲精品一区二区三区大桥未久 | 丝袜人妻一区二区三区 | 国内综合精品午夜久久资源 | 国内精品久久毛片一区二区 | 鲁鲁鲁爽爽爽在线视频观看 | 欧美人与禽zoz0性伦交 | 国产精品亚洲专区无码不卡 | 亚洲精品久久久久久久久久久 | 98国产精品综合一区二区三区 | 国产国产精品人在线视 | 99久久无码一区人妻 | 亚洲国产精品毛片av不卡在线 | 激情五月综合色婷婷一区二区 | 国产精品第一区揄拍无码 | 国产熟女一区二区三区四区五区 | 中文字幕乱妇无码av在线 | 国产精品igao视频网 | 午夜男女很黄的视频 | 熟妇女人妻丰满少妇中文字幕 | 国产色视频一区二区三区 | 免费视频欧美无人区码 | 久激情内射婷内射蜜桃人妖 | 国产精品.xx视频.xxtv | yw尤物av无码国产在线观看 | 色情久久久av熟女人妻网站 | 色婷婷综合激情综在线播放 | 久久久久免费看成人影片 | 少妇性荡欲午夜性开放视频剧场 | 乱码av麻豆丝袜熟女系列 | 久久久中文字幕日本无吗 | 亚洲国产欧美日韩精品一区二区三区 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 在线欧美精品一区二区三区 | 中文字幕人妻无码一夲道 | 未满成年国产在线观看 | 国产精品高潮呻吟av久久4虎 | 欧美 亚洲 国产 另类 | 亚洲国精产品一二二线 | 人人妻人人澡人人爽欧美一区 | 国产在线精品一区二区三区直播 | 欧美国产日韩久久mv | 国内综合精品午夜久久资源 | 国产乱人伦app精品久久 国产在线无码精品电影网 国产国产精品人在线视 | 成人毛片一区二区 | 亚洲va欧美va天堂v国产综合 | 国产精品毛片一区二区 | 国产在线一区二区三区四区五区 | 最近的中文字幕在线看视频 | 亚洲精品久久久久久久久久久 | 日韩精品久久久肉伦网站 | 久久99精品久久久久婷婷 | 国产精品久久久久无码av色戒 | 少妇性l交大片 | 精品日本一区二区三区在线观看 | 精品乱子伦一区二区三区 | 亚洲精品中文字幕 | 女人和拘做爰正片视频 | 亚洲欧美精品伊人久久 | 红桃av一区二区三区在线无码av | 成人免费视频在线观看 | 国内精品九九久久久精品 | 精品人人妻人人澡人人爽人人 | 亚洲自偷自拍另类第1页 | 久久久久人妻一区精品色欧美 | 色妞www精品免费视频 | 麻豆蜜桃av蜜臀av色欲av | 成人性做爰aaa片免费看不忠 | 国产精品无码成人午夜电影 | 少妇无套内谢久久久久 | 玩弄中年熟妇正在播放 | 曰本女人与公拘交酡免费视频 | 中文字幕精品av一区二区五区 | 婷婷五月综合激情中文字幕 | a在线观看免费网站大全 | 亚洲成av人影院在线观看 | 国产亚洲精品久久久久久 | 久久精品丝袜高跟鞋 | 四十如虎的丰满熟妇啪啪 | 亚洲国产综合无码一区 | 国产艳妇av在线观看果冻传媒 | 粗大的内捧猛烈进出视频 | 日本精品少妇一区二区三区 | 国产午夜手机精彩视频 | 99re在线播放 | 婷婷五月综合激情中文字幕 | 亚洲日韩一区二区 | 成人三级无码视频在线观看 | 偷窥日本少妇撒尿chinese | 国产黄在线观看免费观看不卡 | 亚洲精品一区国产 | 女人被男人躁得好爽免费视频 | 成熟女人特级毛片www免费 | 久久国产精品_国产精品 | 精品国产精品久久一区免费式 | 水蜜桃亚洲一二三四在线 | 色欲人妻aaaaaaa无码 | 亚洲色欲久久久综合网东京热 | 国产精品久免费的黄网站 | 国产综合在线观看 | 亚洲欧美日韩国产精品一区二区 |