3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 人文社科 > 生活经验 >内容正文

生活经验

Neon Intrinsics各函数介绍

發(fā)布時(shí)間:2023/11/27 生活经验 33 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Neon Intrinsics各函数介绍 小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
#ifndef __ARM_NEON__
#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
#endif/*(1)、正常指令:生成大小相同且類型通常與操作數(shù)向量相同的結(jié)果向量;
(2)、長指令:對雙字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量的結(jié)果。所生成的元素一般是操作數(shù)元素寬度的兩倍,
并屬于同一類型;
(3)、寬指令:一個(gè)雙字向量操作數(shù)和一個(gè)四字向量操作數(shù)執(zhí)行運(yùn)算,生成四字向量結(jié)果。所生成的元素和第一個(gè)
操作數(shù)的元素是第二個(gè)操作數(shù)元素寬度的兩倍;
(4)、窄指令:四字向量操作數(shù)執(zhí)行運(yùn)算,并生成雙字向量結(jié)果,所生成的元素一般是操作數(shù)元素寬度的一半;
(5)、飽和指令:當(dāng)超過數(shù)據(jù)類型指定的范圍則自動(dòng)限制在該范圍內(nèi)。*//******************************************************Addition*************************/
/*--1、Vector add(正常指令): vadd -> ri = ai + bi; r, a, b have equal lane sizes--*/
int8x8_t vadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_add_epi8
int16x4_t vadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_add_epi16
int32x2_t vadd_s32 (int32x2_t __a, int32x2_t __b);//_mm_add_epi32
int64x1_t vadd_s64 (int64x1_t __a, int64x1_t __b);//_mm_add_epi64
//_mm_add_ps, SSE, use only low 64 bits
float32x2_t vadd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_add_epi8
uint16x4_t vadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_add_epi16
uint32x2_t vadd_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_add_epi32
uint64x1_t vadd_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_add_epi64
int8x16_t vaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_add_epi8
int16x8_t vaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_add_epi16
int32x4_t vaddq_s32 (int32x4_t __a, int32x4_t __b);//_mm_add_epi32
int64x2_t vaddq_s64 (int64x2_t __a, int64x2_t __b);//_mm_add_epi64
float32x4_t vaddq_f32 (float32x4_t __a, float32x4_t __b);//_mm_add_ps
uint8x16_t vaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_add_epi8
uint16x8_t vaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_add_epi16
uint32x4_t vaddq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_add_epi32
uint64x2_t vaddq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_add_epi64
/*--2、Vector long add(長指令): vaddl -> ri = ai + bi; a, b have equal lane sizes, 
result is a 128 bit vector of lanes that are twice the width--*/
int16x8_t vaddl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vaddl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vaddl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vaddl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vaddl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vaddl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Vector wide add(寬指令): vaddw -> ri = ai + bi--*/
int16x8_t vaddw_s8 (int16x8_t __a, int8x8_t __b);
int32x4_t vaddw_s16 (int32x4_t __a, int16x4_t __b);
int64x2_t vaddw_s32 (int64x2_t __a, int32x2_t __b);
uint16x8_t vaddw_u8 (uint16x8_t __a, uint8x8_t __b);
uint32x4_t vaddw_u16 (uint32x4_t __a, uint16x4_t __b);
uint64x2_t vaddw_u32 (uint64x2_t __a, uint32x2_t __b);
/*--4、Vector halving add: vhadd -> ri = (ai + bi) >> 1; 
shifts each result right one bit, Results are truncated--*/
int8x8_t vhadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vhadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vhadd_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vhadd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vhadd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vhadd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vhaddq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vhaddq_s16 (int16x8_t __a, int16x8_t __b)
int32x4_t vhaddq_s32 (int32x4_t __a, int32x4_t __b)
uint8x16_t vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
uint16x8_t vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
uint32x4_t vhaddq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--5、Vector rounding halving add: vrhadd -> ri = (ai + bi + 1) >> 1; 
shifts each result right one bit, Results are rounded(四舍五入)--*/
int8x8_t vrhadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vrhadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vrhadd_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vrhadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_avg_epu8
uint16x4_t vrhadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_avg_epu16
uint32x2_t vrhadd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vrhaddq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vrhaddq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vrhaddq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_avg_epu8
uint16x8_t vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_avg_epu16
uint32x4_t vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector saturating add(飽和指令): vqadd -> ri = sat(ai + bi); 
the results are saturated if they overflow--*/
int8x8_t vqadd_s8 (int8x8_t __a, int8x8_t __b);//_mm_adds_epi8
int16x4_t vqadd_s16 (int16x4_t __a, int16x4_t __b);//_mm_adds_epi16
int32x2_t vqadd_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqadd_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqadd_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_adds_epu8
uint16x4_t vqadd_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_adds_epu16
uint32x2_t vqadd_u32 (uint32x2_t __a, uint32x2_t __b);
uint64x1_t vqadd_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vqaddq_s8 (int8x16_t __a, int8x16_t __b);//_mm_adds_epi8
int16x8_t vqaddq_s16 (int16x8_t __a, int16x8_t __b);//_mm_adds_epi16
int32x4_t vqaddq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqaddq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqaddq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_adds_epu8
uint16x8_t vqaddq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_adds_epu16
uint32x4_t vqaddq_u32 (uint32x4_t __a, uint32x4_t __b);
uint64x2_t vqaddq_u64 (uint64x2_t __a, uint64x2_t __b);
/*--7、Vector add high half(窄指令): vaddhn -> ri = sat(ai + bi); 
selecting High half, The results are truncated--*/
int8x8_t vaddhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vaddhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vaddhn_s64 (int64x2_t __a, int64x2_t __b);
uint8x8_t vaddhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vaddhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vaddhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*--8、Vector rounding add high half(窄指令): vraddhn -> ri = ai + bi; 
selecting High half, The results are rounded--*/
int8x8_t vraddhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vraddhn_s32 (int32x4_t __a, int32x4_t __b)
int32x2_t vraddhn_s64 (int64x2_t __a, int64x2_t __b)
uint8x8_t vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
uint16x4_t vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
uint32x2_t vraddhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*******************************************Multiplication******************************/
/*--1、Vector multiply(正常指令): vmul -> ri = ai * bi;--*/
int8x8_t vmul_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vmul_s16 (int16x4_t __a, int16x4_t __b);//_mm_mullo_epi16
int32x2_t vmul_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vmul_f32 (float32x2_t __a, float32x2_t __b);//_mm_mul_ps
uint8x8_t vmul_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vmul_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_mullo_epi16
uint32x2_t vmul_u32 (uint32x2_t __a, uint32x2_t __b);
poly8x8_t vmul_p8 (poly8x8_t __a, poly8x8_t __b);
int8x16_t vmulq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vmulq_s16 (int16x8_t __a, int16x8_t __b);//_mm_mullo_epi16
int32x4_t vmulq_s32 (int32x4_t __a, int32x4_t __b);
float32x4_t vmulq_f32 (float32x4_t __a, float32x4_t __b);//_mm_mul_ps
uint8x16_t vmulq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vmulq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_mullo_epi16
uint32x4_t vmulq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16_t vmulq_p8 (poly8x16_t __a, poly8x16_t __b);
/*--2、Vector multiply accumulate: vmla -> ri = ai + bi * ci; --*/
int8x8_t vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
float32x2_t vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
float32x4_t vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--3、Vector multiply accumulate long: vmlal -> ri = ai + bi * ci --*/
int16x8_t vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/*--4、Vector multiply subtract: vmls -> ri = ai - bi * ci --*/
int8x8_t vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
float32x2_t vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
float32x4_t vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--5、Vector multiply subtract long:vmlsl -> ri = ai - bi * ci --*/
int16x8_t vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/*--6、Vector saturating doubling multiply high: vqdmulh -> ri = sat(ai * bi); 
doubles the results and returns only the high half of the truncated results--*/
int16x4_t vqdmulh_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqdmulh_s32 (int32x2_t __a, int32x2_t __b);
int16x8_t vqdmulhq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqdmulhq_s32 (int32x4_t __a, int32x4_t __b);
/*--7、Vector saturating rounding doubling multiply high vqrdmulh -> ri = ai * bi: 
doubles the results and returns only the high half of the rounded results. 
The results are saturated if they overflow--*/
int16x4_t vqrdmulh_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqrdmulh_s32 (int32x2_t __a, int32x2_t __b);
int16x8_t vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b);
/*--8、Vector saturating doubling multiply accumulate long: vqdmlal -> ri = ai + bi * ci;
multiplies the elements in the second and third vectors, doubles the results and adds the
results to the values in the first vector. The results are saturated if they overflow--*/
int32x4_t vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t  vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
/*--9、Vector saturating doubling multiply subtract long: vqdmlsl -> ri = ai - bi * ci;
multiplies the elements in the second and third vectors, doubles the results and subtracts 
the results from the elements in the first vector. 
The results are saturated if they overflow--*/
int32x4_t vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
/*--10、Vector long multiply(長指令): vmull -> ri = ai * bi;--*/
int16x8_t vmull_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vmull_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vmull_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vmull_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vmull_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vmull_u32 (uint32x2_t __a, uint32x2_t __b);
poly16x8_t vmull_p8 (poly8x8_t __a, poly8x8_t __b);
/*--11、Vector saturating doubling long multiply: vqdmull -> ri = ai * bi;
If any of the results overflow, they are saturated--*/
int32x4_t vqdmull_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vqdmull_s32 (int32x2_t __a, int32x2_t __b);
/*--12、Fused multiply accumulate: vfma -> ri = ai + bi * ci; 
The result of the multiply is not rounded before the accumulation--*/
float32x2_t vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
float32x4_t vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
/*--13、Fused multiply subtract: vfms -> ri = ai - bi * ci; 
The result of the multiply is not rounded before the subtraction--*/
float32x2_t vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c);
float32x4_t vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c);
/******************************************************Round to integral****************/
/*--1、to nearest, ties to even--*/
float32x2_t vrndn_f32 (float32x2_t __a);
float32x4_t vrndqn_f32 (float32x4_t __a);
/*--2、to nearest, ties away from zero--*/
float32x2_t vrnda_f32 (float32x2_t __a);
float32x4_t vrndqa_f32 (float32x4_t __a);
/*--3、towards +Inf--*/
float32x2_t vrndp_f32 (float32x2_t __a);
float32x4_t vrndqp_f32 (float32x4_t __a);
/*--4、towards -Inf--*/
float32x2_t vrndm_f32 (float32x2_t __a);
float32x4_t vrndqm_f32 (float32x4_t __a);
/*--5、towards 0--*/
float32x2_t vrnd_f32 (float32x2_t __a);
float32x4_t vrndq_f32 (float32x4_t __a);
/**********************************************Subtraction******************************/
/*--1、Vector subtract(正常指令):vsub -> ri = ai - bi;--*/
int8x8_t vsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_sub_epi8
int16x4_t vsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_sub_epi16
int32x2_t vsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_sub_epi32
int64x1_t vsub_s64 (int64x1_t __a, int64x1_t __b);//_mm_sub_epi64
float32x2_t vsub_f32 (float32x2_t __a, float32x2_t __b);//_mm_sub_ps
uint8x8_t vsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_sub_epi8
uint16x4_t vsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_sub_epi16
uint32x2_t vsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_sub_epi32
uint64x1_t vsub_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_sub_epi64
int8x16_t vsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_sub_epi8
int16x8_t vsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_sub_epi16
int32x4_t vsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_sub_epi32
int64x2_t vsubq_s64 (int64x2_t __a, int64x2_t __b);//_mm_sub_epi64
float32x4_t vsubq_f32 (float32x4_t __a, float32x4_t __b);//_mm_sub_ps
uint8x16_t vsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_sub_epi8
uint16x8_t vsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_sub_epi16
uint32x4_t vsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_sub_epi32
uint64x2_t vsubq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_sub_epi64
/*--2、Vector long subtract(長指令): vsubl -> ri = ai - bi; --*/
int16x8_t vsubl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vsubl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vsubl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vsubl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vsubl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vsubl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Vector wide subtract(寬指令): vsubw -> ri = ai - bi;--*/
int16x8_t vsubw_s8 (int16x8_t __a, int8x8_t __b);
int32x4_t vsubw_s16 (int32x4_t __a, int16x4_t __b);
int64x2_t vsubw_s32 (int64x2_t __a, int32x2_t __b);
uint16x8_t vsubw_u8 (uint16x8_t __a, uint8x8_t __b);
uint32x4_t vsubw_u16 (uint32x4_t __a, uint16x4_t __b);
uint64x2_t vsubw_u32 (uint64x2_t __a, uint32x2_t __b);
/*--4、Vector saturating subtract(飽和指令): vqsub -> ri = sat(ai - bi);
If any of the results overflow, they are saturated--*/
int8x8_t vqsub_s8 (int8x8_t __a, int8x8_t __b);//_mm_subs_epi8
int16x4_t vqsub_s16 (int16x4_t __a, int16x4_t __b);//_mm_subs_epi16
int32x2_t vqsub_s32 (int32x2_t __a, int32x2_t __b);//_mm_subs_epi32
int64x1_t vqsub_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqsub_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_subs_epu8
uint16x4_t vqsub_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_subs_epu16
uint32x2_t vqsub_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_subs_epu32
uint64x1_t vqsub_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vqsubq_s8 (int8x16_t __a, int8x16_t __b);//_mm_subs_epi8
int16x8_t vqsubq_s16 (int16x8_t __a, int16x8_t __b);//_mm_subs_epi16
int32x4_t vqsubq_s32 (int32x4_t __a, int32x4_t __b);//_mm_subs_epi32
int64x2_t vqsubq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqsubq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_subs_epu8
uint16x8_t vqsubq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_subs_epu16
uint32x4_t vqsubq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_subs_epu32
uint64x2_t vqsubq_u64 (uint64x2_t __a, uint64x2_t __b);
/*--5、Vector halving subtract: vhsub -> ri = (ai - bi) >> 1; 
shifts each result right one bit, The results are truncated.--*/
int8x8_t vhsub_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vhsub_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vhsub_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vhsub_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vhsub_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vhsub_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vhsubq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vhsubq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vhsubq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vhsubq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vhsubq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vhsubq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector subtract high half(窄指令): vsubhn -> ri = ai - bi;
It returns the most significant halves of the results. The results are truncated--*/
int8x8_t vsubhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vsubhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vsubhn_s64 (int64x2_t __a, int64x2_t __b);
uint8x8_t vsubhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vsubhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vsubhn_u64 (uint64x2_t __a, uint64x2_t __b);
/*--7、Vector rounding subtract high half(窄指令): vrsubhn -> ai - bi; 
It returns the most significant halves of the results. The results are rounded--*/
int8x8_t vrsubhn_s16 (int16x8_t __a, int16x8_t __b);
int16x4_t vrsubhn_s32 (int32x4_t __a, int32x4_t __b);
int32x2_t vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
uint8x8_t vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b);
uint16x4_t vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b);
uint32x2_t vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b);
/******************************************************Comparison***********************/
/*--1、Vector compare equal(正常指令): vceq -> ri = ai == bi ? 1...1 : 0...0; 
If they are equal, the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros--*/
uint8x8_t vceq_s8 (int8x8_t __a, int8x8_t __b);//_mm_cmpeq_epi8
uint16x4_t vceq_s16 (int16x4_t __a, int16x4_t __b);//_mm_cmpeq_epi16
uint32x2_t vceq_s32 (int32x2_t __a, int32x2_t __b);//_mm_cmpeq_epi32
uint32x2_t vceq_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vceq_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_cmpeq_epi8
uint16x4_t vceq_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_cmpeq_epi16
uint32x2_t vceq_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_cmpeq_epi32
uint8x8_t vceq_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_cmpeq_epi8
uint8x16_t vceqq_s8 (int8x16_t __a, int8x16_t __b);//_mm_cmpeq_epi8
uint16x8_t vceqq_s16 (int16x8_t __a, int16x8_t __b);//_mm_cmpeq_epi16
uint32x4_t vceqq_s32 (int32x4_t __a, int32x4_t __b);//_mm_cmpeq_epi32
uint32x4_t vceqq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vceqq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_cmpeq_epi8
uint16x8_t vceqq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_cmpeq_epi16
uint32x4_t vceqq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_cmpeq_epi32
uint8x16_t vceqq_p8 (poly8x16_t __a, poly8x16_t __b);//_mm_cmpeq_epi8
/*--2、Vector compare greater-than or equal(正常指令): vcge-> ri = ai >= bi ? 1...1:0...0;
If it is greater than or equal to it, the corresponding element in the destination 
vector is set to all ones. Otherwise, it is set to all zeros.--*/
uint8x8_t vcge_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcge_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcge_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcge_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcge_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcge_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcge_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcgeq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcgeq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcgeq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcgeq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcgeq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcgeq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcgeq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--3、Vector compare less-than or equal(正常指令): vcle -> ri = ai <= bi ? 1...1:0...0;
If it is less than or equal to it, the corresponding element in the destination vector 
is set to all ones. Otherwise, it is set to all zeros.--*/
uint8x8_t vcle_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcle_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcle_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcle_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcle_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcle_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcle_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcleq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcleq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcleq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcleq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcleq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcleq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcleq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--4、Vector compare greater-than(正常指令): vcgt -> ri = ai > bi ? 1...1:0...0;
If it is greater than it, the corresponding element in the destination vector is
set to all ones. Otherwise, it is set to all zeros--*/
uint8x8_t vcgt_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vcgt_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vcgt_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vcgt_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vcgt_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vcgt_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vcgt_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcgtq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcgtq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcgtq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcgtq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcgtq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcgtq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcgtq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--5、Vector compare less-than(正常指令): vclt -> ri = ai < bi ? 1...1:0...0;
If it is less than it, the corresponding element in the destination vector is set 
to all ones.Otherwise, it is set to all zeros--*/
uint8x8_t vclt_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vclt_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vclt_s32 (int32x2_t __a, int32x2_t __b);
uint32x2_t vclt_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vclt_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vclt_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vclt_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x16_t vcltq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vcltq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vcltq_s32 (int32x4_t __a, int32x4_t __b);
uint32x4_t vcltq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vcltq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vcltq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vcltq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--6、Vector compare absolute greater-than or equal(正常指令): 
vcage -> ri = |ai| >= |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the 
corresponding element of a second vector. If it is greater than or equal to it, 
the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros.--*/
uint32x2_t vcage_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcageq_f32 (float32x4_t __a, float32x4_t __b);
/*--7、Vector compare absolute less-than or equal(正常指令):
vcale -> ri = |ai| <= |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the 
corresponding element of a second vector. If it is less than or equal to it, 
the corresponding element in the destination vector is set to all ones.
Otherwise, it is set to all zeros--*/
uint32x2_t vcale_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcaleq_f32 (float32x4_t __a, float32x4_t __b);
/*--8、Vector compare absolute greater-than(正常指令):
vcage -> ri = |ai| > |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the
corresponding element of a second vector. If it is greater than it, 
the corresponding element in the destination vector is set to all ones. 
Otherwise, it is set to all zeros.--*/
uint32x2_t vcagt_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcagtq_f32 (float32x4_t __a, float32x4_t __b);
/*--9、Vector compare absolute less-than(正常指令):
vcalt -> ri = |ai| < |bi| ? 1...1:0...0;
compares the absolute value of each element in a vector with the absolute value of the
corresponding element of a second vector.If it is less than it, the corresponding 
element in the destination vector is set to all ones. Otherwise,it is set to all zeros--*/
uint32x2_t vcalt_f32 (float32x2_t __a, float32x2_t __b);
uint32x4_t vcaltq_f32 (float32x4_t __a, float32x4_t __b);
/**********************************************Vector test bits*************************/
/*--正常指令,vtst -> ri = (ai & bi != 0) ? 1...1:0...0;
bitwise logical ANDs each element in a vector with the corresponding element of a second 
vector.If the result is not zero, the corresponding element in the destination vector 
is set to all ones. Otherwise, it is set to all zeros--*/
uint8x8_t vtst_s8 (int8x8_t __a, int8x8_t __b);
uint16x4_t vtst_s16 (int16x4_t __a, int16x4_t __b);
uint32x2_t vtst_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vtst_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vtst_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vtst_u32 (uint32x2_t __a, uint32x2_t __b);
uint8x8_t vtst_p8 (poly8x8_t __a, poly8x8_t __b);
uint8x16_t vtstq_s8 (int8x16_t __a, int8x16_t __b);
uint16x8_t vtstq_s16 (int16x8_t __a, int16x8_t __b);
uint32x4_t vtstq_s32 (int32x4_t __a, int32x4_t __b);
uint8x16_t vtstq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vtstq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vtstq_u32 (uint32x4_t __a, uint32x4_t __b);
uint8x16_t vtstq_p8 (poly8x16_t __a, poly8x16_t __b);
/**********************************************Absolute difference**********************/
/*--1、Absolute difference between the arguments(正常指令): vabd -> ri = |ai - bi|;
returns the absolute values of the results--*/
int8x8_t vabd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vabd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vabd_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vabd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vabd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vabd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vabd_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16_t vabdq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vabdq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vabdq_s32 (int32x4_t __a, int32x4_t __b);
float32x4_t vabdq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16_t vabdq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vabdq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vabdq_u32 (uint32x4_t __a, uint32x4_t __b);
/*--2、Absolute difference - long(長指令): vabdl -> ri = |ai - bi|; 
The elements in the result vector are wider--*/
int16x8_t vabdl_s8 (int8x8_t __a, int8x8_t __b);
int32x4_t vabdl_s16 (int16x4_t __a, int16x4_t __b);
int64x2_t vabdl_s32 (int32x2_t __a, int32x2_t __b);
uint16x8_t vabdl_u8 (uint8x8_t __a, uint8x8_t __b);
uint32x4_t vabdl_u16 (uint16x4_t __a, uint16x4_t __b);
uint64x2_t vabdl_u32 (uint32x2_t __a, uint32x2_t __b);
/*--3、Absolute difference and accumulate: vaba -> ri = ai + |bi - ci|;--*/
int8x8_t vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c);
uint8x8_t vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
int8x16_t vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c);
uint8x16_t vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
/*--4、Absolute difference and accumulate - long: vabal -> ri = ai + |bi - ci|; 
The elements in the result are wider--*/
int16x8_t vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c);
int32x4_t vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c);
int64x2_t vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c);
uint16x8_t vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint32x4_t vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint64x2_t vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c);
/***********************************************Max*************************************/
/*--正常指令, vmax -> ri = ai >= bi ? ai : bi; returns the larger of each pair--*/
int8x8_t vmax_s8 (int8x8_t __a, int8x8_t __b);//_mm_max_epi8
int16x4_t vmax_s16 (int16x4_t __a, int16x4_t __b);//_mm_max_epi16
int32x2_t vmax_s32 (int32x2_t __a, int32x2_t __b);//_mm_max_epi32
float32x2_t vmax_f32 (float32x2_t __a, float32x2_t __b);//_mm_max_ps
uint8x8_t vmax_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_max_epu8
uint16x4_t vmax_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_max_epu16
uint32x2_t vmax_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_max_epu32
int8x16_t vmaxq_s8 (int8x16_t __a, int8x16_t __b);//_mm_max_epi8
int16x8_t vmaxq_s16 (int16x8_t __a, int16x8_t __b);//_mm_max_epi16
int32x4_t vmaxq_s32 (int32x4_t __a, int32x4_t __b);//_mm_max_epi32
float32x4_t vmaxq_f32 (float32x4_t __a, float32x4_t __b);//_mm_max_ps
uint8x16_t vmaxq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_max_epu8
uint16x8_t vmaxq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_max_epu16
uint32x4_t vmaxq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_max_epu32
/****************************************************Min********************************/
/*--正常指令, vmin -> ri = ai >= bi ? bi : ai; returns the smaller of each pair--*/
int8x8_t vmin_s8 (int8x8_t __a, int8x8_t __b);//_mm_min_epi8
int16x4_t vmin_s16 (int16x4_t __a, int16x4_t __b);//_mm_min_epi16
int32x2_t vmin_s32 (int32x2_t __a, int32x2_t __b);//_mm_min_epi32
float32x2_t vmin_f32 (float32x2_t __a, float32x2_t __b);//_mm_min_ps
uint8x8_t vmin_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_min_epu8
uint16x4_t vmin_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_min_epu16
uint32x2_t vmin_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_min_epu32
int8x16_t vminq_s8 (int8x16_t __a, int8x16_t __b);//_mm_min_epi8
int16x8_t vminq_s16 (int16x8_t __a, int16x8_t __b);//_mm_min_epi16
int32x4_t vminq_s32 (int32x4_t __a, int32x4_t __b);//_mm_min_epi32
float32x4_t vminq_f32 (float32x4_t __a, float32x4_t __b);//_mm_min_ps
uint8x16_t vminq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_min_epu8
uint16x8_t vminq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_min_epu16
uint32x4_t vminq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_min_epu32
/*******************************************Pairwise addition***************************/
/*--1、Pairwise add(正常指令): 
vpadd -> r0 = a0 + a1, ..., r3 = a6 + a7, r4 = b0 + b1, ..., r7 = b6 + b7
adds adjacent pairs of elements of two vectors, 
and places the results in the destination vector.--*/
//r0 = a0 + a1, ...,r3 = a6 + a7, r4 = b0 + b1, ...,r7 = b6 + b7
int8x8_t vpadd_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpadd_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpadd_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpadd_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpadd_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpadd_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpadd_u32 (uint32x2_t __a, uint32x2_t __b);
/*--2、Long pairwise add: vpaddl vpaddl -> r0 = a0 + a1, ..., r3 = a6 + a7;
adds adjacent pairs of elements of a vector, sign extends or zero extends the results to 
twice their original width, and places the final results in the destination vector--*/
int16x4_t vpaddl_s8 (int8x8_t __a);
int32x2_t vpaddl_s16 (int16x4_t __a);
int64x1_t vpaddl_s32 (int32x2_t __a);
uint16x4_t vpaddl_u8 (uint8x8_t __a);
uint32x2_t vpaddl_u16 (uint16x4_t __a);
uint64x1_t vpaddl_u32 (uint32x2_t __a);
int16x8_t vpaddlq_s8 (int8x16_t __a);
int32x4_t vpaddlq_s16 (int16x8_t __a);
int64x2_t vpaddlq_s32 (int32x4_t __a);
uint16x8_t vpaddlq_u8 (uint8x16_t __a);
uint32x4_t vpaddlq_u16 (uint16x8_t __a);
uint64x2_t vpaddlq_u32 (uint32x4_t __a);
/*--3、Long pairwise add and accumulate: 
vpadal -> r0 = a0 + (b0 + b1), ..., r3 = a3 + (b6 + b7);
adds adjacent pairs of elements in the second vector, sign extends or zero extends the
results to twice the original width.  It then accumulates this with the corresponding 
element in the first vector and places the final results in the destination vector--*/
int16x4_t vpadal_s8 (int16x4_t __a, int8x8_t __b);
int32x2_t vpadal_s16 (int32x2_t __a, int16x4_t __b);
int64x1_t vpadal_s32 (int64x1_t __a, int32x2_t __b);
uint16x4_t vpadal_u8 (uint16x4_t __a, uint8x8_t __b);
uint32x2_t vpadal_u16 (uint32x2_t __a, uint16x4_t __b);
uint64x1_t vpadal_u32 (uint64x1_t __a, uint32x2_t __b);
int16x8_t vpadalq_s8 (int16x8_t __a, int8x16_t __b);
int32x4_t vpadalq_s16 (int32x4_t __a, int16x8_t __b);
int64x2_t vpadalq_s32 (int64x2_t __a, int32x4_t __b);
uint16x8_t vpadalq_u8 (uint16x8_t __a, uint8x16_t __b);
uint32x4_t vpadalq_u16 (uint32x4_t __a, uint16x8_t __b);
uint64x2_t vpadalq_u32 (uint64x2_t __a, uint32x4_t __b);
/**********************************************Folding maximum**************************/
/*--飽和指令, vpmax -> vpmax r0 = a0 >= a1 ? a0 : a1, ..., r4 = b0 >= b1 ? b0 : b1, ...;
compares adjacent pairs of elements, and copies the larger of each pair into the 
destination vector.The maximums from each pair of the first input vector are stored in 
the lower half of the destination vector. The maximums from each pair of the second input 
vector are stored in the higher half of the destination vector--*/
int8x8_t vpmax_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpmax_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpmax_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpmax_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpmax_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpmax_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpmax_u32 (uint32x2_t __a, uint32x2_t __b);
/***************************************************Folding minimum*********************/
/*--飽和指令, vpmin -> r0 = a0 >= a1 ? a1 : a0, ..., r4 = b0 >= b1 ? b1 : b0, ...;
compares adjacent pairs of elements, and copies the smaller of each pair into the 
destination vector.The minimums from each pair of the first input vector are stored in 
the lower half of the destination vector. The minimums from each pair of the second 
input vector are stored in the higher half of the destination vector.--*/
int8x8_t vpmin_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vpmin_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vpmin_s32 (int32x2_t __a, int32x2_t __b);
float32x2_t vpmin_f32 (float32x2_t __a, float32x2_t __b);
uint8x8_t vpmin_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vpmin_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vpmin_u32 (uint32x2_t __a, uint32x2_t __b);
/***************************************************Reciprocal**************************/
/*--1、飽和指令, Newton-Raphson iteration(牛頓 - 拉夫遜迭代)
performs a Newton-Raphson step for finding the reciprocal. It multiplies the elements of
one vector by the corresponding elements of another vector, subtracts each of the results
from 2, and places the final results into the elements of the destination vector--*/
float32x2_t vrecps_f32 (float32x2_t __a, float32x2_t __b);
float32x4_t vrecpsq_f32 (float32x4_t __a, float32x4_t __b);
/*--2、飽和指令,performs a Newton-Raphson step for finding the reciprocal square root. 
It multiplies the elements of one vector by the corresponding elements of another vector, 
subtracts each of the results from 3, divides these results by two, and places 
the final results into the elements of the destination vector--*/
float32x2_t vrsqrts_f32 (float32x2_t __a, float32x2_t __b);
float32x4_t vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b);
/************************************************Shifts by signed variable**************/
/*--1、Vector shift left(飽和指令): vshl -> ri = ai << bi; (negative values shift right)
left shifts each element in a vector by an amount specified in the corresponding element 
in the second input vector. The shift amount is the signed integer value of the least 
significant byte of the element in the second input vector. The bits shifted out of each
element are lost.If the signed integer value is negative, it results in a right shift--*/
int8x8_t vshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--2、Vector saturating shift left(飽和指令): 
vqshl -> ri = ai << bi;(negative values shift right)
If the shift value is positive, the operation is a left shift. Otherwise, it is a 
truncating right shift. left shifts each element in a vector of integers and places
the results in the destination vector. It is similar to VSHL. 
The difference is that the sticky QC flag is set if saturation occurs--*/
int8x8_t vqshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vqshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vqshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vqshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vqshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vqshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vqshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vqshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vqshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vqshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--3、Vector rounding shift left(飽和指令): 
vrshl -> ri = ai << bi;(negative values shift right)
If the shift value is positive, the operation is a left shift. Otherwise, it is a
rounding right shift. left shifts each element in a vector of integers and places
the results in the destination vector. It is similar to VSHL. 
The difference is that the shifted value is then rounded.--*/
int8x8_t vrshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vrshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vrshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vrshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vrshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vrshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vrshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vrshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vrshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vrshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vrshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vrshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vrshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vrshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vrshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vrshlq_u64 (uint64x2_t __a, int64x2_t __b);
/*--4、Vector saturating rounding shift left(飽和指令):
vqrshl -> ri = ai << bi;(negative values shift right)
left shifts each element in a vector of integers and places the results in the 
destination vector.It is similar to VSHL. The difference is that the shifted value
is rounded, and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqrshl_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vqrshl_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vqrshl_s32 (int32x2_t __a, int32x2_t __b);
int64x1_t vqrshl_s64 (int64x1_t __a, int64x1_t __b);
uint8x8_t vqrshl_u8 (uint8x8_t __a, int8x8_t __b);
uint16x4_t vqrshl_u16 (uint16x4_t __a, int16x4_t __b);
uint32x2_t vqrshl_u32 (uint32x2_t __a, int32x2_t __b);
uint64x1_t vqrshl_u64 (uint64x1_t __a, int64x1_t __b);
int8x16_t vqrshlq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vqrshlq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vqrshlq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vqrshlq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vqrshlq_u8 (uint8x16_t __a, int8x16_t __b);
uint16x8_t vqrshlq_u16 (uint16x8_t __a, int16x8_t __b);
uint32x4_t vqrshlq_u32 (uint32x4_t __a, int32x4_t __b);
uint64x2_t vqrshlq_u64 (uint64x2_t __a, int64x2_t __b);
/****************************************Shifts by a constant***************************/
/*--1、Vector shift right by constant: vshr -> ri = ai >> b;The results are truncated.
right shifts each element in a vector by an immediate value, 
and places the results in the destination vector.--*/
int8x8_t vshr_n_s8 (int8x8_t __a, const int __b);
int16x4_t vshr_n_s16 (int16x4_t __a, const int __b);
int32x2_t vshr_n_s32 (int32x2_t __a, const int __b);
int64x1_t vshr_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vshr_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vshr_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vshr_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vshr_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vshrq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vshrq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vshrq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vshrq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vshrq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vshrq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vshrq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vshrq_n_u64 (uint64x2_t __a, const int __b);
/*--2、Vector shift left by constant: vshl -> ri = ai << b;
left shifts each element in a vector by an immediate value, and places the results in the 
destination vector. The bits shifted out of the left of each element are lost--*/
int8x8_t vshl_n_s8 (int8x8_t __a, const int __b);
int16x4_t vshl_n_s16 (int16x4_t __a, const int __b);
int32x2_t vshl_n_s32 (int32x2_t __a, const int __b);
int64x1_t vshl_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vshl_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vshl_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vshl_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vshl_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vshlq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vshlq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vshlq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vshlq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vshlq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vshlq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vshlq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vshlq_n_u64 (uint64x2_t __a, const int __b);
/*--3、Vector rounding shift right by constant: vrshr -> ri = ai >> b;
right shifts each element in a vector by an immediate value, and places the results
in the destination vector. The shifted values are rounded.--*/
int8x8_t vrshr_n_s8 (int8x8_t __a, const int __b);
int16x4_t vrshr_n_s16 (int16x4_t __a, const int __b);
int32x2_t vrshr_n_s32 (int32x2_t __a, const int __b);
int64x1_t vrshr_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vrshr_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vrshr_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vrshr_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vrshr_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vrshrq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vrshrq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vrshrq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vrshrq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vrshrq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vrshrq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vrshrq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vrshrq_n_u64 (uint64x2_t __a, const int __b);
/*--4、Vector shift right by constant and accumulate: vsra -> ri = (ai >> c) + (bi >> c); 
The results are truncated. right shifts each element in a vector by an immediate value, 
and accumulates the results into the destination vector.--*/
int8x8_t vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
int8x16_t vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
/*--5、Vector rounding shift right by constant and accumulate: 
vrsra -> ri = (ai >> c) + (bi >> c);
The results are rounded.right shifts each element in a vector by an immediate value, 
and accumulates the rounded results into the destination vector.--*/
int8x8_t vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
int8x16_t vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
/*--6、Vector saturating shift left by constant: vqshl -> ri = sat(ai << b); 
left shifts each element in a vector of integers by an immediate value, and places the 
results in the destination vector,and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqshl_n_s8 (int8x8_t __a, const int __b);
int16x4_t vqshl_n_s16 (int16x4_t __a, const int __b);
int32x2_t vqshl_n_s32 (int32x2_t __a, const int __b);
int64x1_t vqshl_n_s64 (int64x1_t __a, const int __b);
uint8x8_t vqshl_n_u8 (uint8x8_t __a, const int __b);
uint16x4_t vqshl_n_u16 (uint16x4_t __a, const int __b);
uint32x2_t vqshl_n_u32 (uint32x2_t __a, const int __b);
uint64x1_t vqshl_n_u64 (uint64x1_t __a, const int __b);
int8x16_t vqshlq_n_s8 (int8x16_t __a, const int __b);
int16x8_t vqshlq_n_s16 (int16x8_t __a, const int __b);
int32x4_t vqshlq_n_s32 (int32x4_t __a, const int __b);
int64x2_t vqshlq_n_s64 (int64x2_t __a, const int __b);
uint8x16_t vqshlq_n_u8 (uint8x16_t __a, const int __b);
uint16x8_t vqshlq_n_u16 (uint16x8_t __a, const int __b);
uint32x4_t vqshlq_n_u32 (uint32x4_t __a, const int __b);
uint64x2_t vqshlq_n_u64 (uint64x2_t __a, const int __b);
/*--7、Vector signed->unsigned saturating shift left by constant: vqshlu -> ri = ai << b; 
left shifts each element in a vector of integers by an immediate value, places the 
results in the destination vector, the sticky QC flag is set if saturation occurs, 
and indicates that the results are unsigned even though the operands are signed.--*/
uint8x8_t vqshlu_n_s8 (int8x8_t __a, const int __b);
uint16x4_t vqshlu_n_s16 (int16x4_t __a, const int __b);
uint32x2_t vqshlu_n_s32 (int32x2_t __a, const int __b);
uint64x1_t vqshlu_n_s64 (int64x1_t __a, const int __b);
uint8x16_t vqshluq_n_s8 (int8x16_t __a, const int __b);
uint16x8_t vqshluq_n_s16 (int16x8_t __a, const int __b);
uint32x4_t vqshluq_n_s32 (int32x4_t __a, const int __b);
uint64x2_t vqshluq_n_s64 (int64x2_t __a, const int __b);
/*--8、Vector narrowing shift right by constant: vshrn -> ri = ai >> b;
The results are truncated.right shifts each element in the input vector by an 
immediate value. It then narrows the result by storing only the least significant
half of each element into the destination vector.--*/
int8x8_t vshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vshrn_n_u64 (uint64x2_t __a, const int __b);
/*--9、Vector signed->unsigned narrowing saturating shift right by constant: 
vqshrun -> ri = ai >> b; 
Results are truncated. right shifts each element in a quadword vector of integers by an
immediate value, and places the results in a doubleword vector. The results are unsigned, 
although the operands are signed. The sticky QC flag is set if saturation occurs.--*/
uint8x8_t vqshrun_n_s16 (int16x8_t __a, const int __b);
uint16x4_t vqshrun_n_s32 (int32x4_t __a, const int __b);
uint32x2_t vqshrun_n_s64 (int64x2_t __a, const int __b);
/*--10、Vector signed->unsigned rounding narrowing saturating shift right by constant: 
vqrshrun -> ri = ai >> b; Results are rounded. right shifts each element in a quadword 
vector of integers by an immediate value, and places the rounded results in a doubleword 
vector. The results are unsigned, although the operands are signed.--*/
uint8x8_t vqrshrun_n_s16 (int16x8_t __a, const int __b);
uint16x4_t vqrshrun_n_s32 (int32x4_t __a, const int __b);
uint32x2_t vqrshrun_n_s64 (int64x2_t __a, const int __b);
/*--11、Vector narrowing saturating shift right by constant: vqshrn -> ri = ai >> b; 
Results are truncated. right shifts each element in a quadword vector of integers by an 
immediate value, and places the results in a doubleword vector, 
and the sticky QC flag is set if saturation occurs.--*/
int8x8_t vqshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vqshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vqshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vqshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vqshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vqshrn_n_u64 (uint64x2_t __a, const int __b);
/*--12、Vector rounding narrowing shift right by constant: vrshrn -> ri = ai >> b; 
The results are rounded. right shifts each element in a vector by an immediate value,
and places the rounded,narrowed results in the destination vector.--*/
int8x8_t vrshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vrshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vrshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vrshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vrshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vrshrn_n_u64 (uint64x2_t __a, const int __b);
/*--13、Vector rounding narrowing saturating shift right by constant:
vqrshrn -> ri = ai >> b;
Results are rounded. right shifts each element in a quadword vector of integers by an 
immediate value,and places the rounded,narrowed results in a doubleword vector. 
The sticky QC flag is set if saturation occurs.--*/
int8x8_t vqrshrn_n_s16 (int16x8_t __a, const int __b);
int16x4_t vqrshrn_n_s32 (int32x4_t __a, const int __b);
int32x2_t vqrshrn_n_s64 (int64x2_t __a, const int __b);
uint8x8_t vqrshrn_n_u16 (uint16x8_t __a, const int __b);
uint16x4_t vqrshrn_n_u32 (uint32x4_t __a, const int __b);
uint32x2_t vqrshrn_n_u64 (uint64x2_t __a, const int __b);
/*--14、Vector widening shift left by constant: vshll -> ri = ai << b; 
left shifts each element in a vector of integers by an immediate value, 
and place the results in the destination vector. Bits shifted out of the left of each
element are lost and values are sign extended or zero extended.--*/
int16x8_t vshll_n_s8 (int8x8_t __a, const int __b);
int32x4_t vshll_n_s16 (int16x4_t __a, const int __b);
int64x2_t vshll_n_s32 (int32x2_t __a, const int __b);
uint16x8_t vshll_n_u8 (uint8x8_t __a, const int __b);
uint32x4_t vshll_n_u16 (uint16x4_t __a, const int __b);
uint64x2_t vshll_n_u32 (uint32x2_t __a, const int __b);
/********************************************Shifts with insert*************************/
/*--1、Vector shift right and insert: vsri -> ; The two most significant bits in the 
destination vector are unchanged. right shifts each element in the second input vector 
by an immediate value, and inserts the results in the destination vector. It does not 
affect the highest n significant bits of the elements in the destination register.
Bits shifted out of the right of each element are lost.The first input vector holds
the elements of the destination vector before the operation is performed.--*/
int8x8_t vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
poly8x16_t vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);
/*--2、Vector shift left and insert: vsli ->; The least significant bit in each element
in the destination vector is unchanged. left shifts each element in the second input 
vector by an immediate value, and inserts the results in the destination vector.
It does not affect the lowest n significant bits of the elements in the destination 
register. Bits shifted out of the left of each element are lost. The first input vector
holds the elements of the destination vector before the operation is performed.--*/
int8x8_t vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c);
uint8x8_t vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c);
int16x8_t vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c);
int32x4_t vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c);
int64x2_t vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c);
uint8x16_t vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);
uint64x2_t vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);
poly8x16_t vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);
/*****************************************Absolute value********************************/
/*--1、Absolute(正常指令): vabs -> ri = |ai|;
returns the absolute value of each element in a vector.--*/
int8x8_t vabs_s8 (int8x8_t __a);//_mm_abs_epi8
int16x4_t vabs_s16 (int16x4_t __a);//_mm_abs_epi16
int32x2_t vabs_s32 (int32x2_t __a);//_mm_abs_epi32
float32x2_t vabs_f32 (float32x2_t __a);
int8x16_t vabsq_s8 (int8x16_t __a);//_mm_abs_epi8
int16x8_t vabsq_s16 (int16x8_t __a);//_mm_abs_epi16
int32x4_t vabsq_s32 (int32x4_t __a);//_mm_abs_epi32
float32x4_t vabsq_f32 (float32x4_t __a);
/*--2、Saturating absolute(飽和指令): vqabs -> ri = sat(|ai|);
returns the absolute value of each element in a vector. If any of the results overflow,
they are saturated and the sticky QC flag is set.--*/
int8x8_t vqabs_s8 (int8x8_t __a);
int16x4_t vqabs_s16 (int16x4_t __a);
int32x2_t vqabs_s32 (int32x2_t __a);
int8x16_t vqabsq_s8 (int8x16_t __a);
int16x8_t vqabsq_s16 (int16x8_t __a);
int32x4_t vqabsq_s32 (int32x4_t __a);
/***************************************************Negation****************************/
/*--1、Negate(正常指令): vneg -> ri = -ai; negates each element in a vector.--*/
int8x8_t vneg_s8 (int8x8_t __a);
int16x4_t vneg_s16 (int16x4_t __a);
int32x2_t vneg_s32 (int32x2_t __a);
float32x2_t vneg_f32 (float32x2_t __a);
int8x16_t vnegq_s8 (int8x16_t __a);
int16x8_t vnegq_s16 (int16x8_t __a);
int32x4_t vnegq_s32 (int32x4_t __a);
float32x4_t vnegq_f32 (float32x4_t __a);
/*--2、Saturating Negate: vqneg -> ri = sat(-ai);
negates each element in a vector. If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int8x8_t vqneg_s8 (int8x8_t __a);
int16x4_t vqneg_s16 (int16x4_t __a);
int32x2_t vqneg_s32 (int32x2_t __a);
int8x16_t vqnegq_s8 (int8x16_t __a);
int16x8_t vqnegq_s16 (int16x8_t __a);
int32x4_t vqnegq_s32 (int32x4_t __a);
/********************************************Logical operations*************************/
/*--1、Bitwise not(正常指令): vmvn -> ri = ~ai; 
performs a bitwise inversion of each element from the input vector.--*/
int8x8_t vmvn_s8 (int8x8_t __a);
int16x4_t vmvn_s16 (int16x4_t __a);
int32x2_t vmvn_s32 (int32x2_t __a);
uint8x8_t vmvn_u8 (uint8x8_t __a);
uint16x4_t vmvn_u16 (uint16x4_t __a);
uint32x2_t vmvn_u32 (uint32x2_t __a);
poly8x8_t vmvn_p8 (poly8x8_t __a);
int8x16_t vmvnq_s8 (int8x16_t __a);
int16x8_t vmvnq_s16 (int16x8_t __a);
int32x4_t vmvnq_s32 (int32x4_t __a);
uint8x16_t vmvnq_u8 (uint8x16_t __a);
uint16x8_t vmvnq_u16 (uint16x8_t __a);
uint32x4_t vmvnq_u32 (uint32x4_t __a);
poly8x16_t vmvnq_p8 (poly8x16_t __a);
/*--2、Bitwise and(正常指令): vand -> ri = ai & bi; performs a bitwise AND between 
corresponding elements of the input vectors.--*/
int8x8_t vand_s8 (int8x8_t __a, int8x8_t __b);//_mm_and_si128
int16x4_t vand_s16 (int16x4_t __a, int16x4_t __b);//_mm_and_si128
int32x2_t vand_s32 (int32x2_t __a, int32x2_t __b);//_mm_and_si128
uint8x8_t vand_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_and_si128
uint16x4_t vand_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_and_si128
uint32x2_t vand_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_and_si128
int64x1_t vand_s64 (int64x1_t __a, int64x1_t __b);//_mm_and_si128
uint64x1_t vand_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_and_si128
int8x16_t vandq_s8 (int8x16_t __a, int8x16_t __b);//_mm_and_si128
int16x8_t vandq_s16 (int16x8_t __a, int16x8_t __b);//_mm_and_si128
int32x4_t vandq_s32 (int32x4_t __a, int32x4_t __b);//_mm_and_si128
int64x2_t vandq_s64 (int64x2_t __a, int64x2_t __b);//_mm_and_si128
uint8x16_t vandq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_and_si128
uint16x8_t vandq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_and_si128
uint32x4_t vandq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_and_si128
uint64x2_t vandq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_and_si128
/*--3、Bitwise or(正常指令): vorr -> ri = ai | bi; performs a bitwise OR between
corresponding elements of the input vectors.--*/
int8x8_t vorr_s8 (int8x8_t __a, int8x8_t __b);//_mm_or_si128
int16x4_t vorr_s16 (int16x4_t __a, int16x4_t __b);//_mm_or_si128
int32x2_t vorr_s32 (int32x2_t __a, int32x2_t __b);//_mm_or_si128
uint8x8_t vorr_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_or_si128
uint16x4_t vorr_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_or_si128
uint32x2_t vorr_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_or_si128
int64x1_t vorr_s64 (int64x1_t __a, int64x1_t __b);//_mm_or_si128
uint64x1_t vorr_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_or_si128
int8x16_t vorrq_s8 (int8x16_t __a, int8x16_t __b);//_mm_or_si128
int16x8_t vorrq_s16 (int16x8_t __a, int16x8_t __b);//_mm_or_si128
int32x4_t vorrq_s32 (int32x4_t __a, int32x4_t __b);//_mm_or_si128
int64x2_t vorrq_s64 (int64x2_t __a, int64x2_t __b);//_mm_or_si128
uint8x16_t vorrq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_or_si128
uint16x8_t vorrq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_or_si128
uint32x4_t vorrq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_or_si128
uint64x2_t vorrq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_or_si128
/*--4、Bitwise exclusive or (EOR or XOR)(正常指令): veor -> ri = ai ^ bi; 
performs a bitwise exclusive-OR between corresponding elements of the input vectors.--*/
int8x8_t veor_s8 (int8x8_t __a, int8x8_t __b);//_mm_xor_si128
int16x4_t veor_s16 (int16x4_t __a, int16x4_t __b);//_mm_xor_si128
int32x2_t veor_s32 (int32x2_t __a, int32x2_t __b);//_mm_xor_si128
uint8x8_t veor_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_xor_si128
uint16x4_t veor_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_xor_si128
uint32x2_t veor_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_xor_si128
int64x1_t veor_s64 (int64x1_t __a, int64x1_t __b);//_mm_xor_si128
uint64x1_t veor_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_xor_si128
int8x16_t veorq_s8 (int8x16_t __a, int8x16_t __b);//_mm_xor_si128
int16x8_t veorq_s16 (int16x8_t __a, int16x8_t __b);//_mm_xor_si128
int32x4_t veorq_s32 (int32x4_t __a, int32x4_t __b);//_mm_xor_si128
int64x2_t veorq_s64 (int64x2_t __a, int64x2_t __b);//_mm_xor_si128
uint8x16_t veorq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_xor_si128
uint16x8_t veorq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_xor_si128
uint32x4_t veorq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_xor_si128
uint64x2_t veorq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_xor_si128
/*--5、Bit Clear(正常指令): vbic -> ri = ~ai & bi;
VBIC (Vector Bitwise Clear) performs a bitwise logical AND complement operation between
values in two registers, and places the results in the destination register.--*/
int8x8_t vbic_s8 (int8x8_t __a, int8x8_t __b);//_mm_andnot_si128
int16x4_t vbic_s16 (int16x4_t __a, int16x4_t __b);//_mm_andnot_si128
int32x2_t vbic_s32 (int32x2_t __a, int32x2_t __b);//_mm_andnot_si128
uint8x8_t vbic_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_andnot_si128
uint16x4_t vbic_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_andnot_si128
uint32x2_t vbic_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_andnot_si128
int64x1_t vbic_s64 (int64x1_t __a, int64x1_t __b);//_mm_andnot_si128
uint64x1_t vbic_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_andnot_si128
int8x16_t vbicq_s8 (int8x16_t __a, int8x16_t __b);//_mm_andnot_si128
int16x8_t vbicq_s16 (int16x8_t __a, int16x8_t __b);//_mm_andnot_si128
int32x4_t vbicq_s32 (int32x4_t __a, int32x4_t __b);//_mm_andnot_si128
int64x2_t vbicq_s64 (int64x2_t __a, int64x2_t __b);//_mm_andnot_si128
uint8x16_t vbicq_u8 (uint8x16_t __a, uint8x16_t __b);//_mm_andnot_si128
uint16x8_t vbicq_u16 (uint16x8_t __a, uint16x8_t __b);//_mm_andnot_si128
uint32x4_t vbicq_u32 (uint32x4_t __a, uint32x4_t __b);//_mm_andnot_si128
uint64x2_t vbicq_u64 (uint64x2_t __a, uint64x2_t __b);//_mm_andnot_si128
/*--6、Bitwise OR complement(正常指令): vorn -> ri = ai | (~bi); 
performs a bitwise logical OR NOT operation 
between values in two registers, and places the results in the destination register.--*/
int8x8_t vorn_s8 (int8x8_t __a, int8x8_t __b);
int16x4_t vorn_s16 (int16x4_t __a, int16x4_t __b);
int32x2_t vorn_s32 (int32x2_t __a, int32x2_t __b);
uint8x8_t vorn_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4_t vorn_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2_t vorn_u32 (uint32x2_t __a, uint32x2_t __b);
int64x1_t vorn_s64 (int64x1_t __a, int64x1_t __b);
uint64x1_t vorn_u64 (uint64x1_t __a, uint64x1_t __b);
int8x16_t vornq_s8 (int8x16_t __a, int8x16_t __b);
int16x8_t vornq_s16 (int16x8_t __a, int16x8_t __b);
int32x4_t vornq_s32 (int32x4_t __a, int32x4_t __b);
int64x2_t vornq_s64 (int64x2_t __a, int64x2_t __b);
uint8x16_t vornq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8_t vornq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4_t vornq_u32 (uint32x4_t __a, uint32x4_t __b);
uint64x2_t vornq_u64 (uint64x2_t __a, uint64x2_t __b);
/****************************************Count leading sign bits************************/
/*--正常指令, vcls -> ; counts the number of consecutive bits, starting from the most 
significant bit,that are the same as the most significant bit, in each element in a 
vector, and places the count in the result vector.--*/
int8x8_t vcls_s8 (int8x8_t __a);
int16x4_t vcls_s16 (int16x4_t __a);
int32x2_t vcls_s32 (int32x2_t __a);
int8x16_t vclsq_s8 (int8x16_t __a);
int16x8_t vclsq_s16 (int16x8_t __a);
int32x4_t vclsq_s32 (int32x4_t __a);
/*******************************************Count leading zeros*************************/
/*--正常指令, vclz -> ; counts the number of consecutive zeros, starting from the most
significant bit, in each element in a vector, and places the count in result vector.--*/
int8x8_t vclz_s8 (int8x8_t __a);
int16x4_t vclz_s16 (int16x4_t __a);
int32x2_t vclz_s32 (int32x2_t __a);
uint8x8_t vclz_u8 (uint8x8_t __a);
uint16x4_t vclz_u16 (uint16x4_t __a);
uint32x2_t vclz_u32 (uint32x2_t __a);
int8x16_t vclzq_s8 (int8x16_t __a);
int16x8_t vclzq_s16 (int16x8_t __a);
int32x4_t vclzq_s32 (int32x4_t __a);
uint8x16_t vclzq_u8 (uint8x16_t __a);
uint16x8_t vclzq_u16 (uint16x8_t __a);
uint32x4_t vclzq_u32 (uint32x4_t __a);
/*******************************************Count number of set bits********************/
/*--正常指令, vcnt -> counts the number of bits that are one in each element in a vector, 
and places the count in the result vector.--*/
int8x8_t vcnt_s8 (int8x8_t __a);
uint8x8_t vcnt_u8 (uint8x8_t __a);
poly8x8_t vcnt_p8 (poly8x8_t __a);
int8x16_t vcntq_s8 (int8x16_t __a);
uint8x16_t vcntq_u8 (uint8x16_t __a);
poly8x16_t vcntq_p8 (poly8x16_t __a);
/*****************************************Reciprocal estimate***************************/
/*--正常指令, vrecpe -> ; finds an approximate reciprocal of each element in a vector, 
and places it in the result vector.--*/
float32x2_t vrecpe_f32 (float32x2_t __a);
uint32x2_t vrecpe_u32 (uint32x2_t __a);
float32x4_t vrecpeq_f32 (float32x4_t __a);
uint32x4_t vrecpeq_u32 (uint32x4_t __a);
/****************************************Reciprocal square-root estimate****************/
/*--正常指令, vrsqrte -> ; finds an approximate reciprocal square root of each element
in a vector, and places it in the return vector.--*/
float32x2_t vrsqrte_f32 (float32x2_t __a);
uint32x2_t vrsqrte_u32 (uint32x2_t __a);
float32x4_t vrsqrteq_f32 (float32x4_t __a);
uint32x4_t vrsqrteq_u32 (uint32x4_t __a);
/*******************************************Get lanes from a vector*********************/
/*--vmov -> r = a[b]; returns the value from the specified lane of a vector.
Extract lanes from a vector and put into a register. 
These intrinsics extract a single lane (element) from a vector.--*/
int8_t vget_lane_s8 (int8x8_t __a, const int __b);//_mm_extract_epi8
int16_t vget_lane_s16 (int16x4_t __a, const int __b);//_mm_extract_epi16
int32_t vget_lane_s32 (int32x2_t __a, const int __b);//_mm_extract_epi32
float32_t vget_lane_f32 (float32x2_t __a, const int __b);
uint8_t vget_lane_u8 (uint8x8_t __a, const int __b);//_mm_extract_epi8
uint16_t vget_lane_u16 (uint16x4_t __a, const int __b);//_mm_extract_epi16
uint32_t vget_lane_u32 (uint32x2_t __a, const int __b);//_mm_extract_epi32
poly8_t vget_lane_p8 (poly8x8_t __a, const int __b);//_mm_extract_epi8
poly16_t vget_lane_p16 (poly16x4_t __a, const int __b);//_mm_extract_epi16
int64_t vget_lane_s64 (int64x1_t __a, const int __b);//_mm_extract_epi64
uint64_t vget_lane_u64 (uint64x1_t __a, const int __b);//_mm_extract_epi64
int8_t vgetq_lane_s8 (int8x16_t __a, const int __b);//_mm_extract_epi8
int16_t vgetq_lane_s16 (int16x8_t __a, const int __b);//_mm_extract_epi16
int32_t vgetq_lane_s32 (int32x4_t __a, const int __b);//_mm_extract_epi32
float32_t vgetq_lane_f32 (float32x4_t __a, const int __b);
uint8_t vgetq_lane_u8 (uint8x16_t __a, const int __b);//_mm_extract_epi8
uint16_t vgetq_lane_u16 (uint16x8_t __a, const int __b);//_mm_extract_epi16
uint32_t vgetq_lane_u32 (uint32x4_t __a, const int __b);//_mm_extract_epi32
poly8_t vgetq_lane_p8 (poly8x16_t __a, const int __b);//_mm_extract_epi8
poly16_t vgetq_lane_p16 (poly16x8_t __a, const int __b);//_mm_extract_epi16
int64_t vgetq_lane_s64 (int64x2_t __a, const int __b);//_mm_extract_epi64
uint64_t vgetq_lane_u64 (uint64x2_t __a, const int __b);//_mm_extract_epi64
/*********************************************Set lanes in a vector********************/
/*--vmov -> ; sets the value of the specified lane of a vector. It returns the vector 
with the new value.Load a single lane of a vector from a literal. These intrinsics set 
a single lane (element) within a vector.--*/
int8x8_t vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c);
int16x4_t vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c);
int32x2_t vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c);
float32x2_t vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c);
uint8x8_t vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c);
uint32x2_t vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c);
poly8x8_t vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c);
int64x1_t vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c);
uint64x1_t vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c);
int8x16_t vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c);
int16x8_t vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c);
int32x4_t vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c);
float32x4_t vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c);
uint8x16_t vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c);
uint16x8_t vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c);
uint32x4_t vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c);
poly8x16_t vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c);
poly16x8_t vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c);
int64x2_t vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c);
uint64x2_t vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c);
/****************************************Create vector from literal bit pattern*********/
/*--vmov -> ; creates a vector from a 64-bit pattern. 
Initialize a vector from a literal bit pattern.--*/
int8x8_t vcreate_s8 (uint64_t __a);//_mm_loadl_epi64
int16x4_t vcreate_s16 (uint64_t __a);//_mm_loadl_epi64
int32x2_t vcreate_s32 (uint64_t __a);//_mm_loadl_epi64
int64x1_t vcreate_s64 (uint64_t __a);//_mm_loadl_epi64
float32x2_t vcreate_f32 (uint64_t __a);
uint8x8_t vcreate_u8 (uint64_t __a);//_mm_loadl_epi64
uint16x4_t vcreate_u16 (uint64_t __a);//_mm_loadl_epi64
uint32x2_t vcreate_u32 (uint64_t __a);//_mm_loadl_epi64
uint64x1_t vcreate_u64 (uint64_t __a);//_mm_loadl_epi64
poly8x8_t vcreate_p8 (uint64_t __a);//_mm_loadl_epi64
poly16x4_t vcreate_p16 (uint64_t __a);//_mm_loadl_epi64
/*****************************************Set all lanes to the same value***************/
/*--1、Load all lanes of vector to the same literal value: vdup/vmov -> ri = a; 
duplicates a scalar into every element of the destination vector. 
Load all lanes of vector to the same literal value--*/
int8x8_t vdup_n_s8 (int8_t __a);//_mm_set1_epi8
int16x4_t vdup_n_s16 (int16_t __a);//_mm_set1_epi16
int32x2_t vdup_n_s32 (int32_t __a);//_mm_set1_epi32
float32x2_t vdup_n_f32 (float32_t __a);//_mm_set1_ps
uint8x8_t vdup_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x4_t vdup_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x2_t vdup_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x8_t vdup_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x4_t vdup_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x1_t vdup_n_s64 (int64_t __a);
uint64x1_t vdup_n_u64 (uint64_t __a);
int8x16_t vdupq_n_s8 (int8_t __a);//_mm_set1_epi8
int16x8_t vdupq_n_s16 (int16_t __a);//_mm_set1_epi16
int32x4_t vdupq_n_s32 (int32_t __a);//_mm_set1_epi32
float32x4_t vdupq_n_f32 (float32_t __a);//_mm_set1_ps
uint8x16_t vdupq_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x8_t vdupq_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x4_t vdupq_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x16_t vdupq_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x8_t vdupq_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x2_t vdupq_n_s64 (int64_t __a);
uint64x2_t vdupq_n_u64 (uint64_t __a);
int8x8_t vmov_n_s8 (int8_t __a);//_mm_set1_epi8
int16x4_t vmov_n_s16 (int16_t __a);//_mm_set1_epi16
int32x2_t vmov_n_s32 (int32_t __a);//_mm_set1_epi32
float32x2_t vmov_n_f32 (float32_t __a);//_mm_set1_ps
uint8x8_t vmov_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x4_t vmov_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x2_t vmov_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x8_t vmov_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x4_t vmov_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x1_t vmov_n_s64 (int64_t __a);
uint64x1_t vmov_n_u64 (uint64_t __a);
int8x16_t vmovq_n_s8 (int8_t __a);//_mm_set1_epi8
int16x8_t vmovq_n_s16 (int16_t __a);//_mm_set1_epi16
int32x4_t vmovq_n_s32 (int32_t __a);//_mm_set1_epi32
float32x4_t vmovq_n_f32 (float32_t __a);//_mm_set1_ps
uint8x16_t vmovq_n_u8 (uint8_t __a);//_mm_set1_epi8
uint16x8_t vmovq_n_u16 (uint16_t __a);//_mm_set1_epi16
uint32x4_t vmovq_n_u32 (uint32_t __a);//_mm_set1_epi32
poly8x16_t vmovq_n_p8 (poly8_t __a);//_mm_set1_epi8
poly16x8_t vmovq_n_p16 (poly16_t __a);//_mm_set1_epi16
int64x2_t vmovq_n_s64 (int64_t __a);
uint64x2_t vmovq_n_u64 (uint64_t __a);
/*--2、Load all lanes of the vector to the value of a lane of a vector: 
vdup/vmov -> ri = a[b];
duplicates a scalar into every element of the destination vector.--*/
int8x8_t vdup_lane_s8 (int8x8_t __a, const int __b);
int16x4_t vdup_lane_s16 (int16x4_t __a, const int __b);
int32x2_t vdup_lane_s32 (int32x2_t __a, const int __b);
float32x2_t vdup_lane_f32 (float32x2_t __a, const int __b);
uint8x8_t vdup_lane_u8 (uint8x8_t __a, const int __b);
uint16x4_t vdup_lane_u16 (uint16x4_t __a, const int __b);
uint32x2_t vdup_lane_u32 (uint32x2_t __a, const int __b);
poly8x8_t vdup_lane_p8 (poly8x8_t __a, const int __b);
poly16x4_t vdup_lane_p16 (poly16x4_t __a, const int __b);
int64x1_t vdup_lane_s64 (int64x1_t __a, const int __b);
uint64x1_t vdup_lane_u64 (uint64x1_t __a, const int __b);
int8x16_t vdupq_lane_s8 (int8x8_t __a, const int __b);
int16x8_t vdupq_lane_s16 (int16x4_t __a, const int __b);
int32x4_t vdupq_lane_s32 (int32x2_t __a, const int __b);
float32x4_t vdupq_lane_f32 (float32x2_t __a, const int __b);
uint8x16_t vdupq_lane_u8 (uint8x8_t __a, const int __b);
uint16x8_t vdupq_lane_u16 (uint16x4_t __a, const int __b);
uint32x4_t vdupq_lane_u32 (uint32x2_t __a, const int __b);
poly8x16_t vdupq_lane_p8 (poly8x8_t __a, const int __b);
poly16x8_t vdupq_lane_p16 (poly16x4_t __a, const int __b);
int64x2_t vdupq_lane_s64 (int64x1_t __a, const int __b);//_mm_unpacklo_epi64
uint64x2_t vdupq_lane_u64 (uint64x1_t __a, const int __b);//_mm_unpacklo_epi64
/********************************************Combining vectors**************************/
/*--長指令, -> r0 = a0, ..., r7 = a7, r8 = b0, ..., r15 = b7;
joins two 64-bit vectors into a single 128-bit vector. 
The output vector contains twice the number of elements as each input vector. 
The lower half of the output vector contains the elements of the first input vector.--*/
int8x16_t vcombine_s8 (int8x8_t __a, int8x8_t __b);//_mm_unpacklo_epi64
int16x8_t vcombine_s16 (int16x4_t __a, int16x4_t __b);//_mm_unpacklo_epi64
int32x4_t vcombine_s32 (int32x2_t __a, int32x2_t __b);//_mm_unpacklo_epi64
int64x2_t vcombine_s64 (int64x1_t __a, int64x1_t __b);//_mm_unpacklo_epi64
float32x4_t vcombine_f32 (float32x2_t __a, float32x2_t __b);
uint8x16_t vcombine_u8 (uint8x8_t __a, uint8x8_t __b);//_mm_unpacklo_epi64
uint16x8_t vcombine_u16 (uint16x4_t __a, uint16x4_t __b);//_mm_unpacklo_epi64
uint32x4_t vcombine_u32 (uint32x2_t __a, uint32x2_t __b);//_mm_unpacklo_epi64
uint64x2_t vcombine_u64 (uint64x1_t __a, uint64x1_t __b);//_mm_unpacklo_epi64
poly8x16_t vcombine_p8 (poly8x8_t __a, poly8x8_t __b);//_mm_unpacklo_epi64
poly16x8_t vcombine_p16 (poly16x4_t __a, poly16x4_t __b);//_mm_unpacklo_epi64
/***************************************Splitting vectors*******************************/
/*--1、窄指令, -> ri = a(i+4); returns the higher half of the 128-bit input vector. The
output is a 64-bit vector that has half the number of elements as the input vector.--*/
int8x8_t vget_high_s8 (int8x16_t __a);//_mm_unpackhi_epi64
int16x4_t vget_high_s16 (int16x8_t __a);//_mm_unpackhi_epi64
int32x2_t vget_high_s32 (int32x4_t __a);//_mm_unpackhi_epi64
int64x1_t vget_high_s64 (int64x2_t __a);//_mm_unpackhi_epi64
float32x2_t vget_high_f32 (float32x4_t __a);
uint8x8_t vget_high_u8 (uint8x16_t __a);//_mm_unpackhi_epi64
uint16x4_t vget_high_u16 (uint16x8_t __a);//_mm_unpackhi_epi64
uint32x2_t vget_high_u32 (uint32x4_t __a);//_mm_unpackhi_epi64
uint64x1_t vget_high_u64 (uint64x2_t __a);//_mm_unpackhi_epi64
poly8x8_t vget_high_p8 (poly8x16_t __a);//_mm_unpackhi_epi64
poly16x4_t vget_high_p16 (poly16x8_t __a);//_mm_unpackhi_epi64
/*--2、窄指令, -> ri = ai; returns the lower half of the 128-bit input vector. The
output is a 64-bit vector that has half the number of elements as the input vector.--*/
int8x8_t vget_low_s8 (int8x16_t __a);
int16x4_t vget_low_s16 (int16x8_t __a);
int32x2_t vget_low_s32 (int32x4_t __a);
float32x2_t vget_low_f32 (float32x4_t __a);
uint8x8_t vget_low_u8 (uint8x16_t __a);
uint16x4_t vget_low_u16 (uint16x8_t __a);
uint32x2_t vget_low_u32 (uint32x4_t __a);
poly8x8_t vget_low_p8 (poly8x16_t __a);
poly16x4_t vget_low_p16 (poly16x8_t __a);
int64x1_t vget_low_s64 (int64x2_t __a);
uint64x1_t vget_low_u64 (uint64x2_t __a);
/****************************************************Conversions************************/
/*--1、Convert from float: vcvt ->, convert from floating-point to integer.--*/
int32x2_t vcvt_s32_f32 (float32x2_t __a);
uint32x2_t vcvt_u32_f32 (float32x2_t __a);
int32x4_t vcvtq_s32_f32 (float32x4_t __a);
uint32x4_t vcvtq_u32_f32 (float32x4_t __a);
int32x2_t vcvt_n_s32_f32 (float32x2_t __a, const int __b);
uint32x2_t vcvt_n_u32_f32 (float32x2_t __a, const int __b);
int32x4_t vcvtq_n_s32_f32 (float32x4_t __a, const int __b);
uint32x4_t vcvtq_n_u32_f32 (float32x4_t __a, const int __b);
/*--2、Convert to float: vcvt ->, convert from integer to floating-point.--*/
float32x2_t vcvt_f32_s32 (int32x2_t __a);
float32x2_t vcvt_f32_u32 (uint32x2_t __a);
float32x4_t vcvtq_f32_s32 (int32x4_t __a);
float32x4_t vcvtq_f32_u32 (uint32x4_t __a);
float32x2_t vcvt_n_f32_s32 (int32x2_t __a, const int __b);
float32x2_t vcvt_n_f32_u32 (uint32x2_t __a, const int __b);
float32x4_t vcvtq_n_f32_s32 (int32x4_t __a, const int __b);
float32x4_t vcvtq_n_f32_u32 (uint32x4_t __a, const int __b);
/*--3、between single-precision and double-precision numbers: vcvt ->--*/
float16x4_t vcvt_f16_f32(float32x4_t a);
float32x4_t vcvt_f32_f16(float16x4_t a);
/*************************************************Move**********************************/
/*--1、Vector narrow integer(窄指令): vmovn -> ri = ai[0...8]; copies the least 
significant half of each element of a quadword vector into 
the corresponding elements of a doubleword vector.--*/
int8x8_t vmovn_s16 (int16x8_t __a);
int16x4_t vmovn_s32 (int32x4_t __a);
int32x2_t vmovn_s64 (int64x2_t __a);
uint8x8_t vmovn_u16 (uint16x8_t __a);
uint16x4_t vmovn_u32 (uint32x4_t __a);
uint32x2_t vmovn_u64 (uint64x2_t __a);
/*--2、Vector long move(長指令): vmovl -> sign extends or zero extends each element
in a doubleword vector to twice its original length,
and places the results in a quadword vector.--*/
int16x8_t vmovl_s8 (int8x8_t __a);//_mm_cvtepi8_epi16
int32x4_t vmovl_s16 (int16x4_t __a);//_mm_cvtepi16_epi32
int64x2_t vmovl_s32 (int32x2_t __a);//_mm_cvtepi32_epi64
uint16x8_t vmovl_u8 (uint8x8_t __a);//_mm_cvtepu8_epi16
uint32x4_t vmovl_u16 (uint16x4_t __a);//_mm_cvtepu16_epi32
uint64x2_t vmovl_u32 (uint32x2_t __a);_mm_cvtepu32_epi64
/*--3、Vector saturating narrow integer(窄指令): vqmovn -> copies each element of the
operand vector to the corresponding element of the destination vector. 
The result element is half the width of 
the operand element, and values are saturated to the result width.
The results are the same type as the operands.--*/
int8x8_t vqmovn_s16 (int16x8_t __a);//_mm_packs_epi16
int16x4_t vqmovn_s32 (int32x4_t __a);//_mm_packs_epi32
int32x2_t vqmovn_s64 (int64x2_t __a);
uint8x8_t vqmovn_u16 (uint16x8_t __a);
uint16x4_t vqmovn_u32 (uint32x4_t __a);
uint32x2_t vqmovn_u64 (uint64x2_t __a);
/*--4、Vector saturating narrow integer signed->unsigned(窄指令): copies each element of
the operand vector to the corresponding element of the destination vector.
The result element is half the width of the operand element,
and values are saturated to the result width.
The elements in the operand are signed and the elements in the result are unsigned.--*/
uint8x8_t vqmovun_s16 (int16x8_t __a);//_mm_packus_epi16
uint16x4_t vqmovun_s32 (int32x4_t __a);//_mm_packus_epi32
uint32x2_t vqmovun_s64 (int64x2_t __a);
/******************************************************Table lookup*********************/
/*--1、Table lookup: vtbl -> uses byte indexes in a control vector to look up byte 
values in a table and generate a new vector. Indexes out of range return 0. 
The table is in Vector1 and uses one(or two or three or four)D registers.--*/
int8x8_t vtbl1_s8 (int8x8_t __a, int8x8_t __b);
uint8x8_t vtbl1_u8 (uint8x8_t __a, uint8x8_t __b);
poly8x8_t vtbl1_p8 (poly8x8_t __a, uint8x8_t __b);
int8x8_t vtbl2_s8 (int8x8x2_t __a, int8x8_t __b);
uint8x8_t vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b);
poly8x8_t vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b);
int8x8_t vtbl3_s8 (int8x8x3_t __a, int8x8_t __b);
uint8x8_t vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b);
poly8x8_t vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b);
int8x8_t vtbl4_s8 (int8x8x4_t __a, int8x8_t __b);
uint8x8_t vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b);
poly8x8_t vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b);
/*--2、Extended table lookup: vtbx -> uses byte indexes in a control vector to look up
byte values in a table and generate a new vector. Indexes out of range leave the 
destination element unchanged.The table is in Vector2 and uses one(or two or three or
four) D register. Vector1 contains the elements of the destination vector.--*/
int8x8_t vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c);
uint8x8_t vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
poly8x8_t vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c);
int8x8_t vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c);
uint8x8_t vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c);
poly8x8_t vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c);
int8x8_t vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c);
uint8x8_t vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c);
poly8x8_t vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c);
int8x8_t vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c);
uint8x8_t vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c);
poly8x8_t vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c);
/***************************************Multiply, scalar, lane**************************/
/*--1、Vector multiply by scalar: vmul -> ri = ai * b; 
multiplies each element in a vector by a scalar, 
and places the results in the destination vector.--*/
int16x4_t vmul_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vmul_n_s32 (int32x2_t __a, int32_t __b);
float32x2_t vmul_n_f32 (float32x2_t __a, float32_t __b);
uint16x4_t vmul_n_u16 (uint16x4_t __a, uint16_t __b);
uint32x2_t vmul_n_u32 (uint32x2_t __a, uint32_t __b);
int16x8_t vmulq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vmulq_n_s32 (int32x4_t __a, int32_t __b);
float32x4_t vmulq_n_f32 (float32x4_t __a, float32_t __b);
uint16x8_t vmulq_n_u16 (uint16x8_t __a, uint16_t __b);
uint32x4_t vmulq_n_u32 (uint32x4_t __a, uint32_t __b);
/*--2、Vector multiply by scalar: -> ri = ai * b[c]; 
multiplies the first vector by a scalar. 
The scalar is the element in the second vector with index c.--*/
int16x4_t vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
float32x2_t vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c);
uint16x4_t vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
int16x8_t vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
float32x4_t vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c);
uint16x8_t vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c);
uint32x4_t vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c);
/*--3、Vector long multiply with scalar: vmull ->  ri = ai * b;
multiplies a vector by a scalar. 
Elements in the result are wider than elements in input vector.--*/
int32x4_t vmull_n_s16 (int16x4_t __a, int16_t __b);
int64x2_t vmull_n_s32 (int32x2_t __a, int32_t __b);
uint32x4_t vmull_n_u16 (uint16x4_t __a, uint16_t __b);
uint64x2_t vmull_n_u32 (uint32x2_t __a, uint32_t __b);
/*--4、Vector long multiply by scalar: vmull -> ri = ai * b[c];
multiplies the first vector by a scalar. 
The scalar is the element in the second vector with index c. 
The elements in the result are wider than the elements in input vector.--*/
int32x4_t vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int64x2_t vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
uint32x4_t vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint64x2_t vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
/*--5、Vector saturating doubling long multiply with scalar: vqdmull -> ri = sat(ai * b);
multiplies the elements in the vector by a scalar, and doubles the results. 
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmull_n_s16 (int16x4_t __a, int16_t __b);
int64x2_t vqdmull_n_s32 (int32x2_t __a, int32_t __b);
/*--6、Vector saturating doubling long multiply by scalar: vqdmull -> ri = sat(ai * b[c]);
multiplies the elements in the first vector by a scalar, and doubles the results. 
The scalar has index c in the second vector. If any of the results overflow, 
they are saturated and the sticky QC flagis set.--*/
int32x4_t vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int64x2_t vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
/*--7、Vector saturating doubling multiply high with scalar: vqdmulh -> ri = sat(ai * b)
multiplies the elements of the vector by a scalar, and doubles the results.
It then returns only the high half of the results.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqdmulh_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vqdmulh_n_s32 (int32x2_t __a, int32_t __b);
int16x8_t vqdmulhq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vqdmulhq_n_s32 (int32x4_t __a, int32_t __b);
/*--8、Vector saturating doubling multiply high by scalar: 
vqdmulh -> ri = sat(ai * b[c]);
multiplies the elements of the first vector by a scalar, and doubles the results. It then
returns only the high half of the results. The scalar has index n in the second vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int16x8_t vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
/*--9、Vector saturating rounding doubling multiply high with scalar: 
vqqrdmulh -> ri = sat(ai * b);
multiplies the elements of the vector by a scalar and doubles the results. 
It then returns only the high half of the rounded results. 
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int16x4_t vqrdmulh_n_s16 (int16x4_t __a, int16_t __b);
int32x2_t vqrdmulh_n_s32 (int32x2_t __a, int32_t __b);
int16x8_t vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b);
int32x4_t vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b);
/*--10、Vector rounding saturating doubling multiply high by scalar: 
vqrdmulh -> ri = sat(ai * b[c]);
multiplies the elements of the first vector by a scalar and doubles the results.
It then returns only the high half of the rounded results.
The scalar has index n in the second vector. If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int16x4_t vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int16x8_t vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c);
int32x4_t vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c);
/*--11、Vector multiply accumulate with scalar: vmla -> ri = ai + bi * c;
multiplies each element in the second vector by a scalar, 
and adds the results to the corresponding elements of the first vector.--*/
int16x4_t vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);
int32x2_t vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);
float32x2_t vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);
uint16x4_t vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);
uint32x2_t vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);
int16x8_t vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);
int32x4_t vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);
float32x4_t vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);
uint16x8_t vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);
uint32x4_t vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);
/*--12、Vector multiply accumulate by scalar: vmla -> ri = ai + bi * c[d];
multiplies each element in the second vector by a scalar, 
and adds the results to the corresponding elements of the first vector. 
The scalar has index d in the third vector.--*/
int16x4_t vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int32x2_t vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
float32x2_t vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,const int __d);
uint16x4_t vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint32x2_t vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
int16x8_t vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);
int32x4_t vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);
float32x4_t vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,const int __d);
uint16x8_t vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);
uint32x4_t vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);
/*--13、Vector widening multiply accumulate with scalar: vmlal -> ri = ai + bi * c;
multiplies each element in the second vector by a scalar, and adds the results into the 
corresponding elements of the first vector. 
The scalar has index n in the third vector. The elements in the result are wider.--*/
int32x4_t vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
uint32x4_t vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);
uint64x2_t vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);
/*--14、Vector widening multiply accumulate by scalar: vmlal -> ri = ai + bi * c[d];
multiplies each element in the second vector by a scalar, and adds the results to the 
corresponding elements of the first vector. The scalar has index d in the third vector.
The elements in the result are wider.--*/
int32x4_t vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
uint32x4_t vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint64x2_t vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
/*--15、Vector widening saturating doubling multiply accumulate with scalar: 
vqdmlal -> ri = sat(ai + bi * c);
multiplies the elements in the second vector by a scalar, and doubles the results. 
It then adds the results to the elements in the first vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
/*--16、Vector widening saturating doubling multiply accumulate by scalar: 
vqdmlal -> ri = sat(ai + bi * c[d])
multiplies each element in the second vector by a scalar, doubles the results and adds 
them to the corresponding elements of the first vector. The scalar has index d in the 
third vector. If any of the results overflow,
they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
/*--17、Vector multiply subtract with scalar: vmls -> ri = ai - bi * c;
multiplies each element in a vector by a scalar, subtracts the results from the 
corresponding elements of the destination vector, 
and places the final results in the destination vector.--*/
int16x4_t vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c);
int32x2_t vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c);
float32x2_t vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c);
uint16x4_t vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c);
uint32x2_t vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c);
int16x8_t vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c);
int32x4_t vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c);
float32x4_t vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c);
uint16x8_t vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c);
uint32x4_t vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c);
/*--18、Vector multiply subtract by scalar: vmls -> ri = ai - bi * c[d];
multiplies each element in the second vector by a scalar, and subtracts them from the
corresponding elements of the first vector.
The scalar has index d in the third vector.--*/
int16x4_t vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int32x2_t vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
float32x2_t vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c,const int __d);
uint16x4_t vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d);
uint32x2_t vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
int16x8_t vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d);
int32x4_t vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d);
float32x4_t vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c,const int __d);
uint16x8_t vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d);
uint32x4_t vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d);
/*--19、Vector widening multiply subtract with scalar: vmlsl -> ri = ai - bi * c;
multiplies the elements in the second vector by a scalar, then subtracts the results from
the elements in the first vector. The elements of the result are wider.--*/
int32x4_t vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
uint32x4_t vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c);
uint64x2_t vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c);
/*--20、Vector widening multiply subtract by scalar: vmlsl -> ri = ai - bi * c[d];
multiplies each element in the second vector by a scalar, 
and subtracts them from the corresponding elements of the first vector. 
The scalar has index d in the third vector. The elements in the result are wider.--*/
int32x4_t vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
uint32x4_t vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
uint64x2_t vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d);
/*--21、Vector widening saturating doubling multiply subtract with scalar: 
vqdmlsl -> ri = sat(ai - bi * c);
multiplies the elements of the second vector with a scalar and doubles the results. 
It then subtracts the results from the elements in the first vector.
If any of the results overflow, they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c);
int64x2_t vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c);
/*--22、Vector widening saturating doubling multiply subtract by scalar:
vqdmlsl -> ri = sat(ai - bi * c[[d]);
multiplies each element in the second vector by a scalar, doubles the results and subtracts
them from the corresponding elements of the first vector. The scalar has index n in the 
third vector.If any of the results overflow, 
they are saturated and the sticky QC flag is set.--*/
int32x4_t vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d);
int64x2_t vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d);
/*****************************************************Vector extract********************/
/*--Vector extract: vext -> extracts n elements from the lower end of the second operand
vector and the remaining elements from the higher end of the first, and combines them to
form the result vector. The elements from the second operand are placed in the most 
significant part of the result vector.The elements from the first operand are placed in
the least significant part of the result vector.This intrinsic cycles the elements
through the lanes if the two input vectors are the same.--*/
int8x8_t vext_s8 (int8x8_t __a, int8x8_t __b, const int __c);
int16x4_t vext_s16 (int16x4_t __a, int16x4_t __b, const int __c);
int32x2_t vext_s32 (int32x2_t __a, int32x2_t __b, const int __c);
int64x1_t vext_s64 (int64x1_t __a, int64x1_t __b, const int __c);
float32x2_t vext_f32 (float32x2_t __a, float32x2_t __b, const int __c);
uint8x8_t vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c);
uint16x4_t vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c);
uint32x2_t vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c);
uint64x1_t vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c);
poly8x8_t vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c);
poly16x4_t vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c);
int8x16_t vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c);//_mm_alignr_epi8 
int16x8_t vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c);//_mm_alignr_epi8 
int32x4_t vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c);//_mm_alignr_epi8
int64x2_t vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c);//_mm_alignr_epi8
float32x4_t vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c);//_mm_alignr_epi8
uint8x16_t vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c);//_mm_alignr_epi8
uint16x8_t vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c);//_mm_alignr_epi8
uint32x4_t vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c);//_mm_alignr_epi8
uint64x2_t vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c);//_mm_alignr_epi8
poly8x16_t vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c);//_mm_alignr_epi8
poly16x8_t vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c);//_mm_alignr_epi8
/****************************************************Reverse elements*******************/
/*--1、Reverse vector elements (swap endianness): vrev64 -> reverses the order of 8-bit, 
16-bit, or 32-bit elements within each doubleword of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev64_s8 (int8x8_t __a);
int16x4_t vrev64_s16 (int16x4_t __a);
int32x2_t vrev64_s32 (int32x2_t __a);
float32x2_t vrev64_f32 (float32x2_t __a);//_mm_shuffle_ps
uint8x8_t vrev64_u8 (uint8x8_t __a);
uint16x4_t vrev64_u16 (uint16x4_t __a);
uint32x2_t vrev64_u32 (uint32x2_t __a);
poly8x8_t vrev64_p8 (poly8x8_t __a);
poly16x4_t vrev64_p16 (poly16x4_t __a);
int8x16_t vrev64q_s8 (int8x16_t __a);
int16x8_t vrev64q_s16 (int16x8_t __a);
int32x4_t vrev64q_s32 (int32x4_t __a);
float32x4_t vrev64q_f32 (float32x4_t __a);//_mm_shuffle_ps
uint8x16_t vrev64q_u8 (uint8x16_t __a);
uint16x8_t vrev64q_u16 (uint16x8_t __a);
uint32x4_t vrev64q_u32 (uint32x4_t __a);
poly8x16_t vrev64q_p8 (poly8x16_t __a);
poly16x8_t vrev64q_p16 (poly16x8_t __a);
/*--2、Reverse vector elements (swap endianness): vrev32 -> reverses the order of 8-bit 
or 16-bit elements within each word of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev32_s8 (int8x8_t __a);
int16x4_t vrev32_s16 (int16x4_t __a);
uint8x8_t vrev32_u8 (uint8x8_t __a);
uint16x4_t vrev32_u16 (uint16x4_t __a);
poly8x8_t vrev32_p8 (poly8x8_t __a);
poly16x4_t vrev32_p16 (poly16x4_t __a);
int8x16_t vrev32q_s8 (int8x16_t __a);
int16x8_t vrev32q_s16 (int16x8_t __a);
uint8x16_t vrev32q_u8 (uint8x16_t __a);
uint16x8_t vrev32q_u16 (uint16x8_t __a);
poly8x16_t vrev32q_p8 (poly8x16_t __a);
poly16x8_t vrev32q_p16 (poly16x8_t __a);
/*--3、Reverse vector elements (swap endianness): vrev16 -> reverses the order 
of 8-bit elements within each halfword of the vector, 
and places the result in the corresponding destination vector.--*/
int8x8_t vrev16_s8 (int8x8_t __a);
uint8x8_t vrev16_u8 (uint8x8_t __a);
poly8x8_t vrev16_p8 (poly8x8_t __a);
int8x16_t vrev16q_s8 (int8x16_t __a);
uint8x16_t vrev16q_u8 (uint8x16_t __a);
poly8x16_t vrev16q_p8 (poly8x16_t __a);
/**********************************************************Bitwise Select***************/
/*--Bitwise Select: vbsl -> selects each bit for the destination from the first operand 
if the corresponding bit of the destination is 1, 
or from the second operand if the corresponding bit of the destination is 0.--*/
int8x8_t vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c);
int16x4_t vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c);
int32x2_t vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c);
int64x1_t vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c);
float32x2_t vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c);
uint8x8_t vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c);
uint16x4_t vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c);
uint32x2_t vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c);
uint64x1_t vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c);
poly8x8_t vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c);
poly16x4_t vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c);
int8x16_t vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c);
int16x8_t vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c);
int32x4_t vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c);
int64x2_t vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c);
float32x4_t vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c);
uint8x16_t vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c);
uint16x8_t vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c);
uint32x4_t vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c);
uint64x2_t vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c);
poly8x16_t vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c);
poly16x8_t vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c);
/************************************Transposition operations***************************/
/*--1、Transpose elements: vtrn -> treats the elements of its input vectors as elements
of 2 x 2 matrices, and transposes the matrices. Essentially, it exchanges the elements 
with odd indices from Vector1 with the elements with even indices from Vector2.--*/
int8x8x2_t vtrn_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vtrn_s16 (int16x4_t __a, int16x4_t __b);
uint8x8x2_t vtrn_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vtrn_u16 (uint16x4_t __a, uint16x4_t __b);
poly8x8x2_t vtrn_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vtrn_p16 (poly16x4_t __a, poly16x4_t __b);
int32x2x2_t vtrn_s32 (int32x2_t __a, int32x2_t __b)
float32x2x2_t vtrn_f32 (float32x2_t __a, float32x2_t __b)
uint32x2x2_t vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
int8x16x2_t vtrnq_s8 (int8x16_t __a, int8x16_t __b)
int16x8x2_t vtrnq_s16 (int16x8_t __a, int16x8_t __b)
int32x4x2_t vtrnq_s32 (int32x4_t __a, int32x4_t __b)
float32x4x2_t vtrnq_f32 (float32x4_t __a, float32x4_t __b)
uint8x16x2_t vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
uint16x8x2_t vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
uint32x4x2_t vtrnq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vtrnq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vtrnq_p16 (poly16x8_t __a, poly16x8_t __b);
/*--2、Interleave elements(Zip elements): 
vzip ->  (Vector Zip) interleaves the elements of two vectors.--*/
int8x8x2_t vzip_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vzip_s16 (int16x4_t __a, int16x4_t __b);
uint8x8x2_t vzip_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vzip_u16 (uint16x4_t __a, uint16x4_t __b);
poly8x8x2_t vzip_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vzip_p16 (poly16x4_t __a, poly16x4_t __b);
int32x2x2_t vzip_s32 (int32x2_t __a, int32x2_t __b);
float32x2x2_t vzip_f32 (float32x2_t __a, float32x2_t __b);
uint32x2x2_t vzip_u32 (uint32x2_t __a, uint32x2_t __b);
int8x16x2_t vzipq_s8 (int8x16_t __a, int8x16_t __b);
int16x8x2_t vzipq_s16 (int16x8_t __a, int16x8_t __b);
int32x4x2_t vzipq_s32 (int32x4_t __a, int32x4_t __b);
float32x4x2_t vzipq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16x2_t vzipq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8x2_t vzipq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4x2_t vzipq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vzipq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vzipq_p16 (poly16x8_t __a, poly16x8_t __b);
/*--3、De-Interleave elements(Unzip elements): 
vuzp -> (Vector Unzip) de-interleaves the elements of two vectors.
De-interleaving is the inverse process of interleaving.--*/
int8x8x2_t vuzp_s8 (int8x8_t __a, int8x8_t __b);
int16x4x2_t vuzp_s16 (int16x4_t __a, int16x4_t __b);
int32x2x2_t vuzp_s32 (int32x2_t __a, int32x2_t __b);
float32x2x2_t vuzp_f32 (float32x2_t __a, float32x2_t __b);
uint8x8x2_t vuzp_u8 (uint8x8_t __a, uint8x8_t __b);
uint16x4x2_t vuzp_u16 (uint16x4_t __a, uint16x4_t __b);
uint32x2x2_t vuzp_u32 (uint32x2_t __a, uint32x2_t __b);
poly8x8x2_t vuzp_p8 (poly8x8_t __a, poly8x8_t __b);
poly16x4x2_t vuzp_p16 (poly16x4_t __a, poly16x4_t __b);
int8x16x2_t vuzpq_s8 (int8x16_t __a, int8x16_t __b);
int16x8x2_t vuzpq_s16 (int16x8_t __a, int16x8_t __b);
int32x4x2_t vuzpq_s32 (int32x4_t __a, int32x4_t __b);
float32x4x2_t vuzpq_f32 (float32x4_t __a, float32x4_t __b);
uint8x16x2_t vuzpq_u8 (uint8x16_t __a, uint8x16_t __b);
uint16x8x2_t vuzpq_u16 (uint16x8_t __a, uint16x8_t __b);
uint32x4x2_t vuzpq_u32 (uint32x4_t __a, uint32x4_t __b);
poly8x16x2_t vuzpq_p8 (poly8x16_t __a, poly8x16_t __b);
poly16x8x2_t vuzpq_p16 (poly16x8_t __a, poly16x8_t __b);
/*********************************************************Load**************************/
/*--1、Load a single vector from memory: vld1 -> loads a vector from memory.--*/
int8x8_t vld1_s8 (const int8_t * __a);
int16x4_t vld1_s16 (const int16_t * __a);
int32x2_t vld1_s32 (const int32_t * __a);
int64x1_t vld1_s64 (const int64_t * __a);
float32x2_t vld1_f32 (const float32_t * __a);
uint8x8_t vld1_u8 (const uint8_t * __a);//_mm_loadl_epi64
uint16x4_t vld1_u16 (const uint16_t * __a);//_mm_loadl_epi64
uint32x2_t vld1_u32 (const uint32_t * __a);//_mm_loadl_epi64
uint64x1_t vld1_u64 (const uint64_t * __a);//_mm_loadl_epi64
poly8x8_t vld1_p8 (const poly8_t * __a);
poly16x4_t vld1_p16 (const poly16_t * __a);
int8x16_t vld1q_s8 (const int8_t * __a);
int16x8_t vld1q_s16 (const int16_t * __a);
int32x4_t vld1q_s32 (const int32_t * __a);
int64x2_t vld1q_s64 (const int64_t * __a);
float32x4_t vld1q_f32 (const float32_t * __a);
uint8x16_t vld1q_u8 (const uint8_t * __a);
uint16x8_t vld1q_u16 (const uint16_t * __a);
uint32x4_t vld1q_u32 (const uint32_t * __a);
uint64x2_t vld1q_u64 (const uint64_t * __a);
poly8x16_t vld1q_p8 (const poly8_t * __a);
poly16x8_t vld1q_p16 (const poly16_t * __a);
/*--2、Load a single lane from memory: vld1 -> loads one element of the input vector 
from memory and returns this in the result vector. Elements of the vector that are not
loaded are returned in the result vector unaltered. 
c is the index of the element to load.--*/
int8x8_t vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c);//_mm_insert_epi8
int16x4_t vld1_lane_s16 (const int16_t * __a, int16x4_t __b,const int __c);//_mm_insert_epi16
int32x2_t vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c);//_mm_insert_epi32
float32x2_t vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c);
uint8x8_t vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c);//_mm_insert_epi8
uint16x4_t vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c);//_mm_insert_epi16
uint32x2_t vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c);//_mm_insert_epi32
poly8x8_t vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c);//_mm_insert_epi8
poly16x4_t vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c);//_mm_insert_epi16
int64x1_t vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c);
uint64x1_t vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c);
int8x16_t vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c);//_mm_insert_epi8
int16x8_t vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c);//_mm_insert_epi16
int32x4_t vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c);//_mm_insert_epi32
float32x4_t vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c);
uint8x16_t vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c);//_mm_insert_epi8
uint16x8_t vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c);//_mm_insert_epi16
uint32x4_t vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c);//_mm_insert_epi32
poly8x16_t vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c);//_mm_insert_epi8
poly16x8_t vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c);//_mm_insert_epi16
int64x2_t vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c);//_mm_insert_epi64
uint64x2_t vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c);//_mm_insert_epi64
/*--3、Load all lanes of vector with same value from memory: vld1 -> 
loads one element in a vector from memory. 
The loaded element is copied to all other lanes of the vector.--*/
int8x8_t vld1_dup_s8 (const int8_t * __a);//_mm_set1_epi8
int16x4_t vld1_dup_s16 (const int16_t * __a);//_mm_set1_epi16
int32x2_t vld1_dup_s32 (const int32_t * __a);//_mm_set1_epi32
float32x2_t vld1_dup_f32 (const float32_t * __a);//_mm_set1_ps
uint8x8_t vld1_dup_u8 (const uint8_t * __a);//_mm_set1_epi8
uint16x4_t vld1_dup_u16 (const uint16_t * __a);//_mm_set1_epi16
uint32x2_t vld1_dup_u32 (const uint32_t * __a);//_mm_set1_epi32
poly8x8_t vld1_dup_p8 (const poly8_t * __a);//_mm_set1_epi8
poly16x4_t vld1_dup_p16 (const poly16_t * __a);//_mm_set1_epi16
int64x1_t vld1_dup_s64 (const int64_t * __a);
uint64x1_t vld1_dup_u64 (const uint64_t * __a);
int8x16_t vld1q_dup_s8 (const int8_t * __a);//_mm_set1_epi8
int16x8_t vld1q_dup_s16 (const int16_t * __a);//_mm_set1_epi16
int32x4_t vld1q_dup_s32 (const int32_t * __a);//_mm_set1_epi32
float32x4_t vld1q_dup_f32 (const float32_t * __a);//_mm_set1_ps
uint8x16_t vld1q_dup_u8 (const uint8_t * __a);//_mm_set1_epi8
uint16x8_t vld1q_dup_u16 (const uint16_t * __a);//_mm_set1_epi16
uint32x4_t vld1q_dup_u32 (const uint32_t * __a);//_mm_set1_epi32
poly8x16_t vld1q_dup_p8 (const poly8_t * __a);//_mm_set1_epi8
poly16x8_t vld1q_dup_p16 (const poly16_t * __a);//_mm_set1_epi16
int64x2_t vld1q_dup_s64 (const int64_t * __a);
uint64x2_t vld1q_dup_u64 (const uint64_t * __a);
/*--4、Load 2-element structure from memory: vld2 -> loads 2 vectors from memory. 
It performs a 2-way de-interleave from memory to the vectors.--*/
int8x8x2_t vld2_s8 (const int8_t * __a);
int16x4x2_t vld2_s16 (const int16_t * __a);
int32x2x2_t vld2_s32 (const int32_t * __a);
float32x2x2_t vld2_f32 (const float32_t * __a);
uint8x8x2_t vld2_u8 (const uint8_t * __a);
uint16x4x2_t vld2_u16 (const uint16_t * __a);
uint32x2x2_t vld2_u32 (const uint32_t * __a);
poly8x8x2_t vld2_p8 (const poly8_t * __a);
poly16x4x2_t vld2_p16 (const poly16_t * __a);
int64x1x2_t vld2_s64 (const int64_t * __a);
uint64x1x2_t vld2_u64 (const uint64_t * __a);
int8x16x2_t vld2q_s8 (const int8_t * __a);
int16x8x2_t vld2q_s16 (const int16_t * __a);
int32x4x2_t vld2q_s32 (const int32_t * __a);
float32x4x2_t vld2q_f32 (const float32_t * __a);
uint8x16x2_t vld2q_u8 (const uint8_t * __a);
uint16x8x2_t vld2q_u16 (const uint16_t * __a);
uint32x4x2_t vld2q_u32 (const uint32_t * __a);
poly8x16x2_t vld2q_p8 (const poly8_t * __a);
poly16x8x2_t vld2q_p16 (const poly16_t * __a);
/*--5、Load a single lane of 2-element structure from memory: vld2 -> 
loads two elements in a double-vector structure from memory and returns this in 
the result. The loaded values are from consecutive memory addresses. 
Elements in the structure that are not loaded are returned in the result unaltered. 
c is the index of the elements to load.--*/
int8x8x2_t vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c);
int16x4x2_t vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c);
int32x2x2_t vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c);
float32x2x2_t vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c);
uint8x8x2_t vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c);
uint16x4x2_t vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c);
uint32x2x2_t vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c);
poly8x8x2_t vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c);
poly16x4x2_t vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c);
int16x8x2_t vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c);
int32x4x2_t vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c);
float32x4x2_t vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c);
uint16x8x2_t vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c);
uint32x4x2_t vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c);
poly16x8x2_t vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c);
/*--6、Load all lanes of 2-element structure with same value from memory: vld2 -> 
loads 2 elements from memory and returns a double-vector structure. 
The first element is copied to all lanes of the first vector. 
The second element is copied to all lanes of the second vector.--*/
int8x8x2_t vld2_dup_s8 (const int8_t * __a);
int16x4x2_t vld2_dup_s16 (const int16_t * __a);
int32x2x2_t vld2_dup_s32 (const int32_t * __a);
float32x2x2_t vld2_dup_f32 (const float32_t * __a);
uint8x8x2_t vld2_dup_u8 (const uint8_t * __a);
uint16x4x2_t vld2_dup_u16 (const uint16_t * __a);
uint32x2x2_t vld2_dup_u32 (const uint32_t * __a);
poly8x8x2_t vld2_dup_p8 (const poly8_t * __a);
poly16x4x2_t vld2_dup_p16 (const poly16_t * __a);
int64x1x2_t vld2_dup_s64 (const int64_t * __a);
uint64x1x2_t vld2_dup_u64 (const uint64_t * __a);
/*--7、Load 3-element structure from memory: vld3 -> 
loads 3 vectors from memory. 
It performs a 3-way de-interleave from memory to the vectors.--*/
int8x8x3_t vld3_s8 (const int8_t * __a);
int16x4x3_t vld3_s16 (const int16_t * __a);
int32x2x3_t vld3_s32 (const int32_t * __a);
float32x2x3_t vld3_f32 (const float32_t * __a);
uint8x8x3_t vld3_u8 (const uint8_t * __a);
uint16x4x3_t vld3_u16 (const uint16_t * __a);
uint32x2x3_t vld3_u32 (const uint32_t * __a);
poly8x8x3_t vld3_p8 (const poly8_t * __a);
poly16x4x3_t vld3_p16 (const poly16_t * __a);
int64x1x3_t vld3_s64 (const int64_t * __a);
uint64x1x3_t vld3_u64 (const uint64_t * __a);
int8x16x3_t vld3q_s8 (const int8_t * __a);
int16x8x3_t vld3q_s16 (const int16_t * __a);
int32x4x3_t vld3q_s32 (const int32_t * __a);
float32x4x3_t vld3q_f32 (const float32_t * __a);
uint8x16x3_t vld3q_u8 (const uint8_t * __a);
uint16x8x3_t vld3q_u16 (const uint16_t * __a);
uint32x4x3_t vld3q_u32 (const uint32_t * __a);
poly8x16x3_t vld3q_p8 (const poly8_t * __a);
poly16x8x3_t vld3q_p16 (const poly16_t * __a);
/*--8、Load a single lane of 3-element structure from memory: vld3 -> 
loads three elements in a triple-vector structure from memory and returns this in the
result. The loaded values are from consecutive memory addresses. 
Elements in the structure that are not loaded are returned in the result unaltered.
c is the index of the element to load.--*/
int8x8x3_t vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c);
int16x4x3_t vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c);
int32x2x3_t vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c);
float32x2x3_t vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c);
uint8x8x3_t vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c);
uint16x4x3_t vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c);
uint32x2x3_t vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c);
poly8x8x3_t vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c);
poly16x4x3_t vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c);
int16x8x3_t vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c);
int32x4x3_t vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c);
float32x4x3_t vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c);
uint16x8x3_t vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c);
uint32x4x3_t vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c);
poly16x8x3_t vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c);
/*--9、Load all lanes of 3-element structure with same value from memory: vld3 ->
loads 3 elements from memory and returns a triple-vector structure. The first element
is copied to all lanes of the first vector. And similarly the second and third elements 
are copied to the second and third vectors respectively.--*/
int8x8x3_t vld3_dup_s8 (const int8_t * __a);
int16x4x3_t vld3_dup_s16 (const int16_t * __a);
int32x2x3_t vld3_dup_s32 (const int32_t * __a);
float32x2x3_t vld3_dup_f32 (const float32_t * __a);
uint8x8x3_t vld3_dup_u8 (const uint8_t * __a);
uint16x4x3_t vld3_dup_u16 (const uint16_t * __a);
uint32x2x3_t vld3_dup_u32 (const uint32_t * __a);
poly8x8x3_t vld3_dup_p8 (const poly8_t * __a);
poly16x4x3_t vld3_dup_p16 (const poly16_t * __a);
int64x1x3_t vld3_dup_s64 (const int64_t * __a);
uint64x1x3_t vld3_dup_u64 (const uint64_t * __a);
/*--10、Load 4-element structure from memory: vld4 -> 
loads 4 vectors from memory. 
It performs a 4-way de-interleave from memory to the vectors.--*/
int8x8x4_t vld4_s8 (const int8_t * __a);
int16x4x4_t vld4_s16 (const int16_t * __a);
int32x2x4_t vld4_s32 (const int32_t * __a);
float32x2x4_t vld4_f32 (const float32_t * __a);
uint8x8x4_t  vld4_u8 (const uint8_t * __a);
uint16x4x4_t vld4_u16 (const uint16_t * __a);
uint32x2x4_t vld4_u32 (const uint32_t * __a);
poly8x8x4_t vld4_p8 (const poly8_t * __a);
poly16x4x4_t vld4_p16 (const poly16_t * __a);
int64x1x4_t vld4_s64 (const int64_t * __a);
uint64x1x4_t vld4_u64 (const uint64_t * __a);
int8x16x4_t vld4q_s8 (const int8_t * __a);
int16x8x4_t vld4q_s16 (const int16_t * __a);
int32x4x4_t vld4q_s32 (const int32_t * __a);
float32x4x4_t vld4q_f32 (const float32_t * __a);
uint8x16x4_t vld4q_u8 (const uint8_t * __a);
uint16x8x4_t vld4q_u16 (const uint16_t * __a);
uint32x4x4_t vld4q_u32 (const uint32_t * __a);
poly8x16x4_t vld4q_p8 (const poly8_t * __a);
poly16x8x4_t vld4q_p16 (const poly16_t * __a);
/*--11、Load a single lane of 4-element structure from memory: vld4 -> 
loads four elements in a quad-vector structure from memory and returns this in the result. 
The loaded values are from consecutive memory addresses.
Elements in the structure that are not loaded are returned in the result unaltered. 
c is the index of the element to load.--*/
int8x8x4_t vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c);
int16x4x4_t vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c);
int32x2x4_t vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c);
float32x2x4_t vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c);
uint8x8x4_t vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c);
uint16x4x4_t vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c);
uint32x2x4_t vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c);
poly8x8x4_t vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c);
poly16x4x4_t vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c);
int16x8x4_t vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c);
int32x4x4_t vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c);
float32x4x4_t vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c);
uint16x8x4_t vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c);
uint32x4x4_t vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c);
poly16x8x4_t vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c);
/*--12、Load all lanes of 4-element structure with same value from memory: vld4 ->
loads 4 elements from memory and returns a quad-vector structure. The first element is 
copied to all lanes of the first vector. And similarly the second, third, and fourth 
elements are copied to the second, third, and fourth vectors respectively.--*/
int8x8x4_t vld4_dup_s8 (const int8_t * __a);
int16x4x4_t vld4_dup_s16 (const int16_t * __a);
int32x2x4_t vld4_dup_s32 (const int32_t * __a);
float32x2x4_t vld4_dup_f32 (const float32_t * __a);
uint8x8x4_t vld4_dup_u8 (const uint8_t * __a);
uint16x4x4_t vld4_dup_u16 (const uint16_t * __a);
uint32x2x4_t vld4_dup_u32 (const uint32_t * __a);
poly8x8x4_t vld4_dup_p8 (const poly8_t * __a);
poly16x4x4_t vld4_dup_p16 (const poly16_t * __a);
int64x1x4_t vld4_dup_s64 (const int64_t * __a);
uint64x1x4_t vld4_dup_u64 (const uint64_t * __a);
/*****************************************************Store*****************************/
/*--1、Store a single vector into memory: vst1 -> stores a vector into memory.--*/
void vst1_s8 (int8_t * __a, int8x8_t __b);
void vst1_s16 (int16_t * __a, int16x4_t __b);
void vst1_s32 (int32_t * __a, int32x2_t __b);
void vst1_s64 (int64_t * __a, int64x1_t __b);
void vst1_f32 (float32_t * __a, float32x2_t __b);
void vst1_u8 (uint8_t * __a, uint8x8_t __b);
void vst1_u16 (uint16_t * __a, uint16x4_t __b);
void vst1_u32 (uint32_t * __a, uint32x2_t __b);
void vst1_u64 (uint64_t * __a, uint64x1_t __b);
void vst1_p8 (poly8_t * __a, poly8x8_t __b);
void vst1_p16 (poly16_t * __a, poly16x4_t __b);
void vst1q_s8 (int8_t * __a, int8x16_t __b);
void vst1q_s16 (int16_t * __a, int16x8_t __b);
void vst1q_s32 (int32_t * __a, int32x4_t __b);
void vst1q_s64 (int64_t * __a, int64x2_t __b);
void vst1q_f32 (float32_t * __a, float32x4_t __b);
void vst1q_u8 (uint8_t * __a, uint8x16_t __b);
void vst1q_u16 (uint16_t * __a, uint16x8_t __b);
void vst1q_u32 (uint32_t * __a, uint32x4_t __b);
void vst1q_u64 (uint64_t * __a, uint64x2_t __b);
void vst1q_p8 (poly8_t * __a, poly8x16_t __b);
void vst1q_p16 (poly16_t * __a, poly16x8_t __b);
/*--2、Store a single lane into memory: vst1 -> 
stores one element of the vector into memory. 
c is the index in the vector to be stored.--*/
void vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c);
void vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c);
void vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c);
void vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c);
void vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c);
void vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c);
void vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c);
void vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c);
void vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c);
void vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c);
void vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c);
void vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c);
void vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c);
void vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c);
void vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c);
void vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c);
void vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c);
void vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c);
void vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c);
void vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c);
void vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c);
void vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c);
/*--3、Store 2 vectors into memory: vst2 -> 
stores 2 vectors into memory. It interleaves the 2 vectors into memory.--*/
void vst2_s8 (int8_t * __a, int8x8x2_t __b);
void vst2_s16 (int16_t * __a, int16x4x2_t __b);
void vst2_s32 (int32_t * __a, int32x2x2_t __b);
void vst2_f32 (float32_t * __a, float32x2x2_t __b);
void vst2_u8 (uint8_t * __a, uint8x8x2_t __b);
void vst2_u16 (uint16_t * __a, uint16x4x2_t __b);
void vst2_u32 (uint32_t * __a, uint32x2x2_t __b);
void vst2_p8 (poly8_t * __a, poly8x8x2_t __b);
void vst2_p16 (poly16_t * __a, poly16x4x2_t __b);
void vst2_s64 (int64_t * __a, int64x1x2_t __b);
void vst2_u64 (uint64_t * __a, uint64x1x2_t __b);
void vst2q_s8 (int8_t * __a, int8x16x2_t __b);
void vst2q_s16 (int16_t * __a, int16x8x2_t __b);
void vst2q_s32 (int32_t * __a, int32x4x2_t __b);
void vst2q_f32 (float32_t * __a, float32x4x2_t __b);
void vst2q_u8 (uint8_t * __a, uint8x16x2_t __b);
void vst2q_u16 (uint16_t * __a, uint16x8x2_t __b);
void vst2q_u32 (uint32_t * __a, uint32x4x2_t __b);
void vst2q_p8 (poly8_t * __a, poly8x16x2_t __b);
void vst2q_p16 (poly16_t * __a, poly16x8x2_t __b);
/*--4、Store a lane of two elements into memory: vst2 ->
stores a lane of two elements from a double-vector structure into memory.
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c);
void vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c);
void vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c);
void vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c);
void vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c);
void vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c);
void vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c);
void vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c);
void vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c);
void vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c);
void vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c);
void vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c);
void vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c);
void vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c);
void vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c);
/*--5、Store 3 vectors into memory: vst3 -> 
stores 3 vectors into memory. It interleaves the 3 vectors into memory.--*/
void vst3_s8 (int8_t * __a, int8x8x3_t __b);
void vst3_s16 (int16_t * __a, int16x4x3_t __b);
void vst3_s32 (int32_t * __a, int32x2x3_t __b);
void vst3_f32 (float32_t * __a, float32x2x3_t __b);
void  vst3_u8 (uint8_t * __a, uint8x8x3_t __b);
void vst3_u16 (uint16_t * __a, uint16x4x3_t __b);
void vst3_u32 (uint32_t * __a, uint32x2x3_t __b);
void vst3_p8 (poly8_t * __a, poly8x8x3_t __b);
void vst3_p16 (poly16_t * __a, poly16x4x3_t __b);
void vst3_s64 (int64_t * __a, int64x1x3_t __b);
void vst3_u64 (uint64_t * __a, uint64x1x3_t __b);
void vst3q_s8 (int8_t * __a, int8x16x3_t __b);
void vst3q_s16 (int16_t * __a, int16x8x3_t __b);
void vst3q_s32 (int32_t * __a, int32x4x3_t __b);
void vst3q_f32 (float32_t * __a, float32x4x3_t __b);
void vst3q_u8 (uint8_t * __a, uint8x16x3_t __b);
void vst3q_u16 (uint16_t * __a, uint16x8x3_t __b);
void vst3q_u32 (uint32_t * __a, uint32x4x3_t __b);
void vst3q_p8 (poly8_t * __a, poly8x16x3_t __b);
void vst3q_p16 (poly16_t * __a, poly16x8x3_t __b);
/*--6、Store a lane of three elements into memory: vst3 ->
stores a lane of three elements from a triple-vector structure into memory. 
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c);
void vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c);
void vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c);
void vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c);
void vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c);
void vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c);
void vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c);
void vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c);
void vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c);
void vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c);
void vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c);
void vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c);
void vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c);
void vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c);
void vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c);
/*--7、Store 4 vectors into memory: vst4 -> 
stores 4 vectors into memory. It interleaves the 4 vectors into memory.--*/
void vst4_s8 (int8_t * __a, int8x8x4_t __b);
void vst4_s16 (int16_t * __a, int16x4x4_t __b);
void vst4_s32 (int32_t * __a, int32x2x4_t __b);
void vst4_f32 (float32_t * __a, float32x2x4_t __b);
void vst4_u8 (uint8_t * __a, uint8x8x4_t __b);
void vst4_u16 (uint16_t * __a, uint16x4x4_t __b);
void vst4_u32 (uint32_t * __a, uint32x2x4_t __b);
void vst4_p8 (poly8_t * __a, poly8x8x4_t __b);
void vst4_p16 (poly16_t * __a, poly16x4x4_t __b);
void vst4_s64 (int64_t * __a, int64x1x4_t __b);
void vst4_u64 (uint64_t * __a, uint64x1x4_t __b);
void vst4q_s8 (int8_t * __a, int8x16x4_t __b);
void vst4q_s16 (int16_t * __a, int16x8x4_t __b);
void vst4q_s32 (int32_t * __a, int32x4x4_t __b);
void  vst4q_f32 (float32_t * __a, float32x4x4_t __b);
void vst4q_u8 (uint8_t * __a, uint8x16x4_t __b);
void vst4q_u16 (uint16_t * __a, uint16x8x4_t __b);
void vst4q_u32 (uint32_t * __a, uint32x4x4_t __b);
void vst4q_p8 (poly8_t * __a, poly8x16x4_t __b);
void vst4q_p16 (poly16_t * __a, poly16x8x4_t __b);
/*--8、Store a lane of four elements into memory: vst4 ->
stores a lane of four elements from a quad-vector structure into memory.
The elements to be stored are from the same lane in the vectors and their index is c.--*/
void vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c);
void vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c);
void vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c);
void vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c);
void vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c);
void vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c);
void vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c);
void vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c);
void vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c);
void vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c);
void vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c);
void vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c);
void vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c);
void vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c);
void vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c);
/*********************************Reinterpret casts(type conversion)********************/
/*--convert between types: vreinterpret -> treats a vector as having a different 
datatype, without changing its value.--*/
poly8x8_t vreinterpret_p8_s8 (int8x8_t __a);
poly8x8_t vreinterpret_p8_s16 (int16x4_t __a);
poly8x8_t vreinterpret_p8_s32 (int32x2_t __a);
poly8x8_t vreinterpret_p8_s64 (int64x1_t __a);
poly8x8_t vreinterpret_p8_f32 (float32x2_t __a);
poly8x8_t vreinterpret_p8_u8 (uint8x8_t __a);
poly8x8_t vreinterpret_p8_u16 (uint16x4_t __a);
poly8x8_t vreinterpret_p8_u32 (uint32x2_t __a);
poly8x8_t vreinterpret_p8_u64 (uint64x1_t __a);
poly8x8_t vreinterpret_p8_p16 (poly16x4_t __a);
poly8x16_t vreinterpretq_p8_s8 (int8x16_t __a);
poly8x16_t vreinterpretq_p8_s16 (int16x8_t __a);
poly8x16_t vreinterpretq_p8_s32 (int32x4_t __a);
poly8x16_t vreinterpretq_p8_s64 (int64x2_t __a);
poly8x16_t vreinterpretq_p8_f32 (float32x4_t __a);
poly8x16_t vreinterpretq_p8_u8 (uint8x16_t __a);
poly8x16_t vreinterpretq_p8_u16 (uint16x8_t __a);
poly8x16_t vreinterpretq_p8_u32 (uint32x4_t __a);
poly8x16_t vreinterpretq_p8_u64 (uint64x2_t __a);
poly8x16_t vreinterpretq_p8_p16 (poly16x8_t __a);
poly16x4_t vreinterpret_p16_s8 (int8x8_t __a);
poly16x4_t vreinterpret_p16_s16 (int16x4_t __a);
poly16x4_t vreinterpret_p16_s32 (int32x2_t __a);
poly16x4_t vreinterpret_p16_s64 (int64x1_t __a);
poly16x4_t vreinterpret_p16_f32 (float32x2_t __a);
poly16x4_t vreinterpret_p16_u8 (uint8x8_t __a);
poly16x4_t vreinterpret_p16_u16 (uint16x4_t __a);
poly16x4_t vreinterpret_p16_u32 (uint32x2_t __a);
poly16x4_t vreinterpret_p16_u64 (uint64x1_t __a);
poly16x4_t vreinterpret_p16_p8 (poly8x8_t __a);
poly16x8_t vreinterpretq_p16_s8 (int8x16_t __a);
poly16x8_t vreinterpretq_p16_s16 (int16x8_t __a);
poly16x8_t vreinterpretq_p16_s32 (int32x4_t __a);
poly16x8_t vreinterpretq_p16_s64 (int64x2_t __a);
poly16x8_t vreinterpretq_p16_f32 (float32x4_t __a);
poly16x8_t vreinterpretq_p16_u8 (uint8x16_t __a);
poly16x8_t vreinterpretq_p16_u16 (uint16x8_t __a);
poly16x8_t vreinterpretq_p16_u32 (uint32x4_t __a);
poly16x8_t vreinterpretq_p16_u64 (uint64x2_t __a);
poly16x8_t vreinterpretq_p16_p8 (poly8x16_t __a);
float32x2_t vreinterpret_f32_s8 (int8x8_t __a);
float32x2_t vreinterpret_f32_s16 (int16x4_t __a);
float32x2_t vreinterpret_f32_s32 (int32x2_t __a);
float32x2_t vreinterpret_f32_s64 (int64x1_t __a);
float32x2_t vreinterpret_f32_u8 (uint8x8_t __a);
float32x2_t vreinterpret_f32_u16 (uint16x4_t __a);
float32x2_t vreinterpret_f32_u32 (uint32x2_t __a);
float32x2_t vreinterpret_f32_u64 (uint64x1_t __a);
float32x2_t vreinterpret_f32_p8 (poly8x8_t __a);
float32x2_t vreinterpret_f32_p16 (poly16x4_t __a);
float32x4_t vreinterpretq_f32_s8 (int8x16_t __a);
float32x4_t vreinterpretq_f32_s16 (int16x8_t __a);
float32x4_t vreinterpretq_f32_s32 (int32x4_t __a);
float32x4_t vreinterpretq_f32_s64 (int64x2_t __a);
float32x4_t vreinterpretq_f32_u8 (uint8x16_t __a);
float32x4_t vreinterpretq_f32_u16 (uint16x8_t __a);
float32x4_t vreinterpretq_f32_u32 (uint32x4_t __a);
float32x4_t vreinterpretq_f32_u64 (uint64x2_t __a);
float32x4_t vreinterpretq_f32_p8 (poly8x16_t __a);
float32x4_t vreinterpretq_f32_p16 (poly16x8_t __a);
int64x1_t vreinterpret_s64_s8 (int8x8_t __a);
int64x1_t vreinterpret_s64_s16 (int16x4_t __a);
int64x1_t vreinterpret_s64_s32 (int32x2_t __a);
int64x1_t vreinterpret_s64_f32 (float32x2_t __a);
int64x1_t vreinterpret_s64_u8 (uint8x8_t __a);
int64x1_t vreinterpret_s64_u16 (uint16x4_t __a);
int64x1_t vreinterpret_s64_u32 (uint32x2_t __a);
int64x1_t vreinterpret_s64_u64 (uint64x1_t __a);
int64x1_t vreinterpret_s64_p8 (poly8x8_t __a);
int64x1_t vreinterpret_s64_p16 (poly16x4_t __a);
int64x2_t vreinterpretq_s64_s8 (int8x16_t __a);
int64x2_t vreinterpretq_s64_s16 (int16x8_t __a);
int64x2_t vreinterpretq_s64_s32 (int32x4_t __a);
int64x2_t vreinterpretq_s64_f32 (float32x4_t __a);
int64x2_t vreinterpretq_s64_u8 (uint8x16_t __a);
int64x2_t vreinterpretq_s64_u16 (uint16x8_t __a);
int64x2_t vreinterpretq_s64_u32 (uint32x4_t __a);
int64x2_t vreinterpretq_s64_u64 (uint64x2_t __a);
int64x2_t vreinterpretq_s64_p8 (poly8x16_t __a);
int64x2_t vreinterpretq_s64_p16 (poly16x8_t __a);
uint64x1_t vreinterpret_u64_s8 (int8x8_t __a);
uint64x1_t vreinterpret_u64_s16 (int16x4_t __a);
uint64x1_t vreinterpret_u64_s32 (int32x2_t __a);
uint64x1_t vreinterpret_u64_s64 (int64x1_t __a);
uint64x1_t vreinterpret_u64_f32 (float32x2_t __a);
uint64x1_t vreinterpret_u64_u8 (uint8x8_t __a);
uint64x1_t vreinterpret_u64_u16 (uint16x4_t __a);
uint64x1_t vreinterpret_u64_u32 (uint32x2_t __a);
uint64x1_t vreinterpret_u64_p8 (poly8x8_t __a);
uint64x1_t vreinterpret_u64_p16 (poly16x4_t __a);
uint64x2_t vreinterpretq_u64_s8 (int8x16_t __a);
uint64x2_t vreinterpretq_u64_s16 (int16x8_t __a);
uint64x2_t vreinterpretq_u64_s32 (int32x4_t __a);
uint64x2_t vreinterpretq_u64_s64 (int64x2_t __a);
uint64x2_t vreinterpretq_u64_f32 (float32x4_t __a);
uint64x2_t vreinterpretq_u64_u8 (uint8x16_t __a);
uint64x2_t vreinterpretq_u64_u16 (uint16x8_t __a);
uint64x2_t vreinterpretq_u64_u32 (uint32x4_t __a);
uint64x2_t vreinterpretq_u64_p8 (poly8x16_t __a);
uint64x2_t vreinterpretq_u64_p16 (poly16x8_t __a);
int8x8_t vreinterpret_s8_s16 (int16x4_t __a);
int8x8_t vreinterpret_s8_s32 (int32x2_t __a);
int8x8_t vreinterpret_s8_s64 (int64x1_t __a);
int8x8_t vreinterpret_s8_f32 (float32x2_t __a);
int8x8_t vreinterpret_s8_u8 (uint8x8_t __a);
int8x8_t vreinterpret_s8_u16 (uint16x4_t __a);
int8x8_t vreinterpret_s8_u32 (uint32x2_t __a);
int8x8_t vreinterpret_s8_u64 (uint64x1_t __a);
int8x8_t vreinterpret_s8_p8 (poly8x8_t __a);
int8x8_t vreinterpret_s8_p16 (poly16x4_t __a);
int8x16_t vreinterpretq_s8_s16 (int16x8_t __a);
int8x16_t vreinterpretq_s8_s32 (int32x4_t __a);
int8x16_t vreinterpretq_s8_s64 (int64x2_t __a);
int8x16_t vreinterpretq_s8_f32 (float32x4_t __a);
int8x16_t vreinterpretq_s8_u8 (uint8x16_t __a);
int8x16_t vreinterpretq_s8_u16 (uint16x8_t __a);
int8x16_t vreinterpretq_s8_u32 (uint32x4_t __a);
int8x16_t vreinterpretq_s8_u64 (uint64x2_t __a);
int8x16_t vreinterpretq_s8_p8 (poly8x16_t __a);
int8x16_t vreinterpretq_s8_p16 (poly16x8_t __a);
int16x4_t vreinterpret_s16_s8 (int8x8_t __a);
int16x4_t vreinterpret_s16_s32 (int32x2_t __a);
int16x4_t vreinterpret_s16_s64 (int64x1_t __a);
int16x4_t vreinterpret_s16_f32 (float32x2_t __a);
int16x4_t vreinterpret_s16_u8 (uint8x8_t __a);
int16x4_t vreinterpret_s16_u16 (uint16x4_t __a);
int16x4_t vreinterpret_s16_u32 (uint32x2_t __a);
int16x4_t vreinterpret_s16_u64 (uint64x1_t __a);
int16x4_t vreinterpret_s16_p8 (poly8x8_t __a);
int16x4_t vreinterpret_s16_p16 (poly16x4_t __a);
int16x8_t vreinterpretq_s16_s8 (int8x16_t __a);
int16x8_t vreinterpretq_s16_s32 (int32x4_t __a);
int16x8_t vreinterpretq_s16_s64 (int64x2_t __a);
int16x8_t vreinterpretq_s16_f32 (float32x4_t __a);
int16x8_t vreinterpretq_s16_u8 (uint8x16_t __a);
int16x8_t vreinterpretq_s16_u16 (uint16x8_t __a);
int16x8_t vreinterpretq_s16_u32 (uint32x4_t __a);
int16x8_t vreinterpretq_s16_u64 (uint64x2_t __a);
int16x8_t vreinterpretq_s16_p8 (poly8x16_t __a);
int16x8_t vreinterpretq_s16_p16 (poly16x8_t __a);
int32x2_t vreinterpret_s32_s8 (int8x8_t __a);
int32x2_t vreinterpret_s32_s16 (int16x4_t __a);
int32x2_t vreinterpret_s32_s64 (int64x1_t __a);
int32x2_t vreinterpret_s32_f32 (float32x2_t __a);
int32x2_t vreinterpret_s32_u8 (uint8x8_t __a);
int32x2_t vreinterpret_s32_u16 (uint16x4_t __a);
int32x2_t vreinterpret_s32_u32 (uint32x2_t __a);
int32x2_t vreinterpret_s32_u64 (uint64x1_t __a);
int32x2_t vreinterpret_s32_p8 (poly8x8_t __a);
int32x2_t vreinterpret_s32_p16 (poly16x4_t __a);
int32x4_t vreinterpretq_s32_s8 (int8x16_t __a);
int32x4_t vreinterpretq_s32_s16 (int16x8_t __a);
int32x4_t vreinterpretq_s32_s64 (int64x2_t __a);
int32x4_t vreinterpretq_s32_f32 (float32x4_t __a);
int32x4_t vreinterpretq_s32_u8 (uint8x16_t __a);
int32x4_t vreinterpretq_s32_u16 (uint16x8_t __a);
int32x4_t vreinterpretq_s32_u32 (uint32x4_t __a);
int32x4_t vreinterpretq_s32_u64 (uint64x2_t __a);
int32x4_t vreinterpretq_s32_p8 (poly8x16_t __a);
int32x4_t vreinterpretq_s32_p16 (poly16x8_t __a);
uint8x8_t vreinterpret_u8_s8 (int8x8_t __a);
uint8x8_t vreinterpret_u8_s16 (int16x4_t __a);
uint8x8_t vreinterpret_u8_s32 (int32x2_t __a);
uint8x8_t vreinterpret_u8_s64 (int64x1_t __a);
uint8x8_t vreinterpret_u8_f32 (float32x2_t __a);
uint8x8_t vreinterpret_u8_u16 (uint16x4_t __a);
uint8x8_t vreinterpret_u8_u32 (uint32x2_t __a);
uint8x8_t vreinterpret_u8_u64 (uint64x1_t __a);
uint8x8_t vreinterpret_u8_p8 (poly8x8_t __a);
uint8x8_t vreinterpret_u8_p16 (poly16x4_t __a);
uint8x16_t vreinterpretq_u8_s8 (int8x16_t __a);
uint8x16_t vreinterpretq_u8_s16 (int16x8_t __a);
uint8x16_t vreinterpretq_u8_s32 (int32x4_t __a);
uint8x16_t vreinterpretq_u8_s64 (int64x2_t __a);
uint8x16_t vreinterpretq_u8_f32 (float32x4_t __a);
uint8x16_t vreinterpretq_u8_u16 (uint16x8_t __a);
uint8x16_t vreinterpretq_u8_u32 (uint32x4_t __a);
uint8x16_t vreinterpretq_u8_u64 (uint64x2_t __a);
uint8x16_t vreinterpretq_u8_p8 (poly8x16_t __a);
uint8x16_t vreinterpretq_u8_p16 (poly16x8_t __a);
uint16x4_t vreinterpret_u16_s8 (int8x8_t __a);
uint16x4_t vreinterpret_u16_s16 (int16x4_t __a);
uint16x4_t vreinterpret_u16_s32 (int32x2_t __a);
uint16x4_t vreinterpret_u16_s64 (int64x1_t __a);
uint16x4_t vreinterpret_u16_f32 (float32x2_t __a);
uint16x4_t vreinterpret_u16_u8 (uint8x8_t __a);
uint16x4_t vreinterpret_u16_u32 (uint32x2_t __a);
uint16x4_t vreinterpret_u16_u64 (uint64x1_t __a);
uint16x4_t vreinterpret_u16_p8 (poly8x8_t __a);
uint16x4_t vreinterpret_u16_p16 (poly16x4_t __a);
uint16x8_t vreinterpretq_u16_s8 (int8x16_t __a);
uint16x8_t vreinterpretq_u16_s16 (int16x8_t __a);
uint16x8_t vreinterpretq_u16_s32 (int32x4_t __a);
uint16x8_t vreinterpretq_u16_s64 (int64x2_t __a);
uint16x8_t vreinterpretq_u16_f32 (float32x4_t __a);
uint16x8_t vreinterpretq_u16_u8 (uint8x16_t __a);
uint16x8_t vreinterpretq_u16_u32 (uint32x4_t __a);
uint16x8_t vreinterpretq_u16_u64 (uint64x2_t __a);
uint16x8_t vreinterpretq_u16_p8 (poly8x16_t __a);
uint16x8_t vreinterpretq_u16_p16 (poly16x8_t __a);
uint32x2_t vreinterpret_u32_s8 (int8x8_t __a);
uint32x2_t vreinterpret_u32_s16 (int16x4_t __a);
uint32x2_t vreinterpret_u32_s32 (int32x2_t __a);
uint32x2_t vreinterpret_u32_s64 (int64x1_t __a);
uint32x2_t vreinterpret_u32_f32 (float32x2_t __a);
uint32x2_t vreinterpret_u32_u8 (uint8x8_t __a);
uint32x2_t vreinterpret_u32_u16 (uint16x4_t __a);
uint32x2_t vreinterpret_u32_u64 (uint64x1_t __a);
uint32x2_t vreinterpret_u32_p8 (poly8x8_t __a);
uint32x2_t vreinterpret_u32_p16 (poly16x4_t __a);
uint32x4_t vreinterpretq_u32_s8 (int8x16_t __a);
uint32x4_t vreinterpretq_u32_s16 (int16x8_t __a);
uint32x4_t vreinterpretq_u32_s32 (int32x4_t __a);
uint32x4_t vreinterpretq_u32_s64 (int64x2_t __a);
uint32x4_t vreinterpretq_u32_f32 (float32x4_t __a);
uint32x4_t vreinterpretq_u32_u8 (uint8x16_t __a);
uint32x4_t vreinterpretq_u32_u16 (uint16x8_t __a);
uint32x4_t vreinterpretq_u32_u64 (uint64x2_t __a);
uint32x4_t vreinterpretq_u32_p8 (poly8x16_t __a);
uint32x4_t vreinterpretq_u32_p16 (poly16x8_t __a);

總結(jié)

以上是生活随笔為你收集整理的Neon Intrinsics各函数介绍的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。

歡迎分享!

轉(zhuǎn)載請說明來源于"生活随笔",并保留原作者的名字。

本文地址:Neon Intrinsics各函数介绍

日本乱偷人妻中文字幕 | 精品国产麻豆免费人成网站 | 国产午夜手机精彩视频 | 啦啦啦www在线观看免费视频 | 俺去俺来也在线www色官网 | 久久国产精品偷任你爽任你 | 最近的中文字幕在线看视频 | 狂野欧美激情性xxxx | 国产精品福利视频导航 | 国产极品视觉盛宴 | 无码人妻av免费一区二区三区 | 99riav国产精品视频 | 国产精品国产自线拍免费软件 | 久久综合香蕉国产蜜臀av | 国产精品资源一区二区 | 乌克兰少妇xxxx做受 | 亚洲区小说区激情区图片区 | 成人无码视频免费播放 | 久久国产精品二国产精品 | 亚洲综合精品香蕉久久网 | 天天拍夜夜添久久精品大 | 国产精品美女久久久久av爽李琼 | 蜜桃臀无码内射一区二区三区 | 秋霞成人午夜鲁丝一区二区三区 | 对白脏话肉麻粗话av | 中文无码精品a∨在线观看不卡 | 亚洲精品www久久久 | 人人妻人人藻人人爽欧美一区 | 国产内射爽爽大片视频社区在线 | 欧美怡红院免费全部视频 | 日韩精品无码一本二本三本色 | 色 综合 欧美 亚洲 国产 | 欧美国产亚洲日韩在线二区 | 玩弄人妻少妇500系列视频 | 亚洲欧美日韩国产精品一区二区 | 性欧美熟妇videofreesex | 久久www免费人成人片 | 无码精品人妻一区二区三区av | 人人妻人人澡人人爽人人精品 | 少妇性l交大片 | 日本熟妇大屁股人妻 | 人妻尝试又大又粗久久 | 亚洲色欲久久久综合网东京热 | 少妇无码av无码专区在线观看 | 亚洲一区二区观看播放 | 中国女人内谢69xxxx | 最近免费中文字幕中文高清百度 | 精品人妻人人做人人爽 | 人妻夜夜爽天天爽三区 | 熟女俱乐部五十路六十路av | 国产精品18久久久久久麻辣 | 欧美人与物videos另类 | 人妻无码αv中文字幕久久琪琪布 | 国产 浪潮av性色四虎 | 成人精品一区二区三区中文字幕 | 欧美性猛交内射兽交老熟妇 | 久久精品女人的天堂av | 亚洲国产精品一区二区美利坚 | 亚洲一区av无码专区在线观看 | 精品国产成人一区二区三区 | 国产两女互慰高潮视频在线观看 | 中文字幕无码乱人伦 | 国产内射老熟女aaaa | 久久97精品久久久久久久不卡 | 亚洲国产精品久久人人爱 | 久久久国产一区二区三区 | 午夜理论片yy44880影院 | 久久综合给久久狠狠97色 | 天天综合网天天综合色 | 97人妻精品一区二区三区 | 成人欧美一区二区三区黑人免费 | 欧美熟妇另类久久久久久多毛 | 欧美人与禽猛交狂配 | 日韩精品无码一本二本三本色 | 亚洲国产精品美女久久久久 | 99精品视频在线观看免费 | 国产一区二区三区四区五区加勒比 | 无码午夜成人1000部免费视频 | 亚洲 另类 在线 欧美 制服 | 99久久精品日本一区二区免费 | 国产真人无遮挡作爱免费视频 | 在线天堂新版最新版在线8 | 久久久久久久女国产乱让韩 | 精品久久久久久人妻无码中文字幕 | 在线观看国产午夜福利片 | 欧美黑人巨大xxxxx | 国产成人精品久久亚洲高清不卡 | 老头边吃奶边弄进去呻吟 | 久久99久久99精品中文字幕 | 蜜桃无码一区二区三区 | 欧美猛少妇色xxxxx | 亚洲精品综合一区二区三区在线 | 国产69精品久久久久app下载 | 色一情一乱一伦一视频免费看 | 亚洲国产综合无码一区 | 欧美35页视频在线观看 | 色综合天天综合狠狠爱 | 无码av免费一区二区三区试看 | 精品国产av色一区二区深夜久久 | 国产亚洲日韩欧美另类第八页 | 亚洲男人av香蕉爽爽爽爽 | 老司机亚洲精品影院 | 欧美黑人性暴力猛交喷水 | 真人与拘做受免费视频一 | 亚洲日本一区二区三区在线 | 亚洲一区二区三区播放 | 日韩人妻无码中文字幕视频 | 青草青草久热国产精品 | 日产精品99久久久久久 | 天堂无码人妻精品一区二区三区 | 国产在线精品一区二区三区直播 | 亚洲一区二区三区含羞草 | 无码人妻出轨黑人中文字幕 | 蜜臀av在线播放 久久综合激激的五月天 | 久久久精品人妻久久影视 | 人人妻人人藻人人爽欧美一区 | 国产成人精品视频ⅴa片软件竹菊 | 性做久久久久久久免费看 | 精品无人区无码乱码毛片国产 | 国产精品嫩草久久久久 | 日韩av无码中文无码电影 | 国产乱人偷精品人妻a片 | www国产亚洲精品久久久日本 | 国内少妇偷人精品视频免费 | 亚拍精品一区二区三区探花 | 东京无码熟妇人妻av在线网址 | yw尤物av无码国产在线观看 | 18黄暴禁片在线观看 | 老头边吃奶边弄进去呻吟 | 亚洲熟熟妇xxxx | 久久精品女人的天堂av | 亚洲精品无码人妻无码 | 76少妇精品导航 | 欧美日韩一区二区三区自拍 | 国产亚洲精品久久久久久久久动漫 | 老子影院午夜伦不卡 | 欧美 丝袜 自拍 制服 另类 | 中文字幕av日韩精品一区二区 | 无套内射视频囯产 | 国产乱人无码伦av在线a | 人妻体内射精一区二区三四 | 日韩欧美群交p片內射中文 | 亚洲自偷精品视频自拍 | 强开小婷嫩苞又嫩又紧视频 | 成年美女黄网站色大免费全看 | 5858s亚洲色大成网站www | 午夜精品一区二区三区在线观看 | 国产日产欧产精品精品app | 国产人妻精品一区二区三区 | 亚洲日韩乱码中文无码蜜桃臀网站 | 好男人www社区 | 老司机亚洲精品影院无码 | 18无码粉嫩小泬无套在线观看 | 老头边吃奶边弄进去呻吟 | 亚洲精品国产精品乱码视色 | 18禁黄网站男男禁片免费观看 | 亚洲呦女专区 | 日本熟妇人妻xxxxx人hd | 欧美精品免费观看二区 | 毛片内射-百度 | 日韩人妻少妇一区二区三区 | 国产欧美亚洲精品a | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 色综合久久中文娱乐网 | 欧美性色19p | 好爽又高潮了毛片免费下载 | aa片在线观看视频在线播放 | 久精品国产欧美亚洲色aⅴ大片 | 一个人看的www免费视频在线观看 | 青青青手机频在线观看 | 人妻有码中文字幕在线 | 国产精品久久久久9999小说 | 国产成人无码一二三区视频 | а√天堂www在线天堂小说 | 久久综合九色综合欧美狠狠 | 无码免费一区二区三区 | 亚洲成av人片在线观看无码不卡 | 国产精品久久久久久久影院 | 中文无码成人免费视频在线观看 | 嫩b人妻精品一区二区三区 | 久久久久久久人妻无码中文字幕爆 | 一本大道久久东京热无码av | 亚洲 高清 成人 动漫 | 国产成人精品久久亚洲高清不卡 | 国产亚洲精品久久久久久久久动漫 | 成人免费视频一区二区 | 精品一二三区久久aaa片 | 无码乱肉视频免费大全合集 | 国产精品久久国产精品99 | 少妇性l交大片欧洲热妇乱xxx | 一区二区三区高清视频一 | 国产国语老龄妇女a片 | 日本大香伊一区二区三区 | 亚洲 激情 小说 另类 欧美 | 色一情一乱一伦一区二区三欧美 | 亚洲精品国产品国语在线观看 | 四虎影视成人永久免费观看视频 | 少妇人妻大乳在线视频 | а天堂中文在线官网 | 性做久久久久久久久 | 亚洲 高清 成人 动漫 | 国产精品久久久久影院嫩草 | 精品 日韩 国产 欧美 视频 | 久久精品国产99久久6动漫 | 国产亚洲精品久久久久久久 | 无码av中文字幕免费放 | 老头边吃奶边弄进去呻吟 | 久久视频在线观看精品 | 国产熟女一区二区三区四区五区 | 国产精品无码永久免费888 | 日韩少妇白浆无码系列 | 99久久久无码国产精品免费 | 欧美日韩人成综合在线播放 | 少妇久久久久久人妻无码 | 亚洲国产日韩a在线播放 | 日韩精品一区二区av在线 | 日韩av无码一区二区三区不卡 | 日韩av无码一区二区三区不卡 | 国产激情综合五月久久 | 亚洲精品成人av在线 | 国产乡下妇女做爰 | 亚洲人成影院在线无码按摩店 | 扒开双腿吃奶呻吟做受视频 | 性色欲网站人妻丰满中文久久不卡 | 久久国语露脸国产精品电影 | 影音先锋中文字幕无码 | 久久99精品久久久久久 | 国产精品99久久精品爆乳 | 国产精品国产自线拍免费软件 | 无码人妻av免费一区二区三区 | 中文字幕无线码免费人妻 | 强伦人妻一区二区三区视频18 | 欧美老熟妇乱xxxxx | 国产熟妇另类久久久久 | 丁香花在线影院观看在线播放 | 国产人成高清在线视频99最全资源 | 亚洲经典千人经典日产 | 色欲综合久久中文字幕网 | 亚洲精品中文字幕 | 久久精品国产亚洲精品 | 高清国产亚洲精品自在久久 | 国产人妻人伦精品1国产丝袜 | 少妇人妻av毛片在线看 | 中国大陆精品视频xxxx | 捆绑白丝粉色jk震动捧喷白浆 | 亚洲成av人片天堂网无码】 | 搡女人真爽免费视频大全 | 亚洲乱码日产精品bd | 天天躁日日躁狠狠躁免费麻豆 | 久久 国产 尿 小便 嘘嘘 | 天天摸天天透天天添 | 色老头在线一区二区三区 | 亚洲国精产品一二二线 | 日本精品人妻无码77777 天堂一区人妻无码 | 色婷婷综合中文久久一本 | 精品国产福利一区二区 | 东京热一精品无码av | 鲁一鲁av2019在线 | 精品成人av一区二区三区 | 国产成人精品优优av | 国产手机在线αⅴ片无码观看 | 精品国产成人一区二区三区 | 国产成人无码区免费内射一片色欲 | 久久国产劲爆∧v内射 | 久久综合香蕉国产蜜臀av | 夜精品a片一区二区三区无码白浆 | 国产成人av免费观看 | 久久久成人毛片无码 | 亚洲国产精华液网站w | 国产精品视频免费播放 | 人妻尝试又大又粗久久 | 天下第一社区视频www日本 | 日韩精品成人一区二区三区 | 黄网在线观看免费网站 | 99久久无码一区人妻 | 澳门永久av免费网站 | 亚洲小说春色综合另类 | 熟妇激情内射com | 久久精品国产大片免费观看 | 国产精华av午夜在线观看 | 久久婷婷五月综合色国产香蕉 | 国内少妇偷人精品视频 | 成人无码视频免费播放 | 中文字幕久久久久人妻 | 亚洲 激情 小说 另类 欧美 | 香蕉久久久久久av成人 | 久久久久av无码免费网 | 国产sm调教视频在线观看 | 亚洲欧美国产精品专区久久 | ass日本丰满熟妇pics | 成人免费视频视频在线观看 免费 | 国产成人av免费观看 | 国产av剧情md精品麻豆 | 99久久精品日本一区二区免费 | 欧美精品无码一区二区三区 | 久久久久se色偷偷亚洲精品av | 久久伊人色av天堂九九小黄鸭 | 免费看男女做好爽好硬视频 | 无码人妻丰满熟妇区毛片18 | 少妇人妻偷人精品无码视频 | 天天拍夜夜添久久精品大 | 狠狠色噜噜狠狠狠7777奇米 | 男女猛烈xx00免费视频试看 | 久9re热视频这里只有精品 | 国产精品第一国产精品 | 国产精品久久久久7777 | 精品无码国产一区二区三区av | 奇米综合四色77777久久 东京无码熟妇人妻av在线网址 | 久久精品99久久香蕉国产色戒 | 国产在线无码精品电影网 | 中国大陆精品视频xxxx | 久久五月精品中文字幕 | 国产精品嫩草久久久久 | 亚洲小说春色综合另类 | 自拍偷自拍亚洲精品10p | 最近的中文字幕在线看视频 | 亚洲一区二区三区在线观看网站 | 亚洲精品一区二区三区大桥未久 | 国产av剧情md精品麻豆 | 四虎国产精品一区二区 | 妺妺窝人体色www在线小说 | 午夜精品一区二区三区在线观看 | 性色av无码免费一区二区三区 | 久久www免费人成人片 | 国产亚洲人成a在线v网站 | 国产综合色产在线精品 | 国产偷国产偷精品高清尤物 | 国产精品对白交换视频 | 又大又硬又爽免费视频 | 欧美真人作爱免费视频 | 人妻aⅴ无码一区二区三区 | 国产亲子乱弄免费视频 | 蜜桃视频插满18在线观看 | 国产午夜无码视频在线观看 | www国产精品内射老师 | 玩弄人妻少妇500系列视频 | 中文字幕乱码中文乱码51精品 | 亚洲欧美日韩国产精品一区二区 | 欧美人与牲动交xxxx | 国产精品怡红院永久免费 | 水蜜桃亚洲一二三四在线 | 亚洲va欧美va天堂v国产综合 | 99久久人妻精品免费二区 | 日本肉体xxxx裸交 | 麻豆国产97在线 | 欧洲 | 熟妇女人妻丰满少妇中文字幕 | 小泽玛莉亚一区二区视频在线 | 亚洲一区二区三区偷拍女厕 | 国产成人精品优优av | 精品 日韩 国产 欧美 视频 | 欧洲极品少妇 | 高潮毛片无遮挡高清免费 | 国产午夜视频在线观看 | 亚洲精品一区二区三区四区五区 | 日韩人妻无码一区二区三区久久99 | 永久黄网站色视频免费直播 | 欧美性生交活xxxxxdddd | 国产乱人伦偷精品视频 | 性欧美熟妇videofreesex | 天天av天天av天天透 | 高潮毛片无遮挡高清免费 | 国産精品久久久久久久 | 在线观看国产午夜福利片 | 黄网在线观看免费网站 | 国产精品爱久久久久久久 | 国产精品18久久久久久麻辣 | 国产两女互慰高潮视频在线观看 | 在线а√天堂中文官网 | 初尝人妻少妇中文字幕 | 大屁股大乳丰满人妻 | www国产亚洲精品久久网站 | 鲁大师影院在线观看 | 亚洲天堂2017无码 | 牲欲强的熟妇农村老妇女视频 | 久久精品国产精品国产精品污 | 国产亚洲精品久久久久久大师 | 天堂久久天堂av色综合 | 麻豆人妻少妇精品无码专区 | 一本久久a久久精品vr综合 | 成人免费视频视频在线观看 免费 | av无码久久久久不卡免费网站 | 成年美女黄网站色大免费全看 | 初尝人妻少妇中文字幕 | 亚洲中文字幕无码中字 | 亚洲成色www久久网站 | 好爽又高潮了毛片免费下载 | 久久婷婷五月综合色国产香蕉 | 荫蒂添的好舒服视频囗交 | 高中生自慰www网站 | 高清国产亚洲精品自在久久 | 中文字幕中文有码在线 | 又湿又紧又大又爽a视频国产 | 欧美丰满老熟妇xxxxx性 | 国产精品99久久精品爆乳 | 人妻插b视频一区二区三区 | 狂野欧美性猛xxxx乱大交 | 欧美人与禽猛交狂配 | 久久人人97超碰a片精品 | 国产国语老龄妇女a片 | 成人精品视频一区二区 | 成人影院yy111111在线观看 | 在线观看国产午夜福利片 | 初尝人妻少妇中文字幕 | 亚洲大尺度无码无码专区 | 色一情一乱一伦 | 久青草影院在线观看国产 | 国产日产欧产精品精品app | 麻豆国产97在线 | 欧洲 | 少妇无码av无码专区在线观看 | 东京无码熟妇人妻av在线网址 | 无人区乱码一区二区三区 | 日本在线高清不卡免费播放 | 国产精品怡红院永久免费 | 欧美精品国产综合久久 | 国产特级毛片aaaaaa高潮流水 | 欧美自拍另类欧美综合图片区 | 高清无码午夜福利视频 | 国产在线一区二区三区四区五区 | 暴力强奷在线播放无码 | 久久综合香蕉国产蜜臀av | 999久久久国产精品消防器材 | 人妻人人添人妻人人爱 | 欧美成人家庭影院 | 撕开奶罩揉吮奶头视频 | 高中生自慰www网站 | 亚洲精品一区二区三区四区五区 | 鲁大师影院在线观看 | 欧美人妻一区二区三区 | 清纯唯美经典一区二区 | 国产成人精品一区二区在线小狼 | 波多野结衣乳巨码无在线观看 | 伊人久久大香线焦av综合影院 | 久久午夜无码鲁丝片秋霞 | 国产三级精品三级男人的天堂 | 精品国精品国产自在久国产87 | 中文字幕无线码 | 欧美日韩在线亚洲综合国产人 | 亚洲欧洲日本无在线码 | 国产成人无码av在线影院 | 性色欲情网站iwww九文堂 | 欧美人与善在线com | 国产卡一卡二卡三 | 成人无码精品一区二区三区 | 俺去俺来也在线www色官网 | 丰满人妻精品国产99aⅴ | 色一情一乱一伦一视频免费看 | 精品无码成人片一区二区98 | 丰腴饱满的极品熟妇 | 国产绳艺sm调教室论坛 | 两性色午夜视频免费播放 | 久9re热视频这里只有精品 | 久精品国产欧美亚洲色aⅴ大片 | 无码精品国产va在线观看dvd | 嫩b人妻精品一区二区三区 | 中文字幕人妻丝袜二区 | 婷婷五月综合缴情在线视频 | 欧美高清在线精品一区 | 性欧美熟妇videofreesex | 国产精品第一国产精品 | 亚洲精品一区国产 | 日本在线高清不卡免费播放 | 亚洲人交乣女bbw | 日韩欧美成人免费观看 | 1000部夫妻午夜免费 | 国产内射老熟女aaaa | 国产黑色丝袜在线播放 | 久久人人爽人人爽人人片ⅴ | 久久亚洲日韩精品一区二区三区 | 成在人线av无码免费 | 四虎4hu永久免费 | 国产片av国语在线观看 | 中文字幕人成乱码熟女app | 欧美 日韩 人妻 高清 中文 | 国产美女精品一区二区三区 | 一本色道久久综合狠狠躁 | 亚洲精品成人福利网站 | 亚洲最大成人网站 | 午夜性刺激在线视频免费 | 牲欲强的熟妇农村老妇女视频 | 亚洲热妇无码av在线播放 | 日本护士毛茸茸高潮 | 久久国语露脸国产精品电影 | 精品人妻中文字幕有码在线 | 极品嫩模高潮叫床 | 国产精品无码一区二区三区不卡 | 欧美野外疯狂做受xxxx高潮 | 国产亚洲美女精品久久久2020 | 夜夜躁日日躁狠狠久久av | 中国大陆精品视频xxxx | 欧美丰满熟妇xxxx性ppx人交 | 任你躁国产自任一区二区三区 | 娇妻被黑人粗大高潮白浆 | 国产亚洲人成a在线v网站 | 国内精品久久久久久中文字幕 | 亚洲国产欧美国产综合一区 | 77777熟女视频在线观看 а天堂中文在线官网 | 日本乱偷人妻中文字幕 | 一本色道婷婷久久欧美 | 久久精品一区二区三区四区 | 国产午夜亚洲精品不卡 | 国产精品亚洲五月天高清 | 免费无码一区二区三区蜜桃大 | 国产另类ts人妖一区二区 | 人妻无码久久精品人妻 | 在线观看免费人成视频 | 亚洲天堂2017无码 | 人妻无码久久精品人妻 | 日韩人妻无码一区二区三区久久99 | 一二三四社区在线中文视频 | 亚洲国产精品毛片av不卡在线 | 国产莉萝无码av在线播放 | 永久免费观看美女裸体的网站 | 成 人 免费观看网站 | 麻豆国产丝袜白领秘书在线观看 | 九九综合va免费看 | 国产精品久久久av久久久 | 久久久中文久久久无码 | 国产在线一区二区三区四区五区 | 男女下面进入的视频免费午夜 | 国产精品第一国产精品 | 国产亚av手机在线观看 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 国产精品成人av在线观看 | 久久综合网欧美色妞网 | 久久综合网欧美色妞网 | 欧美精品在线观看 | 欧美性猛交xxxx富婆 | 日本爽爽爽爽爽爽在线观看免 | 亚洲欧美精品伊人久久 | 大屁股大乳丰满人妻 | 国产精品福利视频导航 | 日韩亚洲欧美中文高清在线 | 精品无人区无码乱码毛片国产 | 麻豆国产丝袜白领秘书在线观看 | 97无码免费人妻超级碰碰夜夜 | 水蜜桃色314在线观看 | 亚洲国产日韩a在线播放 | 国产人妻久久精品二区三区老狼 | 久久天天躁夜夜躁狠狠 | 欧美午夜特黄aaaaaa片 | 国产手机在线αⅴ片无码观看 | 大肉大捧一进一出视频出来呀 | 中文字幕人成乱码熟女app | 国内综合精品午夜久久资源 | 亚洲日韩乱码中文无码蜜桃臀网站 | 久久人人爽人人爽人人片av高清 | 久久综合给合久久狠狠狠97色 | 亚洲娇小与黑人巨大交 | 精品人妻人人做人人爽 | 日日躁夜夜躁狠狠躁 | 中文字幕av无码一区二区三区电影 | 伊人久久大香线蕉av一区二区 | 久久伊人色av天堂九九小黄鸭 | 婷婷色婷婷开心五月四房播播 | 丰腴饱满的极品熟妇 | 在线成人www免费观看视频 | 人人妻人人澡人人爽欧美一区九九 | 亚洲经典千人经典日产 | 欧美人与禽猛交狂配 | 久久久久成人片免费观看蜜芽 | 水蜜桃色314在线观看 | 国产亚洲美女精品久久久2020 | 亚洲a无码综合a国产av中文 | 精品国产精品久久一区免费式 | 国产热a欧美热a在线视频 | 色五月五月丁香亚洲综合网 | 欧美性色19p | 亚洲精品一区二区三区婷婷月 | 疯狂三人交性欧美 | 黑人大群体交免费视频 | 日日躁夜夜躁狠狠躁 | 97人妻精品一区二区三区 | 精品国产福利一区二区 | 亚洲综合久久一区二区 | 国产麻豆精品一区二区三区v视界 | 窝窝午夜理论片影院 | 久久久久免费精品国产 | 娇妻被黑人粗大高潮白浆 | 国产午夜亚洲精品不卡 | 亚洲欧美精品aaaaaa片 | 波多野结衣高清一区二区三区 | 国产精品a成v人在线播放 | 国产精品国产自线拍免费软件 | 色综合天天综合狠狠爱 | 中文字幕无线码免费人妻 | 熟妇人妻无乱码中文字幕 | 久久综合给久久狠狠97色 | 无码吃奶揉捏奶头高潮视频 | 国产成人无码av片在线观看不卡 | 自拍偷自拍亚洲精品被多人伦好爽 | 老子影院午夜精品无码 | 午夜福利不卡在线视频 | 国产 精品 自在自线 | 荫蒂被男人添的好舒服爽免费视频 | 乱人伦中文视频在线观看 | 粗大的内捧猛烈进出视频 | 亚洲精品久久久久中文第一幕 | 波多野结衣av一区二区全免费观看 | 欧美肥老太牲交大战 | 国产无遮挡又黄又爽免费视频 | 秋霞成人午夜鲁丝一区二区三区 | 97人妻精品一区二区三区 | 国产肉丝袜在线观看 | 亚洲精品国偷拍自产在线观看蜜桃 | 亲嘴扒胸摸屁股激烈网站 | 亚欧洲精品在线视频免费观看 | 精品国产乱码久久久久乱码 | 国产97色在线 | 免 | 亚洲中文字幕无码一久久区 | 亚洲の无码国产の无码步美 | 亚洲爆乳大丰满无码专区 | 无遮无挡爽爽免费视频 | 无码人妻少妇伦在线电影 | 极品尤物被啪到呻吟喷水 | 国产精品毛多多水多 | 国产亲子乱弄免费视频 | 久久视频在线观看精品 | 久久精品无码一区二区三区 | 人人爽人人澡人人人妻 | 无码成人精品区在线观看 | 色婷婷香蕉在线一区二区 | 久久99国产综合精品 | 麻豆国产97在线 | 欧洲 | 国产另类ts人妖一区二区 | 国产精品欧美成人 | 亚洲日韩一区二区三区 | 久久综合激激的五月天 | 国产人妻久久精品二区三区老狼 | 一个人看的www免费视频在线观看 | 牲交欧美兽交欧美 | 国产精品怡红院永久免费 | 亚洲中文字幕成人无码 | 日本丰满熟妇videos | 国产成人一区二区三区在线观看 | 国产片av国语在线观看 | 日本精品人妻无码77777 天堂一区人妻无码 | 中文字幕日产无线码一区 | 2019午夜福利不卡片在线 | 成人片黄网站色大片免费观看 | 妺妺窝人体色www在线小说 | 国产成人无码区免费内射一片色欲 | 强伦人妻一区二区三区视频18 | 丝袜足控一区二区三区 | 国精品人妻无码一区二区三区蜜柚 | 亚洲精品中文字幕乱码 | 成人精品视频一区二区 | 亚洲成a人片在线观看日本 | 无码成人精品区在线观看 | 无码国内精品人妻少妇 | 99久久久国产精品无码免费 | 日韩成人一区二区三区在线观看 | 少妇性l交大片欧洲热妇乱xxx | 国产成人一区二区三区在线观看 | 在线观看免费人成视频 | 欧美性生交xxxxx久久久 | 国产精品久久久久久无码 | 日本高清一区免费中文视频 | 免费中文字幕日韩欧美 | 色妞www精品免费视频 | 精品国产av色一区二区深夜久久 | 爱做久久久久久 | 国产精品亚洲综合色区韩国 | 亚洲精品午夜无码电影网 | 波多野结衣一区二区三区av免费 | 欧美激情内射喷水高潮 | 国产性猛交╳xxx乱大交 国产精品久久久久久无码 欧洲欧美人成视频在线 | 国产精品亚洲专区无码不卡 | 国产午夜精品一区二区三区嫩草 | 亚洲一区二区三区国产精华液 | 夜夜影院未满十八勿进 | 亚洲综合久久一区二区 | 俺去俺来也在线www色官网 | 欧美猛少妇色xxxxx | 激情爆乳一区二区三区 | 精品国产乱码久久久久乱码 | 国产亚洲美女精品久久久2020 | 性欧美videos高清精品 | 免费国产黄网站在线观看 | 国产极品美女高潮无套在线观看 | 水蜜桃色314在线观看 | 内射欧美老妇wbb | 日日碰狠狠丁香久燥 | 蜜桃臀无码内射一区二区三区 | 精品人人妻人人澡人人爽人人 | 欧洲熟妇精品视频 | 台湾无码一区二区 | 日韩av无码一区二区三区 | 特级做a爰片毛片免费69 | 亚洲国产欧美日韩精品一区二区三区 | 亚洲色无码一区二区三区 | 九九综合va免费看 | 青青久在线视频免费观看 | 久久久婷婷五月亚洲97号色 | 红桃av一区二区三区在线无码av | 国产精品嫩草久久久久 | 亚洲精品国偷拍自产在线麻豆 | 日韩人妻少妇一区二区三区 | 狠狠色欧美亚洲狠狠色www | 国产成人综合在线女婷五月99播放 | аⅴ资源天堂资源库在线 | 精品乱码久久久久久久 | 国产成人精品视频ⅴa片软件竹菊 | 西西人体www44rt大胆高清 | 国产精品久久久久久亚洲影视内衣 | 国产电影无码午夜在线播放 | 麻豆国产97在线 | 欧洲 | 精品人妻人人做人人爽夜夜爽 | 国产精品久久久久影院嫩草 | 一区二区三区高清视频一 | 麻豆国产丝袜白领秘书在线观看 | 麻豆国产丝袜白领秘书在线观看 | 中文精品无码中文字幕无码专区 | 成 人影片 免费观看 | 国产无套粉嫩白浆在线 | 伊人久久大香线蕉午夜 | 日韩精品a片一区二区三区妖精 | 77777熟女视频在线观看 а天堂中文在线官网 | 丝袜足控一区二区三区 | 国产尤物精品视频 | 水蜜桃av无码 | 免费人成网站视频在线观看 | 亚洲色在线无码国产精品不卡 | 国产乱人伦偷精品视频 | 国产精品a成v人在线播放 | 国产熟女一区二区三区四区五区 | 成熟妇人a片免费看网站 | 日日麻批免费40分钟无码 | 日日摸夜夜摸狠狠摸婷婷 | 曰韩少妇内射免费播放 | 欧美日韩亚洲国产精品 | 中文字幕乱码人妻无码久久 | 国产av久久久久精东av | 成在人线av无码免观看麻豆 | 久久精品国产大片免费观看 | 午夜精品一区二区三区在线观看 | 无码人妻精品一区二区三区下载 | 久久 国产 尿 小便 嘘嘘 | а√资源新版在线天堂 | 性啪啪chinese东北女人 | 国产乱人伦偷精品视频 | 国产精品无码mv在线观看 | 精品夜夜澡人妻无码av蜜桃 | 精品人妻中文字幕有码在线 | 精品久久久久久亚洲精品 | 窝窝午夜理论片影院 | 无套内谢老熟女 | 国产亚洲欧美在线专区 | 国产av一区二区精品久久凹凸 | 久久久久久亚洲精品a片成人 | 人妻少妇被猛烈进入中文字幕 | 日韩视频 中文字幕 视频一区 | 性生交大片免费看女人按摩摩 | 久久99精品久久久久久 | 午夜精品久久久久久久 | 无码精品国产va在线观看dvd | 国产精品多人p群无码 | 国产精品办公室沙发 | 性欧美大战久久久久久久 | 国产高清不卡无码视频 | 国产熟妇另类久久久久 | 一区二区三区乱码在线 | 欧洲 | 欧美亚洲日韩国产人成在线播放 | 97精品国产97久久久久久免费 | 欧美乱妇无乱码大黄a片 | 久久99精品久久久久久动态图 | 中文字幕乱码人妻无码久久 | 国产乡下妇女做爰 | 任你躁在线精品免费 | 久久综合狠狠综合久久综合88 | 欧洲欧美人成视频在线 | 欧美人与动性行为视频 | 亚洲男人av天堂午夜在 | 亚洲七七久久桃花影院 | 久久久久国色av免费观看性色 | 国产熟妇另类久久久久 | 亚洲热妇无码av在线播放 | 久久综合狠狠综合久久综合88 | 性色av无码免费一区二区三区 | 国产欧美精品一区二区三区 | 午夜精品久久久内射近拍高清 | 正在播放老肥熟妇露脸 | 国产97人人超碰caoprom | 久久国产精品二国产精品 | 久久人人爽人人人人片 | 国产亚洲美女精品久久久2020 | 99国产欧美久久久精品 | a在线观看免费网站大全 | 成人精品视频一区二区 | 国产精品99爱免费视频 | 亚洲成av人影院在线观看 | 国产内射爽爽大片视频社区在线 | 国产片av国语在线观看 | 啦啦啦www在线观看免费视频 | 久久久国产精品无码免费专区 | 亚洲熟妇自偷自拍另类 | 天堂在线观看www | 国产熟妇另类久久久久 | 免费视频欧美无人区码 | 综合人妻久久一区二区精品 | 日韩人妻系列无码专区 | 国产综合色产在线精品 | 领导边摸边吃奶边做爽在线观看 | 蜜桃av抽搐高潮一区二区 | 乱码av麻豆丝袜熟女系列 | 国产精品视频免费播放 | 国产内射爽爽大片视频社区在线 | 特级做a爰片毛片免费69 | 国产亚洲tv在线观看 | 国产性生大片免费观看性 | 激情综合激情五月俺也去 | 中文字幕无码免费久久99 | 性色欲网站人妻丰满中文久久不卡 | 亚洲国产综合无码一区 | 久久久久国色av免费观看性色 | 精品aⅴ一区二区三区 | 国产亚洲tv在线观看 | 亚洲a无码综合a国产av中文 | 精品熟女少妇av免费观看 | 日本一区二区三区免费播放 | 亚洲啪av永久无码精品放毛片 | 国产激情艳情在线看视频 | 亚洲人成影院在线无码按摩店 | 亚洲中文字幕久久无码 | 中文字幕人妻丝袜二区 | 久久成人a毛片免费观看网站 | 全球成人中文在线 | 成人精品一区二区三区中文字幕 | 日韩欧美中文字幕在线三区 | 日韩人妻无码中文字幕视频 | 国产偷自视频区视频 | 国产人妻人伦精品1国产丝袜 | 东京一本一道一二三区 | 无码精品人妻一区二区三区av | 亚洲精品国产a久久久久久 | 国产又粗又硬又大爽黄老大爷视 | 亚洲国产精华液网站w | 亚洲色偷偷偷综合网 | 欧美国产日韩久久mv | 亚洲自偷精品视频自拍 | √8天堂资源地址中文在线 | √天堂中文官网8在线 | 性欧美大战久久久久久久 | 国产人妻人伦精品1国产丝袜 | 国产乱人伦av在线无码 | 亚洲欧美日韩综合久久久 | 亚洲中文字幕久久无码 | 国产精品久久久午夜夜伦鲁鲁 | 国内精品一区二区三区不卡 | 日本爽爽爽爽爽爽在线观看免 | 纯爱无遮挡h肉动漫在线播放 | 在线观看国产午夜福利片 | 国精产品一品二品国精品69xx | 少妇性l交大片欧洲热妇乱xxx | 国产av无码专区亚洲a∨毛片 | 无码av最新清无码专区吞精 | 东京热一精品无码av | 人人妻人人澡人人爽人人精品浪潮 | 一本久久a久久精品亚洲 | 久久亚洲中文字幕无码 | 中文字幕+乱码+中文字幕一区 | 国产精品怡红院永久免费 | 亚洲成av人片在线观看无码不卡 | 国产特级毛片aaaaaa高潮流水 | 成人亚洲精品久久久久软件 | 久久国产精品_国产精品 | 日产国产精品亚洲系列 | 亚洲精品国偷拍自产在线麻豆 | 2019nv天堂香蕉在线观看 | 性生交片免费无码看人 | 免费男性肉肉影院 | 色综合视频一区二区三区 | 精品无码一区二区三区的天堂 | 国产精品丝袜黑色高跟鞋 | av在线亚洲欧洲日产一区二区 | 午夜福利一区二区三区在线观看 | 精品人妻人人做人人爽 | 国产sm调教视频在线观看 | 欧美人与动性行为视频 | 成熟妇人a片免费看网站 | 国产香蕉尹人综合在线观看 | 1000部啪啪未满十八勿入下载 | 亚洲成熟女人毛毛耸耸多 | 内射后入在线观看一区 | 国产成人无码av在线影院 | 亚洲中文字幕成人无码 | aⅴ在线视频男人的天堂 | 午夜熟女插插xx免费视频 | 人妻尝试又大又粗久久 | 波多野结衣乳巨码无在线观看 | 一本久道久久综合婷婷五月 | 好屌草这里只有精品 | 久久亚洲日韩精品一区二区三区 | 国产成人无码av一区二区 | 牲交欧美兽交欧美 | av人摸人人人澡人人超碰下载 | 久久无码专区国产精品s | 影音先锋中文字幕无码 | 亚洲精品无码人妻无码 | 国产成人综合在线女婷五月99播放 | 色窝窝无码一区二区三区色欲 | 少妇性俱乐部纵欲狂欢电影 | 国产97人人超碰caoprom | a国产一区二区免费入口 | 男人扒开女人内裤强吻桶进去 | 午夜成人1000部免费视频 | 久久午夜夜伦鲁鲁片无码免费 | 中文字幕无线码免费人妻 | 无码人妻精品一区二区三区不卡 | 少妇人妻偷人精品无码视频 | 久久久久久a亚洲欧洲av冫 | 国产女主播喷水视频在线观看 | 国产手机在线αⅴ片无码观看 | 国产农村妇女aaaaa视频 撕开奶罩揉吮奶头视频 | 人妻尝试又大又粗久久 | 国产凸凹视频一区二区 | 国产午夜手机精彩视频 | 亚无码乱人伦一区二区 | 久久精品国产亚洲精品 | 最新版天堂资源中文官网 | www国产精品内射老师 | 亚无码乱人伦一区二区 | 一本久久伊人热热精品中文字幕 | 高清国产亚洲精品自在久久 | 在教室伦流澡到高潮hnp视频 | 亚洲国产av精品一区二区蜜芽 | 性生交大片免费看女人按摩摩 | 中文亚洲成a人片在线观看 | 国产午夜视频在线观看 | 一本久久a久久精品亚洲 | 免费看男女做好爽好硬视频 | 欧美精品一区二区精品久久 | 97se亚洲精品一区 | 久久综合久久自在自线精品自 | 粗大的内捧猛烈进出视频 | 丰满人妻一区二区三区免费视频 | 十八禁真人啪啪免费网站 | 一本久道久久综合婷婷五月 | 成人毛片一区二区 | 国产无av码在线观看 | 亚洲欧美国产精品专区久久 | 久久综合久久自在自线精品自 | 熟妇女人妻丰满少妇中文字幕 | 久久精品人人做人人综合 | 人妻有码中文字幕在线 | 亚洲精品国产a久久久久久 | 精品国产av色一区二区深夜久久 | 亚洲va欧美va天堂v国产综合 | 亚洲狠狠婷婷综合久久 | 俺去俺来也www色官网 | 国产精品自产拍在线观看 | 亚洲国产欧美日韩精品一区二区三区 | 国产熟妇高潮叫床视频播放 | 日本成熟视频免费视频 | 国产精品香蕉在线观看 | 国产精品人人爽人人做我的可爱 | 精品国产一区av天美传媒 | 国产精品永久免费视频 | 国产艳妇av在线观看果冻传媒 | 啦啦啦www在线观看免费视频 | 色偷偷人人澡人人爽人人模 | 黑人巨大精品欧美黑寡妇 | 久久精品中文字幕一区 | 亚洲va中文字幕无码久久不卡 | 欧美性色19p | 亚洲国产精品一区二区第一页 | 日本xxxx色视频在线观看免费 | 国语精品一区二区三区 | 精品国产av色一区二区深夜久久 | 亚洲日韩av一区二区三区中文 | 久久国产精品精品国产色婷婷 | 亚洲午夜福利在线观看 | 国产精品va在线播放 | 免费国产成人高清在线观看网站 | 亚洲の无码国产の无码影院 | 性色av无码免费一区二区三区 | 亚洲精品久久久久avwww潮水 | 久久亚洲精品中文字幕无男同 | 欧美熟妇另类久久久久久多毛 | 日日干夜夜干 | 爆乳一区二区三区无码 | 日本精品高清一区二区 | 成人精品视频一区二区三区尤物 | 正在播放老肥熟妇露脸 | 丰满人妻被黑人猛烈进入 | 亚洲天堂2017无码中文 | 在线看片无码永久免费视频 | 少妇高潮喷潮久久久影院 | 国产精品美女久久久 | 香蕉久久久久久av成人 | 国产香蕉尹人综合在线观看 | 欧美大屁股xxxxhd黑色 | 国产情侣作爱视频免费观看 | 99久久99久久免费精品蜜桃 | 久久午夜无码鲁丝片秋霞 | 天天综合网天天综合色 | 国产精品免费大片 | av无码电影一区二区三区 | 天天av天天av天天透 | 久久精品人人做人人综合试看 | 国产一精品一av一免费 | 欧美精品无码一区二区三区 | 国产成人无码av一区二区 | 亚洲大尺度无码无码专区 | 国产黑色丝袜在线播放 | 在教室伦流澡到高潮hnp视频 | 国产精品欧美成人 | 狠狠躁日日躁夜夜躁2020 | 欧洲vodafone精品性 | 成人动漫在线观看 | 亚洲综合在线一区二区三区 | 国产超碰人人爽人人做人人添 | 国产熟女一区二区三区四区五区 | 天下第一社区视频www日本 | 亚洲国产一区二区三区在线观看 | 国产成人无码午夜视频在线观看 | 无码播放一区二区三区 | 无码国产乱人伦偷精品视频 | 未满成年国产在线观看 | av香港经典三级级 在线 | 欧美性生交xxxxx久久久 | 久久久久久av无码免费看大片 | 无码免费一区二区三区 | 最新版天堂资源中文官网 | 妺妺窝人体色www婷婷 | 日日摸天天摸爽爽狠狠97 | 图片区 小说区 区 亚洲五月 | 夜精品a片一区二区三区无码白浆 | 熟女体下毛毛黑森林 | 丁香花在线影院观看在线播放 | 国产亚洲人成a在线v网站 | 色婷婷香蕉在线一区二区 | 欧美老人巨大xxxx做受 | 日本精品人妻无码免费大全 | 亚洲 a v无 码免 费 成 人 a v | 88国产精品欧美一区二区三区 | 麻豆国产97在线 | 欧洲 | 激情内射日本一区二区三区 | 国产无遮挡又黄又爽免费视频 | 18禁止看的免费污网站 | 日本xxxx色视频在线观看免费 | 精品亚洲韩国一区二区三区 | 亚洲七七久久桃花影院 | 55夜色66夜色国产精品视频 | 亚洲欧美国产精品专区久久 | 性欧美牲交xxxxx视频 | 日日碰狠狠丁香久燥 | 亚洲成色在线综合网站 | 亚洲综合另类小说色区 | 国产高潮视频在线观看 | 亚洲日韩av一区二区三区四区 | 又色又爽又黄的美女裸体网站 | 无码乱肉视频免费大全合集 | 精品熟女少妇av免费观看 | 欧美日韩一区二区综合 | 中文字幕无码视频专区 | 亚洲第一无码av无码专区 | 76少妇精品导航 | 日韩精品成人一区二区三区 | 色欲av亚洲一区无码少妇 | 亚洲第一无码av无码专区 | 成熟妇人a片免费看网站 | 伊人色综合久久天天小片 | 国产人妻大战黑人第1集 | 国产精品va在线播放 | 亚洲呦女专区 | 中文字幕亚洲情99在线 | 国产亚洲精品久久久ai换 | 中文字幕无线码免费人妻 | 激情亚洲一区国产精品 | √天堂中文官网8在线 | 亚洲综合无码久久精品综合 | 国产成人综合色在线观看网站 | 国产成人精品无码播放 | 亚洲国产精品久久久久久 | 精品国产乱码久久久久乱码 | 午夜福利不卡在线视频 | 综合人妻久久一区二区精品 | 精品久久久无码中文字幕 | 成人性做爰aaa片免费看不忠 | 久9re热视频这里只有精品 | a国产一区二区免费入口 | 国产内射爽爽大片视频社区在线 | 丰满护士巨好爽好大乳 | 奇米影视888欧美在线观看 | 无套内谢的新婚少妇国语播放 | 麻豆国产97在线 | 欧洲 | 国产午夜无码视频在线观看 | 巨爆乳无码视频在线观看 | 人妻体内射精一区二区三四 | 伊人久久大香线焦av综合影院 | 丝袜足控一区二区三区 | 99久久久无码国产aaa精品 | 国产凸凹视频一区二区 | 无码福利日韩神码福利片 | 日欧一片内射va在线影院 | 中文无码伦av中文字幕 | 色一情一乱一伦一视频免费看 | 亚洲欧美色中文字幕在线 | 思思久久99热只有频精品66 | 国产精品美女久久久 | 欧美日韩一区二区综合 | 女人被爽到呻吟gif动态图视看 | 白嫩日本少妇做爰 | 麻豆人妻少妇精品无码专区 | 亚洲熟妇自偷自拍另类 | 2019午夜福利不卡片在线 | 无码av免费一区二区三区试看 | 色偷偷人人澡人人爽人人模 | 大乳丰满人妻中文字幕日本 | 捆绑白丝粉色jk震动捧喷白浆 | 久久天天躁狠狠躁夜夜免费观看 | 一本久久a久久精品vr综合 | 18禁止看的免费污网站 | 欧美三级不卡在线观看 | 中文精品久久久久人妻不卡 | 2020久久超碰国产精品最新 | 日韩人妻无码中文字幕视频 | 久久午夜无码鲁丝片秋霞 | 久久精品国产日本波多野结衣 | 精品无码国产自产拍在线观看蜜 | 成人精品视频一区二区 | 亚洲日韩中文字幕在线播放 | 成人免费视频一区二区 | 夜精品a片一区二区三区无码白浆 | 爆乳一区二区三区无码 | 亚洲成av人综合在线观看 | 人妻天天爽夜夜爽一区二区 | 激情人妻另类人妻伦 | 国产卡一卡二卡三 | 国产97人人超碰caoprom | 少妇的肉体aa片免费 | 丰满岳乱妇在线观看中字无码 | 久久久av男人的天堂 | av香港经典三级级 在线 | 性色欲网站人妻丰满中文久久不卡 | 乱人伦中文视频在线观看 | 亚洲一区二区三区四区 | 在线精品亚洲一区二区 | 综合激情五月综合激情五月激情1 | 亚洲 a v无 码免 费 成 人 a v | 亚洲中文字幕无码中字 | 青青青手机频在线观看 | 熟女体下毛毛黑森林 | 偷窥日本少妇撒尿chinese | 西西人体www44rt大胆高清 | 波多野结衣av在线观看 | 日本爽爽爽爽爽爽在线观看免 | 亚洲国产精品无码久久久久高潮 | 日韩亚洲欧美精品综合 | 国产xxx69麻豆国语对白 | 国产精品美女久久久网av | 久久国产精品偷任你爽任你 | 一区二区三区高清视频一 | 日日碰狠狠丁香久燥 | 日韩在线不卡免费视频一区 | 久久久久se色偷偷亚洲精品av | 亚洲国产精品美女久久久久 | 无套内射视频囯产 | 亚洲国产欧美在线成人 | 麻豆国产丝袜白领秘书在线观看 | 久久久国产精品无码免费专区 | 波多野结衣aⅴ在线 | 又粗又大又硬毛片免费看 | 红桃av一区二区三区在线无码av | 亚洲日韩av一区二区三区中文 | 性生交大片免费看l | 久久97精品久久久久久久不卡 | 亚洲日韩一区二区三区 | yw尤物av无码国产在线观看 | 成人无码精品1区2区3区免费看 | 欧美怡红院免费全部视频 | 乱码午夜-极国产极内射 | 伦伦影院午夜理论片 | 国产午夜福利100集发布 | 日韩 欧美 动漫 国产 制服 | 俺去俺来也在线www色官网 | 国产成人午夜福利在线播放 | 曰本女人与公拘交酡免费视频 | 国产香蕉尹人综合在线观看 | 亚洲国产精品久久久久久 | 欧洲vodafone精品性 | 亚洲一区二区三区含羞草 | 全黄性性激高免费视频 | 在线亚洲高清揄拍自拍一品区 | 少妇性荡欲午夜性开放视频剧场 | 午夜福利一区二区三区在线观看 | 午夜成人1000部免费视频 | 亚洲爆乳无码专区 | 国产小呦泬泬99精品 | 麻豆精品国产精华精华液好用吗 | 国内少妇偷人精品视频免费 | 亚洲中文无码av永久不收费 | 色偷偷人人澡人人爽人人模 | 欧美阿v高清资源不卡在线播放 | 一本大道伊人av久久综合 | 久久99精品国产麻豆 | 小鲜肉自慰网站xnxx | 国产肉丝袜在线观看 | 久久人人爽人人爽人人片ⅴ | 国产人成高清在线视频99最全资源 | 国产精品久久久久9999小说 | a片在线免费观看 | 全球成人中文在线 | 77777熟女视频在线观看 а天堂中文在线官网 | 免费中文字幕日韩欧美 | 久久国产精品精品国产色婷婷 | 亚洲色成人中文字幕网站 | av无码不卡在线观看免费 | 熟女少妇人妻中文字幕 | 国产精品人人爽人人做我的可爱 | 天天摸天天透天天添 | 久久综合九色综合欧美狠狠 | 香蕉久久久久久av成人 | 色偷偷av老熟女 久久精品人妻少妇一区二区三区 | 欧美日韩一区二区三区自拍 | 内射巨臀欧美在线视频 | 精品无码一区二区三区的天堂 | 亚洲无人区午夜福利码高清完整版 | 国产激情一区二区三区 | 国产特级毛片aaaaaaa高清 | 最新国产麻豆aⅴ精品无码 | 国产午夜亚洲精品不卡下载 | 国产在线aaa片一区二区99 | 少女韩国电视剧在线观看完整 | 天堂一区人妻无码 | 在线欧美精品一区二区三区 | 亚洲精品久久久久中文第一幕 | 无码人妻出轨黑人中文字幕 | 久久精品女人天堂av免费观看 | 99久久人妻精品免费一区 | 亚洲一区二区三区在线观看网站 | 曰本女人与公拘交酡免费视频 | 国产在线精品一区二区高清不卡 | 狠狠躁日日躁夜夜躁2020 | 无码国产色欲xxxxx视频 | 欧美精品无码一区二区三区 | 77777熟女视频在线观看 а天堂中文在线官网 | 久久久久久久女国产乱让韩 | 亚洲精品欧美二区三区中文字幕 | 影音先锋中文字幕无码 | 欧美阿v高清资源不卡在线播放 | 无套内谢老熟女 | 欧美真人作爱免费视频 | 久久久精品人妻久久影视 | 中文字幕无码免费久久99 | 亚洲综合精品香蕉久久网 | 啦啦啦www在线观看免费视频 | 国产精品va在线播放 | 人人妻人人澡人人爽精品欧美 | 免费观看又污又黄的网站 | av无码电影一区二区三区 | 人妻aⅴ无码一区二区三区 | 亚洲 a v无 码免 费 成 人 a v | 日日橹狠狠爱欧美视频 | 国产区女主播在线观看 | 人妻无码αv中文字幕久久琪琪布 | 无套内谢老熟女 | 人人妻人人藻人人爽欧美一区 | 国产精品理论片在线观看 | 色综合天天综合狠狠爱 | 亚洲色无码一区二区三区 | 18无码粉嫩小泬无套在线观看 | 国产精品久久久久影院嫩草 | 大肉大捧一进一出好爽视频 | 色婷婷欧美在线播放内射 | 全球成人中文在线 | 无套内谢的新婚少妇国语播放 | 丝袜美腿亚洲一区二区 | 久久久久久国产精品无码下载 | 国产办公室秘书无码精品99 | 亚洲成a人片在线观看日本 | 日韩 欧美 动漫 国产 制服 | 人人妻人人澡人人爽欧美一区九九 | 亚洲精品国产a久久久久久 | 久久精品人人做人人综合 | 未满成年国产在线观看 | 永久黄网站色视频免费直播 | 免费观看激色视频网站 | 欧美 丝袜 自拍 制服 另类 | 四十如虎的丰满熟妇啪啪 | 99精品国产综合久久久久五月天 | 大地资源中文第3页 | 三上悠亚人妻中文字幕在线 | 性色欲情网站iwww九文堂 | 无码人妻丰满熟妇区毛片18 | 国产精品毛多多水多 | 欧美成人高清在线播放 | 亚洲精品中文字幕久久久久 | 麻豆蜜桃av蜜臀av色欲av | 无码人妻黑人中文字幕 | 国产又爽又猛又粗的视频a片 | 精品无人国产偷自产在线 | 亚洲s色大片在线观看 | 野外少妇愉情中文字幕 | 中文字幕色婷婷在线视频 | 97精品人妻一区二区三区香蕉 | 国产精品a成v人在线播放 | 黑人巨大精品欧美黑寡妇 | 亚洲综合伊人久久大杳蕉 | av在线亚洲欧洲日产一区二区 | 亚洲国产精品毛片av不卡在线 | 国产激情精品一区二区三区 | 国产亚洲欧美日韩亚洲中文色 | 国产国产精品人在线视 | 日韩精品久久久肉伦网站 | 狂野欧美激情性xxxx | 中文字幕av无码一区二区三区电影 | 国产精品亚洲а∨无码播放麻豆 | 欧洲熟妇精品视频 | 国产精品永久免费视频 | 色婷婷久久一区二区三区麻豆 | 日韩精品无码一本二本三本色 | 国产精品沙发午睡系列 | 成 人 网 站国产免费观看 | 一本色道婷婷久久欧美 | 亚洲精品久久久久久一区二区 | 日韩精品一区二区av在线 | 亚洲 另类 在线 欧美 制服 | 久久亚洲精品成人无码 | 日本一区二区三区免费播放 | 亚洲欧美精品aaaaaa片 | 3d动漫精品啪啪一区二区中 | 久久午夜夜伦鲁鲁片无码免费 | 2020久久超碰国产精品最新 | 亚洲一区二区三区四区 | 亚洲色www成人永久网址 | 国产亚av手机在线观看 | 在线亚洲高清揄拍自拍一品区 | 久久99国产综合精品 | 成年美女黄网站色大免费全看 | 日韩av无码一区二区三区不卡 | 97久久精品无码一区二区 | 国产精品va在线观看无码 | 爆乳一区二区三区无码 | 国产精品.xx视频.xxtv | 国产亚洲精品久久久久久大师 | 正在播放老肥熟妇露脸 | 亚洲熟妇色xxxxx亚洲 | 久久无码中文字幕免费影院蜜桃 | 国产精品无码一区二区桃花视频 | 国产午夜亚洲精品不卡 | 美女毛片一区二区三区四区 | 久久人人爽人人人人片 | 荫蒂添的好舒服视频囗交 | 丰满少妇女裸体bbw | 久久午夜无码鲁丝片午夜精品 | 无码人妻少妇伦在线电影 | 任你躁在线精品免费 | 国产精品沙发午睡系列 | 奇米影视888欧美在线观看 | 久久久久成人精品免费播放动漫 | 夫妻免费无码v看片 | 成人av无码一区二区三区 | 亚洲午夜久久久影院 | 在线成人www免费观看视频 | 色一情一乱一伦一区二区三欧美 | 奇米综合四色77777久久 东京无码熟妇人妻av在线网址 | 成人精品视频一区二区三区尤物 | 无遮挡国产高潮视频免费观看 | 日韩精品无码一本二本三本色 | 在线天堂新版最新版在线8 | 国产精品久免费的黄网站 | 亚洲成av人综合在线观看 | 老熟女乱子伦 | 精品午夜福利在线观看 | 西西人体www44rt大胆高清 | 老子影院午夜精品无码 | 狂野欧美性猛xxxx乱大交 | 欧美丰满熟妇xxxx性ppx人交 | 精品亚洲韩国一区二区三区 | 精品国产av色一区二区深夜久久 | 亚洲精品欧美二区三区中文字幕 | √天堂资源地址中文在线 | 学生妹亚洲一区二区 | 欧美精品无码一区二区三区 | 亚洲乱码日产精品bd | 国产 浪潮av性色四虎 | 日本精品人妻无码免费大全 | 国产sm调教视频在线观看 | 亚洲爆乳大丰满无码专区 | 精品无码国产自产拍在线观看蜜 | 亚洲色无码一区二区三区 | 午夜肉伦伦影院 | 亚洲精品一区二区三区四区五区 | 国产精品久久久久9999小说 | 精品国产一区二区三区av 性色 | 欧美性色19p | 亚洲欧美日韩成人高清在线一区 | 亚洲精品综合一区二区三区在线 | 亚洲国产午夜精品理论片 | 女人色极品影院 | 十八禁视频网站在线观看 | 日韩成人一区二区三区在线观看 | 国产午夜无码视频在线观看 | 久久国产自偷自偷免费一区调 | 欧美性生交活xxxxxdddd | 男女超爽视频免费播放 | 精品亚洲韩国一区二区三区 | 一本大道伊人av久久综合 | 亚洲自偷精品视频自拍 | 国产激情精品一区二区三区 | 精品人妻人人做人人爽夜夜爽 | 小泽玛莉亚一区二区视频在线 | 国产av剧情md精品麻豆 | 日日躁夜夜躁狠狠躁 | 人人妻人人澡人人爽精品欧美 | 奇米影视888欧美在线观看 | 日本在线高清不卡免费播放 | 国产成人精品三级麻豆 | 一二三四社区在线中文视频 | 久久综合色之久久综合 | 98国产精品综合一区二区三区 | 香港三级日本三级妇三级 | 色婷婷综合激情综在线播放 | 精品人妻人人做人人爽 | 综合人妻久久一区二区精品 | 国产精品99爱免费视频 | 久久久中文久久久无码 | 精品国产成人一区二区三区 | 日韩av无码一区二区三区 | 亚洲一区二区三区国产精华液 | 久久久久久a亚洲欧洲av冫 | 暴力强奷在线播放无码 | 色五月五月丁香亚洲综合网 | 九九久久精品国产免费看小说 | 国产精品毛多多水多 | 国产精品美女久久久久av爽李琼 | 日本熟妇人妻xxxxx人hd | 亚洲精品国产精品乱码不卡 | 亚洲国产精品久久人人爱 | 99riav国产精品视频 | 亚洲成av人影院在线观看 | 亚洲中文字幕乱码av波多ji | 99久久精品无码一区二区毛片 | 久久精品国产99久久6动漫 | 无套内谢老熟女 | 精品国产aⅴ无码一区二区 | 成 人影片 免费观看 | 久激情内射婷内射蜜桃人妖 | 97精品国产97久久久久久免费 | 国产精品.xx视频.xxtv | 免费人成在线观看网站 | 中文字幕日韩精品一区二区三区 | 77777熟女视频在线观看 а天堂中文在线官网 | 小sao货水好多真紧h无码视频 | 国产三级精品三级男人的天堂 | 一本久道高清无码视频 | 国产乱人偷精品人妻a片 | 国产精品久久久久影院嫩草 | 久久这里只有精品视频9 | 55夜色66夜色国产精品视频 | 久久精品女人的天堂av | 97夜夜澡人人爽人人喊中国片 | 天堂一区人妻无码 | 欧洲精品码一区二区三区免费看 | 少妇的肉体aa片免费 | 男人扒开女人内裤强吻桶进去 | 扒开双腿吃奶呻吟做受视频 | 国产麻豆精品一区二区三区v视界 | 少妇被黑人到高潮喷出白浆 | 日韩人妻少妇一区二区三区 | 国内揄拍国内精品人妻 | 天天综合网天天综合色 | 鲁一鲁av2019在线 | 久久精品国产精品国产精品污 | 精品久久久久久亚洲精品 | 欧美日韩在线亚洲综合国产人 | 成人aaa片一区国产精品 | 东京无码熟妇人妻av在线网址 | 国产精品毛片一区二区 | 扒开双腿疯狂进出爽爽爽视频 | 99视频精品全部免费免费观看 | 日韩人妻无码中文字幕视频 | 亚洲欧美日韩国产精品一区二区 | 中文字幕无码av波多野吉衣 | 欧美阿v高清资源不卡在线播放 | 亚洲va中文字幕无码久久不卡 | 精品久久久久香蕉网 | 国产亚洲人成在线播放 | 国产精品久久久久9999小说 | 国产舌乚八伦偷品w中 | 久久99国产综合精品 | 少妇愉情理伦片bd | 无码人妻精品一区二区三区不卡 | 色欲av亚洲一区无码少妇 | 在线а√天堂中文官网 | 欧美日本日韩 | 日本精品人妻无码77777 天堂一区人妻无码 | 亚洲成在人网站无码天堂 | 国产精品美女久久久网av | 女人被爽到呻吟gif动态图视看 | 欧美激情一区二区三区成人 | 色一情一乱一伦一视频免费看 | 丁香花在线影院观看在线播放 | 欧美日韩在线亚洲综合国产人 | 帮老师解开蕾丝奶罩吸乳网站 | 97久久国产亚洲精品超碰热 | 四十如虎的丰满熟妇啪啪 | 欧美日韩久久久精品a片 | 国产午夜无码精品免费看 | 亚洲一区av无码专区在线观看 | 欧洲熟妇精品视频 | 久久99精品久久久久久 | 图片小说视频一区二区 | 俄罗斯老熟妇色xxxx | 亚洲 欧美 激情 小说 另类 | 国产精品久久久久7777 | 真人与拘做受免费视频一 | 国内揄拍国内精品人妻 | 国产精品对白交换视频 | 亚洲日韩一区二区 | 日日噜噜噜噜夜夜爽亚洲精品 | 澳门永久av免费网站 | 亚洲成熟女人毛毛耸耸多 | 亚洲 激情 小说 另类 欧美 | 亚洲欧洲无卡二区视頻 | 乱人伦人妻中文字幕无码 | 东京无码熟妇人妻av在线网址 | a在线亚洲男人的天堂 | 99er热精品视频 | 国内丰满熟女出轨videos | 国产亚洲tv在线观看 | 桃花色综合影院 | 亚洲中文字幕乱码av波多ji | 奇米影视7777久久精品 | 无码人妻少妇伦在线电影 | 国产成人综合色在线观看网站 | 亚洲国产欧美在线成人 | 亚洲aⅴ无码成人网站国产app | 久久精品人妻少妇一区二区三区 | 99精品视频在线观看免费 | 日本精品久久久久中文字幕 | 亚洲成av人影院在线观看 | 久久久精品欧美一区二区免费 | 日本一本二本三区免费 | 亚洲国产精品无码久久久久高潮 | 国产偷自视频区视频 | 亚洲国产精品久久人人爱 | 久久伊人色av天堂九九小黄鸭 | 男人和女人高潮免费网站 | 巨爆乳无码视频在线观看 | 亚洲成色www久久网站 | 天天燥日日燥 | 国产xxx69麻豆国语对白 | 亚洲gv猛男gv无码男同 | 精品无码国产自产拍在线观看蜜 | 影音先锋中文字幕无码 | 国产精品人人爽人人做我的可爱 | 俺去俺来也在线www色官网 | 日本欧美一区二区三区乱码 | 天干天干啦夜天干天2017 | 夜夜影院未满十八勿进 | 亚洲gv猛男gv无码男同 | 在线观看国产午夜福利片 | 久久亚洲国产成人精品性色 | 中文字幕av伊人av无码av | 丰满少妇人妻久久久久久 | 国产精品对白交换视频 | 久9re热视频这里只有精品 | 亚洲国产精品成人久久蜜臀 | 国产va免费精品观看 | 最近免费中文字幕中文高清百度 | 2019午夜福利不卡片在线 | 国产精品怡红院永久免费 | 自拍偷自拍亚洲精品10p | 国产精品久久久久久无码 | 人妻无码αv中文字幕久久琪琪布 | 人人妻人人澡人人爽人人精品 | 亚洲阿v天堂在线 | 精品欧洲av无码一区二区三区 | 在线欧美精品一区二区三区 | 97夜夜澡人人双人人人喊 | 高潮喷水的毛片 | 成人欧美一区二区三区 | 色偷偷人人澡人人爽人人模 | 欧美亚洲国产一区二区三区 | 大肉大捧一进一出视频出来呀 | 久久人人爽人人爽人人片av高清 | 成人欧美一区二区三区黑人 | 日韩av无码中文无码电影 | 精品无码一区二区三区的天堂 | 国产做国产爱免费视频 | 97色伦图片97综合影院 | 亚洲性无码av中文字幕 | 澳门永久av免费网站 | v一区无码内射国产 | 欧美精品免费观看二区 | 丰满人妻精品国产99aⅴ | 国产精品二区一区二区aⅴ污介绍 | 国产真实伦对白全集 | 中文字幕亚洲情99在线 | 在线成人www免费观看视频 | 人妻中文无码久热丝袜 | 午夜成人1000部免费视频 | 亚洲中文字幕av在天堂 | 久久久www成人免费毛片 | 亚洲日韩中文字幕在线播放 | 国产色视频一区二区三区 | 亚洲综合精品香蕉久久网 | 国产99久久精品一区二区 | 人人澡人人透人人爽 | 久久久久国色av免费观看性色 | 青青草原综合久久大伊人精品 | 大乳丰满人妻中文字幕日本 | 国产内射爽爽大片视频社区在线 | 网友自拍区视频精品 | 福利一区二区三区视频在线观看 | 欧美高清在线精品一区 | 真人与拘做受免费视频 | 精品无人国产偷自产在线 | 久久人人爽人人爽人人片av高清 | 综合人妻久久一区二区精品 | 久久精品国产99精品亚洲 | 国产精品人人妻人人爽 | 亚洲国产日韩a在线播放 | 国产电影无码午夜在线播放 | 欧美国产日产一区二区 | 无码成人精品区在线观看 | 免费人成在线视频无码 | 国产性生交xxxxx无码 | 亚洲中文字幕无码一久久区 | 乱人伦人妻中文字幕无码久久网 | 日本成熟视频免费视频 | 亚洲欧美综合区丁香五月小说 | 性做久久久久久久免费看 | 香港三级日本三级妇三级 | 成人性做爰aaa片免费看不忠 | 欧美老人巨大xxxx做受 | 性色欲网站人妻丰满中文久久不卡 | 亚洲国产av精品一区二区蜜芽 | 欧美 日韩 人妻 高清 中文 | 亚洲无人区午夜福利码高清完整版 | 久久无码人妻影院 | 黑人巨大精品欧美一区二区 | 亚洲人交乣女bbw | 宝宝好涨水快流出来免费视频 | 伊在人天堂亚洲香蕉精品区 | 国产精品爱久久久久久久 | 亚洲爆乳大丰满无码专区 | 亚洲小说图区综合在线 | 天堂а√在线中文在线 | 国产绳艺sm调教室论坛 | 理论片87福利理论电影 | 图片区 小说区 区 亚洲五月 | 九一九色国产 | 久久99国产综合精品 | 国产热a欧美热a在线视频 | 麻豆精品国产精华精华液好用吗 | 久久99精品久久久久婷婷 | 玩弄人妻少妇500系列视频 | 成人无码视频在线观看网站 | 牛和人交xxxx欧美 | 日韩欧美群交p片內射中文 | 亚洲中文字幕无码一久久区 | 亚洲国产精品一区二区第一页 | 欧美日韩一区二区免费视频 | 亚洲色大成网站www国产 | 亚洲中文无码av永久不收费 | 国产精品亚洲一区二区三区喷水 | 亚洲国产欧美在线成人 | 免费人成在线观看网站 | 日韩 欧美 动漫 国产 制服 | 扒开双腿吃奶呻吟做受视频 | 国产精品嫩草久久久久 | 国产色在线 | 国产 | 国产 精品 自在自线 | 国产艳妇av在线观看果冻传媒 | 三级4级全黄60分钟 | 综合人妻久久一区二区精品 | 亚洲精品久久久久avwww潮水 | 亚洲中文字幕成人无码 | 亚洲乱码中文字幕在线 | 丰满护士巨好爽好大乳 | 亚洲一区av无码专区在线观看 | 一本久久伊人热热精品中文字幕 | 色偷偷人人澡人人爽人人模 |