Vector Data Type | |
| |
typedef arch_vector_t | rc_vec_t |
The vector type definition. | |
#define | RC_VEC_SIZE |
The number of bytes in a vector. | |
Performance Hints | |
| |
#define | RC_VEC_HINT_CMPGT |
Hint to use CMPGT instead of CMPGE when possible. | |
#define | RC_VEC_HINT_CMPGE |
Hint to use CMPGE instead of CMPGT when possible. | |
#define | RC_VEC_HINT_AVGT |
Hint to use AVGT/SUBHT instead of AVGR/SUBHR when possible. | |
#define | RC_VEC_HINT_AVGR |
Hint to use AVGR/SUBHR instead of AVGT/SUBHT when possible. | |
#define | RC_VEC_HINT_GETMASKW |
Hint to use GETMASKW instead of GETMASKV when possible. | |
#define | RC_VEC_HINT_GETMASKV |
Hint to use GETMASKV instead of GETMASKW when possible. | |
Vector State | |
| |
#define | RC_VEC_DECLARE() |
Global declarations needed for the vector operations. | |
#define | RC_VEC_CLEANUP() |
Global clean up after we are done with the vector computations. | |
Memory Access | |
| |
#define | RC_VEC_LOAD(vec, ptr) |
Aligned vector load from memory. | |
#define | RC_VEC_STORE(ptr, vec) |
Aligned vector store to memory. | |
Misaligned Memory Access | |
| |
#define | RC_VEC_LDINIT(vec1, vec2, vec3, uptr, ptr) |
Initialize a sequence of misaligned loads. | |
#define | RC_VEC_LOADU(dstv, vec1, vec2, vec3, uptr) |
Misaligned vector load from memory. | |
Field Relocation | |
| |
#define | RC_VEC_SHINIT(shv, bytes) |
Initialize a field shift. | |
#define | RC_VEC_SHL(dstv, srcv, shv) |
Shift all fields to the logical left. | |
#define | RC_VEC_SHR(dstv, srcv, shv) |
Shift fields to the logical right. | |
#define | RC_VEC_SHLC(dstv, srcv, bytes) |
Shift all fields to the logical left by a constant value. | |
#define | RC_VEC_SHRC(dstv, srcv, bytes) |
Shift fields to the logical right by a constant value. | |
#define | RC_VEC_ALIGNC(dstv, srcv1, srcv2, bytes) |
Align srcv1 and srcv2 to dstv, starting at field bytes into concatenation of srcv1 and srcv2. | |
#define | RC_VEC_PACK(dstv, srcv1, srcv2) |
Pack the even fields of srcv1 and srcv2 into one vector, with fields 0, . | |
Bitwise Logical Operations | |
| |
#define | RC_VEC_ZERO(vec) |
Set all bits to zero. | |
#define | RC_VEC_NOT(dstv, srcv) |
Bitwise NOT. | |
#define | RC_VEC_AND(dstv, srcv1, srcv2) |
Bitwise AND. | |
#define | RC_VEC_OR(dstv, srcv1, srcv2) |
Bitwise OR. | |
#define | RC_VEC_XOR(dstv, srcv1, srcv2) |
Bitwise XOR. | |
#define | RC_VEC_ANDNOT(dstv, srcv1, srcv2) |
Bitwise AND NOT. | |
#define | RC_VEC_ORNOT(dstv, srcv1, srcv2) |
Bitwise OR NOT. | |
#define | RC_VEC_XORNOT(dstv, srcv1, srcv2) |
Bitwise XOR NOT. | |
Arithmetic Operations | |
| |
#define | RC_VEC_SPLAT(vec, scal) |
Splat a scalar variable. | |
#define | RC_VEC_ADDS(dstv, srcv1, srcv2) |
Saturating addition. | |
#define | RC_VEC_AVGT(dstv, srcv1, srcv2) |
Average value, truncated. | |
#define | RC_VEC_AVGR(dstv, srcv1, srcv2) |
Average value, rounded. | |
#define | RC_VEC_AVGZ(dstv, srcv1, srcv2) |
Average value, rounded towards srcv1. | |
#define | RC_VEC_SUBS(dstv, srcv1, srcv2) |
Saturating subtraction. | |
#define | RC_VEC_SUBA(dstv, srcv1, srcv2) |
Absolute-value subtraction. | |
#define | RC_VEC_SUBHT(dstv, srcv1, srcv2) |
Half subtraction with bias, truncated. | |
#define | RC_VEC_SUBHR(dstv, srcv1, srcv2) |
Half subtraction with bias, rounded. | |
#define | RC_VEC_ABS(dstv, srcv) |
Absolute value. | |
#define | RC_VEC_CMPGT(dstv, srcv1, srcv2) |
Comparison greater-than. | |
#define | RC_VEC_CMPGE(dstv, srcv1, srcv2) |
Comparison greater-than-or-equal-to. | |
#define | RC_VEC_MIN(dstv, srcv1, srcv2) |
Minimum value. | |
#define | RC_VEC_MAX(dstv, srcv1, srcv2) |
Maximum value. | |
#define | RC_VEC_BLEND(blendv, blend8) |
Generate the blend vector needed by RC_VEC_LERP(). | |
#define | RC_VEC_LERP(dstv, srcv1, srcv2, blend8, blendv) |
Linear interpolation. | |
#define | RC_VEC_BLENDZ(blendv, blend8) |
Generate the blend vector needed by RC_VEC_LERPZ(). | |
#define | RC_VEC_LERPZ(dstv, srcv1, srcv2, blend8, blendv) |
Linear interpolation rounded towards srcv1. | |
#define | RC_VEC_BLENDN(blendv, blend8) |
Generate the blend vector needed by RC_VEC_LERPN(). | |
#define | RC_VEC_LERPN(dstv, srcv1, srcv2, blend8, blendv) |
Linear interpolation rounded towards srcv2. | |
Binary Mask Operations | |
| |
#define | RC_VEC_GETMASKW(maskw, vec) |
Get a binary mask word of the most significant bits. | |
#define | RC_VEC_GETMASKV(maskv, vec) |
Get a binary mask vector of the most significant bits. | |
#define | RC_VEC_SETMASKV(vec, maskv) |
Set 8-bit vector fields from a binary mask vector. | |
Reductions | |
| |
#define | RC_VEC_CNTN |
Count bits in all vector fields, iteration count. | |
#define | RC_VEC_CNTV(accv, srcv) |
Count bits in all vector fields, accumulation step. | |
#define | RC_VEC_CNTR(cnt, accv) |
Count bits in all vector fields, reduction step. | |
#define | RC_VEC_SUMN |
Sum all 8-bit vector fields, iteration count. | |
#define | RC_VEC_SUMV(accv, srcv) |
Sum all 8-bit vector fields, accumulation step. | |
#define | RC_VEC_SUMR(sum, accv) |
Sum all 8-bit vector fields, reduction step. | |
#define | RC_VEC_MACN |
Multiply and accumulate all 8-bit fields, iteration count. | |
#define | RC_VEC_MACV(accv, srcv1, srcv2) |
Multiply and accumulate all 8-bit fields, accumulation step. | |
#define | RC_VEC_MACR(mac, accv) |
Multiply and accumulate all 8-bit fields, reduction step. |
The vector abstraction layer provides a platform-independent SIMD API. It can be implemented using different vector instruction sets. It is also possible to implement it using SIMD-Within-A-Register, or SWAR, techniques on native machine words.
The vector implementation of the compute-layer functions uses only the vector operations defined in the vector interface. This means that only the vector interface needs to be re-implemented when porting the library to a new SIMD architecture.
A compliant implementation does not need to implement all operations. The generic vector driver code will only use the operations that the particular vector implementation defines. However, there is some mandatory functionality that must be present:
There is a mechanism for passing performance hints to the generic vector drivers. By defining a hint, the vector implementation tells the drivers to use a particular operation over a similar one, when applicable. The hints are:
Hints are only needed if more than one alternative is implemented. Note that presence of alternatives does not imply redundancy. Finally, it is an error to hint an unimplemented operation, and also to hint both choices.
Some operations are redundant. This means that there are two vector operations that can be used interchangeably by the driver code. The redundant operations are:
The RC_VEC_GETMASKV() operation is only useful if also RC_VEC_ALIGNC(), RC_VEC_SHINIT() and RC_VEC_SHL() are implemented. The redundant operations use the hint mechanism.
Value arguments to operations are of the type rc_vec_t, and pointers point at uint8_t data, except where otherwise specified.
The available implementations are
The SWAR implementation can be compiled with any ISO/ANSI C compiler. It supports word sizes of 16, 32 and 64 bits. The implementation is endian-independent. The MMX/SSE* implementations rely on the presence of the *mmintrin.h intrinsics headers. They are available on systems using GCC, ICC (Intel) and CL (Microsoft). The AltiVec implementation uses the altivec.h intrinsics header. The Loongson-2E and -2F implementation uses the loongson.h intrinsics header. The ARM NEON implementation uses the arm_neon.h intrinsics header. VADMX- and VIS-support are only available through GCC builtins.
There are extensive tests available for the vector interface. The field-wise arithmetic tests are exhaustive, which means that the tests of operations such as linear interpolation may take some time on embedded platforms.
#define RC_VEC_ABS | ( | dstv, | |||
srcv | ) |
Absolute value.
Computes dstv = 2*abs(srcv - 0x80) for each 8-bit field. The result is saturated to [0,0xff].
dstv | The output vector. | |
srcv | The input vector. |
#define RC_VEC_ADDS | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Saturating addition.
Computes dstv = min(srcv1 + srcv2, 0xff) for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_ALIGNC | ( | dstv, | |||
srcv1, | |||||
srcv2, | |||||
bytes | ) |
Align srcv1 and srcv2 to dstv, starting at field bytes into concatenation of srcv1 and srcv2.
The alignment value bytes must be a constant.
dstv | Aligned output vector. | |
srcv1 | First source vector. | |
srcv2 | Second source vector. | |
bytes | The constant alignment shift in the range [1, RC_VEC_SIZE - 1]. |
#define RC_VEC_AND | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise AND.
Computes dstv = srcv1 & srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_ANDNOT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise AND NOT.
Computes dstv = srcv1 & ~srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_AVGR | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Average value, rounded.
Computes dstv = (srcv1 + srcv2 + 1) >> 1 for each 8-bit field.
#define RC_VEC_AVGT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Average value, truncated.
Computes dstv = (srcv1 + srcv2) >> 1 for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_AVGZ | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Average value, rounded towards srcv1.
Computes dstv = (srcv1 + srcv2 + (srcv1 > srcv2)) >> 1 for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_BLEND | ( | blendv, | |||
blend8 | ) |
Generate the blend vector needed by RC_VEC_LERP().
blendv | The output blend vector. | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_BLENDN | ( | blendv, | |||
blend8 | ) |
Generate the blend vector needed by RC_VEC_LERPN().
blendv | The output blend vector. | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_BLENDZ | ( | blendv, | |||
blend8 | ) |
Generate the blend vector needed by RC_VEC_LERPZ().
blendv | The output blend vector. | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_CLEANUP | ( | ) |
Global clean up after we are done with the vector computations.
Mandatory.
#define RC_VEC_CMPGE | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Comparison greater-than-or-equal-to.
Computes MSB(dst) = srcv1 >= srcv2 ? 1 : 0 for each 8-bit field. Only the most significant bit in each field is set, the remaining bits are undefined.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_CMPGT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Comparison greater-than.
Computes MSB(dst) = srcv1 > srcv2 ? 1 : 0 for each 8-bit field. Only the most significant bit in each field is set, the remaining bits are undefined.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_CNTN |
Count bits in all vector fields, iteration count.
The format of the accumulator vector is implementation-specific, but RC_VEC_CNTV() and RC_VEC_CNTR() together compute the bit count. The accumulation step can be iterated at most RC_VEC_CNTN times before the reduction step. The value of RC_VEC_CNTN must be either one, or be divisible by four.
#define RC_VEC_CNTR | ( | cnt, | |||
accv | ) |
Count bits in all vector fields, reduction step.
cnt | The uint32_t output bitcount value. | |
accv | The input accumulator to reduce. |
#define RC_VEC_CNTV | ( | accv, | |||
srcv | ) |
Count bits in all vector fields, accumulation step.
accv | The input/output accumulator vector. | |
srcv | The input source vector. |
#define RC_VEC_DECLARE | ( | ) |
Global declarations needed for the vector operations.
This can be used to declare vector constants. Mandatory.
#define RC_VEC_GETMASKV | ( | maskv, | |||
vec | ) |
Get a binary mask vector of the most significant bits.
Packs the most significant bits in each 8-bit field to the logically left-most bits in a binary mask vector. The unused fields in the mask vector are undefined.
maskv | The output mask vector. | |
vec | The input vector. |
#define RC_VEC_GETMASKW | ( | maskw, | |||
vec | ) |
Get a binary mask word of the most significant bits.
Packs the most significant bits in each 8-bit field to the physically left-most bits in a binary mask word. The unused mask bits are set to zero.
maskw | The output mask word as an unsigned int. | |
vec | The input vector. |
#define RC_VEC_HINT_AVGR |
Hint to use AVGR/SUBHR instead of AVGT/SUBHT when possible.
#define RC_VEC_HINT_AVGT |
Hint to use AVGT/SUBHT instead of AVGR/SUBHR when possible.
#define RC_VEC_HINT_CMPGE |
Hint to use CMPGE instead of CMPGT when possible.
#define RC_VEC_HINT_CMPGT |
Hint to use CMPGT instead of CMPGE when possible.
#define RC_VEC_HINT_GETMASKV |
Hint to use GETMASKV instead of GETMASKW when possible.
#define RC_VEC_HINT_GETMASKW |
Hint to use GETMASKW instead of GETMASKV when possible.
#define RC_VEC_LDINIT | ( | vec1, | |||
vec2, | |||||
vec3, | |||||
uptr, | |||||
ptr | ) |
Initialize a sequence of misaligned loads.
The output is three intermediate vectors and a data pointer to be passed to RC_VEC_LOADU(). The input data pointer must not be aligned on an RC_VEC_SIZE boundary. The result of the operation is implementation-specific. Mandatory if RC_VEC_LOADU() is implemented.
vec1 | Internal state vector 1. | |
vec2 | Internal state vector 2. | |
vec3 | Internal state vector 3. | |
uptr | Read data pointer_t. | |
ptr | Misaligned data pointer. |
#define RC_VEC_LERP | ( | dstv, | |||
srcv1, | |||||
srcv2, | |||||
blend8, | |||||
blendv | ) |
Linear interpolation.
Computes dstv = srcv1 + ((blend8*(srcv2 - srcv1) + 0x80) >> 8) for each 8-bit field. The Q.8 blend factor blend8 must be in the range [0x1,0x7f].
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. | |
blendv | The blend vector obtained from RC_VEC_BLEND(). | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_LERPN | ( | dstv, | |||
srcv1, | |||||
srcv2, | |||||
blend8, | |||||
blendv | ) |
Linear interpolation rounded towards srcv2.
Computes dstv = srcv1 + (blend8*(srcv2 - srcv1)/256) for each 8-bit field, with the update term rounded away from zero. The Q.8 blend factor blend8 must be in the range [0x1,0x7f].
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. | |
blendv | The blend vector obtained from RC_VEC_BLENDN(). | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_LERPZ | ( | dstv, | |||
srcv1, | |||||
srcv2, | |||||
blend8, | |||||
blendv | ) |
Linear interpolation rounded towards srcv1.
Computes dstv = srcv1 + (blend8*(srcv2 - srcv1)/256) for each 8-bit field, with the update term rounded towards zero. The Q.8 blend factor blend8 must be in the range [0x1,0x7f].
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. | |
blendv | The blend vector obtained from RC_VEC_BLENDZ(). | |
blend8 | The Q.8 fixed-point scalar blend factor as an unsigned int. |
#define RC_VEC_LOAD | ( | vec, | |||
ptr | ) |
Aligned vector load from memory.
The data pointer must be aligned on an RC_VEC_SIZE boundary. Mandatory.
vec | The loaded vector. | |
ptr | An aligned data pointer to load the vector from. |
#define RC_VEC_LOADU | ( | dstv, | |||
vec1, | |||||
vec2, | |||||
vec3, | |||||
uptr | ) |
Misaligned vector load from memory.
The intermediate vectors and data pointer from RC_VEC_LDINIT() are used to load a misaligned vector. The vector can only be loaded once. To load the next misaligned vector, the data pointer uptr must be advanced RC_VEC_SIZE bytes.
dstv | The loaded misaligned vector. | |
vec1 | Internal state vector 1. | |
vec2 | Internal state vector 2. | |
vec3 | Internal state vector 3. | |
uptr | Read data pointer. |
#define RC_VEC_MACN |
Multiply and accumulate all 8-bit fields, iteration count.
The format of the accumulator vector is implementation-specific, but RC_VEC_MACV() and RC_VEC_MACR() together compute the sum. The accumulation step can be iterated at most RC_VEC_MACN times before the reduction step. The value of RC_VEC_MACN must be either one, or be divisible by four.
#define RC_VEC_MACR | ( | mac, | |||
accv | ) |
Multiply and accumulate all 8-bit fields, reduction step.
mac | The uint32_t output multiply-accumulate value. | |
accv | The input accumulator to reduce. |
#define RC_VEC_MACV | ( | accv, | |||
srcv1, | |||||
srcv2 | ) |
Multiply and accumulate all 8-bit fields, accumulation step.
accv | The input/output accumulator vector. | |
srcv1 | The first source vector. | |
srcv2 | The second source vector. |
#define RC_VEC_MAX | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Maximum value.
Computes dstv = max(srcv1, srcv2) for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_MIN | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Minimum value.
Computes dstv = min(srcv1, srcv2) for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_NOT | ( | dstv, | |||
srcv | ) |
Bitwise NOT.
Computes dstv = ~srcv.
dstv | The output vector. | |
srcv | The input vector. |
#define RC_VEC_OR | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise OR.
Computes dstv = srcv1 | srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_ORNOT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise OR NOT.
Computes dstv = srcv1 | ~srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_PACK | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Pack the even fields of srcv1 and srcv2 into one vector, with fields 0, .
.., RC_VEC_SIZE/2 - 1 from srcv1 and fields RC_VEC_SIZE/2, ..., RC_VEC_SIZE - 1 from srcv2.
dstv | Packed output vector. | |
srcv1 | First source vector. | |
srcv2 | Second source vector. |
#define RC_VEC_SETMASKV | ( | vec, | |||
maskv | ) |
Set 8-bit vector fields from a binary mask vector.
Unpack each of the left-most bits from the binary mask vector into the corresponding 8-bit field in the output vector, as the value zero or 0xff, as many as fits into the output vector. Except for producing the specific value 0xff for bits with the value one, this is the inversion of RC_VEC_GETMASKV.
vec | The output vector. | |
maskv | The input mask vector. |
#define RC_VEC_SHINIT | ( | shv, | |||
bytes | ) |
Initialize a field shift.
Initializes a shift vector to be used for RC_VEC_SHL() / RC_VEC_SHR() operations.
shv | Output shift vector. | |
bytes | The field shift number in the range [0, RC_VEC_SIZE - 1]. |
#define RC_VEC_SHL | ( | dstv, | |||
srcv, | |||||
shv | ) |
Shift all fields to the logical left.
dstv | Output vector. | |
srcv | Input vector. | |
shv | Shift vector obtained from RC_VEC_SHINIT(). |
#define RC_VEC_SHLC | ( | dstv, | |||
srcv, | |||||
bytes | ) |
Shift all fields to the logical left by a constant value.
dstv | Output vector. | |
srcv | Input vector. | |
bytes | The field shift constant in the range [0, RC_VEC_SIZE - 1]. |
#define RC_VEC_SHR | ( | dstv, | |||
srcv, | |||||
shv | ) |
Shift fields to the logical right.
dstv | Output vector. | |
srcv | Input vector. | |
shv | Shift vector obtained from RC_VEC_SHINIT(). |
#define RC_VEC_SHRC | ( | dstv, | |||
srcv, | |||||
bytes | ) |
Shift fields to the logical right by a constant value.
dstv | Output vector. | |
srcv | Input vector. | |
bytes | The field shift constant in the range [0, RC_VEC_SIZE - 1]. |
#define RC_VEC_SIZE |
The number of bytes in a vector.
Must be a literal constant, not something like sizeof(rc_vec_t). Mandatory.
#define RC_VEC_SPLAT | ( | vec, | |||
scal | ) |
Splat a scalar variable.
Sets the scalar value 'scal' in each 8-bit field.
vec | The output vector. | |
scal | The 8-bit scalar variable to set, any scalar data type. |
#define RC_VEC_STORE | ( | ptr, | |||
vec | ) |
Aligned vector store to memory.
The data pointer must be aligned on an RC_VEC_SIZE boundary. Mandatory.
ptr | An aligned data pointer where to store the vector. | |
vec | The vector to store. |
#define RC_VEC_SUBA | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Absolute-value subtraction.
Computes dstv = abs(srcv1 - srcv2) for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_SUBHR | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Half subtraction with bias, rounded.
Computes dstv = (srcv1 - srcv2 + 0x100) >> 1 for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_SUBHT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Half subtraction with bias, truncated.
Computes dstv = (srcv1 - srcv2 + 0xff) >> 1 for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_SUBS | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Saturating subtraction.
Computes dstv = max(srvc1 - srcv2, 0) for each 8-bit field.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_SUMN |
Sum all 8-bit vector fields, iteration count.
The format of the accumulator vector is implementation-specific, but RC_VEC_SUMV() and RC_VEC_SUMR() together compute the sum. The accumulation step can be iterated at most RC_VEC_SUMN times before the reduction step. The value of RC_VEC_SUMN must be either one, or be divisible by four.
#define RC_VEC_SUMR | ( | sum, | |||
accv | ) |
Sum all 8-bit vector fields, reduction step.
sum | The uint32_t output sum value. | |
accv | The input accumulator to reduce. |
#define RC_VEC_SUMV | ( | accv, | |||
srcv | ) |
Sum all 8-bit vector fields, accumulation step.
accv | The input/output accumulator vector. | |
srcv | The input source vector. |
#define RC_VEC_XOR | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise XOR.
Computes dstv = srcv1 ^ srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_XORNOT | ( | dstv, | |||
srcv1, | |||||
srcv2 | ) |
Bitwise XOR NOT.
Computes dstv = srcv1 ^ ~srcv2 for all bits.
dstv | The output vector. | |
srcv1 | The first input vector. | |
srcv2 | The second input vector. |
#define RC_VEC_ZERO | ( | vec | ) |
Set all bits to zero.
Sets vec = 0.
vec | The vector to set to zero. |
typedef arch_vector_t rc_vec_t |
The vector type definition.
Mandatory.