Updated 32-bit division so it's now actually accurate to within 0.00065% even for non-reciprocals, the algorithm is slightly slower and slightly less accurate than the 64-bit version on my machine though.
Added 32-bit tan port using joe's atan2_complex algorithm, this is accurate to 0.0055% when used with the new 32-bit divide.
This commit is contained in:
parent
577a41ec50
commit
9c8a4f9aa9
|
@ -83,26 +83,60 @@ fix16_t fix16_div(fix16_t inArg0, fix16_t inArg1) {
|
|||
tempResult /= inArg1;
|
||||
return tempResult;
|
||||
#else
|
||||
int32_t rcp = (0xFFFFFFFF / inArg1);
|
||||
#ifndef FIXMATH_FAST_DIV
|
||||
if(((0xFFFFFFFF % inArg1) + 1) >= inArg1)
|
||||
rcp++;
|
||||
#endif
|
||||
int32_t rcp_hi = rcp >> 16;
|
||||
int neg = ((inArg0 < 0) != (inArg1 < 0));
|
||||
inArg0 = (inArg0 < 0 ? -inArg0 : inArg0);
|
||||
inArg1 = (inArg1 < 0 ? -inArg1 : inArg1);
|
||||
|
||||
uint32_t rcp_lo = rcp & 0xFFFF;
|
||||
int32_t arg_hi = (inArg0 >> 16);
|
||||
uint32_t arg_lo = (inArg0 & 0xFFFF);
|
||||
while(((inArg0 | inArg1) & 1) == 0) {
|
||||
inArg0 >>= 1;
|
||||
inArg1 >>= 1;
|
||||
}
|
||||
|
||||
int32_t res_hi = rcp_hi * arg_hi;
|
||||
int32_t res_md = (rcp_hi * arg_lo) + (rcp_lo * arg_hi);
|
||||
uint32_t res_lo = rcp_lo * arg_lo;
|
||||
uint32_t r_hi = (inArg0 / inArg1);
|
||||
|
||||
int32_t res = (res_hi << 16) + res_md + (res_lo >> 16);
|
||||
uint32_t n_lo = (inArg0 % inArg1);
|
||||
uint32_t n_hi = (n_lo >> 16);
|
||||
n_lo <<= 16;
|
||||
|
||||
uint32_t i, arg;
|
||||
for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) {
|
||||
n_lo = ((n_lo >> 1) | (n_hi << 31));
|
||||
n_hi = (n_hi >> 1);
|
||||
arg >>= 1;
|
||||
}
|
||||
|
||||
uint32_t res = 0;
|
||||
if(n_hi) {
|
||||
uint32_t arg_lo, arg_hi;
|
||||
for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1);
|
||||
for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1);
|
||||
|
||||
do {
|
||||
arg_lo = (arg_lo >> 1) | (arg_hi << 31);
|
||||
arg_hi = (arg_hi >> 1);
|
||||
i >>= 1;
|
||||
if(arg_hi < n_hi) {
|
||||
n_hi -= arg_hi;
|
||||
if(arg_lo > n_lo)
|
||||
n_hi--;
|
||||
n_lo -= arg_lo;
|
||||
res += i;
|
||||
} else if((arg_hi == n_hi) && (arg_lo <= n_lo)) {
|
||||
n_hi -= arg_hi;
|
||||
n_lo -= arg_lo;
|
||||
res += i;
|
||||
}
|
||||
} while(n_hi);
|
||||
}
|
||||
|
||||
res += (n_lo / inArg1);
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
res += ((res_lo >> 15) & 1);
|
||||
if((n_lo % inArg1) >= (inArg1 >> 1))
|
||||
res++;
|
||||
#endif
|
||||
return res;
|
||||
res += (r_hi << 16);
|
||||
|
||||
return (neg ? -res : res);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -120,42 +154,73 @@ fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1) {
|
|||
#endif
|
||||
tempResult /= inArg1;
|
||||
if(tempResult < fix16_min)
|
||||
return fix16_MIN;
|
||||
return fix16_min;
|
||||
if(tempResult > fix16_max)
|
||||
return fix16_max;
|
||||
return tempResult;
|
||||
#else
|
||||
int32_t rcp = (0xFFFFFFFF / inArg1);
|
||||
#ifndef FIXMATH_FAST_DIV
|
||||
if(((0xFFFFFFFF % inArg1) + 1) >= inArg1)
|
||||
rcp++;
|
||||
#endif
|
||||
int32_t rcp_hi = rcp >> 16;
|
||||
if(rcp_hi >= 32768)
|
||||
return fix16_max;
|
||||
if(rcp_hi < -32768)
|
||||
return fix16_min;
|
||||
int neg = ((inArg0 < 0) != (inArg1 < 0));
|
||||
inArg0 = (inArg0 < 0 ? -inArg0 : inArg0);
|
||||
inArg1 = (inArg1 < 0 ? -inArg1 : inArg1);
|
||||
|
||||
uint32_t rcp_lo = rcp & 0xFFFF;
|
||||
int32_t arg_hi = (inArg0 >> 16);
|
||||
uint32_t arg_lo = (inArg0 & 0xFFFF);
|
||||
while(((inArg0 | inArg1) & 1) == 0) {
|
||||
inArg0 >>= 1;
|
||||
inArg1 >>= 1;
|
||||
}
|
||||
|
||||
int32_t res_hi = rcp_hi * arg_hi;
|
||||
int32_t res_md = (rcp_hi * arg_lo) + (rcp_lo * arg_hi);
|
||||
uint32_t res_lo = rcp_lo * arg_lo;
|
||||
uint32_t r_hi = (inArg0 / inArg1);
|
||||
if(r_hi > (neg ? 32768 : 32767))
|
||||
return (neg ? fix16_min : fix16_max);
|
||||
|
||||
// TODO - Check properly for overflows at this stage.
|
||||
uint32_t n_lo = (inArg0 % inArg1);
|
||||
uint32_t n_hi = (n_lo >> 16);
|
||||
n_lo <<= 16;
|
||||
|
||||
int32_t res = (res_hi << 16) + res_md + (res_lo >> 16);
|
||||
uint32_t i, arg;
|
||||
for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) {
|
||||
n_lo = ((n_lo >> 1) | (n_hi << 31));
|
||||
n_hi = (n_hi >> 1);
|
||||
arg >>= 1;
|
||||
}
|
||||
|
||||
uint32_t res = 0;
|
||||
if(n_hi) {
|
||||
uint32_t arg_lo, arg_hi;
|
||||
for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1);
|
||||
for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1);
|
||||
|
||||
do {
|
||||
arg_lo = (arg_lo >> 1) | (arg_hi << 31);
|
||||
arg_hi = (arg_hi >> 1);
|
||||
i >>= 1;
|
||||
if(arg_hi < n_hi) {
|
||||
n_hi -= arg_hi;
|
||||
if(arg_lo > n_lo)
|
||||
n_hi--;
|
||||
n_lo -= arg_lo;
|
||||
res += i;
|
||||
} else if((arg_hi == n_hi) && (arg_lo <= n_lo)) {
|
||||
n_hi -= arg_hi;
|
||||
n_lo -= arg_lo;
|
||||
res += i;
|
||||
}
|
||||
} while(n_hi);
|
||||
}
|
||||
|
||||
res += (n_lo / inArg1);
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
res += ((res_lo >> 15) & 1);
|
||||
if((n_lo % inArg1) >= (inArg1 >> 1))
|
||||
res++;
|
||||
#endif
|
||||
return res;
|
||||
res += (r_hi << 16);
|
||||
|
||||
return (neg ? -res : res);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifndef FIXMATH_NO_64BIT
|
||||
fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract) {
|
||||
int64_t tempOut;
|
||||
tempOut = ((int64_t)inArg0 * (256 - inFract));
|
||||
|
@ -179,3 +244,4 @@ fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract) {
|
|||
tempOut >>= 32;
|
||||
return (fix16_t)tempOut;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -66,11 +66,13 @@ extern fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1);
|
|||
|
||||
|
||||
|
||||
#ifndef FIXMATH_NO_64BIT
|
||||
/*! Returns the linear interpolation: (inArg0 * (1 - inFract)) + (inArg1 * inFract)
|
||||
*/
|
||||
extern fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract);
|
||||
extern fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract);
|
||||
extern fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ fix16_t fix16_sqrt(fix16_t inValue) {
|
|||
tempOne >>= 2;
|
||||
|
||||
while(tempOne != 0) {
|
||||
if (tempOp >= tempOut + tempOne) {
|
||||
if(tempOp >= tempOut + tempOne) {
|
||||
tempOp -= tempOut + tempOne;
|
||||
tempOut += tempOne << 1;
|
||||
}
|
||||
|
|
|
@ -66,41 +66,6 @@ fix16_t fix16_sin(fix16_t inAngle) {
|
|||
tempOut = fix16_mul(tempOut, tempAngle);
|
||||
#endif
|
||||
|
||||
/*// Broken implementation, meant to be slightly faster and much more accurate than the above 'accurate' implementation.
|
||||
int64_t tempAngleSq = tempAngle * tempAngle;
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
tempAngleSq += (fix16_one >> 1);
|
||||
#endif
|
||||
tempAngleSq >>= 16;
|
||||
|
||||
int64_t tempOut;
|
||||
tempOut = (-108 * tempAngleSq) + 11836;
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
tempOut += (fix16_one >> 1);
|
||||
#endif
|
||||
tempOut >>= 16;
|
||||
|
||||
tempOut = (tempOut * tempAngleSq) - 852176;
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
tempOut += (fix16_one >> 1);
|
||||
#endif
|
||||
tempOut >>= 16;
|
||||
|
||||
tempOut = (tempOut * tempAngleSq) + 35791394;
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
tempOut += (fix16_one >> 1);
|
||||
#endif
|
||||
tempOut >>= 16;
|
||||
|
||||
tempOut = (tempOut * tempAngleSq) - 715827883;
|
||||
#ifndef FIXMATH_NO_ROUNDING
|
||||
tempOut += (fix16_one >> 1);
|
||||
#endif
|
||||
tempOut >>= 16;
|
||||
|
||||
tempOut = fix16_mul(tempOut, tempAngle) + tempAngle;
|
||||
*/
|
||||
|
||||
#ifndef FIXMATH_NO_CACHE
|
||||
_fix16_sin_cache_index[tempIndex] = inAngle;
|
||||
_fix16_sin_cache_value[tempIndex] = tempOut;
|
||||
|
@ -142,6 +107,8 @@ fix16_t fix16_atan2(fix16_t inY , fix16_t inX) {
|
|||
#endif
|
||||
|
||||
fix16_t absy = (inY < 0 ? -inY : inY);
|
||||
|
||||
#ifndef FIXMATH_NO_64BIT
|
||||
int64_t i = inX + (inX >= 0 ? -absy : absy);
|
||||
int64_t j = (inX >= 0 ? inX : -inX) + absy;
|
||||
if(j == 0)
|
||||
|
@ -182,6 +149,24 @@ fix16_t fix16_atan2(fix16_t inY , fix16_t inX) {
|
|||
angle += (fix16_one >> 1);
|
||||
#endif
|
||||
angle >>= 16;
|
||||
#else
|
||||
fix16_t angle;
|
||||
if(inX >= 0) {
|
||||
fix16_t r = fix16_sdiv(fix16_sadd(inX, -absy),
|
||||
fix16_sadd(inX, absy));
|
||||
angle = fix16_sadd(fix16_sadd(
|
||||
fix16_mul(12864, fix16_mul(r, fix16_mul(r, r))),
|
||||
fix16_mul(-64336, r)),
|
||||
51471); // pi/4
|
||||
} else {
|
||||
fix16_t r = fix16_sdiv(fix16_sadd(inX, absy),
|
||||
fix16_sadd(absy, -inX));
|
||||
angle = fix16_sadd(fix16_sadd(
|
||||
fix16_mul(12864, fix16_mul(r, fix16_mul(r, r))),
|
||||
fix16_mul(-64336, r)),
|
||||
154415); // 3pi/4
|
||||
}
|
||||
#endif
|
||||
angle = (inY < 0 ? -angle : angle);
|
||||
|
||||
#ifndef FIXMATH_NO_CACHE
|
||||
|
|
Loading…
Reference in New Issue