From 9c8a4f9aa904e1bc8076093566d0bd8a029c4d44 Mon Sep 17 00:00:00 2001 From: flatmush Date: Tue, 1 Mar 2011 15:26:37 +0000 Subject: Updated 32-bit division so it's now actually accurate to within 0.00065% even for non-reciprocals, the algorithm is slightly slower and slightly less accurate than the 64-bit version on my machine though. Added 32-bit tan port using joe's atan2_complex algorithm, this is accurate to 0.0055% when used with the new 32-bit divide. --- libfixmath/fix16.c | 138 +++++++++++++++++++++++++++++++++++------------- libfixmath/fix16.h | 2 + libfixmath/fix16_sqrt.c | 2 +- libfixmath/fix16_trig.c | 55 +++++++------------ 4 files changed, 125 insertions(+), 72 deletions(-) diff --git a/libfixmath/fix16.c b/libfixmath/fix16.c index 49473ca..129541f 100644 --- a/libfixmath/fix16.c +++ b/libfixmath/fix16.c @@ -83,26 +83,60 @@ fix16_t fix16_div(fix16_t inArg0, fix16_t inArg1) { tempResult /= inArg1; return tempResult; #else - int32_t rcp = (0xFFFFFFFF / inArg1); - #ifndef FIXMATH_FAST_DIV - if(((0xFFFFFFFF % inArg1) + 1) >= inArg1) - rcp++; - #endif - int32_t rcp_hi = rcp >> 16; + int neg = ((inArg0 < 0) != (inArg1 < 0)); + inArg0 = (inArg0 < 0 ? -inArg0 : inArg0); + inArg1 = (inArg1 < 0 ? -inArg1 : inArg1); + + while(((inArg0 | inArg1) & 1) == 0) { + inArg0 >>= 1; + inArg1 >>= 1; + } - uint32_t rcp_lo = rcp & 0xFFFF; - int32_t arg_hi = (inArg0 >> 16); - uint32_t arg_lo = (inArg0 & 0xFFFF); + uint32_t r_hi = (inArg0 / inArg1); - int32_t res_hi = rcp_hi * arg_hi; - int32_t res_md = (rcp_hi * arg_lo) + (rcp_lo * arg_hi); - uint32_t res_lo = rcp_lo * arg_lo; + uint32_t n_lo = (inArg0 % inArg1); + uint32_t n_hi = (n_lo >> 16); + n_lo <<= 16; + + uint32_t i, arg; + for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) { + n_lo = ((n_lo >> 1) | (n_hi << 31)); + n_hi = (n_hi >> 1); + arg >>= 1; + } - int32_t res = (res_hi << 16) + res_md + (res_lo >> 16); + uint32_t res = 0; + if(n_hi) { + uint32_t arg_lo, arg_hi; + for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1); + for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1); + + do { + arg_lo = (arg_lo >> 1) | (arg_hi << 31); + arg_hi = (arg_hi >> 1); + i >>= 1; + if(arg_hi < n_hi) { + n_hi -= arg_hi; + if(arg_lo > n_lo) + n_hi--; + n_lo -= arg_lo; + res += i; + } else if((arg_hi == n_hi) && (arg_lo <= n_lo)) { + n_hi -= arg_hi; + n_lo -= arg_lo; + res += i; + } + } while(n_hi); + } + + res += (n_lo / inArg1); #ifndef FIXMATH_NO_ROUNDING - res += ((res_lo >> 15) & 1); + if((n_lo % inArg1) >= (inArg1 >> 1)) + res++; #endif - return res; + res += (r_hi << 16); + + return (neg ? -res : res); #endif } @@ -120,42 +154,73 @@ fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1) { #endif tempResult /= inArg1; if(tempResult < fix16_min) - return fix16_MIN; + return fix16_min; if(tempResult > fix16_max) return fix16_max; return tempResult; #else - int32_t rcp = (0xFFFFFFFF / inArg1); - #ifndef FIXMATH_FAST_DIV - if(((0xFFFFFFFF % inArg1) + 1) >= inArg1) - rcp++; - #endif - int32_t rcp_hi = rcp >> 16; - if(rcp_hi >= 32768) - return fix16_max; - if(rcp_hi < -32768) - return fix16_min; + int neg = ((inArg0 < 0) != (inArg1 < 0)); + inArg0 = (inArg0 < 0 ? -inArg0 : inArg0); + inArg1 = (inArg1 < 0 ? -inArg1 : inArg1); - uint32_t rcp_lo = rcp & 0xFFFF; - int32_t arg_hi = (inArg0 >> 16); - uint32_t arg_lo = (inArg0 & 0xFFFF); + while(((inArg0 | inArg1) & 1) == 0) { + inArg0 >>= 1; + inArg1 >>= 1; + } - int32_t res_hi = rcp_hi * arg_hi; - int32_t res_md = (rcp_hi * arg_lo) + (rcp_lo * arg_hi); - uint32_t res_lo = rcp_lo * arg_lo; + uint32_t r_hi = (inArg0 / inArg1); + if(r_hi > (neg ? 32768 : 32767)) + return (neg ? fix16_min : fix16_max); - // TODO - Check properly for overflows at this stage. + uint32_t n_lo = (inArg0 % inArg1); + uint32_t n_hi = (n_lo >> 16); + n_lo <<= 16; - int32_t res = (res_hi << 16) + res_md + (res_lo >> 16); + uint32_t i, arg; + for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) { + n_lo = ((n_lo >> 1) | (n_hi << 31)); + n_hi = (n_hi >> 1); + arg >>= 1; + } + + uint32_t res = 0; + if(n_hi) { + uint32_t arg_lo, arg_hi; + for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1); + for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1); + + do { + arg_lo = (arg_lo >> 1) | (arg_hi << 31); + arg_hi = (arg_hi >> 1); + i >>= 1; + if(arg_hi < n_hi) { + n_hi -= arg_hi; + if(arg_lo > n_lo) + n_hi--; + n_lo -= arg_lo; + res += i; + } else if((arg_hi == n_hi) && (arg_lo <= n_lo)) { + n_hi -= arg_hi; + n_lo -= arg_lo; + res += i; + } + } while(n_hi); + } + + res += (n_lo / inArg1); #ifndef FIXMATH_NO_ROUNDING - res += ((res_lo >> 15) & 1); + if((n_lo % inArg1) >= (inArg1 >> 1)) + res++; #endif - return res; + res += (r_hi << 16); + + return (neg ? -res : res); #endif } +#ifndef FIXMATH_NO_64BIT fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract) { int64_t tempOut; tempOut = ((int64_t)inArg0 * (256 - inFract)); @@ -179,3 +244,4 @@ fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract) { tempOut >>= 32; return (fix16_t)tempOut; } +#endif diff --git a/libfixmath/fix16.h b/libfixmath/fix16.h index d1a1230..b3ab70c 100644 --- a/libfixmath/fix16.h +++ b/libfixmath/fix16.h @@ -66,11 +66,13 @@ extern fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1); +#ifndef FIXMATH_NO_64BIT /*! Returns the linear interpolation: (inArg0 * (1 - inFract)) + (inArg1 * inFract) */ extern fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract); extern fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract); extern fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract); +#endif diff --git a/libfixmath/fix16_sqrt.c b/libfixmath/fix16_sqrt.c index 3d8e164..f780ef6 100644 --- a/libfixmath/fix16_sqrt.c +++ b/libfixmath/fix16_sqrt.c @@ -24,7 +24,7 @@ fix16_t fix16_sqrt(fix16_t inValue) { tempOne >>= 2; while(tempOne != 0) { - if (tempOp >= tempOut + tempOne) { + if(tempOp >= tempOut + tempOne) { tempOp -= tempOut + tempOne; tempOut += tempOne << 1; } diff --git a/libfixmath/fix16_trig.c b/libfixmath/fix16_trig.c index 5d98872..a198b3d 100644 --- a/libfixmath/fix16_trig.c +++ b/libfixmath/fix16_trig.c @@ -66,41 +66,6 @@ fix16_t fix16_sin(fix16_t inAngle) { tempOut = fix16_mul(tempOut, tempAngle); #endif - /*// Broken implementation, meant to be slightly faster and much more accurate than the above 'accurate' implementation. - int64_t tempAngleSq = tempAngle * tempAngle; - #ifndef FIXMATH_NO_ROUNDING - tempAngleSq += (fix16_one >> 1); - #endif - tempAngleSq >>= 16; - - int64_t tempOut; - tempOut = (-108 * tempAngleSq) + 11836; - #ifndef FIXMATH_NO_ROUNDING - tempOut += (fix16_one >> 1); - #endif - tempOut >>= 16; - - tempOut = (tempOut * tempAngleSq) - 852176; - #ifndef FIXMATH_NO_ROUNDING - tempOut += (fix16_one >> 1); - #endif - tempOut >>= 16; - - tempOut = (tempOut * tempAngleSq) + 35791394; - #ifndef FIXMATH_NO_ROUNDING - tempOut += (fix16_one >> 1); - #endif - tempOut >>= 16; - - tempOut = (tempOut * tempAngleSq) - 715827883; - #ifndef FIXMATH_NO_ROUNDING - tempOut += (fix16_one >> 1); - #endif - tempOut >>= 16; - - tempOut = fix16_mul(tempOut, tempAngle) + tempAngle; - */ - #ifndef FIXMATH_NO_CACHE _fix16_sin_cache_index[tempIndex] = inAngle; _fix16_sin_cache_value[tempIndex] = tempOut; @@ -142,6 +107,8 @@ fix16_t fix16_atan2(fix16_t inY , fix16_t inX) { #endif fix16_t absy = (inY < 0 ? -inY : inY); + + #ifndef FIXMATH_NO_64BIT int64_t i = inX + (inX >= 0 ? -absy : absy); int64_t j = (inX >= 0 ? inX : -inX) + absy; if(j == 0) @@ -182,6 +149,24 @@ fix16_t fix16_atan2(fix16_t inY , fix16_t inX) { angle += (fix16_one >> 1); #endif angle >>= 16; + #else + fix16_t angle; + if(inX >= 0) { + fix16_t r = fix16_sdiv(fix16_sadd(inX, -absy), + fix16_sadd(inX, absy)); + angle = fix16_sadd(fix16_sadd( + fix16_mul(12864, fix16_mul(r, fix16_mul(r, r))), + fix16_mul(-64336, r)), + 51471); // pi/4 + } else { + fix16_t r = fix16_sdiv(fix16_sadd(inX, absy), + fix16_sadd(absy, -inX)); + angle = fix16_sadd(fix16_sadd( + fix16_mul(12864, fix16_mul(r, fix16_mul(r, r))), + fix16_mul(-64336, r)), + 154415); // 3pi/4 + } + #endif angle = (inY < 0 ? -angle : angle); #ifndef FIXMATH_NO_CACHE -- cgit v1.2.3