diff --git a/libfixmath/fix16.c b/libfixmath/fix16.c index 1ea4ff5..72e1b4a 100644 --- a/libfixmath/fix16.c +++ b/libfixmath/fix16.c @@ -2,243 +2,462 @@ #include "int64.h" +/* Subtraction and addition with overflow detection. + * The versions without overflow detection are inlined in the header. + */ +#ifndef FIXMATH_NO_OVERFLOW +fix16_t fix16_add(fix16_t a, fix16_t b) +{ + // Use unsigned integers because overflow with signed integers is + // an undefined operation (http://www.airs.com/blog/archives/120). + uint32_t _a = a, _b = b; + uint32_t sum = _a + _b; -fix16_t fix16_sadd(fix16_t inArg0, fix16_t inArg1) { - fix16_t tempResult = (inArg0 + inArg1); - if((tempResult > 0) && (inArg0 < 0) && (inArg1 < 0)) - return fix16_min; - if((tempResult < 0) && (inArg0 > 0) && (inArg1 > 0)) - return fix16_max; - return tempResult; + // Overflow can only happen if sign of a == sign of b, and then + // it causes sign of sum != sign of a. + if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000)) + return fix16_overflow; + + return sum; } +fix16_t fix16_sub(fix16_t a, fix16_t b) +{ + uint32_t _a = a, _b = b; + uint32_t diff = _a - _b; - -#if defined(__arm__) || defined(_ARM) || defined(__thumb2__) -fix16_t fix16_mul(int32_t inArg0, int32_t inArg1) { - register fix16_t res; - register fix16_t tmp; - asm( - "smull %0, %3, %1, %2\n\t" - #ifndef FIXMATH_NO_ROUNDING - "add %0, %0, #0x8000\n\t" - #endif - "mov %0, %0, lsr #16\n\t" - "orr %0, %0, %3, lsl #16" - : "=r"(res) - : "r"(inArg0), "r"(inArg1), "r"(tmp) - : "r0"); - return res; + // Overflow can only happen if sign of a != sign of b, and then + // it causes sign of diff != sign of a. + if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000)) + return fix16_overflow; + + return diff; } -#else -fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) { - #ifndef FIXMATH_NO_64BIT - int64_t tempResult = ((int64_t)inArg0 * (int64_t)inArg1); - #ifndef FIXMATH_NO_ROUNDING - tempResult += (fix16_one >> 1); - #endif - tempResult >>= 16; - return tempResult; - #else - int16_t hi[2] = { (inArg0 >> 16), (inArg1 >> 16) }; - uint16_t lo[2] = { (inArg0 & 0xFFFF), (inArg1 & 0xFFFF) }; - int32_t r_hi = hi[0] * hi[1]; - int32_t r_md = (hi[0] * lo[1]) + (hi[1] * lo[0]); - uint32_t r_lo = lo[0] * lo[1]; - #ifndef FIXMATH_NO_ROUNDING - r_lo += 0xFFFF; - #endif +/* Saturating arithmetic */ +fix16_t fix16_sadd(fix16_t a, fix16_t b) +{ + fix16_t result = fix16_add(a, b); - r_md += (r_hi & 0xFFFF) << 16; - r_md += (r_lo >> 16); + if (result == fix16_overflow) + return (a > 0) ? fix16_max : fix16_min; - return r_md; - #endif + return result; +} + +fix16_t fix16_ssub(fix16_t a, fix16_t b) +{ + fix16_t result = fix16_sub(a, b); + + if (result == fix16_overflow) + return (a > 0) ? fix16_max : fix16_min; + + return result; } #endif + + +/* 64-bit implementation for fix16_mul. Fastest version for e.g. ARM Cortex M3. + * Performs a 32*32 -> 64bit multiplication. The middle 32 bits are the result, + * bottom 16 bits are used for rounding, and upper 16 bits are used for overflow + * detection. + */ + +#if !defined(FIXMATH_NO_64BIT) && !defined(FIXMATH_OPTIMIZE_8BIT) +fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) +{ + int64_t product = (int64_t)inArg0 * inArg1; + + #ifndef FIXMATH_NO_OVERFLOW + // The upper 17 bits should all be the same (the sign). + uint32_t upper = (product >> 47); + #endif + + if (product < 0) + { + #ifndef FIXMATH_NO_OVERFLOW + if (~upper) + return fix16_overflow; + #endif + + #ifndef FIXMATH_NO_ROUNDING + // This adjustment is required in order to round -1/2 correctly + product--; + #endif + } + else + { + #ifndef FIXMATH_NO_OVERFLOW + if (upper) + return fix16_overflow; + #endif + } + + #ifdef FIXMATH_NO_ROUNDING + return product >> 16; + #else + fix16_t result = product >> 16; + result += (product & 0x8000) >> 15; + + return result; + #endif +} +#endif + +/* 32-bit implementation of fix16_mul. Potentially fast on 16-bit processors, + * and this is a relatively good compromise for compilers that do not support + * uint64_t. Uses 16*16->32bit multiplications. + */ +#if defined(FIXMATH_NO_64BIT) && !defined(FIXMATH_OPTIMIZE_8BIT) +fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) +{ + // Each argument is divided to 16-bit parts. + // AB + // * CD + // ----------- + // BD 16 * 16 -> 32 bit products + // CB + // AD + // AC + // |----| 64 bit product + int32_t A = (inArg0 >> 16), C = (inArg1 >> 16); + uint32_t B = (inArg0 & 0xFFFF), D = (inArg1 & 0xFFFF); + + int32_t AC = A*C; + int32_t AD_CB = A*D + C*B; + uint32_t BD = B*D; + + int32_t product_hi = AC + (AD_CB >> 16); + + // Handle carry from lower 32 bits to upper part of result. + uint32_t ad_cb_temp = AD_CB << 16; + uint32_t product_lo = BD + ad_cb_temp; + if (product_lo < BD) + product_hi++; + +#ifndef FIXMATH_NO_OVERFLOW + // The upper 17 bits should all be the same (the sign). + if (product_hi >> 31 != product_hi >> 15) + return fix16_overflow; +#endif + +#ifdef FIXMATH_NO_ROUNDING + return (product_hi << 16) | (product_lo >> 16); +#else + // Subtracting 0x8000 (= 0.5) and then using signed right shift + // achieves proper rounding to result-1, except in the corner + // case of negative numbers and lowest word = 0x8000. + // To handle that, we also have to subtract 1 for negative numbers. + uint32_t product_lo_tmp = product_lo; + product_lo -= 0x8000; + product_lo -= (uint32_t)product_hi >> 31; + if (product_lo > product_lo_tmp) + product_hi--; + + // Discard the lowest 16 bits. Note that this is not exactly the same + // as dividing by 0x10000. For example if product = -1, result will + // also be -1 and not 0. This is compensated by adding +1 to the result + // and compensating this in turn in the rounding above. + fix16_t result = (product_hi << 16) | (product_lo >> 16); + result += 1; + return result; +#endif +} +#endif + +/* 8-bit implementation of fix16_mul. Fastest on e.g. Atmel AVR. + * Uses 8*8->16bit multiplications, and also skips any bytes that + * are zero. + */ +#if defined(FIXMATH_OPTIMIZE_8BIT) +fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1) +{ + uint32_t _a = (inArg0 >= 0) ? inArg0 : (-inArg0); + uint32_t _b = (inArg1 >= 0) ? inArg1 : (-inArg1); + + uint8_t va[4] = {_a, (_a >> 8), (_a >> 16), (_a >> 24)}; + uint8_t vb[4] = {_b, (_b >> 8), (_b >> 16), (_b >> 24)}; + + uint32_t low = 0; + uint32_t mid = 0; + + // Result column i depends on va[0..i] and vb[i..0] + + #ifndef FIXMATH_NO_OVERFLOW + // i = 6 + if (va[3] && vb[3]) return fix16_overflow; + #endif + + // i = 5 + if (va[2] && vb[3]) mid += (uint16_t)va[2] * vb[3]; + if (va[3] && vb[2]) mid += (uint16_t)va[3] * vb[2]; + mid <<= 8; + + // i = 4 + if (va[1] && vb[3]) mid += (uint16_t)va[1] * vb[3]; + if (va[2] && vb[2]) mid += (uint16_t)va[2] * vb[2]; + if (va[3] && vb[1]) mid += (uint16_t)va[3] * vb[1]; + + #ifndef FIXMATH_NO_OVERFLOW + if (mid & 0xFF000000) return fix16_overflow; + #endif + mid <<= 8; + + // i = 3 + if (va[0] && vb[3]) mid += (uint16_t)va[0] * vb[3]; + if (va[1] && vb[2]) mid += (uint16_t)va[1] * vb[2]; + if (va[2] && vb[1]) mid += (uint16_t)va[2] * vb[1]; + if (va[3] && vb[0]) mid += (uint16_t)va[3] * vb[0]; + + #ifndef FIXMATH_NO_OVERFLOW + if (mid & 0xFF000000) return fix16_overflow; + #endif + mid <<= 8; + + // i = 2 + if (va[0] && vb[2]) mid += (uint16_t)va[0] * vb[2]; + if (va[1] && vb[1]) mid += (uint16_t)va[1] * vb[1]; + if (va[2] && vb[0]) mid += (uint16_t)va[2] * vb[0]; + + // i = 1 + if (va[0] && vb[1]) low += (uint16_t)va[0] * vb[1]; + if (va[1] && vb[0]) low += (uint16_t)va[1] * vb[0]; + low <<= 8; + + // i = 0 + if (va[0] && vb[0]) low += (uint16_t)va[0] * vb[0]; + + #ifndef FIXMATH_NO_ROUNDING + low += 0x8000; + #endif + mid += (low >> 16); + + #ifndef FIXMATH_NO_OVERFLOW + if (mid & 0x80000000) + return fix16_overflow; + #endif + + fix16_t result = mid; + + /* Figure out the sign of result */ + if ((inArg0 >= 0) != (inArg1 >= 0)) + { + result = -result; + } + + return result; +} +#endif + +#ifndef FIXMATH_NO_OVERFLOW +/* Wrapper around fix16_mul to add saturating arithmetic. */ fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1) { - #ifndef FIXMATH_NO_64BIT - int64_t tempResult = ((int64_t)inArg0 * (int64_t)inArg1); - #ifndef FIXMATH_NO_ROUNDING - tempResult += (fix16_one >> 1); - #endif - tempResult >>= 16; - if(tempResult < fix16_min) - return fix16_min; - if(tempResult > fix16_max) - return fix16_max; - return tempResult; - #else - int16_t hi[2] = { (inArg0 >> 16), (inArg1 >> 16) }; - int32_t r_hi = hi[0] * hi[1]; - if(r_hi >> 16) - return (r_hi < 0 ? fix16_min : fix16_max); - - uint16_t lo[2] = { (inArg0 & 0xFFFF), (inArg1 & 0xFFFF) }; - int32_t r_md = (hi[0] * lo[1]) + (hi[1] * lo[0]); - uint32_t r_lo = lo[0] * lo[1]; - #ifndef FIXMATH_NO_ROUNDING - r_lo += 0xFFFF; - #endif - - r_md += (r_hi & 0xFFFF) << 16; - r_md += (r_lo >> 16); - - return r_md; - #endif + fix16_t result = fix16_mul(inArg0, inArg1); + + if (result == fix16_overflow) + { + if ((inArg0 >= 0) == (inArg1 >= 0)) + return fix16_max; + else + return fix16_min; + } + + return result; } +#endif - - -fix16_t fix16_div(fix16_t inArg0, fix16_t inArg1) { - #ifndef FIXMATH_NO_64BIT - int64_t tempResult = inArg0; - tempResult <<= 16; - #ifndef FIXMATH_NO_ROUNDING - tempResult += (inArg1 >> 1); - #endif - tempResult /= inArg1; - return tempResult; - #else - int neg = ((inArg0 < 0) != (inArg1 < 0)); - inArg0 = (inArg0 < 0 ? -inArg0 : inArg0); - inArg1 = (inArg1 < 0 ? -inArg1 : inArg1); - - while(((inArg0 | inArg1) & 1) == 0) { - inArg0 >>= 1; - inArg1 >>= 1; - } - - uint32_t r_hi = (inArg0 / inArg1); - - uint32_t n_lo = (inArg0 % inArg1); - uint32_t n_hi = (n_lo >> 16); - n_lo <<= 16; - uint32_t n_lo_orig = n_lo; - - uint32_t i, arg; - for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) { - n_lo = ((n_lo >> 1) | (n_hi << 31)); - n_hi = (n_hi >> 1); - arg >>= 1; - } - n_lo = n_lo_orig; - - uint32_t res = 0; - if(n_hi) { - uint32_t arg_lo, arg_hi; - for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1); - for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1); - - do { - arg_lo = (arg_lo >> 1) | (arg_hi << 31); - arg_hi = (arg_hi >> 1); - i >>= 1; - if(arg_hi < n_hi) { - n_hi -= arg_hi; - if(arg_lo > n_lo) - n_hi--; - n_lo -= arg_lo; - res += i; - } else if((arg_hi == n_hi) && (arg_lo <= n_lo)) { - n_hi -= arg_hi; - n_lo -= arg_lo; - res += i; - } - } while(n_hi); - } - - res += (n_lo / inArg1); - #ifndef FIXMATH_NO_ROUNDING - if((n_lo % inArg1) >= (inArg1 >> 1)) - res++; - #endif - res += (r_hi << 16); - - return (neg ? -res : res); - #endif +/* 32-bit implementation of fix16_div. Fastest version for e.g. ARM Cortex M3. + * Performs 32-bit divisions repeatedly to reduce the remainder. For this to + * be efficient, the processor has to have 32-bit hardware division. + */ +#if !defined(FIXMATH_OPTIMIZE_8BIT) +#ifdef __GNUC__ +// Count leading zeros, using processor-specific instruction if available. +#define clz(x) __builtin_clzl(x) +#else +static uint8_t clz(uint32_t x) +{ + uint8_t result = 0; + if (x == 0) return 32; + while (!(x & 0xF0000000)) { result += 4; x <<= 4; } + while (!(x & 0x80000000)) { result += 1; x <<= 1; } + return result; } +#endif +fix16_t fix16_div(fix16_t a, fix16_t b) +{ + // This uses a hardware 32/32 bit division multiple times, until we have + // computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations. + + if (b == 0) + return fix16_min; + + uint32_t remainder = (a >= 0) ? a : (-a); + uint32_t divider = (b >= 0) ? b : (-b); + uint32_t quotient = 0; + int bit_pos = 17; + + // Kick-start the division a bit. + // This improves speed in the worst-case scenarios where N and D are large + // It gets a lower estimate for the result by N/(D >> 17 + 1). + if (divider & 0xFFF00000) + { + uint32_t shifted_div = ((divider >> 17) + 1); + quotient = remainder / shifted_div; + remainder -= ((uint64_t)quotient * divider) >> 17; + } + + // If the divider is divisible by 2^n, take advantage of it. + while (!(divider & 0xF) && bit_pos >= 4) + { + divider >>= 4; + bit_pos -= 4; + } + + while (remainder && bit_pos >= 0) + { + // Shift remainder as much as we can without overflowing + int shift = clz(remainder); + if (shift > bit_pos) shift = bit_pos; + remainder <<= shift; + bit_pos -= shift; + + uint32_t div = remainder / divider; + remainder = remainder % divider; + quotient += div << bit_pos; + + #ifndef FIXMATH_NO_OVERFLOW + if (div & ~(0xFFFFFFFF >> bit_pos)) + return fix16_overflow; + #endif + + remainder <<= 1; + bit_pos--; + } + + #ifndef FIXMATH_NO_ROUNDING + // Quotient is always positive so rounding is easy + quotient++; + #endif + + fix16_t result = quotient >> 1; + + // Figure out the sign of the result + if ((a ^ b) & 0x80000000) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result == fix16_min) + return fix16_overflow; + #endif + + result = -result; + } + + return result; +} +#endif + +/* Alternative 32-bit implementation of fix16_div. Fastest on e.g. Atmel AVR. + * This does the division manually, and is therefore good for processors that + * do not have hardware division. + */ +#if defined(FIXMATH_OPTIMIZE_8BIT) +fix16_t fix16_div(fix16_t a, fix16_t b) +{ + // This uses the basic binary restoring division algorithm. + // It appears to be faster to do the whole division manually than + // trying to compose a 64-bit divide out of 32-bit divisions on + // platforms without hardware divide. + + if (b == 0) + return fix16_min; + + uint32_t remainder = (a >= 0) ? a : (-a); + uint32_t divider = (b >= 0) ? b : (-b); + + uint32_t quotient = 0; + uint32_t bit = 0x10000; + + /* The algorithm requires D >= R */ + while (divider < remainder) + { + divider <<= 1; + bit <<= 1; + } + + #ifndef FIXMATH_NO_OVERFLOW + if (!bit) + return fix16_overflow; + #endif + + if (divider & 0x80000000) + { + // Perform one step manually to avoid overflows later. + // We know that divider's bottom bit is 0 here. + if (remainder >= divider) + { + quotient |= bit; + remainder -= divider; + } + divider >>= 1; + bit >>= 1; + } + + /* Main division loop */ + while (bit && remainder) + { + if (remainder >= divider) + { + quotient |= bit; + remainder -= divider; + } + + remainder <<= 1; + bit >>= 1; + } + + #ifndef FIXMATH_NO_ROUNDING + if (remainder >= divider) + { + quotient++; + } + #endif + + fix16_t result = quotient; + + /* Figure out the sign of result */ + if ((a ^ b) & 0x80000000) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result == fix16_min) + return fix16_overflow; + #endif + + result = -result; + } + + return result; +} +#endif + +#ifndef FIXMATH_NO_OVERFLOW +/* Wrapper around fix16_div to add saturating arithmetic. */ fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1) { - if(inArg1 == 0) { - if(inArg0 < 0) - return fix16_min; - return fix16_max; - } - #ifndef FIXMATH_NO_64BIT - int64_t tempResult = inArg0; - tempResult <<= 16; - #ifndef FIXMATH_NO_ROUNDING - tempResult += (inArg1 >> 1); - #endif - tempResult /= inArg1; - if(tempResult < fix16_min) - return fix16_min; - if(tempResult > fix16_max) - return fix16_max; - return tempResult; - #else - int neg = ((inArg0 < 0) != (inArg1 < 0)); - inArg0 = (inArg0 < 0 ? -inArg0 : inArg0); - inArg1 = (inArg1 < 0 ? -inArg1 : inArg1); - - while(((inArg0 | inArg1) & 1) == 0) { - inArg0 >>= 1; - inArg1 >>= 1; - } - - uint32_t r_hi = (inArg0 / inArg1); - if(r_hi > (neg ? 32768 : 32767)) - return (neg ? fix16_min : fix16_max); - - uint32_t n_lo = (inArg0 % inArg1); - uint32_t n_hi = (n_lo >> 16); - n_lo <<= 16; - - uint32_t i, arg; - for(i = 1, arg = inArg1; ((n_lo | arg) & 1) == 0; i <<= 1) { - n_lo = ((n_lo >> 1) | (n_hi << 31)); - n_hi = (n_hi >> 1); - arg >>= 1; - } - - uint32_t res = 0; - if(n_hi) { - uint32_t arg_lo, arg_hi; - for(arg_lo = inArg1; (arg_lo >> 31) == 0; arg_lo <<= 1, i <<= 1); - for(arg_hi = (arg_lo >> 31), arg_lo <<= 1, i <<= 1; arg_hi < n_hi; arg_hi = (arg_hi << 1) | (arg_lo >> 31), arg_lo <<= 1, i <<= 1); - - do { - arg_lo = (arg_lo >> 1) | (arg_hi << 31); - arg_hi = (arg_hi >> 1); - i >>= 1; - if(arg_hi < n_hi) { - n_hi -= arg_hi; - if(arg_lo > n_lo) - n_hi--; - n_lo -= arg_lo; - res += i; - } else if((arg_hi == n_hi) && (arg_lo <= n_lo)) { - n_hi -= arg_hi; - n_lo -= arg_lo; - res += i; - } - } while(n_hi); - } - - #ifndef FIXMATH_NO_ROUNDING - n_lo += (inArg1 >> 1); - #endif - res += (n_lo / inArg1); - res += (r_hi << 16); - - return (neg ? -res : res); - #endif + fix16_t result = fix16_div(inArg0, inArg1); + + if (result == fix16_overflow) + { + if ((inArg0 >= 0) == (inArg1 >= 0)) + return fix16_max; + else + return fix16_min; + } + + return result; } - - +#endif fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract) { int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract)); diff --git a/libfixmath/fix16.h b/libfixmath/fix16.h index d073efa..c573786 100644 --- a/libfixmath/fix16.h +++ b/libfixmath/fix16.h @@ -18,65 +18,83 @@ static const fix16_t THREE_PI_DIV_4 = 0x00025B2F; /*!< Fix16 value of 3PI/ static const fix16_t fix16_max = 0x7FFFFFFF; /*!< the maximum value of fix16_t */ static const fix16_t fix16_min = 0x80000000; /*!< the minimum value of fix16_t */ +static const fix16_t fix16_overflow = 0x80000000; /*!< the value used to indicate overflows when FIXMATH_NO_OVERFLOW is not specified */ static const fix16_t fix16_pi = 205887; /*!< fix16_t value of pi */ static const fix16_t fix16_e = 178145; /*!< fix16_t value of e */ static const fix16_t fix16_one = 0x00010000; /*!< fix16_t value of 1 */ +/* Conversion functions between fix16_t and float/integer. + * These are inlined to allow compiler to optimize away constant numbers + */ +static inline fix16_t fix16_from_int(int a) { return a * fix16_one; } +static inline float fix16_to_float(fix16_t a) { return (float)a / fix16_one; } +static inline double fix16_to_dbl(fix16_t a) { return (double)a / fix16_one; } + +static inline int fix16_to_int(fix16_t a) +{ #ifdef FIXMATH_NO_ROUNDING -/*! Converts a double to a fix16_t and returns the result. */ -static inline fix16_t fix16_from_dbl(const double inVal) { return (fix16_t)(inVal * 65536.0); } -/*! Converts a float to a fix16_t and returns the result. */ -static inline fix16_t fix16_from_float(const float inVal) { return (fix16_t)(inVal * 65536.0f); } + return a >> 16; #else -/*! Converts a double to a fix16_t and returns the result. */ -static inline fix16_t fix16_from_dbl(const double inVal) { return (fix16_t)((inVal * 65536.0) + 0.5); } -/*! Converts a float to a fix16_t and returns the result. */ -static inline fix16_t fix16_from_float(const float inVal) { return (fix16_t)((inVal * 65536.0f) + 0.5f); } + if (a >= 0) + return (a + fix16_one / 2) / fix16_one; + else + return (a - fix16_one / 2) / fix16_one; #endif -/*! Converts a signed integer to a fix16_t and returns the result. */ -static inline fix16_t fix16_from_int(const int32_t inVal) { return (inVal << 16); } +} -/*! Coverts a fix16_t to a double and returns the result. */ -static inline double fix16_to_dbl(const fix16_t inVal) { return ((double)inVal / 65536.0); } -/*! Converts a fix16_t to a float and returns the result. */ -static inline float fix16_to_float(const fix16_t inVal) { return ((float)inVal / 65536.0f); } -/*! Converts a fix16_t to a signed integer and returns the result. */ -static inline int32_t fix16_to_int(const fix16_t inVal) { return ((inVal + 0x00008000) >> 16); } +static inline fix16_t fix16_from_float(float a) +{ + float temp = a * fix16_one; +#ifndef FIXMATH_NO_ROUNDING + temp += (temp >= 0) ? 0.5f : -0.5f; +#endif + return (fix16_t)temp; +} +static inline fix16_t fix16_from_dbl(double a) +{ + double temp = a * fix16_one; +#ifndef FIXMATH_NO_ROUNDING + temp += (temp >= 0) ? 0.5f : -0.5f; +#endif + return (fix16_t)temp; +} +/* Subtraction and addition with (optional) overflow detection. */ +#ifdef FIXMATH_NO_OVERFLOW static inline fix16_t fix16_add(fix16_t inArg0, fix16_t inArg1) { return (inArg0 + inArg1); } - -/*! Performs a saturated addition (overflow-protected) of the two given fix16_t's and returns the result. -*/ -extern fix16_t fix16_sadd(fix16_t inArg0, fix16_t inArg1); - static inline fix16_t fix16_sub(fix16_t inArg0, fix16_t inArg1) { return (inArg0 - inArg1); } -static inline fix16_t fix16_ssub(fix16_t inArg0, fix16_t inArg1) { return fix16_sadd(inArg0, -inArg1);} +#else +extern fix16_t fix16_add(fix16_t a, fix16_t b); +extern fix16_t fix16_sub(fix16_t a, fix16_t b); +/* Saturating arithmetic */ +extern fix16_t fix16_sadd(fix16_t a, fix16_t b); +extern fix16_t fix16_ssub(fix16_t a, fix16_t b); + +#endif /*! Multiplies the two given fix16_t's and returns the result. */ extern fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1); -/*! Performs a saturated multiplication (overflow-protected) of the two given fix16_t's and returns the result. -*/ -extern fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1); - - - /*! Divides the first given fix16_t by the second and returns the result. */ extern fix16_t fix16_div(fix16_t inArg0, fix16_t inArg1); +#ifndef FIXMATH_NO_OVERFLOW +/*! Performs a saturated multiplication (overflow-protected) of the two given fix16_t's and returns the result. +*/ +extern fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1); + /*! Performs a saturated division (overflow-protected) of the first fix16_t by the second and returns the result. */ extern fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1); - - +#endif /*! Returns the linear interpolation: (inArg0 * (1 - inFract)) + (inArg1 * inFract) */ diff --git a/libfixmath/fix16_sqrt.c b/libfixmath/fix16_sqrt.c index ca8474d..13d31a8 100644 --- a/libfixmath/fix16_sqrt.c +++ b/libfixmath/fix16_sqrt.c @@ -1,76 +1,83 @@ #include "fix16.h" -#include "int64.h" - - -#ifndef FIXMATH_NO_CACHE -fix16_t _fix16_sqrt_cache_index[4096] = { 0 }; -fix16_t _fix16_sqrt_cache_value[4096] = { 0 }; -#endif - -fix16_t fix16_sqrt32(fix16_t inValue) -{ - fix16_t retval = 0; - fix16_t x = inValue; - - /* Take only Leading 1 bit */ - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - x = x & ~(x >> 1); - - /* Avoid useless loops */ - if (x & 0xFF000000) - { - x = 0x00800000; - } - /* Condition for numbers less than 1.0 */ - if ( !(inValue>>16) ) x = 0x00008000; - - while(x) - { - retval |= x; - if ( (uint32_t)fix16_mul(retval,retval) > (uint32_t)inValue) - { - retval &= ~x; - } - x >>= 1; - } - return retval; -} +/* The square root algorithm is quite directly from + * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_.28base_2.29 + * An important difference is that it is split to two parts + * in order to use only 32-bit operations. + * + * Note that for negative numbers we return -sqrt(-inValue). + * Not sure if someone relies on this behaviour, but not going + * to break it for now. It doesn't slow the code much overall. + */ fix16_t fix16_sqrt(fix16_t inValue) { - int neg = (inValue < 0); - if(neg) - inValue = -inValue; + uint8_t neg = (inValue < 0); + uint32_t num = (neg ? -inValue : inValue); + uint32_t result = 0; + uint32_t bit; + uint8_t n; + + // Many numbers will be less than 15, so + // this gives a good balance between time spent + // in if vs. time spent in the while loop + // when searching for the starting value. + if (num & 0xFFF00000) + bit = (uint32_t)1 << 30; + else + bit = (uint32_t)1 << 18; + + while (bit > num) bit >>= 2; + + // The main part is executed twice, in order to avoid + // using 64 bit values in computations. + for (n = 0; n < 2; n++) + { + // First we get the top 24 bits of the answer. + while (bit) + { + if (num >= result + bit) + { + num -= result + bit; + result = (result >> 1) + bit; + } + else + { + result = (result >> 1); + } + bit >>= 2; + } + + if (n == 0) + { + // Then process it again to get the lowest 8 bits. + if (num > 65535) + { + // The remainder 'num' is too large to be shifted left + // by 16, so we have to add 1 to result manually and + // adjust 'num' accordingly. + // num = a - (result + 0.5)^2 + // = num + result^2 - (result + 0.5)^2 + // = num - result - 0.5 + num -= result; + num = (num << 16) - 0x8000; + result = (result << 16) + 0x8000; + } + else + { + num <<= 16; + result <<= 16; + } + + bit = 1 << 14; + } + } - #ifndef FIXMATH_NO_CACHE - fix16_t tempIndex = (((inValue >> 16) ^ (inValue >> 4)) & 0x00000FFF); - if(_fix16_sqrt_cache_index[tempIndex] == inValue) - return (neg ? -_fix16_sqrt_cache_value[tempIndex] : _fix16_sqrt_cache_value[tempIndex]); - #endif - - int64_t tempOp = int64_const((inValue >> 16), (inValue << 16)); - int64_t tempOut = int64_const(0, 0); - int64_t tempOne = int64_const(0x40000000UL, 0x00000000UL); - - while(int64_cmp_gt(tempOne, tempOp)) - tempOne = int64_shift(tempOne, -2); - - while(int64_cmp_ne(tempOne, int64_const(0, 0))) { - if(int64_cmp_ge(tempOp, int64_add(tempOut, tempOne))) { - tempOp = int64_sub(tempOp, int64_add(tempOut, tempOne)); - tempOut = int64_add(tempOut, int64_shift(tempOne, 1)); - } - tempOut = int64_shift(tempOut, -1); - tempOne = int64_shift(tempOne, -2); - } - - #ifndef FIXMATH_NO_CACHE - _fix16_sqrt_cache_index[tempIndex] = inValue; - _fix16_sqrt_cache_value[tempIndex] = int64_lo(tempOut); - #endif - - return (neg ? -int64_lo(tempOut) : int64_lo(tempOut)); +#ifndef FIXMATH_NO_ROUNDING + // Finally, if next bit would have been 1, round the result upwards. + if (num > result) + { + result++; + } +#endif + + return (neg ? -result : result); } diff --git a/unittests/Makefile b/unittests/Makefile new file mode 100644 index 0000000..80df66b --- /dev/null +++ b/unittests/Makefile @@ -0,0 +1,49 @@ +# Makefile for running the unittests of libfixmath. +CC = gcc + +# Basic CFLAGS for debugging +CFLAGS = -g -O0 -I../libfixmath -Wall -Wextra -Werror + +# The files required for tests +FIX16_SRC = ../libfixmath/fix16.c ../libfixmath/fix16_sqrt.c \ + ../libfixmath/fix16.h + +all: run_fix16_unittests + +clean: + rm -f fix16_unittests_???? + +# The library is tested automatically under different compilations +# options. +# +# Test naming: +# r = rounding, n = no rounding +# o = overflow detection, n = no overflow detection +# 64 = int64_t math, 32 = int32_t math + +run_fix16_unittests: \ + fix16_unittests_ro64 fix16_unittests_no64 \ + fix16_unittests_rn64 fix16_unittests_nn64 \ + fix16_unittests_ro32 fix16_unittests_no32 \ + fix16_unittests_rn32 fix16_unittests_nn32 \ + fix16_unittests_ro08 fix16_unittests_no08 \ + fix16_unittests_rn08 fix16_unittests_nn08 + $(foreach test, $^, \ + echo $(test) && \ + ./$(test) > /dev/null && \ + ) true + +fix16_unittests_no64: DEFINES=-DFIXMATH_NO_ROUNDING +fix16_unittests_rn64: DEFINES=-DFIXMATH_NO_OVERFLOW +fix16_unittests_nn64: DEFINES=-DFIXMATH_NO_ROUNDING -DFIXMATH_NO_OVERFLOW +fix16_unittests_ro32: DEFINES=-DFIXMATH_NO_64BIT +fix16_unittests_no32: DEFINES=-DFIXMATH_NO_ROUNDING -DFIXMATH_NO_64BIT +fix16_unittests_rn32: DEFINES=-DFIXMATH_NO_OVERFLOW -DFIXMATH_NO_64BIT +fix16_unittests_nn32: DEFINES=-DFIXMATH_NO_OVERFLOW -DFIXMATH_NO_ROUNDING -DFIXMATH_NO_64BIT +fix16_unittests_ro08: DEFINES=-DFIXMATH_OPTIMIZE_8BIT +fix16_unittests_no08: DEFINES=-DFIXMATH_NO_ROUNDING -DFIXMATH_OPTIMIZE_8BIT +fix16_unittests_rn08: DEFINES=-DFIXMATH_NO_OVERFLOW -DFIXMATH_OPTIMIZE_8BIT +fix16_unittests_nn08: DEFINES=-DFIXMATH_NO_OVERFLOW -DFIXMATH_NO_ROUNDING -DFIXMATH_OPTIMIZE_8BIT + +fix16_unittests_% : fix16_unittests.c $(FIX16_SRC) + $(CC) $(CFLAGS) $(DEFINES) -o $@ $^ -lm diff --git a/unittests/fix16_unittests.c b/unittests/fix16_unittests.c new file mode 100644 index 0000000..a47cfb6 --- /dev/null +++ b/unittests/fix16_unittests.c @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include "unittests.h" + +const fix16_t testcases[] = { + // Small numbers + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, + + // Integer numbers + 0x10000, -0x10000, 0x20000, -0x20000, 0x30000, -0x30000, + 0x40000, -0x40000, 0x50000, -0x50000, 0x60000, -0x60000, + + // Fractions (1/2, 1/4, 1/8) + 0x8000, -0x8000, 0x4000, -0x4000, 0x2000, -0x2000, + + // Problematic carry + 0xFFFF, -0xFFFF, 0x1FFFF, -0x1FFFF, 0x3FFFF, -0x3FFFF, + + // Smallest and largest values + 0x7FFFFFFF, 0x80000000, + + // Large random numbers + 831858892, 574794913, 2147272293, -469161054, -961611615, + 1841960234, 1992698389, 520485404, 560523116, -2094993050, + -876897543, -67813629, 2146227091, 509861939, -1073573657, + + // Small random numbers + -14985, 30520, -83587, 41129, 42137, 58537, -2259, 84142, + -28283, 90914, 19865, 33191, 81844, -66273, -63215, -44459, + -11326, 84295, 47515, -39324, + + // Tiny random numbers + -171, -359, 491, 844, 158, -413, -422, -737, -575, -330, + -376, 435, -311, 116, 715, -1024, -487, 59, 724, 993 +}; + +#define TESTCASES_COUNT (sizeof(testcases)/sizeof(testcases[0])) + +#define delta(a,b) (((a)>=(b)) ? (a)-(b) : (b)-(a)) + +#ifdef FIXMATH_NO_ROUNDING +const fix16_t max_delta = 1; +#else +const fix16_t max_delta = 0; +#endif + +int main() +{ + int status = 0; + + { + COMMENT("Testing basic multiplication"); + TEST(fix16_mul(fix16_from_int(5), fix16_from_int(5)) == fix16_from_int(25)); + TEST(fix16_mul(fix16_from_int(-5), fix16_from_int(5)) == fix16_from_int(-25)); + TEST(fix16_mul(fix16_from_int(-5), fix16_from_int(-5)) == fix16_from_int(25)); + TEST(fix16_mul(fix16_from_int(5), fix16_from_int(-5)) == fix16_from_int(-25)); + } + +#ifndef FIXMATH_NO_ROUNDING + { + COMMENT("Testing multiplication rounding corner cases"); + TEST(fix16_mul(0, 10) == 0); + TEST(fix16_mul(2, 0x8000) == 1); + TEST(fix16_mul(-2, 0x8000) == -1); + TEST(fix16_mul(3, 0x8000) == 2); + TEST(fix16_mul(-3, 0x8000) == -2); + TEST(fix16_mul(2, 0x7FFF) == 1); + TEST(fix16_mul(-2, 0x7FFF) == -1); + TEST(fix16_mul(2, 0x8001) == 1); + TEST(fix16_mul(-2, 0x8001) == -1); + } +#endif + + { + unsigned int i, j; + int failures = 0; + COMMENT("Running testcases for multiplication"); + + for (i = 0; i < TESTCASES_COUNT; i++) + { + for (j = 0; j < TESTCASES_COUNT; j++) + { + fix16_t a = testcases[i]; + fix16_t b = testcases[j]; + fix16_t result = fix16_mul(a, b); + + double fa = fix16_to_dbl(a); + double fb = fix16_to_dbl(b); + fix16_t fresult = fix16_from_dbl(fa * fb); + + double max = fix16_to_dbl(fix16_max); + double min = fix16_to_dbl(fix16_min); + + if (delta(fresult, result) > max_delta) + { + if (fa * fb > max || fa * fb < min) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result != fix16_overflow) + { + printf("\n%d * %d overflow not detected!\n", a, b); + failures++; + } + #endif + // Legitimate overflow + continue; + } + + printf("\n%d * %d = %d\n", a, b, result); + printf("%f * %f = %d\n", fa, fb, fresult); + failures++; + } + } + } + + TEST(failures == 0); + } + + { + COMMENT("Testing basic division"); + TEST(fix16_div(fix16_from_int(15), fix16_from_int(5)) == fix16_from_int(3)); + TEST(fix16_div(fix16_from_int(-15), fix16_from_int(5)) == fix16_from_int(-3)); + TEST(fix16_div(fix16_from_int(-15), fix16_from_int(-5)) == fix16_from_int(3)); + TEST(fix16_div(fix16_from_int(15), fix16_from_int(-5)) == fix16_from_int(-3)); + } + +#ifndef FIXMATH_NO_ROUNDING + { + COMMENT("Testing division rounding corner cases"); + TEST(fix16_div(0, 10) == 0); + TEST(fix16_div(1, fix16_from_int(2)) == 1); + TEST(fix16_div(-1, fix16_from_int(2)) == -1); + TEST(fix16_div(1, fix16_from_int(-2)) == -1); + TEST(fix16_div(-1, fix16_from_int(-2)) == 1); + TEST(fix16_div(3, fix16_from_int(2)) == 2); + TEST(fix16_div(-3, fix16_from_int(2)) == -2); + TEST(fix16_div(3, fix16_from_int(-2)) == -2); + TEST(fix16_div(-3, fix16_from_int(-2)) == 2); + TEST(fix16_div(2, 0x7FFF) == 4); + TEST(fix16_div(-2, 0x7FFF) == -4); + TEST(fix16_div(2, 0x8001) == 4); + TEST(fix16_div(-2, 0x8001) == -4); + } +#endif + + { + unsigned int i, j; + int failures = 0; + COMMENT("Running testcases for division"); + + for (i = 0; i < TESTCASES_COUNT; i++) + { + for (j = 0; j < TESTCASES_COUNT; j++) + { + fix16_t a = testcases[i]; + fix16_t b = testcases[j]; + + // We don't require a solution for /0 :) + if (b == 0) continue; + + fix16_t result = fix16_div(a, b); + + double fa = fix16_to_dbl(a); + double fb = fix16_to_dbl(b); + fix16_t fresult = fix16_from_dbl(fa / fb); + + double max = fix16_to_dbl(fix16_max); + double min = fix16_to_dbl(fix16_min); + if (delta(fresult, result) > max_delta) + { + if (fa / fb > max || fa / fb < min) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result != fix16_overflow) + { + printf("\n%d / %d overflow not detected!\n", a, b); + failures++; + } + #endif + // Legitimate overflow + continue; + } + + printf("\n%d / %d = %d\n", a, b, result); + printf("%f / %f = %d\n", fa, fb, fresult); + failures++; + } + } + } + + TEST(failures == 0); + } + + { + unsigned int i, j; + int failures = 0; + COMMENT("Running testcases for addition"); + + for (i = 0; i < TESTCASES_COUNT; i++) + { + for (j = 0; j < TESTCASES_COUNT; j++) + { + fix16_t a = testcases[i]; + fix16_t b = testcases[j]; + + fix16_t result = fix16_add(a, b); + + double fa = fix16_to_dbl(a); + double fb = fix16_to_dbl(b); + fix16_t fresult = fix16_from_dbl(fa + fb); + + double max = fix16_to_dbl(fix16_max); + double min = fix16_to_dbl(fix16_min); + + if (delta(fresult, result) > max_delta) + { + if (fa + fb > max || fa + fb < min) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result != fix16_overflow) + { + printf("\n%d + %d overflow not detected!\n", a, b); + failures++; + } + #endif + // Legitimate overflow + continue; + } + + printf("\n%d + %d = %d\n", a, b, result); + printf("%f + %f = %d\n", fa, fb, fresult); + failures++; + } + } + } + + TEST(failures == 0); + } + + { + unsigned int i, j; + int failures = 0; + COMMENT("Running testcases for subtraction"); + + for (i = 0; i < TESTCASES_COUNT; i++) + { + for (j = 0; j < TESTCASES_COUNT; j++) + { + fix16_t a = testcases[i]; + fix16_t b = testcases[j]; + + fix16_t result = fix16_sub(a, b); + + double fa = fix16_to_dbl(a); + double fb = fix16_to_dbl(b); + fix16_t fresult = fix16_from_dbl(fa - fb); + + double max = fix16_to_dbl(fix16_max); + double min = fix16_to_dbl(fix16_min); + + if (delta(fresult, result) > max_delta) + { + if (fa - fb > max || fa - fb < min) + { + #ifndef FIXMATH_NO_OVERFLOW + if (result != fix16_overflow) + { + printf("\n%d - %d overflow not detected!\n", a, b); + failures++; + } + #endif + // Legitimate overflow + continue; + } + + printf("\n%d - %d = %d\n", a, b, result); + printf("%f - %f = %d\n", fa, fb, fresult); + failures++; + } + } + } + + TEST(failures == 0); + } + + { + COMMENT("Testing basic square roots"); + TEST(fix16_sqrt(fix16_from_int(16)) == fix16_from_int(4)); + TEST(fix16_sqrt(fix16_from_int(100)) == fix16_from_int(10)); + TEST(fix16_sqrt(fix16_from_int(1)) == fix16_from_int(1)); + } + +#ifndef FIXMATH_NO_ROUNDING + { + COMMENT("Testing square root rounding corner cases"); + TEST(fix16_sqrt(214748302) == 3751499); + TEST(fix16_sqrt(214748303) == 3751499); + TEST(fix16_sqrt(214748359) == 3751499); + TEST(fix16_sqrt(214748360) == 3751500); + } +#endif + + { + unsigned int i; + int failures = 0; + COMMENT("Running test cases for square root"); + + for (i = 0; i < TESTCASES_COUNT; i++) + { + fix16_t a = testcases[i]; + + if (a < 0) continue; + + fix16_t result = fix16_sqrt(a); + + double fa = fix16_to_dbl(a); + fix16_t fresult = fix16_from_dbl(sqrt(fa)); + + if (delta(fresult, result) > max_delta) + { + printf("\nfix16_sqrt(%d) = %d\n", a, result); + printf("sqrt(%f) = %d\n", fa, fresult); + failures++; + } + } + + TEST(failures == 0); + } + + if (status != 0) + fprintf(stdout, "\n\nSome tests FAILED!\n"); + + return status; +} diff --git a/unittests/unittests.h b/unittests/unittests.h new file mode 100644 index 0000000..bac57d2 --- /dev/null +++ b/unittests/unittests.h @@ -0,0 +1,18 @@ +#include + +#define COMMENT(x) printf("\n----" x "----\n"); +#define STR(x) #x +#define STR2(x) STR(x) +#define TEST(x) \ + if (!(x)) { \ + fflush(stdout); \ + fflush(stderr); \ + fprintf(stderr, "\033[31;1mFAILED:\033[22;39m " __FILE__ ":" STR2(__LINE__) " " #x "\n"); \ + status = 1; \ + } else { \ + fflush(stdout); \ + fflush(stderr); \ + printf("\033[32;1mOK:\033[22;39m " #x "\n"); \ + } + +