From cc1e97463ef6c5d5cef55191e203c60171f472b3 Mon Sep 17 00:00:00 2001
From: flatmush <flatmush@d3e1167c-abe1-51d5-8199-f9061ebe54e4>
Date: Thu, 3 Mar 2011 11:21:20 +0000
Subject: Added int64.h which performs 64-bit calculations using 32-bit
 operations, this means that the FIXMATH_NO_64BIT macro can now be used to
 enable these operations on compilers without 64-bit support, note that some
 of these operations are slower than the built-in gcc ones (though some are
 faster too). sqrt can now be compiled without 64-bit compiler support. All
 remaining functions which aren't supported in 32-bit are now disabled when
 the FIXMATH_NO_64BIT macro is enable which should stop any compiler errors on
 compilers where 64-bit isn't supported.

---
 libfixmath/fix16_exp.c    |  18 +++---
 libfixmath/fix16_sqrt.c   |  34 +++++-----
 libfixmath/fract32.c      |   2 +
 libfixmath/fract32.h      |   2 +
 libfixmath/int64.h        | 162 ++++++++++++++++++++++++++++++++++++++++++++++
 libfixmath/libfixmath.cbp |   5 +-
 6 files changed, 195 insertions(+), 28 deletions(-)
 create mode 100644 libfixmath/int64.h

diff --git a/libfixmath/fix16_exp.c b/libfixmath/fix16_exp.c
index d48698a..e7e92e4 100644
--- a/libfixmath/fix16_exp.c
+++ b/libfixmath/fix16_exp.c
@@ -1,4 +1,5 @@
 #include "fix16.h"
+#include "int64.h"
 
 
 
@@ -26,23 +27,22 @@ fix16_t fix16_exp(fix16_t inValue) {
 		return _fix16_exp_cache_value[tempIndex];
 	#endif
 
-	int64_t tempOut = fix16_one;
-	tempOut += inValue;
-	int64_t tempValue = inValue;
+	int64_t tempOut = int64_add(int64_from_int32(fix16_one), int64_from_int32(inValue));
+	int64_t tempValue = int64_from_int32(inValue);
 	uint32_t i, n;
 	for(i = 3, n = 2; i < 13; n *= i, i++) {
-		tempValue *= inValue;
+		tempValue = int64_mul_i64_i32(tempValue, inValue);
 		#ifndef FIXMATH_NO_ROUNDING
-		tempValue += (fix16_one >> 1);
+		tempValue = int64_add(tempValue, int64_from_int32(fix16_one >> 1));
 		#endif
-		tempValue >>= 16;
-		tempOut += (tempValue / n);
+		tempValue = int64_shift(tempValue, -16);
+		tempOut = int64_add(tempOut, int64_div_i64_i32(tempValue, n));
 	}
 
 	#ifndef FIXMATH_NO_CACHE
 	_fix16_exp_cache_index[tempIndex] = inValue;
-	_fix16_exp_cache_value[tempIndex] = tempOut;
+	_fix16_exp_cache_value[tempIndex] = int64_lo(tempOut);
 	#endif
 
-	return tempOut;
+	return int64_lo(tempOut);
 }
diff --git a/libfixmath/fix16_sqrt.c b/libfixmath/fix16_sqrt.c
index f780ef6..06df310 100644
--- a/libfixmath/fix16_sqrt.c
+++ b/libfixmath/fix16_sqrt.c
@@ -1,5 +1,5 @@
 #include "fix16.h"
-
+#include "int64.h"
 
 
 #ifndef FIXMATH_NO_CACHE
@@ -10,32 +10,36 @@ fix16_t _fix16_sqrt_cache_value[4096] = { 0 };
 
 
 fix16_t fix16_sqrt(fix16_t inValue) {
+	int neg = (inValue < 0);
+	if(neg)
+		inValue = -inValue;
+
 	#ifndef FIXMATH_NO_CACHE
 	fix16_t tempIndex = (((inValue >> 16) ^ (inValue >> 4)) & 0x00000FFF);
 	if(_fix16_sqrt_cache_index[tempIndex] == inValue)
-		return _fix16_sqrt_cache_value[tempIndex];
+		return (neg ? -_fix16_sqrt_cache_value[tempIndex] : _fix16_sqrt_cache_value[tempIndex]);
 	#endif
 
-	int64_t tempOp = inValue; tempOp <<= 16;
-	int64_t tempOut = 0;
-	int64_t tempOne = 0x4000000000000000ULL;
+	int64_t tempOp  = int64_const((inValue >> 16), (inValue << 16));
+	int64_t tempOut = int64_const(0, 0);
+	int64_t tempOne = int64_const(0x40000000UL, 0x00000000UL);
 
-	while(tempOne > tempOp)
-		tempOne >>= 2;
+	while(int64_cmp_gt(tempOne, tempOp))
+		tempOne = int64_shift(tempOne, -2);
 
-	while(tempOne != 0) {
-		if(tempOp >= tempOut + tempOne) {
-			tempOp -= tempOut + tempOne;
-			tempOut += tempOne << 1;
+	while(int64_cmp_ne(tempOne, int64_const(0, 0))) {
+		if(int64_cmp_ge(tempOp, int64_add(tempOut, tempOne))) {
+			tempOp  = int64_sub(tempOp, int64_add(tempOut, tempOne));
+			tempOut = int64_add(tempOut, int64_shift(tempOne, 1));
 		}
-		tempOut >>= 1;
-		tempOne >>= 2;
+		tempOut = int64_shift(tempOut, -1);
+		tempOne = int64_shift(tempOne, -2);
 	}
 
 	#ifndef FIXMATH_NO_CACHE
 	_fix16_sqrt_cache_index[tempIndex] = inValue;
-	_fix16_sqrt_cache_value[tempIndex] = tempOut;
+	_fix16_sqrt_cache_value[tempIndex] = int64_lo(tempOut);
 	#endif
 
-	return tempOut;
+	return (neg ? -int64_lo(tempOut) : int64_lo(tempOut));
 }
diff --git a/libfixmath/fract32.c b/libfixmath/fract32.c
index c351258..397e000 100644
--- a/libfixmath/fract32.c
+++ b/libfixmath/fract32.c
@@ -14,6 +14,7 @@ fract32_t fract32_invert(fract32_t inFract) {
 	return (0xFFFFFFFF - inFract);
 }
 
+#ifndef FIXMATH_NO_64BIT
 uint32_t fract32_usmul(uint32_t inVal, fract32_t inFract) {
 	return (uint32_t)(((uint64_t)inVal * (uint64_t)inFract) >> 32);
 }
@@ -23,3 +24,4 @@ int32_t fract32_smul(int32_t inVal, fract32_t inFract) {
 		return -fract32_usmul(-inVal, inFract);
 	return fract32_usmul(inVal, inFract);
 }
+#endif
diff --git a/libfixmath/fract32.h b/libfixmath/fract32.h
index 8b6df05..ee1f1c6 100644
--- a/libfixmath/fract32.h
+++ b/libfixmath/fract32.h
@@ -21,6 +21,7 @@ extern fract32_t fract32_create(uint32_t inNumerator, uint32_t inDenominator);
 */
 extern fract32_t fract32_invert(fract32_t inFract);
 
+#ifndef FIXMATH_NO_64BIT
 /*! Performs unsigned saturated (overflow-protected) multiplication with the two given fractions and returns the result as an unsigned integer.
 */
 extern uint32_t  fract32_usmul(uint32_t inVal, fract32_t inFract);
@@ -28,6 +29,7 @@ extern uint32_t  fract32_usmul(uint32_t inVal, fract32_t inFract);
 /*! Performs saturated (overflow-protected) multiplication with the two given fractions and returns the result as a signed integer.
 */
 extern int32_t   fract32_smul(int32_t inVal, fract32_t inFract);
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/libfixmath/int64.h b/libfixmath/int64.h
new file mode 100644
index 0000000..95b2269
--- /dev/null
+++ b/libfixmath/int64.h
@@ -0,0 +1,162 @@
+#ifndef __libfixmath_int64_h__
+#define __libfixmath_int64_h__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifndef FIXMATH_NO_64BIT
+static inline  int64_t int64_const(int32_t hi, uint32_t lo) { return (((int64_t)hi << 32) | lo); }
+static inline  int64_t int64_from_int32(int32_t x) { return (int64_t)x; }
+static inline  int32_t int64_hi(int64_t x) { return (x >> 32); }
+static inline uint32_t int64_lo(int64_t x) { return (x & ((1ULL << 32) - 1)); }
+
+static inline int64_t int64_add(int64_t x, int64_t y)   { return (x + y);  }
+static inline int64_t int64_neg(int64_t x)              { return (-x);     }
+static inline int64_t int64_sub(int64_t x, int64_t y)   { return (x - y);  }
+static inline int64_t int64_shift(int64_t x, int8_t y)  { return (y < 0 ? (x >> -y) : (x << y)); }
+
+static inline int64_t int64_mul_i32_i32(int32_t x, int32_t y) { return (x * y);  }
+static inline int64_t int64_mul_i64_i32(int64_t x, int32_t y) { return (x * y);  }
+
+static inline int64_t int64_div_i64_i32(int64_t x, int32_t y) { return (x / y);  }
+
+static inline int int64_cmp_eq(int64_t x, int64_t y) { return (x == y); }
+static inline int int64_cmp_ne(int64_t x, int64_t y) { return (x != y); }
+static inline int int64_cmp_gt(int64_t x, int64_t y) { return (x >  y); }
+static inline int int64_cmp_ge(int64_t x, int64_t y) { return (x >= y); }
+static inline int int64_cmp_lt(int64_t x, int64_t y) { return (x <  y); }
+static inline int int64_cmp_le(int64_t x, int64_t y) { return (x <= y); }
+#else
+
+typedef struct {
+	 int32_t hi;
+	uint32_t lo;
+} __int64_t;
+
+static inline __int64_t int64_const(int32_t hi, uint32_t lo) { return (__int64_t){ hi, lo }; }
+static inline __int64_t int64_from_int32(int32_t x) { return (__int64_t){ (x < 0 ? -1 : 0), x }; }
+static inline   int32_t int64_hi(__int64_t x) { return x.hi; }
+static inline  uint32_t int64_lo(__int64_t x) { return x.lo; }
+
+static inline __int64_t int64_add(__int64_t x, __int64_t y) {
+	__int64_t ret;
+	ret.hi = x.hi + y.hi;
+	ret.lo = x.lo + y.lo;
+	if((ret.lo < x.lo) || (ret.hi < y.hi))
+		ret.hi++;
+	return ret;
+}
+
+static inline __int64_t int64_neg(__int64_t x) {
+	__int64_t ret;
+	ret.hi = ~x.hi;
+	ret.lo = ~x.lo + 1;
+	if(ret.lo == 0)
+		ret.hi++;
+	return ret;
+}
+
+static inline __int64_t int64_sub(__int64_t x, __int64_t y) {
+	return int64_add(x, int64_neg(y));
+}
+
+static inline __int64_t int64_shift(__int64_t x, int8_t y) {
+	__int64_t ret;
+	if(y > 0) {
+		if(y >= 32)
+			return (__int64_t){ 0, 0 };
+		ret.hi = (x.hi << y) | (x.lo >> (32 - y));
+		ret.lo = (x.lo << y);
+	} else {
+		y = -y;
+		if(y >= 32)
+			return (__int64_t){ 0, 0 };
+		ret.lo = (x.lo >> y) | (x.hi << (32 - y));
+		ret.hi = (x.hi >> y);
+	}
+	return ret;
+}
+
+static inline __int64_t int64_mul_i32_i32(int32_t x, int32_t y) {
+	 int16_t hi[2] = { (x >> 16), (y >> 16) };
+	uint16_t lo[2] = { (x & 0xFFFF), (y & 0xFFFF) };
+
+	 int32_t r_hi = hi[0] * hi[1];
+	 int32_t r_md = (hi[0] * lo[1]) + (hi[1] * lo[0]);
+	uint32_t r_lo = lo[0] * lo[1];
+
+	r_hi += (r_md >> 16);
+	r_lo += (r_md << 16);
+
+	return (__int64_t){ r_hi, r_lo };
+}
+
+static inline __int64_t int64_mul_i64_i32(__int64_t x, int32_t y) {
+	int neg = ((x.hi ^ y) < 0);
+	if(x.hi < 0)
+		x = int64_neg(x);
+	if(y < 0)
+		y = -y;
+
+	uint32_t _x[4] = { (x.hi >> 16), (x.hi & 0xFFFF), (x.lo >> 16), (x.lo & 0xFFFF) };
+	uint32_t _y[2] = { (y >> 16), (y & 0xFFFF) };
+
+	uint32_t r[4];
+	r[0] = (_x[0] * _y[0]);
+	r[1] = (_x[1] * _y[0]) + (_x[0] * _y[1]);
+	r[2] = (_x[1] * _y[1]) + (_x[2] * _y[0]);
+	r[3] = (_x[2] * _y[0]) + (_x[1] * _y[1]);
+
+	__int64_t ret;
+	ret.lo = r[0] + (r[1] << 16);
+	ret.hi = (r[3] << 16) + r[2] + (r[1] >> 16);
+	return (neg ? int64_neg(ret) : ret);
+}
+
+static inline __int64_t int64_div_i64_i32(__int64_t x, int32_t y) {
+	int neg = ((x.hi ^ y) < 0);
+	if(x.hi < 0)
+		x = int64_neg(x);
+	if(y < 0)
+		y = -y;
+
+	__int64_t ret = { (x.hi / y) , (x.lo / y) };
+	x.hi = x.hi % y;
+	x.lo = x.lo % y;
+
+	__int64_t _y = int64_from_int32(y);
+
+	__int64_t i;
+	for(i = int64_from_int32(1); _y < x; _y = int64_shift(_y, 1), i = int64_shift(i, 1));
+
+	while(x.hi) {
+		_y = int64_shift(_y, -1);
+		 i = int64_shift(i, -1);
+		if(in64_cmp_ge(x, _y)) {
+			x = int64_sub(x, _y);
+			ret = int64_add(ret, i);
+		}
+	}
+
+	ret = int64_add(ret, int64_from_int32(x.lo / y))
+	return (neg ? int64_neg(ret) : ret);
+}
+
+static inline int int64_cmp_eq(__int64_t x, __int64_t y) { return ((x.hi == y.hi) && (x.lo == y.lo)); }
+static inline int int64_cmp_ne(__int64_t x, __int64_t y) { return ((x.hi != y.hi) || (x.lo != y.lo)); }
+static inline int int64_cmp_gt(__int64_t x, __int64_t y) { return ((x.hi > y.hi) || ((x.hi == y.hi) && (x.lo >  y.lo))); }
+static inline int int64_cmp_ge(__int64_t x, __int64_t y) { return ((x.hi > y.hi) || ((x.hi == y.hi) && (x.lo >= y.lo))); }
+static inline int int64_cmp_lt(__int64_t x, __int64_t y) { return ((x.hi < y.hi) || ((x.hi == y.hi) && (x.lo <  y.lo))); }
+static inline int int64_cmp_le(__int64_t x, __int64_t y) { return ((x.hi < y.hi) || ((x.hi == y.hi) && (x.lo <= y.lo))); }
+
+#define int64_t __int64_t
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/libfixmath/libfixmath.cbp b/libfixmath/libfixmath.cbp
index 1e02116..c15000f 100644
--- a/libfixmath/libfixmath.cbp
+++ b/libfixmath/libfixmath.cbp
@@ -36,15 +36,12 @@
 				</Linker>
 			</Target>
 		</Build>
-		<Compiler>
-			<Add option="-DFIXMATH_NO_CACHE" />
-			<Add option="-DFIXMATH_NO_64BIT" />
-		</Compiler>
 		<Unit filename="Makefile" />
 		<Unit filename="fix16.c">
 			<Option compilerVar="CC" />
 		</Unit>
 		<Unit filename="fix16.h" />
+		<Unit filename="fix16.hpp" />
 		<Unit filename="fix16_exp.c">
 			<Option compilerVar="CC" />
 		</Unit>
-- 
cgit v1.2.3