From d9944df6d2a26b05cdcaeeb1062a8c16963ec843 Mon Sep 17 00:00:00 2001
From: Flatmush <Flatmush@gmail.com>
Date: Wed, 25 Jul 2012 09:54:58 +0000
Subject: Added some deg/rad conversion functions. Fixed up some code style
 inconsistencies. Added a modulo function to allow optimization on 8-bit
 machines.

---
 libfixmath/fix16.c      | 734 +++++++++++++++++++++++++-----------------------
 libfixmath/fix16.h      |  47 ++--
 libfixmath/fix16_sqrt.c | 137 ++++-----
 libfixmath/fix16_trig.c |  25 +-
 4 files changed, 497 insertions(+), 446 deletions(-)

diff --git a/libfixmath/fix16.c b/libfixmath/fix16.c
index 72e1b4a..810efe6 100644
--- a/libfixmath/fix16.c
+++ b/libfixmath/fix16.c
@@ -8,51 +8,51 @@
 #ifndef FIXMATH_NO_OVERFLOW
 fix16_t fix16_add(fix16_t a, fix16_t b)
 {
-  // Use unsigned integers because overflow with signed integers is
-  // an undefined operation (http://www.airs.com/blog/archives/120).
-  uint32_t _a = a, _b = b;
-  uint32_t sum = _a + _b;
+	// Use unsigned integers because overflow with signed integers is
+	// an undefined operation (http://www.airs.com/blog/archives/120).
+	uint32_t _a = a, _b = b;
+	uint32_t sum = _a + _b;
 
-  // Overflow can only happen if sign of a == sign of b, and then
-  // it causes sign of sum != sign of a.
-  if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))
-    return fix16_overflow;
-  
-  return sum;
+	// Overflow can only happen if sign of a == sign of b, and then
+	// it causes sign of sum != sign of a.
+	if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))
+		return fix16_overflow;
+	
+	return sum;
 }
 
 fix16_t fix16_sub(fix16_t a, fix16_t b)
 {
-  uint32_t _a = a, _b = b;
-  uint32_t diff = _a - _b;
+	uint32_t _a = a, _b = b;
+	uint32_t diff = _a - _b;
 
-  // Overflow can only happen if sign of a != sign of b, and then
-  // it causes sign of diff != sign of a.
-  if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))
-    return fix16_overflow;
-  
-  return diff;
+	// Overflow can only happen if sign of a != sign of b, and then
+	// it causes sign of diff != sign of a.
+	if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))
+		return fix16_overflow;
+	
+	return diff;
 }
 
 /* Saturating arithmetic */
 fix16_t fix16_sadd(fix16_t a, fix16_t b)
 {
-  fix16_t result = fix16_add(a, b);
+	fix16_t result = fix16_add(a, b);
 
-  if (result == fix16_overflow)
-    return (a > 0) ? fix16_max : fix16_min;
+	if (result == fix16_overflow)
+		return (a > 0) ? fix16_max : fix16_min;
 
-  return result;
-}  
+	return result;
+}	
 
 fix16_t fix16_ssub(fix16_t a, fix16_t b)
 {
-  fix16_t result = fix16_sub(a, b);
+	fix16_t result = fix16_sub(a, b);
 
-  if (result == fix16_overflow)
-    return (a > 0) ? fix16_max : fix16_min;
+	if (result == fix16_overflow)
+		return (a > 0) ? fix16_max : fix16_min;
 
-  return result;
+	return result;
 }
 #endif
 
@@ -67,41 +67,41 @@ fix16_t fix16_ssub(fix16_t a, fix16_t b)
 #if !defined(FIXMATH_NO_64BIT) && !defined(FIXMATH_OPTIMIZE_8BIT)
 fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
 {
-  int64_t product = (int64_t)inArg0 * inArg1;
-  
-  #ifndef FIXMATH_NO_OVERFLOW
-  // The upper 17 bits should all be the same (the sign).
-  uint32_t upper = (product >> 47);
-  #endif
-  
-  if (product < 0)
-  {
-    #ifndef FIXMATH_NO_OVERFLOW
-    if (~upper)
-        return fix16_overflow;
-    #endif
-    
-    #ifndef FIXMATH_NO_ROUNDING
-    // This adjustment is required in order to round -1/2 correctly
-    product--;
-    #endif
-  }
-  else
-  {
-    #ifndef FIXMATH_NO_OVERFLOW
-    if (upper)
-        return fix16_overflow;
-    #endif
-  }
-  
-  #ifdef FIXMATH_NO_ROUNDING
-  return product >> 16;
-  #else
-  fix16_t result = product >> 16;
-  result += (product & 0x8000) >> 15;
-  
-  return result;
-  #endif
+	int64_t product = (int64_t)inArg0 * inArg1;
+	
+	#ifndef FIXMATH_NO_OVERFLOW
+	// The upper 17 bits should all be the same (the sign).
+	uint32_t upper = (product >> 47);
+	#endif
+	
+	if (product < 0)
+	{
+		#ifndef FIXMATH_NO_OVERFLOW
+		if (~upper)
+				return fix16_overflow;
+		#endif
+		
+		#ifndef FIXMATH_NO_ROUNDING
+		// This adjustment is required in order to round -1/2 correctly
+		product--;
+		#endif
+	}
+	else
+	{
+		#ifndef FIXMATH_NO_OVERFLOW
+		if (upper)
+				return fix16_overflow;
+		#endif
+	}
+	
+	#ifdef FIXMATH_NO_ROUNDING
+	return product >> 16;
+	#else
+	fix16_t result = product >> 16;
+	result += (product & 0x8000) >> 15;
+	
+	return result;
+	#endif
 }
 #endif
 
@@ -112,56 +112,56 @@ fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
 #if defined(FIXMATH_NO_64BIT) && !defined(FIXMATH_OPTIMIZE_8BIT)
 fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
 {
-  // Each argument is divided to 16-bit parts.
-  //          AB
-  //      *   CD
-  // -----------
-  //          BD  16 * 16 -> 32 bit products
-  //         CB
-  //         AD
-  //        AC
-  //       |----| 64 bit product
-  int32_t A = (inArg0 >> 16), C = (inArg1 >> 16);
-  uint32_t B = (inArg0 & 0xFFFF), D = (inArg1 & 0xFFFF);
-  
-  int32_t AC = A*C;
-  int32_t AD_CB = A*D + C*B;
-  uint32_t BD = B*D;
-  
-  int32_t product_hi = AC + (AD_CB >> 16);
-  
-  // Handle carry from lower 32 bits to upper part of result.
-  uint32_t ad_cb_temp = AD_CB << 16;
-  uint32_t product_lo = BD + ad_cb_temp;
-  if (product_lo < BD)
-    product_hi++;
-  
+	// Each argument is divided to 16-bit parts.
+	//					AB
+	//			*	 CD
+	// -----------
+	//					BD	16 * 16 -> 32 bit products
+	//				 CB
+	//				 AD
+	//				AC
+	//			 |----| 64 bit product
+	int32_t A = (inArg0 >> 16), C = (inArg1 >> 16);
+	uint32_t B = (inArg0 & 0xFFFF), D = (inArg1 & 0xFFFF);
+	
+	int32_t AC = A*C;
+	int32_t AD_CB = A*D + C*B;
+	uint32_t BD = B*D;
+	
+	int32_t product_hi = AC + (AD_CB >> 16);
+	
+	// Handle carry from lower 32 bits to upper part of result.
+	uint32_t ad_cb_temp = AD_CB << 16;
+	uint32_t product_lo = BD + ad_cb_temp;
+	if (product_lo < BD)
+		product_hi++;
+	
 #ifndef FIXMATH_NO_OVERFLOW
-  // The upper 17 bits should all be the same (the sign).
-  if (product_hi >> 31 != product_hi >> 15)
-    return fix16_overflow;
+	// The upper 17 bits should all be the same (the sign).
+	if (product_hi >> 31 != product_hi >> 15)
+		return fix16_overflow;
 #endif
-  
+	
 #ifdef FIXMATH_NO_ROUNDING
-  return (product_hi << 16) | (product_lo >> 16);
+	return (product_hi << 16) | (product_lo >> 16);
 #else
-  // Subtracting 0x8000 (= 0.5) and then using signed right shift
-  // achieves proper rounding to result-1, except in the corner
-  // case of negative numbers and lowest word = 0x8000.
-  // To handle that, we also have to subtract 1 for negative numbers.
-  uint32_t product_lo_tmp = product_lo;
-  product_lo -= 0x8000;
-  product_lo -= (uint32_t)product_hi >> 31;
-  if (product_lo > product_lo_tmp)
-    product_hi--;
-  
-  // Discard the lowest 16 bits. Note that this is not exactly the same
-  // as dividing by 0x10000. For example if product = -1, result will
-  // also be -1 and not 0. This is compensated by adding +1 to the result
-  // and compensating this in turn in the rounding above.
-  fix16_t result = (product_hi << 16) | (product_lo >> 16);
-  result += 1;
-  return result;
+	// Subtracting 0x8000 (= 0.5) and then using signed right shift
+	// achieves proper rounding to result-1, except in the corner
+	// case of negative numbers and lowest word = 0x8000.
+	// To handle that, we also have to subtract 1 for negative numbers.
+	uint32_t product_lo_tmp = product_lo;
+	product_lo -= 0x8000;
+	product_lo -= (uint32_t)product_hi >> 31;
+	if (product_lo > product_lo_tmp)
+		product_hi--;
+	
+	// Discard the lowest 16 bits. Note that this is not exactly the same
+	// as dividing by 0x10000. For example if product = -1, result will
+	// also be -1 and not 0. This is compensated by adding +1 to the result
+	// and compensating this in turn in the rounding above.
+	fix16_t result = (product_hi << 16) | (product_lo >> 16);
+	result += 1;
+	return result;
 #endif
 }
 #endif
@@ -173,97 +173,98 @@ fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
 #if defined(FIXMATH_OPTIMIZE_8BIT)
 fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
 {
-  uint32_t _a = (inArg0 >= 0) ? inArg0 : (-inArg0);
-  uint32_t _b = (inArg1 >= 0) ? inArg1 : (-inArg1);
-  
-  uint8_t va[4] = {_a, (_a >> 8), (_a >> 16), (_a >> 24)};
-  uint8_t vb[4] = {_b, (_b >> 8), (_b >> 16), (_b >> 24)};
-  
-  uint32_t low = 0;
-  uint32_t mid = 0;
-  
-  // Result column i depends on va[0..i] and vb[i..0]
+	uint32_t _a = (inArg0 >= 0) ? inArg0 : (-inArg0);
+	uint32_t _b = (inArg1 >= 0) ? inArg1 : (-inArg1);
+	
+	uint8_t va[4] = {_a, (_a >> 8), (_a >> 16), (_a >> 24)};
+	uint8_t vb[4] = {_b, (_b >> 8), (_b >> 16), (_b >> 24)};
+	
+	uint32_t low = 0;
+	uint32_t mid = 0;
+	
+	// Result column i depends on va[0..i] and vb[i..0]
 
-  #ifndef FIXMATH_NO_OVERFLOW
-  // i = 6
-  if (va[3] && vb[3]) return fix16_overflow;
-  #endif
-  
-  // i = 5
-  if (va[2] && vb[3]) mid += (uint16_t)va[2] * vb[3];
-  if (va[3] && vb[2]) mid += (uint16_t)va[3] * vb[2];
-  mid <<= 8;
-  
-  // i = 4
-  if (va[1] && vb[3]) mid += (uint16_t)va[1] * vb[3];
-  if (va[2] && vb[2]) mid += (uint16_t)va[2] * vb[2];
-  if (va[3] && vb[1]) mid += (uint16_t)va[3] * vb[1];
-  
-  #ifndef FIXMATH_NO_OVERFLOW
-  if (mid & 0xFF000000) return fix16_overflow;
-  #endif
-  mid <<= 8;
-  
-  // i = 3
-  if (va[0] && vb[3]) mid += (uint16_t)va[0] * vb[3];
-  if (va[1] && vb[2]) mid += (uint16_t)va[1] * vb[2];
-  if (va[2] && vb[1]) mid += (uint16_t)va[2] * vb[1];
-  if (va[3] && vb[0]) mid += (uint16_t)va[3] * vb[0];
-  
-  #ifndef FIXMATH_NO_OVERFLOW
-  if (mid & 0xFF000000) return fix16_overflow;
-  #endif
-  mid <<= 8;
-  
-  // i = 2
-  if (va[0] && vb[2]) mid += (uint16_t)va[0] * vb[2];
-  if (va[1] && vb[1]) mid += (uint16_t)va[1] * vb[1];
-  if (va[2] && vb[0]) mid += (uint16_t)va[2] * vb[0];    
-  
-  // i = 1
-  if (va[0] && vb[1]) low += (uint16_t)va[0] * vb[1];
-  if (va[1] && vb[0]) low += (uint16_t)va[1] * vb[0];
-  low <<= 8;
-  
-  // i = 0
-  if (va[0] && vb[0]) low += (uint16_t)va[0] * vb[0];
-  
-  #ifndef FIXMATH_NO_ROUNDING
-  low += 0x8000;
-  #endif
-  mid += (low >> 16);
-  
-  #ifndef FIXMATH_NO_OVERFLOW
-  if (mid & 0x80000000)
-    return fix16_overflow;
-  #endif
-  
-  fix16_t result = mid;
-  
-  /* Figure out the sign of result */
-  if ((inArg0 >= 0) != (inArg1 >= 0))
-  {
-    result = -result;
-  }
-  
-  return result;
+	#ifndef FIXMATH_NO_OVERFLOW
+	// i = 6
+	if (va[3] && vb[3]) return fix16_overflow;
+	#endif
+	
+	// i = 5
+	if (va[2] && vb[3]) mid += (uint16_t)va[2] * vb[3];
+	if (va[3] && vb[2]) mid += (uint16_t)va[3] * vb[2];
+	mid <<= 8;
+	
+	// i = 4
+	if (va[1] && vb[3]) mid += (uint16_t)va[1] * vb[3];
+	if (va[2] && vb[2]) mid += (uint16_t)va[2] * vb[2];
+	if (va[3] && vb[1]) mid += (uint16_t)va[3] * vb[1];
+	
+	#ifndef FIXMATH_NO_OVERFLOW
+	if (mid & 0xFF000000) return fix16_overflow;
+	#endif
+	mid <<= 8;
+	
+	// i = 3
+	if (va[0] && vb[3]) mid += (uint16_t)va[0] * vb[3];
+	if (va[1] && vb[2]) mid += (uint16_t)va[1] * vb[2];
+	if (va[2] && vb[1]) mid += (uint16_t)va[2] * vb[1];
+	if (va[3] && vb[0]) mid += (uint16_t)va[3] * vb[0];
+	
+	#ifndef FIXMATH_NO_OVERFLOW
+	if (mid & 0xFF000000) return fix16_overflow;
+	#endif
+	mid <<= 8;
+	
+	// i = 2
+	if (va[0] && vb[2]) mid += (uint16_t)va[0] * vb[2];
+	if (va[1] && vb[1]) mid += (uint16_t)va[1] * vb[1];
+	if (va[2] && vb[0]) mid += (uint16_t)va[2] * vb[0];		
+	
+	// i = 1
+	if (va[0] && vb[1]) low += (uint16_t)va[0] * vb[1];
+	if (va[1] && vb[0]) low += (uint16_t)va[1] * vb[0];
+	low <<= 8;
+	
+	// i = 0
+	if (va[0] && vb[0]) low += (uint16_t)va[0] * vb[0];
+	
+	#ifndef FIXMATH_NO_ROUNDING
+	low += 0x8000;
+	#endif
+	mid += (low >> 16);
+	
+	#ifndef FIXMATH_NO_OVERFLOW
+	if (mid & 0x80000000)
+		return fix16_overflow;
+	#endif
+	
+	fix16_t result = mid;
+	
+	/* Figure out the sign of result */
+	if ((inArg0 >= 0) != (inArg1 >= 0))
+	{
+		result = -result;
+	}
+	
+	return result;
 }
 #endif
 
 #ifndef FIXMATH_NO_OVERFLOW
 /* Wrapper around fix16_mul to add saturating arithmetic. */
-fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1) {
-  fix16_t result = fix16_mul(inArg0, inArg1);
-  
-  if (result == fix16_overflow)
-  {
-    if ((inArg0 >= 0) == (inArg1 >= 0))
-      return fix16_max;
-    else
-      return fix16_min;
-  }
-  
-  return result;
+fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1)
+{
+	fix16_t result = fix16_mul(inArg0, inArg1);
+	
+	if (result == fix16_overflow)
+	{
+		if ((inArg0 >= 0) == (inArg1 >= 0))
+			return fix16_max;
+		else
+			return fix16_min;
+	}
+	
+	return result;
 }
 #endif
 
@@ -278,84 +279,84 @@ fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1) {
 #else
 static uint8_t clz(uint32_t x)
 {
-  uint8_t result = 0;
-  if (x == 0) return 32;
-  while (!(x & 0xF0000000)) { result += 4; x <<= 4; }
-  while (!(x & 0x80000000)) { result += 1; x <<= 1; }
-  return result;
+	uint8_t result = 0;
+	if (x == 0) return 32;
+	while (!(x & 0xF0000000)) { result += 4; x <<= 4; }
+	while (!(x & 0x80000000)) { result += 1; x <<= 1; }
+	return result;
 }
 #endif
 
 fix16_t fix16_div(fix16_t a, fix16_t b)
 {
-  // This uses a hardware 32/32 bit division multiple times, until we have
-  // computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.
-  
-  if (b == 0)
-      return fix16_min;
-  
-  uint32_t remainder = (a >= 0) ? a : (-a);
-  uint32_t divider = (b >= 0) ? b : (-b);
-  uint32_t quotient = 0;
-  int bit_pos = 17;
-  
-  // Kick-start the division a bit.
-  // This improves speed in the worst-case scenarios where N and D are large
-  // It gets a lower estimate for the result by N/(D >> 17 + 1).
-  if (divider & 0xFFF00000)
-  {
-    uint32_t shifted_div = ((divider >> 17) + 1);
-    quotient = remainder / shifted_div;
-    remainder -= ((uint64_t)quotient * divider) >> 17;
-  }
-  
-  // If the divider is divisible by 2^n, take advantage of it.
-  while (!(divider & 0xF) && bit_pos >= 4)
-  {
-    divider >>= 4;
-    bit_pos -= 4;
-  }
-  
-  while (remainder && bit_pos >= 0)
-  {
-    // Shift remainder as much as we can without overflowing
-    int shift = clz(remainder);
-    if (shift > bit_pos) shift = bit_pos;
-    remainder <<= shift;
-    bit_pos -= shift;
-    
-    uint32_t div = remainder / divider;
-    remainder = remainder % divider;
-    quotient += div << bit_pos;
+	// This uses a hardware 32/32 bit division multiple times, until we have
+	// computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.
+	
+	if (b == 0)
+			return fix16_min;
+	
+	uint32_t remainder = (a >= 0) ? a : (-a);
+	uint32_t divider = (b >= 0) ? b : (-b);
+	uint32_t quotient = 0;
+	int bit_pos = 17;
+	
+	// Kick-start the division a bit.
+	// This improves speed in the worst-case scenarios where N and D are large
+	// It gets a lower estimate for the result by N/(D >> 17 + 1).
+	if (divider & 0xFFF00000)
+	{
+		uint32_t shifted_div = ((divider >> 17) + 1);
+		quotient = remainder / shifted_div;
+		remainder -= ((uint64_t)quotient * divider) >> 17;
+	}
+	
+	// If the divider is divisible by 2^n, take advantage of it.
+	while (!(divider & 0xF) && bit_pos >= 4)
+	{
+		divider >>= 4;
+		bit_pos -= 4;
+	}
+	
+	while (remainder && bit_pos >= 0)
+	{
+		// Shift remainder as much as we can without overflowing
+		int shift = clz(remainder);
+		if (shift > bit_pos) shift = bit_pos;
+		remainder <<= shift;
+		bit_pos -= shift;
+		
+		uint32_t div = remainder / divider;
+		remainder = remainder % divider;
+		quotient += div << bit_pos;
 
-    #ifndef FIXMATH_NO_OVERFLOW
-    if (div & ~(0xFFFFFFFF >> bit_pos))
-        return fix16_overflow;
-    #endif
-    
-    remainder <<= 1;
-    bit_pos--;
-  }
-  
-  #ifndef FIXMATH_NO_ROUNDING
-  // Quotient is always positive so rounding is easy
-  quotient++;
-  #endif
-  
-  fix16_t result = quotient >> 1;
-  
-  // Figure out the sign of the result
-  if ((a ^ b) & 0x80000000)
-  {
-    #ifndef FIXMATH_NO_OVERFLOW
-    if (result == fix16_min)
-        return fix16_overflow;
-    #endif
-    
-    result = -result;
-  }
-  
-  return result;
+		#ifndef FIXMATH_NO_OVERFLOW
+		if (div & ~(0xFFFFFFFF >> bit_pos))
+				return fix16_overflow;
+		#endif
+		
+		remainder <<= 1;
+		bit_pos--;
+	}
+	
+	#ifndef FIXMATH_NO_ROUNDING
+	// Quotient is always positive so rounding is easy
+	quotient++;
+	#endif
+	
+	fix16_t result = quotient >> 1;
+	
+	// Figure out the sign of the result
+	if ((a ^ b) & 0x80000000)
+	{
+		#ifndef FIXMATH_NO_OVERFLOW
+		if (result == fix16_min)
+				return fix16_overflow;
+		#endif
+		
+		result = -result;
+	}
+	
+	return result;
 }
 #endif
 
@@ -366,107 +367,131 @@ fix16_t fix16_div(fix16_t a, fix16_t b)
 #if defined(FIXMATH_OPTIMIZE_8BIT)
 fix16_t fix16_div(fix16_t a, fix16_t b)
 {
-  // This uses the basic binary restoring division algorithm.
-  // It appears to be faster to do the whole division manually than
-  // trying to compose a 64-bit divide out of 32-bit divisions on
-  // platforms without hardware divide.
-  
-  if (b == 0)
-    return fix16_min;
-  
-  uint32_t remainder = (a >= 0) ? a : (-a);
-  uint32_t divider = (b >= 0) ? b : (-b);
+	// This uses the basic binary restoring division algorithm.
+	// It appears to be faster to do the whole division manually than
+	// trying to compose a 64-bit divide out of 32-bit divisions on
+	// platforms without hardware divide.
+	
+	if (b == 0)
+		return fix16_min;
+	
+	uint32_t remainder = (a >= 0) ? a : (-a);
+	uint32_t divider = (b >= 0) ? b : (-b);
 
-  uint32_t quotient = 0;
-  uint32_t bit = 0x10000;
-  
-  /* The algorithm requires D >= R */
-  while (divider < remainder)
-  {
-    divider <<= 1;
-    bit <<= 1;
-  }
-  
-  #ifndef FIXMATH_NO_OVERFLOW
-  if (!bit)
-    return fix16_overflow;
-  #endif
-  
-  if (divider & 0x80000000)
-  {
-    // Perform one step manually to avoid overflows later.
-    // We know that divider's bottom bit is 0 here.
-    if (remainder >= divider)
-    {
-        quotient |= bit;
-        remainder -= divider;
-    }
-    divider >>= 1;
-    bit >>= 1;
-  }
-  
-  /* Main division loop */
-  while (bit && remainder)
-  {
-    if (remainder >= divider)
-    {
-        quotient |= bit;
-        remainder -= divider;
-    }
-    
-    remainder <<= 1;
-    bit >>= 1;
-  }   
-      
-  #ifndef FIXMATH_NO_ROUNDING
-  if (remainder >= divider)
-  {
-    quotient++;
-  }
-  #endif
-  
-  fix16_t result = quotient;
-  
-  /* Figure out the sign of result */
-  if ((a ^ b) & 0x80000000)
-  {
-    #ifndef FIXMATH_NO_OVERFLOW
-    if (result == fix16_min)
-        return fix16_overflow;
-    #endif
-    
-    result = -result;
-  }
-  
-  return result;
+	uint32_t quotient = 0;
+	uint32_t bit = 0x10000;
+	
+	/* The algorithm requires D >= R */
+	while (divider < remainder)
+	{
+		divider <<= 1;
+		bit <<= 1;
+	}
+	
+	#ifndef FIXMATH_NO_OVERFLOW
+	if (!bit)
+		return fix16_overflow;
+	#endif
+	
+	if (divider & 0x80000000)
+	{
+		// Perform one step manually to avoid overflows later.
+		// We know that divider's bottom bit is 0 here.
+		if (remainder >= divider)
+		{
+				quotient |= bit;
+				remainder -= divider;
+		}
+		divider >>= 1;
+		bit >>= 1;
+	}
+	
+	/* Main division loop */
+	while (bit && remainder)
+	{
+		if (remainder >= divider)
+		{
+				quotient |= bit;
+				remainder -= divider;
+		}
+		
+		remainder <<= 1;
+		bit >>= 1;
+	}	 
+			
+	#ifndef FIXMATH_NO_ROUNDING
+	if (remainder >= divider)
+	{
+		quotient++;
+	}
+	#endif
+	
+	fix16_t result = quotient;
+	
+	/* Figure out the sign of result */
+	if ((a ^ b) & 0x80000000)
+	{
+		#ifndef FIXMATH_NO_OVERFLOW
+		if (result == fix16_min)
+				return fix16_overflow;
+		#endif
+		
+		result = -result;
+	}
+	
+	return result;
 }
 #endif
 
 #ifndef FIXMATH_NO_OVERFLOW
 /* Wrapper around fix16_div to add saturating arithmetic. */
-fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1) {
-  fix16_t result = fix16_div(inArg0, inArg1);
-  
-  if (result == fix16_overflow)
-  {
-    if ((inArg0 >= 0) == (inArg1 >= 0))
-      return fix16_max;
-    else
-      return fix16_min;
-  }
-  
-  return result;
+fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1)
+{
+	fix16_t result = fix16_div(inArg0, inArg1);
+	
+	if (result == fix16_overflow)
+	{
+		if ((inArg0 >= 0) == (inArg1 >= 0))
+			return fix16_max;
+		else
+			return fix16_min;
+	}
+	
+	return result;
 }
 #endif
 
-fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract) {
+fix16_t fix16_mod(fix16_t x, fix16_t y)
+{
+	#ifdef FIXMATH_OPTIMIZE_8BIT
+		/* The reason we do this, rather than use a modulo operator
+		 * is that if you don't have a hardware divider, this will result
+		 * in faster operations when the angles are close to the bounds. 
+		 */
+		while(x >=  y) x -= y;
+		while(x <= -y) x += y;
+	#else
+		/* Note that in C90, the sign of result of the modulo operation is
+		 * undefined. in C99, it's the same as the dividend (aka numerator).
+		 */
+		x %= y;
+	#endif
+
+	return x;
+}
+
+
+
+fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract)
+{
 	int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract));
 	tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
 	tempOut = int64_shift(tempOut, -8);
 	return (fix16_t)int64_lo(tempOut);
 }
 
-fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract) {
+fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract)
+{
 	int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 16) - inFract));
 	tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
 	tempOut = int64_shift(tempOut, -16);
@@ -474,10 +499,11 @@ fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract) {
 }
 
 #ifndef FIXMATH_NO_64BIT
-fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract) {
+fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract)
+{
 	int64_t tempOut;
-	tempOut   = ((int64_t)inArg0 * (0 - inFract));
-	tempOut  += ((int64_t)inArg1 * inFract);
+	tempOut  = ((int64_t)inArg0 * (0 - inFract));
+	tempOut	+= ((int64_t)inArg1 * inFract);
 	tempOut >>= 32;
 	return (fix16_t)tempOut;
 }
diff --git a/libfixmath/fix16.h b/libfixmath/fix16.h
index 6e5c428..b396eaf 100644
--- a/libfixmath/fix16.h
+++ b/libfixmath/fix16.h
@@ -31,8 +31,8 @@ static const fix16_t X4_CORRECTION_COMPONENT = 0x399A; 	/*!< Fix16 value of 0.22
 static const fix16_t PI_DIV_4 = 0x0000C90F;             /*!< Fix16 value of PI/4 */
 static const fix16_t THREE_PI_DIV_4 = 0x00025B2F;       /*!< Fix16 value of 3PI/4 */
 
-static const fix16_t fix16_max = 0x7FFFFFFF; /*!< the maximum value of fix16_t */
-static const fix16_t fix16_min = 0x80000000; /*!< the minimum value of fix16_t */
+static const fix16_t fix16_max      = 0x7FFFFFFF; /*!< the maximum value of fix16_t */
+static const fix16_t fix16_min      = 0x80000000; /*!< the minimum value of fix16_t */
 static const fix16_t fix16_overflow = 0x80000000; /*!< the value used to indicate overflows when FIXMATH_NO_OVERFLOW is not specified */
 
 static const fix16_t fix16_pi  = 205887;     /*!< fix16_t value of pi */
@@ -42,38 +42,37 @@ static const fix16_t fix16_one = 0x00010000; /*!< fix16_t value of 1 */
 /* Conversion functions between fix16_t and float/integer.
  * These are inlined to allow compiler to optimize away constant numbers
  */
-static inline fix16_t fix16_from_int(int a) { return a * fix16_one; }
-static inline float fix16_to_float(fix16_t a) { return (float)a / fix16_one; }
-static inline double fix16_to_dbl(fix16_t a) { return (double)a / fix16_one; }
+static inline fix16_t fix16_from_int(int a)     { return a * fix16_one; }
+static inline float   fix16_to_float(fix16_t a) { return (float)a / fix16_one; }
+static inline double  fix16_to_dbl(fix16_t a)   { return (double)a / fix16_one; }
 
 static inline int fix16_to_int(fix16_t a)
 {
 #ifdef FIXMATH_NO_ROUNDING
-    return a >> 16;
+    return (a >> 16);
 #else
-    if (a >= 0)
-        return (a + fix16_one / 2) / fix16_one;
-    else
-        return (a - fix16_one / 2) / fix16_one;
+	if (a >= 0)
+		return (a + (fix16_one >> 1)) / fix16_one;
+	return (a - (fix16_one >> 1)) / fix16_one;
 #endif
 }
 
 static inline fix16_t fix16_from_float(float a)
 {
-    float temp = a * fix16_one;
+	float temp = a * fix16_one;
 #ifndef FIXMATH_NO_ROUNDING
-    temp += (temp >= 0) ? 0.5f : -0.5f;
+	temp += (temp >= 0) ? 0.5f : -0.5f;
 #endif
-    return (fix16_t)temp;
+	return (fix16_t)temp;
 }
 
 static inline fix16_t fix16_from_dbl(double a)
 {
-    double temp = a * fix16_one;
+	double temp = a * fix16_one;
 #ifndef FIXMATH_NO_ROUNDING
-    temp += (temp >= 0) ? 0.5f : -0.5f;
+	temp += (temp >= 0) ? 0.5f : -0.5f;
 #endif
-    return (fix16_t)temp;
+	return (fix16_t)temp;
 }
 
 /* Subtraction and addition with (optional) overflow detection. */
@@ -111,6 +110,12 @@ extern fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1) FIXMATH_FUNC_ATTRS;
 extern fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1) FIXMATH_FUNC_ATTRS;
 #endif
 
+/*! Divides the first given fix16_t by the second and returns the result.
+*/
+extern fix16_t fix16_mod(fix16_t x, fix16_t y) FIXMATH_FUNC_ATTRS;
+
+
+
 /*! Returns the linear interpolation: (inArg0 * (1 - inFract)) + (inArg1 * inFract)
 */
 extern fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract) FIXMATH_FUNC_ATTRS;
@@ -119,6 +124,8 @@ extern fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract) FI
 extern fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract) FIXMATH_FUNC_ATTRS;
 #endif
 
+
+
 /*! Returns the sine of the given fix16_t.
 */
 extern fix16_t fix16_sin_parabola(fix16_t inAngle) FIXMATH_FUNC_ATTRS;
@@ -151,6 +158,14 @@ extern fix16_t fix16_atan(fix16_t inValue) FIXMATH_FUNC_ATTRS;
 */
 extern fix16_t fix16_atan2(fix16_t inY, fix16_t inX) FIXMATH_FUNC_ATTRS;
 
+static const fix16_t fix16_rad_to_deg_mult = 3754936;
+static inline fix16_t fix16_rad_to_deg(fix16_t radians)
+	{ return fix16_mul(radians, fix16_rad_to_deg_mult); }
+
+static const fix16_t fix16_deg_to_rad_mult = 1144;
+static inline fix16_t fix16_deg_to_rad(fix16_t degrees)
+	{ return fix16_mul(degrees, fix16_deg_to_rad_mult); }
+
 
 
 /*! Returns the square root of the given fix16_t.
diff --git a/libfixmath/fix16_sqrt.c b/libfixmath/fix16_sqrt.c
index 13d31a8..abf57f7 100644
--- a/libfixmath/fix16_sqrt.c
+++ b/libfixmath/fix16_sqrt.c
@@ -9,75 +9,76 @@
  * Not sure if someone relies on this behaviour, but not going
  * to break it for now. It doesn't slow the code much overall.
  */
-fix16_t fix16_sqrt(fix16_t inValue) {
-  uint8_t neg = (inValue < 0);
-  uint32_t num = (neg ? -inValue : inValue);
-  uint32_t result = 0;
-  uint32_t bit;
-  uint8_t n;
-  
-  // Many numbers will be less than 15, so
-  // this gives a good balance between time spent
-  // in if vs. time spent in the while loop
-  // when searching for the starting value.
-  if (num & 0xFFF00000)
-    bit = (uint32_t)1 << 30;
-  else
-    bit = (uint32_t)1 << 18;
-  
-  while (bit > num) bit >>= 2;
-  
-  // The main part is executed twice, in order to avoid
-  // using 64 bit values in computations.
-  for (n = 0; n < 2; n++)
-  {
-    // First we get the top 24 bits of the answer.
-    while (bit)
-    {
-      if (num >= result + bit)
-      {
-        num -= result + bit;
-        result = (result >> 1) + bit;
-      }
-      else
-      {
-        result = (result >> 1);
-      }
-      bit >>= 2;
-    }
-    
-    if (n == 0)
-    {
-      // Then process it again to get the lowest 8 bits.
-      if (num > 65535)
-      {
-        // The remainder 'num' is too large to be shifted left
-        // by 16, so we have to add 1 to result manually and
-        // adjust 'num' accordingly.
-        // num = a - (result + 0.5)^2
-        //   = num + result^2 - (result + 0.5)^2
-        //   = num - result - 0.5
-        num -= result;
-        num = (num << 16) - 0x8000;
-        result = (result << 16) + 0x8000;
-      }
-      else
-      {
-        num <<= 16;
-        result <<= 16;
-      }
-      
-      bit = 1 << 14;
-    }
-  }
+fix16_t fix16_sqrt(fix16_t inValue)
+{
+	uint8_t  neg = (inValue < 0);
+	uint32_t num = (neg ? -inValue : inValue);
+	uint32_t result = 0;
+	uint32_t bit;
+	uint8_t  n;
+	
+	// Many numbers will be less than 15, so
+	// this gives a good balance between time spent
+	// in if vs. time spent in the while loop
+	// when searching for the starting value.
+	if (num & 0xFFF00000)
+		bit = (uint32_t)1 << 30;
+	else
+		bit = (uint32_t)1 << 18;
+	
+	while (bit > num) bit >>= 2;
+	
+	// The main part is executed twice, in order to avoid
+	// using 64 bit values in computations.
+	for (n = 0; n < 2; n++)
+	{
+		// First we get the top 24 bits of the answer.
+		while (bit)
+		{
+			if (num >= result + bit)
+			{
+				num -= result + bit;
+				result = (result >> 1) + bit;
+			}
+			else
+			{
+				result = (result >> 1);
+			}
+			bit >>= 2;
+		}
+		
+		if (n == 0)
+		{
+			// Then process it again to get the lowest 8 bits.
+			if (num > 65535)
+			{
+				// The remainder 'num' is too large to be shifted left
+				// by 16, so we have to add 1 to result manually and
+				// adjust 'num' accordingly.
+				// num = a - (result + 0.5)^2
+				//	 = num + result^2 - (result + 0.5)^2
+				//	 = num - result - 0.5
+				num -= result;
+				num = (num << 16) - 0x8000;
+				result = (result << 16) + 0x8000;
+			}
+			else
+			{
+				num <<= 16;
+				result <<= 16;
+			}
+			
+			bit = 1 << 14;
+		}
+	}
 
 #ifndef FIXMATH_NO_ROUNDING
-  // Finally, if next bit would have been 1, round the result upwards.
-  if (num > result)
-  {
-    result++;
-  }
+	// Finally, if next bit would have been 1, round the result upwards.
+	if (num > result)
+	{
+		result++;
+	}
 #endif
-  
-  return (neg ? -result : result);
+	
+	return (neg ? -result : result);
 }
diff --git a/libfixmath/fix16_trig.c b/libfixmath/fix16_trig.c
index 6b53682..864dc22 100644
--- a/libfixmath/fix16_trig.c
+++ b/libfixmath/fix16_trig.c
@@ -44,7 +44,8 @@ fix16_t fix16_sin_parabola(fix16_t inAngle)
 	return retval;
 }
 
-fix16_t fix16_sin(fix16_t inAngle) {
+fix16_t fix16_sin(fix16_t inAngle)
+{
 	fix16_t tempAngle = inAngle % (fix16_pi << 1);
 
 	#ifdef FIXMATH_SIN_LUT
@@ -105,17 +106,22 @@ fix16_t fix16_sin(fix16_t inAngle) {
 	return tempOut;
 }
 
-fix16_t fix16_cos(fix16_t inAngle) {
+fix16_t fix16_cos(fix16_t inAngle)
+{
 	return fix16_sin(inAngle + (fix16_pi >> 1));
 }
 
-fix16_t fix16_tan(fix16_t inAngle) {
+fix16_t fix16_tan(fix16_t inAngle)
+{
 	return fix16_sdiv(fix16_sin(inAngle), fix16_cos(inAngle));
 }
 
-fix16_t fix16_asin(fix16_t inValue) {
-	if((inValue > fix16_one) || (inValue < -fix16_one))
+fix16_t fix16_asin(fix16_t inValue)
+{
+	if((inValue > fix16_one)
+		|| (inValue < -fix16_one))
 		return 0;
+
 	fix16_t tempOut;
 	tempOut = (fix16_one - fix16_mul(inValue, inValue));
 	tempOut = fix16_div(inValue, fix16_sqrt(tempOut));
@@ -123,11 +129,13 @@ fix16_t fix16_asin(fix16_t inValue) {
 	return tempOut;
 }
 
-fix16_t fix16_acos(fix16_t inValue) {
+fix16_t fix16_acos(fix16_t inValue)
+{
 	return ((fix16_pi >> 1) - fix16_asin(inValue));
 }
 
-fix16_t fix16_atan2(fix16_t inY , fix16_t inX) {
+fix16_t fix16_atan2(fix16_t inY , fix16_t inX)
+{
 	fix16_t abs_inY, mask, angle, r, r_3;
 
 	#ifndef FIXMATH_NO_CACHE
@@ -166,6 +174,7 @@ fix16_t fix16_atan2(fix16_t inY , fix16_t inX) {
 	return angle;
 }
 
-fix16_t fix16_atan(fix16_t inValue) {
+fix16_t fix16_atan(fix16_t inValue)
+{
 	return fix16_atan2(inValue, fix16_one);
 }
-- 
cgit v1.2.3