Benchmark suite using simulators
This commit is contained in:
parent
e929442f71
commit
90973e833d
33
benchmarks/Makefile
Normal file
33
benchmarks/Makefile
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# These are testcases & benchmarks for the library on the target processors
|
||||||
|
# (currently ARM Cortex M3 and AVR). They are a bit tricky to run, as they
|
||||||
|
# depend on specific simulator versions.
|
||||||
|
|
||||||
|
FILES = benchmark.c ../libfixmath/fix16.c ../libfixmath/fix16_sqrt.c ../libfixmath/fix16_exp.c
|
||||||
|
|
||||||
|
CFLAGS = -DFIXMATH_NO_OVERFLOW -DFIXMATH_NO_ROUNDING -ffast-math -I../libfixmath
|
||||||
|
|
||||||
|
testcases.c: generate_testcases.py
|
||||||
|
python $<
|
||||||
|
|
||||||
|
benchmark-arm.elf: $(FILES) interface-arm.c testcases.c
|
||||||
|
# Note: this needs hacked QEmu that "makes no sense":
|
||||||
|
# https://bugs.launchpad.net/qemu/+bug/696094
|
||||||
|
arm-none-eabi-gcc -mcpu=cortex-m3 -mthumb -T generic-m-hosted.ld \
|
||||||
|
-Wall -O2 $(CFLAGS) \
|
||||||
|
-o $@ -I .. $(FILES) interface-arm.c -lm
|
||||||
|
|
||||||
|
run-benchmark-arm: benchmark-arm.elf
|
||||||
|
qemu-system-arm -cpu cortex-m3 -icount 0 -device armv7m_nvic \
|
||||||
|
-nographic -monitor null -serial null \
|
||||||
|
-semihosting -kernel $<
|
||||||
|
|
||||||
|
benchmark-avr.elf: $(FILES) interface-avr.c testcases.c
|
||||||
|
avr-gcc -Wall -mmcu=atmega128 $(CFLAGS) \
|
||||||
|
-Wall -O2 -DFIXMATH_OPTIMIZE_8BIT \
|
||||||
|
-o $@ -I .. $(FILES) interface-avr.c
|
||||||
|
|
||||||
|
run-benchmark-avr: benchmark-avr.elf
|
||||||
|
# Note: this needs simulavrxx 1.0rc0 or newer
|
||||||
|
simulavr -d atmega128 -f $< -W 0x20,- -T exit
|
||||||
|
|
||||||
|
|
214
benchmarks/benchmark.c
Normal file
214
benchmarks/benchmark.c
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
#ifndef NO_FLOAT
|
||||||
|
#include <math.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <fix16.h>
|
||||||
|
#include "interface.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/* Autogenerated testcases */
|
||||||
|
#include "testcases.c"
|
||||||
|
|
||||||
|
/* Tools for profiling */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t min;
|
||||||
|
uint32_t max;
|
||||||
|
uint32_t sum;
|
||||||
|
uint32_t count;
|
||||||
|
} cyclecount_t;
|
||||||
|
|
||||||
|
// Initializer for cyclecount_t structure.
|
||||||
|
// Max is initialized to 0 and min is 2^32-1 so that first call to cyclecount_update will set them.
|
||||||
|
#define CYCLECOUNT_INIT {0xFFFFFFFF, 0, 0, 0}
|
||||||
|
|
||||||
|
// Update cyclecount_t structure after a single measurement has been made.
|
||||||
|
static void cyclecount_update(cyclecount_t *data, uint32_t cycles)
|
||||||
|
{
|
||||||
|
if (cycles < data->min)
|
||||||
|
data->min = cycles;
|
||||||
|
if (cycles > data->max)
|
||||||
|
data->max = cycles;
|
||||||
|
|
||||||
|
data->sum += cycles;
|
||||||
|
data->count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MEASURE(variable, statement) { \
|
||||||
|
start_timing(); \
|
||||||
|
statement; \
|
||||||
|
cyclecount_update(&variable, end_timing()); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PRINT(variable, label) { \
|
||||||
|
print_value(label " min", variable.min); \
|
||||||
|
print_value(label " max", variable.max); \
|
||||||
|
print_value(label " avg", variable.sum / variable.count); \
|
||||||
|
}
|
||||||
|
|
||||||
|
static cyclecount_t exp_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t sqrt_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t add_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t sub_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t div_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t mul_cycles = CYCLECOUNT_INIT;
|
||||||
|
|
||||||
|
#ifndef NO_FLOAT
|
||||||
|
static cyclecount_t float_sqrtf_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t float_add_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t float_sub_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t float_div_cycles = CYCLECOUNT_INIT;
|
||||||
|
static cyclecount_t float_mul_cycles = CYCLECOUNT_INIT;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static fix16_t delta(fix16_t result, fix16_t expected)
|
||||||
|
{
|
||||||
|
#ifdef FIXMATH_NO_OVERFLOW
|
||||||
|
// Ignore overflow errors when the detection is turned off
|
||||||
|
if (expected == fix16_min)
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (result >= expected)
|
||||||
|
{
|
||||||
|
return result - expected;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return expected - result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef FIXMATH_NO_ROUNDING
|
||||||
|
const fix16_t max_delta = 1;
|
||||||
|
#else
|
||||||
|
const fix16_t max_delta = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
interface_init();
|
||||||
|
|
||||||
|
start_timing();
|
||||||
|
print_value("Timestamp bias", end_timing());
|
||||||
|
|
||||||
|
for (i = 0; i < TESTCASES1_COUNT; i++)
|
||||||
|
{
|
||||||
|
fix16_t input = testcases1[i].a;
|
||||||
|
fix16_t result;
|
||||||
|
fix16_t expected = testcases1[i].sqrt;
|
||||||
|
MEASURE(sqrt_cycles, result = fix16_sqrt(input));
|
||||||
|
|
||||||
|
if (input > 0 && delta(result, expected) > max_delta)
|
||||||
|
{
|
||||||
|
print_value("Failed SQRT, i", i);
|
||||||
|
print_value("Failed SQRT, input", input);
|
||||||
|
print_value("Failed SQRT, output", result);
|
||||||
|
print_value("Failed SQRT, expected", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
expected = testcases1[i].exp;
|
||||||
|
MEASURE(exp_cycles, result = fix16_exp(input));
|
||||||
|
|
||||||
|
if (delta(result, expected) > 400)
|
||||||
|
{
|
||||||
|
print_value("Failed EXP, i", i);
|
||||||
|
print_value("Failed EXP, input", input);
|
||||||
|
print_value("Failed EXP, output", result);
|
||||||
|
print_value("Failed EXP, expected", expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT(sqrt_cycles, "fix16_sqrt");
|
||||||
|
PRINT(exp_cycles, "fix16_exp");
|
||||||
|
|
||||||
|
for (i = 0; i < TESTCASES2_COUNT; i++)
|
||||||
|
{
|
||||||
|
fix16_t a = testcases2[i].a;
|
||||||
|
fix16_t b = testcases2[i].b;
|
||||||
|
volatile fix16_t result;
|
||||||
|
|
||||||
|
fix16_t expected = testcases2[i].add;
|
||||||
|
MEASURE(add_cycles, result = fix16_add(a, b));
|
||||||
|
if (delta(result, expected) > max_delta)
|
||||||
|
{
|
||||||
|
print_value("Failed ADD, i", i);
|
||||||
|
print_value("Failed ADD, a", a);
|
||||||
|
print_value("Failed ADD, b", b);
|
||||||
|
print_value("Failed ADD, output", result);
|
||||||
|
print_value("Failed ADD, expected", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
expected = testcases2[i].sub;
|
||||||
|
MEASURE(sub_cycles, result = fix16_sub(a, b));
|
||||||
|
if (delta(result, expected) > max_delta)
|
||||||
|
{
|
||||||
|
print_value("Failed SUB, i", i);
|
||||||
|
print_value("Failed SUB, a", a);
|
||||||
|
print_value("Failed SUB, b", b);
|
||||||
|
print_value("Failed SUB, output", result);
|
||||||
|
print_value("Failed SUB, expected", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
expected = testcases2[i].mul;
|
||||||
|
MEASURE(mul_cycles, result = fix16_mul(a, b));
|
||||||
|
if (delta(result, expected) > max_delta)
|
||||||
|
{
|
||||||
|
print_value("Failed MUL, i", i);
|
||||||
|
print_value("Failed MUL, a", a);
|
||||||
|
print_value("Failed MUL, b", b);
|
||||||
|
print_value("Failed MUL, output", result);
|
||||||
|
print_value("Failed MUL, expected", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (b != 0)
|
||||||
|
{
|
||||||
|
expected = testcases2[i].div;
|
||||||
|
MEASURE(div_cycles, result = fix16_div(a, b));
|
||||||
|
if (delta(result, expected) > max_delta)
|
||||||
|
{
|
||||||
|
print_value("Failed DIV, i", i);
|
||||||
|
print_value("Failed DIV, a", a);
|
||||||
|
print_value("Failed DIV, b", b);
|
||||||
|
print_value("Failed DIV, output", result);
|
||||||
|
print_value("Failed DIV, expected", expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT(add_cycles, "fix16_add");
|
||||||
|
PRINT(sub_cycles, "fix16_sub");
|
||||||
|
PRINT(mul_cycles, "fix16_mul");
|
||||||
|
PRINT(div_cycles, "fix16_div");
|
||||||
|
|
||||||
|
/* Compare with floating point performance */
|
||||||
|
#ifndef NO_FLOAT
|
||||||
|
for (i = 0; i < TESTCASES1_COUNT; i++)
|
||||||
|
{
|
||||||
|
float input = fix16_to_float(testcases1[i].a);
|
||||||
|
volatile float result;
|
||||||
|
MEASURE(float_sqrtf_cycles, result = sqrtf(input));
|
||||||
|
}
|
||||||
|
PRINT(float_sqrtf_cycles, "float sqrtf");
|
||||||
|
|
||||||
|
for (i = 0; i < TESTCASES2_COUNT; i++)
|
||||||
|
{
|
||||||
|
float a = fix16_to_float(testcases2[i].a);
|
||||||
|
float b = fix16_to_float(testcases2[i].b);
|
||||||
|
volatile float result;
|
||||||
|
MEASURE(float_add_cycles, result = a + b);
|
||||||
|
MEASURE(float_sub_cycles, result = a - b);
|
||||||
|
MEASURE(float_mul_cycles, result = a * b);
|
||||||
|
|
||||||
|
if (b != 0)
|
||||||
|
{
|
||||||
|
MEASURE(float_div_cycles, result = a / b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT(float_add_cycles, "float add");
|
||||||
|
PRINT(float_sub_cycles, "float sub");
|
||||||
|
PRINT(float_mul_cycles, "float mul");
|
||||||
|
PRINT(float_div_cycles, "float div");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
140
benchmarks/generate_testcases.py
Normal file
140
benchmarks/generate_testcases.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
'''This script precalculates the correct solutions for a set of test numbers,
|
||||||
|
and writes them to testcases.c. This is aimed for running the tests on-target,
|
||||||
|
therefore it doesn't test all the cases or use floating point math, but
|
||||||
|
instead generates a ~10k binary.
|
||||||
|
|
||||||
|
The tests are chosen randomly, so there is quite good chance to eventually
|
||||||
|
catch most errors. Because the list is not regenerated automatically, the
|
||||||
|
functioning of the benchmark application is still deterministic and easy
|
||||||
|
to debug.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import struct
|
||||||
|
|
||||||
|
# Fix16 scaling factor
|
||||||
|
scale = 65536.
|
||||||
|
|
||||||
|
# Fix16 overflow indicator
|
||||||
|
overflow = -2**31
|
||||||
|
|
||||||
|
def f16_to_float(val):
|
||||||
|
return val / scale
|
||||||
|
|
||||||
|
def float_to_f16(val):
|
||||||
|
val = int(round(val * scale))
|
||||||
|
if val >= 2**31 or val < -2**31:
|
||||||
|
val = overflow
|
||||||
|
return val
|
||||||
|
|
||||||
|
def to_ui32(val):
|
||||||
|
return struct.unpack('I', struct.pack('i', val))[0]
|
||||||
|
|
||||||
|
testcases = [
|
||||||
|
# Small numbers
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||||
|
-1, -2, -3, -4, -5, -6, -7, -8, -9, -10,
|
||||||
|
|
||||||
|
# Integer numbers
|
||||||
|
0x10000, -0x10000, 0x20000, -0x20000, 0x30000, -0x30000,
|
||||||
|
0x40000, -0x40000, 0x50000, -0x50000, 0x60000, -0x60000,
|
||||||
|
|
||||||
|
# Fractions (1/2, 1/4, 1/8)
|
||||||
|
0x8000, -0x8000, 0x4000, -0x4000, 0x2000, -0x2000,
|
||||||
|
|
||||||
|
# Problematic carry
|
||||||
|
0xFFFF, -0xFFFF, 0x1FFFF, -0x1FFFF, 0x3FFFF, -0x3FFFF,
|
||||||
|
|
||||||
|
# Smallest and largest values
|
||||||
|
0x7FFFFFFF, -0x80000000
|
||||||
|
]
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
# Large random numbers
|
||||||
|
testcases.append(random.randint(-0x80000000, 0x7FFFFFFF))
|
||||||
|
|
||||||
|
# Small random numbers
|
||||||
|
testcases.append(random.randint(-100000, 100000))
|
||||||
|
|
||||||
|
# Tiny random numbers
|
||||||
|
testcases.append(random.randint(-200, 200))
|
||||||
|
|
||||||
|
out = open("testcases.c", "w")
|
||||||
|
|
||||||
|
out.write('''
|
||||||
|
/* Automatically generated testcases for fix16 operations
|
||||||
|
* See generate_testcases.py for the generator.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fix16.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// Input
|
||||||
|
fix16_t a;
|
||||||
|
|
||||||
|
// Correct output
|
||||||
|
fix16_t sqrt;
|
||||||
|
fix16_t exp;
|
||||||
|
} fix16_1op_testcase;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// Inputs
|
||||||
|
fix16_t a;
|
||||||
|
fix16_t b;
|
||||||
|
|
||||||
|
// Correct output
|
||||||
|
fix16_t add;
|
||||||
|
fix16_t sub;
|
||||||
|
fix16_t mul;
|
||||||
|
fix16_t div;
|
||||||
|
} fix16_2op_testcase;
|
||||||
|
|
||||||
|
#define TESTCASES1_COUNT (sizeof(testcases1)/sizeof(testcases1[0]))
|
||||||
|
#define TESTCASES2_COUNT (sizeof(testcases2)/sizeof(testcases2[0]))
|
||||||
|
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Write testcases for 1-operand functions
|
||||||
|
|
||||||
|
out.write('static const fix16_1op_testcase testcases1[] = {\n')
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
a = random.choice(testcases)
|
||||||
|
if a >= 0:
|
||||||
|
sqrt = float_to_f16(math.sqrt(f16_to_float(a)))
|
||||||
|
else:
|
||||||
|
sqrt = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
exp = float_to_f16(math.exp(f16_to_float(a)))
|
||||||
|
except OverflowError:
|
||||||
|
exp = 0x7FFFFFFF
|
||||||
|
|
||||||
|
out.write(' {0x%08x, 0x%08x, 0x%08x}, // %d\n'
|
||||||
|
% (to_ui32(a), to_ui32(sqrt), to_ui32(exp), i))
|
||||||
|
|
||||||
|
out.write('};\n\n')
|
||||||
|
|
||||||
|
# Write testcases for 2-operand functions
|
||||||
|
|
||||||
|
out.write('static const fix16_2op_testcase testcases2[] = {\n')
|
||||||
|
|
||||||
|
for i in range(50):
|
||||||
|
a = random.choice(testcases)
|
||||||
|
b = random.choice(testcases)
|
||||||
|
|
||||||
|
add = float_to_f16(f16_to_float(a) + f16_to_float(b))
|
||||||
|
sub = float_to_f16(f16_to_float(a) - f16_to_float(b))
|
||||||
|
mul = float_to_f16(f16_to_float(a) * f16_to_float(b))
|
||||||
|
if b != 0:
|
||||||
|
div = float_to_f16(f16_to_float(a) / f16_to_float(b))
|
||||||
|
else:
|
||||||
|
div = 0
|
||||||
|
out.write(' {0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x}, // %d\n'
|
||||||
|
% (to_ui32(a), to_ui32(b), to_ui32(add), to_ui32(sub), to_ui32(mul), to_ui32(div), i))
|
||||||
|
|
||||||
|
out.write('};\n\n')
|
||||||
|
|
||||||
|
out.close()
|
||||||
|
|
32
benchmarks/interface-arm.c
Normal file
32
benchmarks/interface-arm.c
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
#include "interface.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
// This targets an ARM Cortex M3 core using QEmu LM3S6965 emulation.
|
||||||
|
#define STBASE 0xE000E000
|
||||||
|
#define STCTRL (*(volatile uint32_t*)(0x010 + STBASE))
|
||||||
|
#define STRELOAD (*(volatile uint32_t*)(0x014 + STBASE))
|
||||||
|
#define STCURRENT (*(volatile uint32_t*)(0x018 + STBASE))
|
||||||
|
|
||||||
|
|
||||||
|
void interface_init()
|
||||||
|
{
|
||||||
|
STRELOAD = 0x00FFFFFF;
|
||||||
|
STCTRL = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
void start_timing()
|
||||||
|
{
|
||||||
|
STCURRENT = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t end_timing()
|
||||||
|
{
|
||||||
|
return 0x00FFFFFF - STCURRENT - 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_value(const char *label, int32_t value)
|
||||||
|
{
|
||||||
|
printf("%-20s %ld\n", label, value);
|
||||||
|
}
|
||||||
|
|
39
benchmarks/interface-avr.c
Normal file
39
benchmarks/interface-avr.c
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
#include <avr/io.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "interface.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define special_output_port (*((volatile char *)0x20))
|
||||||
|
static int output_char(char c, FILE *stream)
|
||||||
|
{
|
||||||
|
special_output_port = c;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static FILE mystdout = FDEV_SETUP_STREAM(output_char, NULL, _FDEV_SETUP_WRITE);
|
||||||
|
|
||||||
|
void interface_init()
|
||||||
|
{
|
||||||
|
// Set timer 1 to count cycles
|
||||||
|
TCCR1B = 1;
|
||||||
|
|
||||||
|
// Set output to simulator
|
||||||
|
stdout = &mystdout;
|
||||||
|
stderr = &mystdout;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void start_timing()
|
||||||
|
{
|
||||||
|
TCNT1 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t end_timing()
|
||||||
|
{
|
||||||
|
return TCNT1 - 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_value(const char *label, int32_t value)
|
||||||
|
{
|
||||||
|
printf("%-20s %ld\n", label, value);
|
||||||
|
}
|
16
benchmarks/interface.h
Normal file
16
benchmarks/interface.h
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
// This file defines the hardware or simulator interface that will be used to
|
||||||
|
// measure timings and report results.
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// Initialize
|
||||||
|
void interface_init();
|
||||||
|
|
||||||
|
// Reset timer/counter/something
|
||||||
|
void start_timing();
|
||||||
|
|
||||||
|
// Return the number of clock cycles passed since start_timing();
|
||||||
|
uint16_t end_timing();
|
||||||
|
|
||||||
|
// Print a value to console, along with a descriptive label
|
||||||
|
void print_value(const char *label, int32_t value);
|
Loading…
Reference in New Issue
Block a user