416 lines
12 KiB
C++
416 lines
12 KiB
C++
/*
|
|
* simdtests.c -- test accuracy and performance of simd optimizations
|
|
*
|
|
* Copyright (C) 2017 Andreas Mueller.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
/* We must include all headers memops.c includes to avoid trouble with
|
|
* out namespace game below.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <memory.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <limits.h>
|
|
#ifdef __linux__
|
|
#include <endian.h>
|
|
#endif
|
|
#include "memops.h"
|
|
|
|
#if defined (__SSE2__) && !defined (__sun__)
|
|
#include <emmintrin.h>
|
|
#ifdef __SSE4_1__
|
|
#include <smmintrin.h>
|
|
#endif
|
|
#endif
|
|
|
|
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
|
|
#include <arm_neon.h>
|
|
#endif
|
|
|
|
// our additional headers
|
|
#include <time.h>
|
|
|
|
/* Dirty: include mempos.c twice the second time with SIMD disabled
|
|
* so we can compare aceelerated non accelerated
|
|
*/
|
|
namespace accelerated {
|
|
#include "../common/memops.c"
|
|
}
|
|
|
|
namespace origerated {
|
|
#ifdef __SSE2__
|
|
#undef __SSE2__
|
|
#endif
|
|
|
|
#ifdef __ARM_NEON__
|
|
#undef __ARM_NEON__
|
|
#endif
|
|
|
|
#ifdef __ARM_NEON
|
|
#undef __ARM_NEON
|
|
#endif
|
|
|
|
#include "../common/memops.c"
|
|
}
|
|
|
|
// define conversion function types
|
|
typedef void (*t_jack_to_integer)(
|
|
char *dst,
|
|
jack_default_audio_sample_t *src,
|
|
unsigned long nsamples,
|
|
unsigned long dst_skip,
|
|
dither_state_t *state);
|
|
|
|
typedef void (*t_integer_to_jack)(
|
|
jack_default_audio_sample_t *dst,
|
|
char *src,
|
|
unsigned long nsamples,
|
|
unsigned long src_skip);
|
|
|
|
// define/setup test case data
|
|
typedef struct test_case_data {
|
|
uint32_t frame_size;
|
|
uint32_t sample_size;
|
|
bool reverse;
|
|
t_jack_to_integer jack_to_integer_accel;
|
|
t_jack_to_integer jack_to_integer_orig;
|
|
t_integer_to_jack integer_to_jack_accel;
|
|
t_integer_to_jack integer_to_jack_orig;
|
|
dither_state_t *ditherstate;
|
|
const char *name;
|
|
} test_case_data_t;
|
|
|
|
test_case_data_t test_cases[] = {
|
|
{
|
|
4,
|
|
3,
|
|
true,
|
|
accelerated::sample_move_d32u24_sSs,
|
|
origerated::sample_move_d32u24_sSs,
|
|
accelerated::sample_move_dS_s32u24s,
|
|
origerated::sample_move_dS_s32u24s,
|
|
NULL,
|
|
"32u24s" },
|
|
{
|
|
4,
|
|
3,
|
|
false,
|
|
accelerated::sample_move_d32u24_sS,
|
|
origerated::sample_move_d32u24_sS,
|
|
accelerated::sample_move_dS_s32u24,
|
|
origerated::sample_move_dS_s32u24,
|
|
NULL,
|
|
"32u24" },
|
|
{
|
|
4,
|
|
3,
|
|
true,
|
|
accelerated::sample_move_d32l24_sSs,
|
|
origerated::sample_move_d32l24_sSs,
|
|
accelerated::sample_move_dS_s32l24s,
|
|
origerated::sample_move_dS_s32l24s,
|
|
NULL,
|
|
"32l24s" },
|
|
{
|
|
4,
|
|
3,
|
|
false,
|
|
accelerated::sample_move_d32l24_sS,
|
|
origerated::sample_move_d32l24_sS,
|
|
accelerated::sample_move_dS_s32l24,
|
|
origerated::sample_move_dS_s32l24,
|
|
NULL,
|
|
"32l24" },
|
|
{
|
|
3,
|
|
3,
|
|
true,
|
|
accelerated::sample_move_d24_sSs,
|
|
origerated::sample_move_d24_sSs,
|
|
accelerated::sample_move_dS_s24s,
|
|
origerated::sample_move_dS_s24s,
|
|
NULL,
|
|
"24s" },
|
|
{
|
|
3,
|
|
3,
|
|
false,
|
|
accelerated::sample_move_d24_sS,
|
|
origerated::sample_move_d24_sS,
|
|
accelerated::sample_move_dS_s24,
|
|
origerated::sample_move_dS_s24,
|
|
NULL,
|
|
"24" },
|
|
{
|
|
2,
|
|
2,
|
|
true,
|
|
accelerated::sample_move_d16_sSs,
|
|
origerated::sample_move_d16_sSs,
|
|
accelerated::sample_move_dS_s16s,
|
|
origerated::sample_move_dS_s16s,
|
|
NULL,
|
|
"16s" },
|
|
{
|
|
2,
|
|
2,
|
|
false,
|
|
accelerated::sample_move_d16_sS,
|
|
origerated::sample_move_d16_sS,
|
|
accelerated::sample_move_dS_s16,
|
|
origerated::sample_move_dS_s16,
|
|
NULL,
|
|
"16" },
|
|
};
|
|
|
|
// we need to repeat for better accuracy at time measurement
|
|
const uint32_t retry_per_case = 1000;
|
|
|
|
// setup test buffers
|
|
#define TESTBUFF_SIZE 1024
|
|
jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
|
|
// integer buffers: max 4 bytes per value / * 2 for stereo
|
|
char integerbuffer_accel[TESTBUFF_SIZE*4*2];
|
|
char integerbuffer_orig[TESTBUFF_SIZE*4*2];
|
|
// float buffers
|
|
jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
|
|
jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
|
|
|
|
// comparing unsigned makes life easier
|
|
uint32_t extract_integer(
|
|
char* buff,
|
|
uint32_t offset,
|
|
uint32_t frame_size,
|
|
uint32_t sample_size,
|
|
bool big_endian)
|
|
{
|
|
uint32_t retval = 0;
|
|
unsigned char* curr;
|
|
uint32_t mult = 1;
|
|
if(big_endian) {
|
|
curr = (unsigned char*)buff + offset + sample_size-1;
|
|
for(uint32_t i=0; i<sample_size; i++) {
|
|
retval += *(curr--) * mult;
|
|
mult*=256;
|
|
}
|
|
}
|
|
else {
|
|
curr = (unsigned char*)buff + offset + frame_size-sample_size;
|
|
for(uint32_t i=0; i<sample_size; i++) {
|
|
retval += *(curr++) * mult;
|
|
mult*=256;
|
|
}
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
// parse_arguments(argc, argv);
|
|
uint32_t maxerr_displayed = 10;
|
|
|
|
// fill jackbuffer
|
|
for(int i=0; i<TESTBUFF_SIZE; i++) {
|
|
// ramp
|
|
jack_default_audio_sample_t value =
|
|
((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
|
|
// force clipping
|
|
value *= 1.02;
|
|
jackbuffer_source[i] = value;
|
|
}
|
|
|
|
for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
|
|
// test mono/stereo
|
|
for(uint32_t channels=1; channels<=2; channels++) {
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
// jackfloat -> integer
|
|
|
|
// clean target buffers
|
|
memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
|
|
memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
|
|
// accel
|
|
clock_t time_to_integer_accel = clock();
|
|
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
|
|
{
|
|
test_cases[testcase].jack_to_integer_accel(
|
|
integerbuffer_accel,
|
|
jackbuffer_source,
|
|
TESTBUFF_SIZE,
|
|
test_cases[testcase].frame_size*channels,
|
|
test_cases[testcase].ditherstate);
|
|
}
|
|
float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
|
|
// orig
|
|
clock_t time_to_integer_orig = clock();
|
|
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
|
|
{
|
|
test_cases[testcase].jack_to_integer_orig(
|
|
integerbuffer_orig,
|
|
jackbuffer_source,
|
|
TESTBUFF_SIZE,
|
|
test_cases[testcase].frame_size*channels,
|
|
test_cases[testcase].ditherstate);
|
|
}
|
|
float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
|
|
// output performance results
|
|
printf(
|
|
"JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
|
|
test_cases[testcase].name,
|
|
channels,
|
|
timediff_to_integer_orig,
|
|
timediff_to_integer_accel,
|
|
(timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
|
|
uint32_t int_deviation_max = 0;
|
|
uint32_t int_error_count = 0;
|
|
// output error (avoid spam -> limit error lines per test case)
|
|
for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
|
|
uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
|
|
// compare both results
|
|
uint32_t intval_accel=extract_integer(
|
|
integerbuffer_accel,
|
|
sample_offset,
|
|
test_cases[testcase].frame_size,
|
|
test_cases[testcase].sample_size,
|
|
#if __BYTE_ORDER == __BIG_ENDIAN
|
|
!test_cases[testcase].reverse);
|
|
#else
|
|
test_cases[testcase].reverse);
|
|
#endif
|
|
uint32_t intval_orig=extract_integer(
|
|
integerbuffer_orig,
|
|
sample_offset,
|
|
test_cases[testcase].frame_size,
|
|
test_cases[testcase].sample_size,
|
|
#if __BYTE_ORDER == __BIG_ENDIAN
|
|
!test_cases[testcase].reverse);
|
|
#else
|
|
test_cases[testcase].reverse);
|
|
#endif
|
|
// allow a deviation of 1
|
|
if(intval_accel>intval_orig+1 || intval_orig>intval_accel+1) {
|
|
if(int_error_count<maxerr_displayed) {
|
|
printf("Value error sample %u:", sample);
|
|
printf(" Orig 0x");
|
|
char formatstr[10];
|
|
sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
|
|
printf(formatstr, intval_orig);
|
|
printf(" Accel 0x");
|
|
printf(formatstr, intval_accel);
|
|
printf("\n");
|
|
}
|
|
int_error_count++;
|
|
uint32_t int_deviation;
|
|
if(intval_accel > intval_orig)
|
|
int_deviation = intval_accel-intval_orig;
|
|
else
|
|
int_deviation = intval_orig-intval_accel;
|
|
if(int_deviation > int_deviation_max)
|
|
int_deviation_max = int_deviation;
|
|
}
|
|
}
|
|
printf(
|
|
"JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
|
|
test_cases[testcase].name,
|
|
channels,
|
|
int_error_count,
|
|
int_deviation_max);
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
// integer -> jackfloat
|
|
|
|
// clean target buffers
|
|
memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
|
|
memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
|
|
// accel
|
|
clock_t time_to_float_accel = clock();
|
|
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
|
|
{
|
|
test_cases[testcase].integer_to_jack_accel(
|
|
jackfloatbuffer_accel,
|
|
integerbuffer_orig,
|
|
TESTBUFF_SIZE,
|
|
test_cases[testcase].frame_size*channels);
|
|
}
|
|
float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
|
|
// orig
|
|
clock_t time_to_float_orig = clock();
|
|
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
|
|
{
|
|
test_cases[testcase].integer_to_jack_orig(
|
|
jackfloatbuffer_orig,
|
|
integerbuffer_orig,
|
|
TESTBUFF_SIZE,
|
|
test_cases[testcase].frame_size*channels);
|
|
}
|
|
float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
|
|
// output performance results
|
|
printf(
|
|
"Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
|
|
test_cases[testcase].name,
|
|
channels,
|
|
timediff_to_float_orig,
|
|
timediff_to_float_accel,
|
|
(timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
|
|
jack_default_audio_sample_t float_deviation_max = 0.0;
|
|
uint32_t float_error_count = 0;
|
|
// output error (avoid spam -> limit error lines per test case)
|
|
for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
|
|
// For easier estimation/readability we scale floats back to integer
|
|
jack_default_audio_sample_t sample_scaling;
|
|
switch(test_cases[testcase].sample_size) {
|
|
case 2:
|
|
sample_scaling = SAMPLE_16BIT_SCALING;
|
|
break;
|
|
default:
|
|
sample_scaling = SAMPLE_24BIT_SCALING;
|
|
break;
|
|
}
|
|
jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
|
|
jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
|
|
// compare both results
|
|
jack_default_audio_sample_t float_deviation;
|
|
if(floatval_accel > floatval_orig)
|
|
float_deviation = floatval_accel-floatval_orig;
|
|
else
|
|
float_deviation = floatval_orig-floatval_accel;
|
|
if(float_deviation > float_deviation_max)
|
|
float_deviation_max = float_deviation;
|
|
// deviation > half bit => error
|
|
if(float_deviation > 0.5) {
|
|
if(float_error_count<maxerr_displayed) {
|
|
printf("Value error sample %u:", sample);
|
|
printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
|
|
}
|
|
float_error_count++;
|
|
}
|
|
}
|
|
printf(
|
|
"Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
|
|
test_cases[testcase].name,
|
|
channels,
|
|
float_error_count,
|
|
float_deviation_max);
|
|
|
|
printf("\n");
|
|
}
|
|
}
|
|
return 0;
|
|
}
|