数学.h sin() 浮点结果与优化定点结果的比较方法



我已经为我的客户端硬件优化了sin函数。但是我想将标准浮点正弦函数与优化的定点正弦函数进行比较。

我分4个阶段工作:

1) Input generation (floating point to fixed point conversation):Input.y
2) Reference *.cpp code execution in fixed point(output is fixed point):Ref.y
3) Optimization code execution on hardware (output is fixed point):Optimized.y
4) Comparison of ref output with optimized (stage3) output in fixed point:(Ref.y == Optimized.y) ?

在这些阶段结束时,我在定点中获得真正的位精确性。 但我还需要使用标准 mathlib (math.h( 标头 sin 函数生成的浮点结果来验证我的定点输出。

我能够从math.h文件中从sin函数生成浮点结果。但是,任何人都可以建议我哪种是比较浮点结果和定点的最佳方法:

1) convert once again floating point result of standard sin function result to fixed point and compare with fixed point Ref.y 
2) convert fixed point Ref.y to floating point point and compare the result of float with float .

还请建议我如何将浮点数与浮点数进行比较。我的意思是,由于精度问题,这不是我可以比较的正确方法(3.4 == 3.4(。

假设没有适用于您的用例的特殊要求,"标准"方法是将定点结果转换为double(对于大多数常见的定点格式,这是一种无差错的转换(,并计算该结果与标准数学库sin(double)结果的绝对误差:

absolute_error = fabs (result - reference)

如果对于给定的定点格式可行,建议进行详尽的测试。在现代机器上,通常在几分钟内轻松计算出 232个函数评估。您的测试需要记录观察到的最大绝对误差,并计算所有结果的均方根 (RMS( 误差。

对于像sin这样的周期函数,你可能会观察到相位误差随着参数的大小而增加,因为参数约简中使用的近似π不够精确。您的用例将确定在这方面可接受的内容。

下面是一个工作示例,使用log2的特定定点实现作为所测试的函数:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#define FRAC_BITS_OUT (16)
#define INT_BITS_OUT  (15)
#define FRAC_BITS_IN  (31)
#define INT_BITS_IN   ( 0)
#define USE_TABLE     (1)
/* count leading zeros: intrinsic or machine instruction on many architectures */
int32_t clz (uint32_t x)
{
uint32_t n, y;
n = 31 + (!x);
if ((y = (x & 0xffff0000U))) { n -= 16;  x = y; }
if ((y = (x & 0xff00ff00U))) { n -=  8;  x = y; }
if ((y = (x & 0xf0f0f0f0U))) { n -=  4;  x = y; }
if ((y = (x & 0xccccccccU))) { n -=  2;  x = y; }
if ((    (x & 0xaaaaaaaaU))) { n -=  1;         }
return n;
}
#if USE_TABLE
#define LOG2_TBL_SIZE (6)
#define TBL_SIZE      ((1 << LOG2_TBL_SIZE) + 2)
/* for i = [0,65]: log2(1 + i/64) * (1 << 31) */
const uint32_t log2Tab [TBL_SIZE] =
{
0x00000000, 0x02dcf2d1, 0x05aeb4dd, 0x08759c50, 
0x0b31fb7d, 0x0de42120, 0x108c588d, 0x132ae9e2, 
0x15c01a3a, 0x184c2bd0, 0x1acf5e2e, 0x1d49ee4c, 
0x1fbc16b9, 0x22260fb6, 0x24880f56, 0x26e2499d, 
0x2934f098, 0x2b803474, 0x2dc4439b, 0x30014ac6, 
0x32377512, 0x3466ec15, 0x368fd7ee, 0x38b25f5a, 
0x3acea7c0, 0x3ce4d544, 0x3ef50ad2, 0x40ff6a2e, 
0x43041403, 0x450327eb, 0x46fcc47a, 0x48f10751, 
0x4ae00d1d, 0x4cc9f1ab, 0x4eaecfeb, 0x508ec1fa, 
0x5269e12f, 0x5440461c, 0x5612089a, 0x57df3fd0, 
0x59a80239, 0x5b6c65aa, 0x5d2c7f59, 0x5ee863e5, 
0x60a02757, 0x6253dd2c, 0x64039858, 0x65af6b4b, 
0x675767f5, 0x68fb9fce, 0x6a9c23d6, 0x6c39049b, 
0x6dd2523d, 0x6f681c73, 0x70fa728c, 0x72896373, 
0x7414fdb5, 0x759d4f81, 0x772266ad, 0x78a450b8, 
0x7a231ace, 0x7b9ed1c7, 0x7d17822f, 0x7e8d3846, 
0x80000000, 0x816fe50b
};
#define RND_SHIFT     (31 - FRAC_BITS_OUT)
#define RND_CONST     ((1 << RND_SHIFT) / 2)
#define RND_ADJUST    (0x10d) /* established heuristically */
/* 
compute log2(x) in s15.16 format, where x is in s0.31 format
maximum absolute error 8.18251e-6 @ 0x20352845 (0.251622232)
*/   
int32_t fixed_log2 (int32_t x)
{
int32_t f1, f2, dx, a, b, approx, lz, i, idx;
uint32_t t;
/* x = 2**i * (1 + f), 0 <= f < 1. Find i */
lz = clz (x);
i = INT_BITS_IN - lz;
/* normalize f */
t = (uint32_t)x << (lz + 1);
/* index table of log2 values using LOG2_TBL_SIZE msbs of fraction */
idx = t >> (32 - LOG2_TBL_SIZE);
/* difference between argument and smallest sampling point */
dx = t - (idx << (32 - LOG2_TBL_SIZE));
/* fit parabola through closest three sampling points; find coeffs a, b */
f1 = (log2Tab[idx+1] - log2Tab[idx]);
f2 = (log2Tab[idx+2] - log2Tab[idx]);
a = f2 - (f1 << 1);
b = (f1 << 1) - a;
/* find function value for argument by computing ((a*dx+b)*dx) */
approx = (int32_t)((((int64_t)a)*dx) >> (32 - LOG2_TBL_SIZE)) + b;
approx = (int32_t)((((int64_t)approx)*dx) >> (32 - LOG2_TBL_SIZE + 1));
approx = log2Tab[idx] + approx;
/* round fractional part of result */
approx = (((uint32_t)approx) + RND_CONST + RND_ADJUST) >> RND_SHIFT;
/* combine integer and fractional parts of result */
return (i << FRAC_BITS_OUT) + approx;
}
#else // USE_TABLE
/* on 32-bit architectures, there is often an instruction/intrinsic for this */
int32_t mulhi (int32_t a, int32_t b)
{
return (int32_t)(((int64_t)a * (int64_t)b) >> 32);
}
#define RND_SHIFT  (25 - FRAC_BITS_OUT)
#define RND_CONST  ((1 << RND_SHIFT) / 2)
#define RND_ADJUST (-2) /* established heuristically */
/* 
compute log2(x) in s15.16 format, where x is in s0.31 format
maximum absolute error 1.11288e-5 @ 0x5a82689f (0.707104757)
*/   
int32_t fixed_log2 (int32_t x)
{
int32_t lz, i, f, p, approx;
uint32_t t;
/* x = 2**i * (1 + f), 0 <= f < 1. Find i */
lz = clz (x);
i = INT_BITS_IN - lz;
/* force (1+f) into range [sqrt(0.5), sqrt(2)] */
t = (uint32_t)x << lz;    
if (t > (uint32_t)(1.414213562 * (1U << 31))) {
i++;
t = t >> 1;
}
/* compute log2(1+f) for f in [-0.2929, 0.4142] */
f = t - (1U << 31);
p =              + (int32_t)(-0.206191055 * (1U << 31) -  1);
p = mulhi (p, f) + (int32_t)( 0.318199910 * (1U << 30) - 18);
p = mulhi (p, f) + (int32_t)(-0.366491705 * (1U << 29) + 22);
p = mulhi (p, f) + (int32_t)( 0.479811855 * (1U << 28) -  2);
p = mulhi (p, f) + (int32_t)(-0.721206390 * (1U << 27) + 37);
p = mulhi (p, f) + (int32_t)( 0.442701618 * (1U << 26) + 35);
p = mulhi (p, f) + (f >> (31 - 25));
/* round fractional part of the result */
approx = (p + RND_CONST + RND_ADJUST) >> RND_SHIFT;
/* combine integer and fractional parts of result */
return (i << FRAC_BITS_OUT) + approx;
}
#endif // USE_TABLE
/* convert from s15.16 fixed point to double-precision floating point */
double fixed_to_float_s15_16 (int32_t a)
{
return a / 65536.0;
}
/* convert from s0.31 fixed point to double-precision floating point */
double fixed_to_float_s0_31 (int32_t a)
{
return a / (65536.0 * 32768.0);
}
int main (void)
{
double a, res, ref, err, rmserr, sumerrsq = 0.0, maxerr = 0.0;
int32_t x, start, end, nbrtests = 0;
start = 0x00000001;
end =   0x7fffffff;
printf ("testing fixed_log2 with inputs in [%17.10e, %17.10e)n",  
fixed_to_float_s0_31 (start), fixed_to_float_s0_31 (end));
printf ("using %s to compute log2n", USE_TABLE ? "table" : "polynomial");
for (x = start; x < end; x++) {
a = fixed_to_float_s0_31 (x);
ref = log2 (a);
res = fixed_to_float_s15_16 (fixed_log2 (x));
err = fabs (res - ref);
sumerrsq += err * err;
nbrtests++;
if (err > maxerr) {
maxerr = err;
}
}
rmserr = sqrt (sumerrsq / nbrtests);
printf ("max. error = %g  RMS error = %gn", maxerr, rmserr);
return EXIT_SUCCESS;
}

上面的程序在我的旧电脑上几分钟内执行,根据算法选择,将产生如下输出:

testing fixed_log2 with inputs in [4.6566128731e-010, 9.9999999953e-001)
using polynomial to compute log2
max. error = 1.11288e-005  RMS error = 4.65419e-006
testing fixed_log2 with inputs in [4.6566128731e-010, 9.9999999953e-001)
using table to compute log2
max. error = 8.18251e-006  RMS error = 4.40727e-006

最新更新