SecondLife: llmath/llmath.h Source File

00001 
00032 #ifndef LLMATH_H
00033 #define LLMATH_H
00034 
00035 #include <cmath>
00036 //#include <math.h>
00037 //#include <stdlib.h>
00038 #include "lldefs.h"
00039 
00040 // work around for Windows & older gcc non-standard function names.
00041 #if LL_WINDOWS
00042 #include <float.h>
00043 #define llisnan(val)    _isnan(val)
00044 #define llfinite(val)   _finite(val)
00045 #elif (LL_LINUX && __GNUC__ <= 2)
00046 #define llisnan(val)    isnan(val)
00047 #define llfinite(val)   isfinite(val)
00048 #elif LL_SOLARIS
00049 #define llisnan(val)    isnan(val)
00050 #define llfinite(val)   (val <= std::numeric_limits<double>::max())
00051 #else
00052 #define llisnan(val)    std::isnan(val)
00053 #define llfinite(val)   std::isfinite(val)
00054 #endif
00055 
00056 // Single Precision Floating Point Routines
00057 #ifndef fsqrtf
00058 #define fsqrtf(x)               ((F32)sqrt((F64)(x)))
00059 #endif
00060 #ifndef sqrtf
00061 #define sqrtf(x)                ((F32)sqrt((F64)(x)))
00062 #endif
00063 
00064 #ifndef cosf
00065 #define cosf(x)         ((F32)cos((F64)(x)))
00066 #endif
00067 #ifndef sinf
00068 #define sinf(x)         ((F32)sin((F64)(x)))
00069 #endif
00070 #ifndef tanf
00071 #define tanf(x)         ((F32)tan((F64)(x)))
00072 #endif
00073 #ifndef acosf
00074 #define acosf(x)                ((F32)acos((F64)(x)))
00075 #endif
00076 
00077 #ifndef powf
00078 #define powf(x,y) ((F32)pow((F64)(x),(F64)(y)))
00079 #endif
00080 
00081 const F32       GRAVITY                 = -9.8f;
00082 
00083 // mathematical constants
00084 const F32       F_PI            = 3.1415926535897932384626433832795f;
00085 const F32       F_TWO_PI        = 6.283185307179586476925286766559f;
00086 const F32       F_PI_BY_TWO     = 1.5707963267948966192313216916398f;
00087 const F32       F_SQRT2         = 1.4142135623730950488016887242097f;
00088 const F32       F_SQRT3         = 1.73205080756888288657986402541f;
00089 const F32       OO_SQRT2        = 0.7071067811865475244008443621049f;
00090 const F32       DEG_TO_RAD      = 0.017453292519943295769236907684886f;
00091 const F32       RAD_TO_DEG      = 57.295779513082320876798154814105f;
00092 const F32       F_APPROXIMATELY_ZERO = 0.00001f;
00093 const F32       F_LN2           = 0.69314718056f;
00094 const F32       OO_LN2          = 1.4426950408889634073599246810019f;
00095 
00096 // BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
00097 const F32 FP_MAG_THRESHOLD = 0.0000001f;
00098 
00099 // TODO: Replace with logic like is_approx_equal
00100 inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
00101 
00102 inline BOOL is_approx_equal(F32 x, F32 y)
00103 {
00104         const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
00105         return (abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
00106 }
00107 
00108 inline BOOL is_approx_equal(F64 x, F64 y)
00109 {
00110         const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
00111         return (abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
00112 }
00113 
00114 inline BOOL is_approx_equal_fraction(F32 x, F32 y, U32 frac_bits)
00115 {
00116         BOOL ret = TRUE;
00117         F32 diff = (F32) fabs(x - y);
00118 
00119         S32 diffInt = (S32) diff;
00120         S32 diffFracTolerance = (S32) ((diff - (F32) diffInt) * (1 << frac_bits));
00121         
00122         // if integer portion is not equal, not enough bits were used for packing
00123         // so error out since either the use case is not correct OR there is
00124         // an issue with pack/unpack. should fail in either case.
00125         // for decimal portion, make sure that the delta is no more than 1
00126         // based on the number of bits used for packing decimal portion.
00127         if (diffInt != 0 || diffFracTolerance > 1)
00128         {
00129                 ret = FALSE;
00130         }
00131 
00132         return ret;
00133 }
00134 
00135 inline BOOL is_approx_equal_fraction(F64 x, F64 y, U32 frac_bits)
00136 {
00137         BOOL ret = TRUE;
00138         F64 diff = (F64) fabs(x - y);
00139 
00140         S32 diffInt = (S32) diff;
00141         S32 diffFracTolerance = (S32) ((diff - (F64) diffInt) * (1 << frac_bits));
00142         
00143         // if integer portion is not equal, not enough bits were used for packing
00144         // so error out since either the use case is not correct OR there is
00145         // an issue with pack/unpack. should fail in either case.
00146         // for decimal portion, make sure that the delta is no more than 1
00147         // based on the number of bits used for packing decimal portion.
00148         if (diffInt != 0 || diffFracTolerance > 1)
00149         {
00150                 ret = FALSE;
00151         }
00152 
00153         return ret;
00154 }
00155 
00156 inline S32 llabs(const S32 a)
00157 {
00158         return S32(labs(a));
00159 }
00160 
00161 inline F32 llabs(const F32 a)
00162 {
00163         return F32(fabs(a));
00164 }
00165 
00166 inline F64 llabs(const F64 a)
00167 {
00168         return F64(fabs(a));
00169 }
00170 
00171 inline S32 lltrunc( F32 f )
00172 {
00173 #if LL_WINDOWS && !defined( __INTEL_COMPILER )
00174                 // Avoids changing the floating point control word.
00175                 // Add or subtract 0.5 - epsilon and then round
00176                 const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
00177                 S32 result;
00178                 __asm {
00179                         fld             f
00180                         mov             eax,    f
00181                         shr             eax,    29
00182                         and             eax,    4
00183                         fadd    dword ptr [zpfp + eax]
00184                         fistp   result
00185                 }
00186                 return result;
00187 #else
00188                 return (S32)f;
00189 #endif
00190 }
00191 
00192 inline S32 lltrunc( F64 f )
00193 {
00194         return (S32)f;
00195 }
00196 
00197 inline S32 llfloor( F32 f )
00198 {
00199 #if LL_WINDOWS && !defined( __INTEL_COMPILER )
00200                 // Avoids changing the floating point control word.
00201                 // Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
00202                 // Add -(0.5 - epsilon) and then round
00203                 const U32 zpfp = 0xBEFFFFFF;
00204                 S32 result;
00205                 __asm { 
00206                         fld             f
00207                         fadd    dword ptr [zpfp]
00208                         fistp   result
00209                 }
00210                 return result;
00211 #else
00212                 return (S32)floor(f);
00213 #endif
00214 }
00215 
00216 
00217 inline S32 llceil( F32 f )
00218 {
00219         // This could probably be optimized, but this works.
00220         return (S32)ceil(f);
00221 }
00222 
00223 
00224 #ifndef BOGUS_ROUND
00225 // Use this round.  Does an arithmetic round (0.5 always rounds up)
00226 inline S32 llround(const F32 val)
00227 {
00228         return llfloor(val + 0.5f);
00229 }
00230 
00231 #else // BOGUS_ROUND
00232 // Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
00233 // Not using this because we don't have a consistent implementation on both platforms, use
00234 // llfloor(val + 0.5f), which is consistent on all platforms.
00235 inline S32 llround(const F32 val)
00236 {
00237         #if LL_WINDOWS
00238                 // Note: assumes that the floating point control word is set to rounding mode (the default)
00239                 S32 ret_val;
00240                 _asm fld        val
00241                 _asm fistp      ret_val;
00242                 return ret_val;
00243         #elif LL_LINUX
00244                 // Note: assumes that the floating point control word is set
00245                 // to rounding mode (the default)
00246                 S32 ret_val;
00247                 __asm__ __volatile__( "flds %1    \n\t"
00248                                                           "fistpl %0  \n\t"
00249                                                           : "=m" (ret_val)
00250                                                           : "m" (val) );
00251                 return ret_val;
00252         #else
00253                 return llfloor(val + 0.5f);
00254         #endif
00255 }
00256 
00257 // A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
00258 inline int round_int(double x)
00259 {
00260         const float round_to_nearest = 0.5f;
00261         int i;
00262         __asm
00263         {
00264                 fld x
00265                 fadd st, st (0)
00266                 fadd round_to_nearest
00267                 fistp i
00268                 sar i, 1
00269         }
00270         return (i);
00271 }
00272 #endif // BOGUS_ROUND
00273 
00274 inline F32 llround( F32 val, F32 nearest )
00275 {
00276         return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
00277 }
00278 
00279 inline F64 llround( F64 val, F64 nearest )
00280 {
00281         return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
00282 }
00283 
00284 // these provide minimum peak error
00285 //
00286 // avg  error = -0.013049 
00287 // peak error = -31.4 dB
00288 // RMS  error = -28.1 dB
00289 
00290 const F32 FAST_MAG_ALPHA = 0.960433870103f;
00291 const F32 FAST_MAG_BETA = 0.397824734759f;
00292 
00293 // these provide minimum RMS error
00294 //
00295 // avg  error = 0.000003 
00296 // peak error = -32.6 dB
00297 // RMS  error = -25.7 dB
00298 //
00299 //const F32 FAST_MAG_ALPHA = 0.948059448969f;
00300 //const F32 FAST_MAG_BETA = 0.392699081699f;
00301 
00302 inline F32 fastMagnitude(F32 a, F32 b)
00303 { 
00304         a = (a > 0) ? a : -a;
00305         b = (b > 0) ? b : -b;
00306         return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
00307 }
00308 
00309 
00310 
00312 //
00313 // Fast F32/S32 conversions
00314 //
00315 // Culled from www.stereopsis.com/FPU.html
00316 
00317 const F64 LL_DOUBLE_TO_FIX_MAGIC        = 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
00318 const S32 LL_SHIFT_AMOUNT                       = 16;                    //16.16 fixed point representation,
00319 
00320 // Endian dependent code
00321 #ifdef LL_LITTLE_ENDIAN
00322         #define LL_EXP_INDEX                            1
00323         #define LL_MAN_INDEX                            0
00324 #else
00325         #define LL_EXP_INDEX                            0
00326         #define LL_MAN_INDEX                            1
00327 #endif
00328 
00329 /* Deprecated: use llround(), lltrunc(), or llfloor() instead
00330 // ================================================================================================
00331 // Real2Int
00332 // ================================================================================================
00333 inline S32 F64toS32(F64 val)
00334 {
00335         val             = val + LL_DOUBLE_TO_FIX_MAGIC;
00336         return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT; 
00337 }
00338 
00339 // ================================================================================================
00340 // Real2Int
00341 // ================================================================================================
00342 inline S32 F32toS32(F32 val)
00343 {
00344         return F64toS32 ((F64)val);
00345 }
00346 */
00347 
00349 //
00350 // Fast exp and log
00351 //
00352 
00353 // Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
00354 // http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
00355 static union
00356 {
00357         double d;
00358         struct
00359         {
00360 #ifdef LL_LITTLE_ENDIAN
00361                 S32 j, i;
00362 #else
00363                 S32 i, j;
00364 #endif
00365         } n;
00366 } LLECO; // not sure what the name means
00367 
00368 #define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
00369 #define LL_EXP_C (60801)                        // this value of C good for -4 < y < 4
00370 
00371 #define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
00372 
00373 
00374 
00375 inline F32 llfastpow(const F32 x, const F32 y)
00376 {
00377         return (F32)(LL_FAST_EXP(y * log(x)));
00378 }
00379 
00380 
00381 inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
00382 {
00383         // compute the power of ten
00384         F32 bar = 1.f;
00385         for (S32 i = 0; i < sig_figs; i++)
00386         {
00387                 bar *= 10.f;
00388         }
00389 
00390         foo = (F32)llround(foo * bar);
00391 
00392         // shift back
00393         foo /= bar;
00394         return foo;
00395 }
00396 
00397 inline F32 lerp(F32 a, F32 b, F32 u) 
00398 {
00399         return a + ((b - a) * u);
00400 }
00401 
00402 inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
00403 {
00404         F32 a = x00 + (x01-x00)*u;
00405         F32 b = x10 + (x11-x10)*u;
00406         F32 r = a + (b-a)*v;
00407         return r;
00408 }
00409 
00410 inline F32 ramp(F32 x, F32 a, F32 b)
00411 {
00412         return (a == b) ? 0.0f : ((a - x) / (a - b));
00413 }
00414 
00415 inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
00416 {
00417         return lerp(y1, y2, ramp(x, x1, x2));
00418 }
00419 
00420 inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
00421 {
00422         if (y1 < y2)
00423         {
00424                 return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
00425         }
00426         else
00427         {
00428                 return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
00429         }
00430 }
00431 
00432 
00433 inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
00434 {
00435         if (x <= x0)
00436                 return s0;
00437 
00438         if (x >= x1)
00439                 return s1;
00440 
00441         F32 f = (x - x0) / (x1 - x0);
00442 
00443         return  s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
00444 }
00445 
00446 inline F32 cubic_step( F32 x )
00447 {
00448         x = llclampf(x);
00449 
00450         return  (x * x) * (3.0f - 2.0f * x);
00451 }
00452 
00453 inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
00454 {
00455         if (x <= x0)
00456                 return s0;
00457 
00458         if (x >= x1)
00459                 return s1;
00460 
00461         F32 f = (x - x0) / (x1 - x0);
00462         F32 f_squared = f * f;
00463 
00464         return  (s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
00465 }
00466 
00467 inline F32 llsimple_angle(F32 angle)
00468 {
00469         while(angle <= -F_PI)
00470                 angle += F_TWO_PI;
00471         while(angle >  F_PI)
00472                 angle -= F_TWO_PI;
00473         return angle;
00474 }
00475 
00476 //calculate the nearesr power of two number for val, bounded by max_power_two
00477 inline U32 get_nearest_power_two(U32 val, U32 max_power_two)
00478 {
00479         if(!max_power_two)
00480         {
00481                 max_power_two = 1 << 31 ;
00482         }
00483         if(max_power_two & (max_power_two - 1))
00484         {
00485                 return 0 ;
00486         }
00487 
00488         for(; val < max_power_two ; max_power_two >>= 1) ;
00489         
00490         return max_power_two ;
00491 }
00492 #endif