Second Life Viewer: llmath.h Source File

00001 
00032 #ifndef LLMATH_H
00033 #define LLMATH_H
00034 
00035 // work around for Windows & older gcc non-standard function names.
00036 #if LL_WINDOWS
00037 #define llisnan(val)    _isnan(val)
00038 #define llfinite(val)   _finite(val)
00039 #elif (LL_LINUX && __GNUC__ <= 2)
00040 #define llisnan(val)    isnan(val)
00041 #define llfinite(val)   isfinite(val)
00042 #elif LL_SOLARIS
00043 #define llisnan(val)    isnan(val)
00044 #define llfinite(val)   (val <= std::numeric_limits<double>::max())
00045 #else
00046 #define llisnan(val)    std::isnan(val)
00047 #define llfinite(val)   std::isfinite(val)
00048 #endif
00049 
00050 // Single Precision Floating Point Routines
00051 #ifndef fsqrtf
00052 #define fsqrtf(x)               ((F32)sqrt((F64)(x)))
00053 #endif
00054 #ifndef sqrtf
00055 #define sqrtf(x)                ((F32)sqrt((F64)(x)))
00056 #endif
00057 
00058 #ifndef cosf
00059 #define cosf(x)         ((F32)cos((F64)(x)))
00060 #endif
00061 #ifndef sinf
00062 #define sinf(x)         ((F32)sin((F64)(x)))
00063 #endif
00064 #ifndef tanf
00065 #define tanf(x)         ((F32)tan((F64)(x)))
00066 #endif
00067 #ifndef acosf
00068 #define acosf(x)                ((F32)acos((F64)(x)))
00069 #endif
00070 
00071 #ifndef powf
00072 #define powf(x,y) ((F32)pow((F64)(x),(F64)(y)))
00073 #endif
00074 
00075 const F32       GRAVITY                 = -9.8f;
00076 
00077 // mathematical constants
00078 const F32       F_PI            = 3.1415926535897932384626433832795f;
00079 const F32       F_TWO_PI        = 6.283185307179586476925286766559f;
00080 const F32       F_PI_BY_TWO     = 1.5707963267948966192313216916398f;
00081 const F32       F_SQRT2         = 1.4142135623730950488016887242097f;
00082 const F32       F_SQRT3         = 1.73205080756888288657986402541f;
00083 const F32       OO_SQRT2        = 0.7071067811865475244008443621049f;
00084 const F32       DEG_TO_RAD      = 0.017453292519943295769236907684886f;
00085 const F32       RAD_TO_DEG      = 57.295779513082320876798154814105f;
00086 const F32       F_APPROXIMATELY_ZERO = 0.00001f;
00087 const F32       F_LN2           = 0.69314718056f;
00088 const F32       OO_LN2          = 1.4426950408889634073599246810019f;
00089 
00090 // BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
00091 const F32 FP_MAG_THRESHOLD = 0.0000001f;
00092 
00093 // TODO: Replace with logic like is_approx_equal
00094 inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
00095 
00096 inline BOOL is_approx_equal(F32 x, F32 y)
00097 {
00098         const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
00099         return (abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
00100 }
00101 
00102 inline BOOL is_approx_equal_fraction(F32 x, F32 y, U32 frac_bits)
00103 {
00104         BOOL ret = TRUE;
00105         F32 diff = (F32) fabs(x - y);
00106 
00107         S32 diffInt = (S32) diff;
00108         S32 diffFracTolerance = (S32) ((diff - (F32) diffInt) * (1 << frac_bits));
00109         
00110         // if integer portion is not equal, not enough bits were used for packing
00111         // so error out since either the use case is not correct OR there is
00112         // an issue with pack/unpack. should fail in either case.
00113         // for decimal portion, make sure that the delta is no more than 1
00114         // based on the number of bits used for packing decimal portion.
00115         if (diffInt != 0 || diffFracTolerance > 1)
00116         {
00117                 ret = FALSE;
00118         }
00119 
00120         return ret;
00121 }
00122 
00123 inline S32 llabs(const S32 a)
00124 {
00125         return S32(labs(a));
00126 }
00127 
00128 inline F32 llabs(const F32 a)
00129 {
00130         return F32(fabs(a));
00131 }
00132 
00133 inline F64 llabs(const F64 a)
00134 {
00135         return F64(fabs(a));
00136 }
00137 
00138 inline S32 lltrunc( F32 f )
00139 {
00140 #if LL_WINDOWS && !defined( __INTEL_COMPILER )
00141                 // Avoids changing the floating point control word.
00142                 // Add or subtract 0.5 - epsilon and then round
00143                 const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
00144                 S32 result;
00145                 __asm {
00146                         fld             f
00147                         mov             eax,    f
00148                         shr             eax,    29
00149                         and             eax,    4
00150                         fadd    dword ptr [zpfp + eax]
00151                         fistp   result
00152                 }
00153                 return result;
00154 #else
00155                 return (S32)f;
00156 #endif
00157 }
00158 
00159 inline S32 lltrunc( F64 f )
00160 {
00161         return (S32)f;
00162 }
00163 
00164 inline S32 llfloor( F32 f )
00165 {
00166 #if LL_WINDOWS && !defined( __INTEL_COMPILER )
00167                 // Avoids changing the floating point control word.
00168                 // Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
00169                 // Add -(0.5 - epsilon) and then round
00170                 const U32 zpfp = 0xBEFFFFFF;
00171                 S32 result;
00172                 __asm { 
00173                         fld             f
00174                         fadd    dword ptr [zpfp]
00175                         fistp   result
00176                 }
00177                 return result;
00178 #else
00179                 return (S32)floor(f);
00180 #endif
00181 }
00182 
00183 
00184 inline S32 llceil( F32 f )
00185 {
00186         // This could probably be optimized, but this works.
00187         return (S32)ceil(f);
00188 }
00189 
00190 
00191 #ifndef BOGUS_ROUND
00192 // Use this round.  Does an arithmetic round (0.5 always rounds up)
00193 inline S32 llround(const F32 val)
00194 {
00195         return llfloor(val + 0.5f);
00196 }
00197 
00198 #else // BOGUS_ROUND
00199 // Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
00200 // Not using this because we don't have a consistent implementation on both platforms, use
00201 // llfloor(val + 0.5f), which is consistent on all platforms.
00202 inline S32 llround(const F32 val)
00203 {
00204         #if LL_WINDOWS
00205                 // Note: assumes that the floating point control word is set to rounding mode (the default)
00206                 S32 ret_val;
00207                 _asm fld        val
00208                 _asm fistp      ret_val;
00209                 return ret_val;
00210         #elif LL_LINUX
00211                 // Note: assumes that the floating point control word is set
00212                 // to rounding mode (the default)
00213                 S32 ret_val;
00214                 __asm__ __volatile__( "flds %1    \n\t"
00215                                                           "fistpl %0  \n\t"
00216                                                           : "=m" (ret_val)
00217                                                           : "m" (val) );
00218                 return ret_val;
00219         #else
00220                 return llfloor(val + 0.5f);
00221         #endif
00222 }
00223 
00224 // A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
00225 inline int round_int(double x)
00226 {
00227         const float round_to_nearest = 0.5f;
00228         int i;
00229         __asm
00230         {
00231                 fld x
00232                 fadd st, st (0)
00233                 fadd round_to_nearest
00234                 fistp i
00235                 sar i, 1
00236         }
00237         return (i);
00238 }
00239 #endif // BOGUS_ROUND
00240 
00241 inline F32 llround( F32 val, F32 nearest )
00242 {
00243         return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
00244 }
00245 
00246 inline F64 llround( F64 val, F64 nearest )
00247 {
00248         return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
00249 }
00250 
00251 // these provide minimum peak error
00252 //
00253 // avg  error = -0.013049 
00254 // peak error = -31.4 dB
00255 // RMS  error = -28.1 dB
00256 
00257 const F32 FAST_MAG_ALPHA = 0.960433870103f;
00258 const F32 FAST_MAG_BETA = 0.397824734759f;
00259 
00260 // these provide minimum RMS error
00261 //
00262 // avg  error = 0.000003 
00263 // peak error = -32.6 dB
00264 // RMS  error = -25.7 dB
00265 //
00266 //const F32 FAST_MAG_ALPHA = 0.948059448969f;
00267 //const F32 FAST_MAG_BETA = 0.392699081699f;
00268 
00269 inline F32 fastMagnitude(F32 a, F32 b)
00270 { 
00271         a = (a > 0) ? a : -a;
00272         b = (b > 0) ? b : -b;
00273         return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
00274 }
00275 
00276 
00277 
00279 //
00280 // Fast F32/S32 conversions
00281 //
00282 // Culled from www.stereopsis.com/FPU.html
00283 
00284 const F64 LL_DOUBLE_TO_FIX_MAGIC        = 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
00285 const S32 LL_SHIFT_AMOUNT                       = 16;                    //16.16 fixed point representation,
00286 
00287 // Endian dependent code
00288 #ifdef LL_LITTLE_ENDIAN
00289         #define LL_EXP_INDEX                            1
00290         #define LL_MAN_INDEX                            0
00291 #else
00292         #define LL_EXP_INDEX                            0
00293         #define LL_MAN_INDEX                            1
00294 #endif
00295 
00296 /* Deprecated: use llround(), lltrunc(), or llfloor() instead
00297 // ================================================================================================
00298 // Real2Int
00299 // ================================================================================================
00300 inline S32 F64toS32(F64 val)
00301 {
00302         val             = val + LL_DOUBLE_TO_FIX_MAGIC;
00303         return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT; 
00304 }
00305 
00306 // ================================================================================================
00307 // Real2Int
00308 // ================================================================================================
00309 inline S32 F32toS32(F32 val)
00310 {
00311         return F64toS32 ((F64)val);
00312 }
00313 */
00314 
00316 //
00317 // Fast exp and log
00318 //
00319 
00320 // Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
00321 // http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
00322 static union
00323 {
00324         double d;
00325         struct
00326         {
00327 #ifdef LL_LITTLE_ENDIAN
00328                 S32 j, i;
00329 #else
00330                 S32 i, j;
00331 #endif
00332         } n;
00333 } LLECO; // not sure what the name means
00334 
00335 #define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
00336 #define LL_EXP_C (60801)                        // this value of C good for -4 < y < 4
00337 
00338 #define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
00339 
00340 
00341 
00342 inline F32 llfastpow(const F32 x, const F32 y)
00343 {
00344         return (F32)(LL_FAST_EXP(y * log(x)));
00345 }
00346 
00347 
00348 inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
00349 {
00350         // compute the power of ten
00351         F32 bar = 1.f;
00352         for (S32 i = 0; i < sig_figs; i++)
00353         {
00354                 bar *= 10.f;
00355         }
00356 
00357         foo = (F32)llround(foo * bar);
00358 
00359         // shift back
00360         foo /= bar;
00361         return foo;
00362 }
00363 
00364 inline F32 lerp(F32 a, F32 b, F32 u) 
00365 {
00366         return a + ((b - a) * u);
00367 }
00368 
00369 inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
00370 {
00371         F32 a = x00 + (x01-x00)*u;
00372         F32 b = x10 + (x11-x10)*u;
00373         F32 r = a + (b-a)*v;
00374         return r;
00375 }
00376 
00377 inline F32 ramp(F32 x, F32 a, F32 b)
00378 {
00379         return (a == b) ? 0.0f : ((a - x) / (a - b));
00380 }
00381 
00382 inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
00383 {
00384         return lerp(y1, y2, ramp(x, x1, x2));
00385 }
00386 
00387 inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
00388 {
00389         if (y1 < y2)
00390         {
00391                 return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
00392         }
00393         else
00394         {
00395                 return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
00396         }
00397 }
00398 
00399 
00400 inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
00401 {
00402         if (x <= x0)
00403                 return s0;
00404 
00405         if (x >= x1)
00406                 return s1;
00407 
00408         F32 f = (x - x0) / (x1 - x0);
00409 
00410         return  s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
00411 }
00412 
00413 inline F32 cubic_step( F32 x )
00414 {
00415         x = llclampf(x);
00416 
00417         return  (x * x) * (3.0f - 2.0f * x);
00418 }
00419 
00420 inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
00421 {
00422         if (x <= x0)
00423                 return s0;
00424 
00425         if (x >= x1)
00426                 return s1;
00427 
00428         F32 f = (x - x0) / (x1 - x0);
00429         F32 f_squared = f * f;
00430 
00431         return  (s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
00432 }
00433 
00434 inline F32 llsimple_angle(F32 angle)
00435 {
00436         while(angle <= -F_PI)
00437                 angle += F_TWO_PI;
00438         while(angle >  F_PI)
00439                 angle -= F_TWO_PI;
00440         return angle;
00441 }
00442 
00443 #endif