00001
00032 #ifndef LL_LLV4MATRIX4_H
00033 #define LL_LLV4MATRIX4_H
00034
00035 #include "llv4math.h"
00036 #include "llv4matrix3.h"
00037 #include "llv4vector3.h"
00038
00039
00040
00041
00042
00043
00044
00045 LL_LLV4MATH_ALIGN_PREFIX
00046
00047 class LLV4Matrix4
00048 {
00049 public:
00050 union {
00051 F32 mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS];
00052 V4F32 mV[LLV4_NUM_AXIS];
00053 };
00054
00055 void lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w);
00056 void multiply(const LLVector3 &a, LLVector3& o) const;
00057 void multiply(const LLVector3 &a, LLV4Vector3& o) const;
00058
00059 const LLV4Matrix4& transpose();
00060 const LLV4Matrix4& translate(const LLVector3 &vec);
00061 const LLV4Matrix4& translate(const LLV4Vector3 &vec);
00062 const LLV4Matrix4& operator=(const LLMatrix4& a);
00063
00064 operator LLMatrix4() const { return *(reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0]))); }
00065 operator LLV4Matrix3() const { return *(reinterpret_cast<const LLV4Matrix3*>(const_cast<const F32*>(&mMatrix[0][0]))); }
00066
00067 friend LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b);
00068 }
00069
00070 LL_LLV4MATH_ALIGN_POSTFIX;
00071
00072
00073
00074
00075
00076
00077
00078 #if LL_VECTORIZE
00079
00080 inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w)
00081 {
00082 __m128 vw = _mm_set1_ps(w);
00083 mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]);
00084 mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]);
00085 mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]);
00086 mV[VW] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VW], a.mV[VW]), vw), a.mV[VW]);
00087 }
00088
00089 inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const
00090 {
00091 LLV4Vector3 j;
00092 j.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]));
00093 j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY]));
00094 j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ]));
00095 o.setVec(j.mV);
00096 }
00097
00098 inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const
00099 {
00100 o.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX]));
00101 o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY]));
00102 o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ]));
00103 }
00104
00105 inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec)
00106 {
00107 mV[VW] = _mm_add_ps(mV[VW], vec.v);
00108 return (*this);
00109 }
00110
00111
00112
00113
00114
00115
00116
00117 #else
00118
00119 inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w)
00120 {
00121 mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w);
00122 mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w);
00123 mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w);
00124
00125 mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w);
00126 mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w);
00127 mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w);
00128
00129 mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w);
00130 mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w);
00131 mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w);
00132
00133 mMatrix[VW][VX] = llv4lerp(a.mMatrix[VW][VX], b.mMatrix[VW][VX], w);
00134 mMatrix[VW][VY] = llv4lerp(a.mMatrix[VW][VY], b.mMatrix[VW][VY], w);
00135 mMatrix[VW][VZ] = llv4lerp(a.mMatrix[VW][VZ], b.mMatrix[VW][VZ], w);
00136 }
00137
00138 inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const
00139 {
00140 o.setVec( a.mV[VX] * mMatrix[VX][VX] +
00141 a.mV[VY] * mMatrix[VY][VX] +
00142 a.mV[VZ] * mMatrix[VZ][VX] +
00143 mMatrix[VW][VX],
00144
00145 a.mV[VX] * mMatrix[VX][VY] +
00146 a.mV[VY] * mMatrix[VY][VY] +
00147 a.mV[VZ] * mMatrix[VZ][VY] +
00148 mMatrix[VW][VY],
00149
00150 a.mV[VX] * mMatrix[VX][VZ] +
00151 a.mV[VY] * mMatrix[VY][VZ] +
00152 a.mV[VZ] * mMatrix[VZ][VZ] +
00153 mMatrix[VW][VZ]);
00154 }
00155
00156 inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const
00157 {
00158 o.setVec( a.mV[VX] * mMatrix[VX][VX] +
00159 a.mV[VY] * mMatrix[VY][VX] +
00160 a.mV[VZ] * mMatrix[VZ][VX] +
00161 mMatrix[VW][VX],
00162
00163 a.mV[VX] * mMatrix[VX][VY] +
00164 a.mV[VY] * mMatrix[VY][VY] +
00165 a.mV[VZ] * mMatrix[VZ][VY] +
00166 mMatrix[VW][VY],
00167
00168 a.mV[VX] * mMatrix[VX][VZ] +
00169 a.mV[VY] * mMatrix[VY][VZ] +
00170 a.mV[VZ] * mMatrix[VZ][VZ] +
00171 mMatrix[VW][VZ]);
00172 }
00173
00174 inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec)
00175 {
00176 mMatrix[3][0] += vec.mV[0];
00177 mMatrix[3][1] += vec.mV[1];
00178 mMatrix[3][2] += vec.mV[2];
00179 return (*this);
00180 }
00181
00182
00183
00184
00185
00186
00187
00188 #endif
00189
00190 inline const LLV4Matrix4& LLV4Matrix4::operator=(const LLMatrix4& a)
00191 {
00192 memcpy(mMatrix, a.mMatrix, sizeof(F32) * 16 );
00193 return *this;
00194 }
00195
00196 inline const LLV4Matrix4& LLV4Matrix4::transpose()
00197 {
00198 #if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS)
00199 _MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]);
00200 #else
00201 LLV4Matrix4 mat;
00202 mat.mMatrix[0][0] = mMatrix[0][0];
00203 mat.mMatrix[1][0] = mMatrix[0][1];
00204 mat.mMatrix[2][0] = mMatrix[0][2];
00205 mat.mMatrix[3][0] = mMatrix[0][3];
00206
00207 mat.mMatrix[0][1] = mMatrix[1][0];
00208 mat.mMatrix[1][1] = mMatrix[1][1];
00209 mat.mMatrix[2][1] = mMatrix[1][2];
00210 mat.mMatrix[3][1] = mMatrix[1][3];
00211
00212 mat.mMatrix[0][2] = mMatrix[2][0];
00213 mat.mMatrix[1][2] = mMatrix[2][1];
00214 mat.mMatrix[2][2] = mMatrix[2][2];
00215 mat.mMatrix[3][2] = mMatrix[2][3];
00216
00217 mat.mMatrix[0][3] = mMatrix[3][0];
00218 mat.mMatrix[1][3] = mMatrix[3][1];
00219 mat.mMatrix[2][3] = mMatrix[3][2];
00220 mat.mMatrix[3][3] = mMatrix[3][3];
00221
00222 *this = mat;
00223 #endif
00224 return *this;
00225 }
00226
00227 inline const LLV4Matrix4& LLV4Matrix4::translate(const LLVector3 &vec)
00228 {
00229 mMatrix[3][0] += vec.mV[0];
00230 mMatrix[3][1] += vec.mV[1];
00231 mMatrix[3][2] += vec.mV[2];
00232 return (*this);
00233 }
00234
00235 inline LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b)
00236 {
00237 return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] +
00238 a.mV[VY] * b.mMatrix[VY][VX] +
00239 a.mV[VZ] * b.mMatrix[VZ][VX] +
00240 b.mMatrix[VW][VX],
00241
00242 a.mV[VX] * b.mMatrix[VX][VY] +
00243 a.mV[VY] * b.mMatrix[VY][VY] +
00244 a.mV[VZ] * b.mMatrix[VZ][VY] +
00245 b.mMatrix[VW][VY],
00246
00247 a.mV[VX] * b.mMatrix[VX][VZ] +
00248 a.mV[VY] * b.mMatrix[VY][VZ] +
00249 a.mV[VZ] * b.mMatrix[VZ][VZ] +
00250 b.mMatrix[VW][VZ]);
00251 }
00252
00253
00254 #endif