7 #if FE_VDIM<3 || FE_VDIM>4 8 #error Full SIMD Vector specialization only implemented in 3 or 4 dimensions 12 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ 13 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) 46 operator const F32*(void)
const 55 v4sf_assertAlignment(m_simd);
57 #if FE_VEC_CHECK_VALID 58 set4(0.0f,0.0f,0.0f,0.0f);
61 template<
int N,
typename T>
68 template<
typename T,
typename U>
70 { set4(x,y,0.0f,0.0f); }
71 template<
typename T,
typename U,
typename V>
74 template<
typename T,
typename U,
typename V,
typename W>
85 template<
int N,
typename T>
92 F32 operator[](U32 index)
const 106 return m_data[index];
109 template<
typename T,
typename U,
typename V,
typename W>
113 void get4(T array[4]);
115 const Raw temp(
void)
const {
return *
this; }
118 const v4sf&
simd(
void)
const {
return m_simd; }
121 v4sf&
simd(
void) {
return m_simd; }
126 #if FE_VEC_CHECK_VALID 127 if(FE_INVALID_SCALAR(m_data[0]))
128 feX(e_corrupt,
"Vector<N,F32>::checkValid",
129 "element 0 invalid");
130 if(FE_INVALID_SCALAR(m_data[1]))
131 feX(e_corrupt,
"Vector<N,F32>::checkValid",
132 "element 1 invalid");
133 if(FE_INVALID_SCALAR(m_data[2]))
134 feX(e_corrupt,
"Vector<N,F32>::checkValid",
135 "element 2 invalid");
137 if(FE_INVALID_SCALAR(m_data[3]))
138 feX(e_corrupt,
"Vector<4,F32>::checkValid",
139 "element 3 invalid");
142 feX(e_corrupt,
"Vector<3,F32>::checkValid",
143 "element 3 nonzero");
168 v4sf_assertAlignment(rhs.
simd());
173 s.
sPrintf(
"[%g %g %g %g]",rhs[0],rhs[1],rhs[2],rhs[3]);
175 s.
sPrintf(
"[%g %g %g]",rhs[0],rhs[1],rhs[2]);
184 v4sf_assertAlignment(m_simd);
185 return set4(other[0],other[1],other[2],0.0f);
192 v4sf_assertAlignment(m_simd);
193 return set4(other[0],other[1],0.0f,0.0f);
200 v4sf_assertAlignment(m_simd);
201 return set4(other[0],0.0f,0.0f,0.0f);
204 template<
int N,
typename T>
208 v4sf_assertAlignment(m_simd);
209 return set4(other[0],other[1],other[2],other[3]);
215 v4sf_assertAlignment(m_simd);
216 v4sf_assertAlignment(other.m_simd);
226 v4sf_assertAlignment(m_simd);
229 m_simd= __builtin_ia32_loadups(array);
242 for(U32 i=0;i<FE_VDIM;i++)
244 if(m_data[i]!=other[i])
255 return !operator==(other);
258 template<
typename T,
typename U,
typename V,
typename W>
261 v4sf_assertAlignment(m_simd);
278 v4sf_assertAlignment(m_simd);
292 v4sf_assertAlignment(r.simd());
294 r.set4(0.0f,0.0f,0.0f,0.0f);
305 v4sf_assertAlignment(r.simd());
307 r.set4(x,0.0f,0.0f,0.0f);
315 template<
typename T,
typename U>
318 v4sf_assertAlignment(r.simd());
320 r.set4(x,y,0.0f,0.0f);
328 template<
typename T,
typename U,
typename V>
331 v4sf_assertAlignment(r.simd());
341 template<
typename T,
typename U,
typename V,
typename W>
344 v4sf_assertAlignment(r.simd());
357 v4sf_assertAlignment(lhs.
simd());
359 lhs.set4(value,value,value,value);
370 v4sf_assertAlignment(lhs.
simd());
391 v4sf_assertAlignment(lhs.
simd());
392 v4sf_assertAlignment(rhs.
simd());
407 v4sf_assertAlignment(lhs.
simd());
408 v4sf_assertAlignment(rhs.
simd());
422 v4sf_assertAlignment(rhs.
simd());
427 v4sf zero=__builtin_ia32_setzerops();
430 v4sf_setSame(zero,0.0f);
432 v.
simd()=__builtin_ia32_subps(zero,rhs.
simd());
443 v4sf_assertAlignment(lhs.
simd());
444 v4sf_assertAlignment(rhs.
simd());
459 v4sf_assertAlignment(lhs.
simd());
463 v4sf_setSame(scale,rhs);
464 lhs.
simd()=__builtin_ia32_mulps(scale,lhs.
simd());
474 v4sf_assertAlignment(lhs.
simd());
475 v4sf_assertAlignment(rhs.
simd());
481 v4sf combine=__builtin_ia32_mulps(lhs.
simd(),rhs.
simd());
485 v4sf shift=__builtin_ia32_shufps(combine,combine,_MM_SHUFFLE(1,0,3,2));
487 combine=__builtin_ia32_addps(combine,shift);
490 shift=__builtin_ia32_shufps(combine,combine,_MM_SHUFFLE(0,3,2,1));
492 combine=__builtin_ia32_addps(combine,shift);
495 combine=__builtin_ia32_haddps(combine,combine);
496 combine=__builtin_ia32_haddps(combine,combine);
498 return reinterpret_cast<F32*
>(&combine)[0];
506 v4sf_assertAlignment(rhs.
simd());
517 v4sf_assertAlignment(rhs.
simd());
529 v4sf_assertAlignment(lhs.
simd());
530 v4sf_assertAlignment(rhs.
simd());
546 v4sf_assertAlignment(lhs.
simd());
547 v4sf_assertAlignment(rhs.
simd());
563 v4sf_assertAlignment(lhs.
simd());
564 v4sf_assertAlignment(rhs.
simd());
577 template <
typename T>
582 v4sf_assertAlignment(rhs.
simd());
587 v4sf_setSame(scale,lhs);
588 v.
simd()=__builtin_ia32_mulps(scale,rhs.
simd());
601 v4sf_assertAlignment(lhs.
simd());
606 v4sf_setSame(scale,F32(rhs));
607 v.
simd()=__builtin_ia32_mulps(scale,lhs.
simd());
617 v4sf_assertAlignment(rhs.
simd());
623 feX(e_unsolvable,
"unit(Vector<N,F32>)",
624 "attempt to normalize zero magnitude vector");
626 return rhs*F32(1.0f/mag);
634 v4sf_assertAlignment(rhs.
simd());
640 return rhs*F32(1.0f/mag);
650 v4sf_assertAlignment(rhs.
simd());
656 feX(e_unsolvable,
"normalize",
657 "attempt to normalize zero magnitude vector");
659 return rhs*=F32(1.0f/mag);
667 v4sf_assertAlignment(rhs.
simd());
684 v4sf_assertAlignment(r.
simd());
685 v4sf_assertAlignment(lhs.
simd());
686 v4sf_assertAlignment(rhs.
simd());
691 set(r, lhs[1] * rhs[2] - lhs[2] * rhs[1],
692 lhs[2] * rhs[0] - lhs[0] * rhs[2],
693 lhs[0] * rhs[1] - lhs[1] * rhs[0]);
695 v4sf op1=__builtin_ia32_shufps(lhs.
simd(),lhs.
simd(),_MM_SHUFFLE(3,0,2,1));
696 v4sf op2=__builtin_ia32_shufps(rhs.simd(),rhs.simd(),_MM_SHUFFLE(3,1,0,2));
697 v4sf pr1=__builtin_ia32_mulps(op1,op2);
699 op1=__builtin_ia32_shufps(lhs.
simd(),lhs.
simd(),_MM_SHUFFLE(3,1,0,2));
700 op2=__builtin_ia32_shufps(rhs.simd(),rhs.simd(),_MM_SHUFFLE(3,0,2,1));
701 v4sf pr2=__builtin_ia32_mulps(op1,op2);
703 r.
simd()=__builtin_ia32_subps(pr1,pr2);
716 v4sf_assertAlignment(lhs.
simd());
717 v4sf_assertAlignment(rhs.
simd());
722 return cross3(v, lhs, rhs);
755 v4sf_assertAlignment(lhs.
simd());
756 v4sf_assertAlignment(rhs.
simd());
761 v4sf_setSame(scale,scalar);
762 v4sf scaled=__builtin_ia32_mulps(scale,rhs.
simd());
763 lhs.
simd()=__builtin_ia32_addps(lhs.
simd(),scaled);
778 v4sf_assertAlignment(lhs.
simd());
779 v4sf_assertAlignment(rhs.
simd());
784 v4sf_setSame(scale,scalar);
785 v4sf scaled=__builtin_ia32_mulps(scale,lhs.
simd());
786 lhs.
simd()=__builtin_ia32_addps(rhs.
simd(),scaled);
U32 size(const Vector< FE_VDIM, F32 > &lhs)
Return the number of elements.
Definition: VectorNf_gnu.h:380
Vector< FE_VDIM, F32 > & normalizeSafe(Vector< FE_VDIM, F32 > &rhs)
Scale Vector to unit length.
Definition: VectorNf_gnu.h:665
Vector< FE_VDIM, F32 > operator*(const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Return a Vector of products of each component.
Definition: VectorNf_gnu.h:560
Vector< FE_VDIM, F32 > & setAll(Vector< FE_VDIM, F32 > &lhs, T value)
Set all components to the same value.
Definition: VectorNf_gnu.h:355
Vector< N, T > & scaleAndAdd(Vector< N, T > &lhs, U scalar, const Vector< N, T > &rhs)
Scale then add.
Definition: Vector.h:1126
void checkValid(void) const
Definition: VectorNf_gnu.h:124
Vector< FE_VDIM, F32 > & normalize(Vector< FE_VDIM, F32 > &rhs)
Scale Vector to unit length.
Definition: VectorNf_gnu.h:648
kernel
Definition: namespace.dox:3
boost::enable_if< boost::is_arithmetic< T >, Vector< FE_VDIM, F32 > >::type operator*(const T lhs, const Vector< FE_VDIM, F32 > &rhs)
Return a uniformly scaled Vector (pre)
Definition: VectorNf_gnu.h:580
BWORD checkValid(const T &a_value)
Definition: Vector.h:79
Vector< FE_VDIM, F32 > & operator-=(Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Subtract from Vector in place.
Definition: VectorNf_gnu.h:404
Vector< FE_VDIM, F32 > & operator*=(Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Independently scale components in place.
Definition: VectorNf_gnu.h:440
F32 dot(const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Return dot product.
Definition: VectorNf_gnu.h:472
Vector< FE_VDIM, F32 > operator-(const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Return difference of Vectors.
Definition: VectorNf_gnu.h:543
Dense vector - size fixed by template.
Definition: Vector.h:19
const v4sf & simd(void) const
Definition: VectorNf_gnu.h:118
T dot(const Vector< N, T > &lhs, const Vector< N, T > &rhs)
Dot (inner) product.
Definition: Vector.h:768
Vector< FE_VDIM, F32 > & operator+=(Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Add to Vector in place.
Definition: VectorNf_gnu.h:388
Vector< N, T > & addScaled(Vector< N, T > &lhs, U scalar, const Vector< N, T > &rhs)
Add with scaling.
Definition: Vector.h:1106
String & sPrintf(const char *fmt,...)
Populate the string in the manner of sprintf().
Definition: String.cc:529
Automatically reference-counted string container.
Definition: String.h:128
Vector< FE_VDIM, F32 > unit(const Vector< FE_VDIM, F32 > &rhs)
Return the Vector direction scaled to unit length.
Definition: VectorNf_gnu.h:615
Vector< 4, T > & cross3(Vector< 4, T > &r, const Vector< 4, T > &lhs, const Vector< 4, T > &rhs)
Return a cross product of Vectors.
Definition: Vector4.h:432
Vector< FE_VDIM, F32 > operator+(const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Return sum of Vectors.
Definition: VectorNf_gnu.h:526
Vector< FE_VDIM, F32 > cross3(const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Set the Vector as a cross product of Vectors.
Definition: VectorNf_gnu.h:713
T magnitude(const Vector< N, T > &rhs)
Frobenius norm operation.
Definition: Vector.h:785
Vector< FE_VDIM, F32 > & setAt(Vector< FE_VDIM, F32 > &lhs, U32 index, T value)
Set indexed component.
Definition: VectorNf_gnu.h:368
String print(const Vector< FE_VDIM, F32 > &rhs)
Return text describing the Vector's state.
Definition: VectorNf_gnu.h:166
boost::enable_if< boost::is_arithmetic< T >, Vector< FE_VDIM, F32 > >::type operator*(const Vector< FE_VDIM, F32 > &lhs, const T rhs)
Return a uniformly scaled Vector (post)
Definition: VectorNf_gnu.h:599
T magnitudeSquared(const Vector< N, T > &rhs)
Square of the length.
Definition: Vector.h:794
Fully specialized 3 or 4 component F32 vector using GNU SIMD.
Definition: VectorNf_gnu.h:27
v4sf & simd(void)
Definition: VectorNf_gnu.h:121
F32 magnitude(const Vector< FE_VDIM, F32 > &rhs)
Return the Vector length.
Definition: VectorNf_gnu.h:515
Vector< FE_VDIM, F32 > & cross3(Vector< FE_VDIM, F32 > &r, const Vector< FE_VDIM, F32 > &lhs, const Vector< FE_VDIM, F32 > &rhs)
Return a cross product of Vectors.
Definition: VectorNf_gnu.h:681
F32 magnitudeSquared(const Vector< FE_VDIM, F32 > &rhs)
Return square of the Vector length.
Definition: VectorNf_gnu.h:504
Vector< FE_VDIM, F32 > unitSafe(const Vector< FE_VDIM, F32 > &rhs)
Return the Vector direction scaled to unit length with zero check.
Definition: VectorNf_gnu.h:632
Vector< FE_VDIM, F32 > operator-(const Vector< FE_VDIM, F32 > &rhs)
Negate the Vector.
Definition: VectorNf_gnu.h:420
Vector< FE_VDIM, F32 > & operator*=(Vector< FE_VDIM, F32 > &lhs, T rhs)
Uniformly scale components in place.
Definition: VectorNf_gnu.h:457
F32 & operator[](U32 index)
Access internal component.
Definition: VectorNf_gnu.h:103