/****************************************************************************
 *                                                                          *
 *  Module  :   bbtpsse.h                                                   *
 *                                                                          *
 *  Purpose :   Tools for Bernstein Bezier Triangular Patch                 *
 *                                                                          *
 ****************************************************************************/

#ifndef BBTPSSE_H
#define BBTPSSE_H

#include <rwcore.h>
#include <rtintel.h>

/*****************************************************************************
 *
 * SSE code
 *
 */

typedef struct BBTPSSEOrdinates BBTPSSEOrdinates;
struct_BBTPSSEOrdinates
{
    RpSSEOverlayM128  b210;
    RpSSEOverlayM128  b201;
    RpSSEOverlayM128  b120;
    RpSSEOverlayM128  b111;
    RpSSEOverlayM128  b102;
    RpSSEOverlayM128  b021;
    RpSSEOverlayM128  b012;
};

typedef struct BBTPSSEControlPoints BBTPSSEControlPoints;
struct BBTPSSEControlPoints
{
    RpSSEOverlayM128  b210[3];
    RpSSEOverlayM128  b201[3];
    RpSSEOverlayM128  b120[3];
    RpSSEOverlayM128  b111[3];
    RpSSEOverlayM128  b102[3];
    RpSSEOverlayM128  b021[3];
    RpSSEOverlayM128  b012[3];
    RpSSEOverlayM128  b300[3];
    RpSSEOverlayM128  b030[3];
    RpSSEOverlayM128  b003[3];
};

/*
 * Some SSE V3d maths. These may be moved to rtintel eventually.
 */
#define SSEV3dAdd(_o, _v0, _v1)                                 \
MACRO_START                                                     \
{                                                               \
    (_o)[0].m128 = _mm_add_ps((_v0)[0].m128, (_v1)[0].m128);    \
    (_o)[1].m128 = _mm_add_ps((_v0)[1].m128, (_v1)[1].m128);    \
    (_o)[2].m128 = _mm_add_ps((_v0)[2].m128, (_v1)[2].m128);    \
}                                                               \
MACRO_STOP


#define SSEV3dSub(_o, _v0, _v1)                                 \
MACRO_START                                                     \
{                                                               \
    (_o)[0].m128 = _mm_sub_ps((_v0)[0].m128, (_v1)[0].m128);    \
    (_o)[1].m128 = _mm_sub_ps((_v0)[1].m128, (_v1)[1].m128);    \
    (_o)[2].m128 = _mm_sub_ps((_v0)[2].m128, (_v1)[2].m128);    \
}                                                               \
MACRO_STOP


#define SSEV3dMult(_o, _v0, _v1)                                \
MACRO_START                                                     \
{                                                               \
    (_o)[0].m128 = _mm_mul_ps((_v0)[0].m128, (_v1)[0].m128);    \
    (_o)[1].m128 = _mm_mul_ps((_v0)[1].m128, (_v1)[1].m128);    \
    (_o)[2].m128 = _mm_mul_ps((_v0)[2].m128, (_v1)[2].m128);    \
}                                                               \
MACRO_STOP


#define SSEV3dDiv(_o, _v0, _v1)                                 \
MACRO_START                                                     \
{                                                               \
    (_o)[0].m128 = _mm_div_ps((_v0)[0].m128, (_v1)[0].m128);    \
    (_o)[1].m128 = _mm_div_ps((_v0)[1].m128, (_v1)[1].m128);    \
    (_o)[2].m128 = _mm_div_ps((_v0)[2].m128, (_v1)[2].m128);    \
}                                                               \
MACRO_STOP


#define SSEV3dLength(_o, _v)                                            \
MACRO_START                                                             \
{                                                                       \
    (_o).m128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps((_v)[0].m128,          \
                                                 (_v)[0].m128),         \
                                      _mm_mul_ps((_v)[1].m128,          \
                                                 (_v)[1].m128)),        \
                           _mm_mul_ps((_v)[2].m128, (_v)[2].m128));     \
    (_o).m128 = _mm_sqrt_ps((_o).m128);                                 \
}                                                                       \
MACRO_STOP


#define SSEV3dDotProduct(_dp, _v0, _v1)                                 \
MACRO_START                                                             \
{                                                                       \
    (_dp).m128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps((_v0)[0].m128,        \
                                                  (_v1)[0].m128),       \
                                       _mm_mul_ps((_v0)[1].m128,        \
                                                  (_v1)[1].m128)),      \
                            _mm_mul_ps((_v0)[2].m128, (_v1)[2].m128));  \
}                                                                       \
MACRO_STOP


#define SSEV3dCrossProduct(_cp, _v0, _v1)                                 \
MACRO_START                                                               \
{                                                                         \
    (_cp)[0].m128 = _mm_sub_ps(_mm_mul_ps((_v0)[1].m128, (_v1)[2].m128),  \
                               _mm_mul_ps((_v1)[2].m128, (_v0)[1].m128)); \
    (_cp)[1].m128 = _mm_sub_ps(_mm_mul_ps((_v0)[2].m128, (_v1)[0].m128),  \
                               _mm_mul_ps((_v1)[0].m128, (_v0)[2].m128)); \
    (_cp)[2].m128 = _mm_sub_ps(_mm_mul_ps((_v0)[0].m128, (_v1)[1].m128),  \
                               _mm_mul_ps((_v1)[1].m128, (_v0)[0].m128)); \
}                                                                         \
MACRO_STOP


#define SSEV3dSubConstant(_o, _v, _c)                   \
MACRO_START                                             \
{                                                       \
    (_o)[0].m128 = _mm_sub_ps((_v)[0].m128, (_c).m128); \
    (_o)[1].m128 = _mm_sub_ps((_v)[1].m128, (_c).m128); \
    (_o)[2].m128 = _mm_sub_ps((_v)[2].m128, (_c).m128); \
}                                                       \
MACRO_STOP


#define SSEV3dAddConstant(_o, _v, _c)                   \
MACRO_START                                             \
{                                                       \
    (_o)[0].m128 = _mm_add_ps((_v)[0].m128, (_c).m128); \
    (_o)[1].m128 = _mm_add_ps((_v)[1].m128, (_c).m128); \
    (_o)[2].m128 = _mm_add_ps((_v)[2].m128, (_c).m128); \
}                                                       \
MACRO_STOP


#define SSEV3dMultConstant(_o, _v, _c)                  \
MACRO_START                                             \
{                                                       \
    (_o)[0].m128 = _mm_mul_ps((_v)[0].m128, (_c).m128); \
    (_o)[1].m128 = _mm_mul_ps((_v)[1].m128, (_c).m128); \
    (_o)[2].m128 = _mm_mul_ps((_v)[2].m128, (_c).m128); \
}                                                       \
MACRO_STOP


#ifdef    __cplusplus
extern              "C"
{
#endif                          /* __cplusplus */

extern void
_rtbbtpSSEGenerateOrdinates(BBTPSSEOrdinates *ord,
    RpSSEOverlayM128 * objPos0, RpSSEOverlayM128 * objNrm0,
    RpSSEOverlayM128 * objPos1, RpSSEOverlayM128 * objNrm1,
    RpSSEOverlayM128 * objPos2, RpSSEOverlayM128 * objNrm2);

extern void
_rtbbtpSSEGenerateControlPoints(BBTPSSEControlPoints *cps,
    RpSSEOverlayM128 * objPos0, RpSSEOverlayM128 * objNrm0,
    RpSSEOverlayM128 * objPos1, RpSSEOverlayM128 * objNrm1,
    RpSSEOverlayM128 * objPos2, RpSSEOverlayM128 * objNrm2);

extern void
_rtbbtpSSEPatchEvaluate(RpSSEOverlayM128 * res,
    BBTPSSEOrdinates * ord,
    RpSSEOverlayM128 * a,
    RpSSEOverlayM128 * b,
    RpSSEOverlayM128 * c);

extern void
_rtbbtpSSEComputeOrdinates(RpSSEOverlayM128 * alpha,
    RpSSEOverlayM128 * objPos0,
    RpSSEOverlayM128 * objPos1,
    RpSSEOverlayM128 * objNrm,
    RpSSEOverlayM128 * nrmDir __RWUNUSED__);

extern void
_rt_rtbbtpSSEComputeOrdinatesSlow(RpSSEOverlayM128 * alpha,
    RpSSEOverlayM128 * objPos0,
    RpSSEOverlayM128 * objPos1,
    RpSSEOverlayM128 * objNrm,
    RpSSEOverlayM128 * nrmDir );

extern void
_rtbbtpSSESurfaceEvaluate(RpSSEOverlayM128 * res,
    BBTPSSEControlPoints * cps,
    RpSSEOverlayM128 * a,
    RpSSEOverlayM128 * b,
    RpSSEOverlayM128 * c);

extern void
_rt_rtbbtpSSEComputeControlPointsOld(RpSSEOverlayM128 * res,
    RpSSEOverlayM128 * objPos0,
    RpSSEOverlayM128 * objNrm0,
    RpSSEOverlayM128 * objPos1,
    RpSSEOverlayM128 * objNrm1 );

extern void
_rtbbtpSSEComputeControlPoints(RpSSEOverlayM128 * res,
    RpSSEOverlayM128 * objPos0,
    RpSSEOverlayM128 * objNrm0,
    RpSSEOverlayM128 * objPos1,
    RpSSEOverlayM128 * objNrm1 __RWUNUSED__ );

#ifdef    __cplusplus
}
#endif                          /* __cplusplus */


#endif /* BBTPSSE_H */

