/*
 * topic Bezier patch library
 *
 * This library provides effcient evaluation of
 * Cubic Bezier patches
 */

#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#include <rwcore.h>
#include <rpdbgerr.h>

#include "rtbezpat.h"
#include "bezquad.h"

static const char   rcsid[] __RWUNUSED__ =
    "@@(#)$Id: bezquad.c,v 1.18 2001/10/05 09:18:38 rburg Exp $";

#define RtBezierQuadMatrixControlFitMacro(_B, _P)                       \
MACRO_START                                                             \
{                                                                       \
    const RtBezierV4d        * const P0 = &(_P)[0][0];                        \
    const RtBezierV4d        * const P1 = &(_P)[1][0];                        \
    const RtBezierV4d        * const P2 = &(_P)[2][0];                        \
    const RtBezierV4d        * const P3 = &(_P)[3][0];                        \
    RtBezierV4d              * const B0 = &(_B)[0][0];                        \
    RtBezierV4d              * const B1 = &(_B)[1][0];                        \
    RtBezierV4d              * const B2 = &(_B)[2][0];                        \
    RtBezierV4d              * const B3 = &(_B)[3][0];                        \
                                                                        \
    B0[0].x = P0[0].x;                                                  \
    B0[0].y = P0[0].y;                                                  \
    B0[0].z = P0[0].z;                                                  \
                                                                        \
    B0[1].x =                                                           \
        3 * P0[1].x - 5 * P0[0].x / 6 - 3 * P0[2].x / 2 + P0[3].x / 3;  \
    B0[1].y =                                                           \
        3 * P0[1].y - 5 * P0[0].y / 6 - 3 * P0[2].y / 2 + P0[3].y / 3;  \
    B0[1].z =                                                           \
        3 * P0[1].z - 5 * P0[0].z / 6 - 3 * P0[2].z / 2 + P0[3].z / 3;  \
                                                                        \
    B0[2].x =                                                           \
        P0[0].x / 3 - 3 * P0[1].x / 2 + 3 * P0[2].x - 5 * P0[3].x / 6;  \
    B0[2].y =                                                           \
        P0[0].y / 3 - 3 * P0[1].y / 2 + 3 * P0[2].y - 5 * P0[3].y / 6;  \
    B0[2].z =                                                           \
        P0[0].z / 3 - 3 * P0[1].z / 2 + 3 * P0[2].z - 5 * P0[3].z / 6;  \
                                                                        \
    B0[3].x = P0[3].x;                                                  \
    B0[3].y = P0[3].y;                                                  \
    B0[3].z = P0[3].z;                                                  \
                                                                        \
    B1[0].x =                                                           \
        3 * P1[0].x - 5 * P0[0].x / 6 - 3 * P2[0].x / 2 + P3[0].x / 3;  \
    B1[0].y =                                                           \
        3 * P1[0].y - 5 * P0[0].y / 6 - 3 * P2[0].y / 2 + P3[0].y / 3;  \
    B1[0].z =                                                           \
        3 * P1[0].z - 5 * P0[0].z / 6 - 3 * P2[0].z / 2 + P3[0].z / 3;  \
                                                                        \
    B1[1].x =                                                           \
        25 * P0[0].x / 36 - 5 * P0[1].x / 2 + 5 * P0[2].x / 4 -         \
        5 * P0[3].x / 18 - 5 * P1[0].x / 2 + 9 * P1[1].x -              \
        9 * P1[2].x / 2 + P1[3].x + 5 * P2[0].x / 4 - 9 * P2[1].x / 2 + \
        9 * P2[2].x / 4 - P2[3].x / 2 - 5 * P3[0].x / 18 + P3[1].x -    \
        P3[2].x / 2 + P3[3].x / 9;                                      \
    B1[1].y =                                                           \
        25 * P0[0].y / 36 - 5 * P0[1].y / 2 + 5 * P0[2].y / 4 -         \
        5 * P0[3].y / 18 - 5 * P1[0].y / 2 + 9 * P1[1].y -              \
        9 * P1[2].y / 2 + P1[3].y + 5 * P2[0].y / 4 - 9 * P2[1].y / 2 + \
        9 * P2[2].y / 4 - P2[3].y / 2 - 5 * P3[0].y / 18 + P3[1].y -    \
        P3[2].y / 2 + P3[3].y / 9;                                      \
    B1[1].z =                                                           \
        25 * P0[0].z / 36 - 5 * P0[1].z / 2 + 5 * P0[2].z / 4 -         \
        5 * P0[3].z / 18 - 5 * P1[0].z / 2 + 9 * P1[1].z -              \
        9 * P1[2].z / 2 + P1[3].z + 5 * P2[0].z / 4 - 9 * P2[1].z / 2 + \
        9 * P2[2].z / 4 - P2[3].z / 2 - 5 * P3[0].z / 18 + P3[1].z -    \
        P3[2].z / 2 + P3[3].z / 9;                                      \
                                                                        \
    B1[2].x =                                                           \
        5 * P0[1].x / 4 - 5 * P0[0].x / 18 - 5 * P0[2].x / 2 +          \
        25 * P0[3].x / 36 + P1[0].x - 9 * P1[1].x / 2 + 9 * P1[2].x -   \
        5 * P1[3].x / 2 - P2[0].x / 2 + 9 * P2[1].x / 4 -               \
        9 * P2[2].x / 2 + 5 * P2[3].x / 4 + P3[0].x / 9 - P3[1].x / 2 + \
        P3[2].x - 5 * P3[3].x / 18;                                     \
    B1[2].y =                                                           \
        5 * P0[1].y / 4 - 5 * P0[0].y / 18 - 5 * P0[2].y / 2 +          \
        25 * P0[3].y / 36 + P1[0].y - 9 * P1[1].y / 2 + 9 * P1[2].y -   \
        5 * P1[3].y / 2 - P2[0].y / 2 + 9 * P2[1].y / 4 -               \
        9 * P2[2].y / 2 + 5 * P2[3].y / 4 + P3[0].y / 9 - P3[1].y / 2 + \
        P3[2].y - 5 * P3[3].y / 18;                                     \
    B1[2].z =                                                           \
        5 * P0[1].z / 4 - 5 * P0[0].z / 18 - 5 * P0[2].z / 2 +          \
        25 * P0[3].z / 36 + P1[0].z - 9 * P1[1].z / 2 + 9 * P1[2].z -   \
        5 * P1[3].z / 2 - P2[0].z / 2 + 9 * P2[1].z / 4 -               \
        9 * P2[2].z / 2 + 5 * P2[3].z / 4 + P3[0].z / 9 - P3[1].z / 2 + \
        P3[2].z - 5 * P3[3].z / 18;                                     \
                                                                        \
    B1[3].x =                                                           \
        3 * P1[3].x - 5 * P0[3].x / 6 - 3 * P2[3].x / 2 + P3[3].x / 3;  \
    B1[3].y =                                                           \
        3 * P1[3].y - 5 * P0[3].y / 6 - 3 * P2[3].y / 2 + P3[3].y / 3;  \
    B1[3].z =                                                           \
        3 * P1[3].z - 5 * P0[3].z / 6 - 3 * P2[3].z / 2 + P3[3].z / 3;  \
                                                                        \
    B2[0].x =                                                           \
        P0[0].x / 3 - 3 * P1[0].x / 2 + 3 * P2[0].x - 5 * P3[0].x / 6;  \
    B2[0].y =                                                           \
        P0[0].y / 3 - 3 * P1[0].y / 2 + 3 * P2[0].y - 5 * P3[0].y / 6;  \
    B2[0].z =                                                           \
        P0[0].z / 3 - 3 * P1[0].z / 2 + 3 * P2[0].z - 5 * P3[0].z / 6;  \
                                                                        \
    B2[1].x =                                                           \
        P0[1].x - 5 * P0[0].x / 18 - P0[2].x / 2 + P0[3].x / 9 +        \
        5 * P1[0].x / 4 - 9 * P1[1].x / 2 + 9 * P1[2].x / 4 -           \
        P1[3].x / 2 - 5 * P2[0].x / 2 + 9 * P2[1].x - 9 * P2[2].x / 2 + \
        P2[3].x + 25 * P3[0].x / 36 - 5 * P3[1].x / 2 +                 \
        5 * P3[2].x / 4 - 5 * P3[3].x / 18;                             \
    B2[1].y =                                                           \
        P0[1].y - 5 * P0[0].y / 18 - P0[2].y / 2 + P0[3].y / 9 +        \
        5 * P1[0].y / 4 - 9 * P1[1].y / 2 + 9 * P1[2].y / 4 -           \
        P1[3].y / 2 - 5 * P2[0].y / 2 + 9 * P2[1].y - 9 * P2[2].y / 2 + \
        P2[3].y + 25 * P3[0].y / 36 - 5 * P3[1].y / 2 +                 \
        5 * P3[2].y / 4 - 5 * P3[3].y / 18;                             \
    B2[1].z =                                                           \
        P0[1].z - 5 * P0[0].z / 18 - P0[2].z / 2 + P0[3].z / 9 +        \
        5 * P1[0].z / 4 - 9 * P1[1].z / 2 + 9 * P1[2].z / 4 -           \
        P1[3].z / 2 - 5 * P2[0].z / 2 + 9 * P2[1].z - 9 * P2[2].z / 2 + \
        P2[3].z + 25 * P3[0].z / 36 - 5 * P3[1].z / 2 +                 \
        5 * P3[2].z / 4 - 5 * P3[3].z / 18;                             \
                                                                        \
    B2[2].x =                                                           \
        P0[0].x / 9 - P0[1].x / 2 + P0[2].x - 5 * P0[3].x / 18 -        \
        P1[0].x / 2 + 9 * P1[1].x / 4 - 9 * P1[2].x / 2 +               \
        5 * P1[3].x / 4 + P2[0].x - 9 * P2[1].x / 2 + 9 * P2[2].x -     \
        5 * P2[3].x / 2 - 5 * P3[0].x / 18 + 5 * P3[1].x / 4 -          \
        5 * P3[2].x / 2 + 25 * P3[3].x / 36;                            \
    B2[2].y =                                                           \
        P0[0].y / 9 - P0[1].y / 2 + P0[2].y - 5 * P0[3].y / 18 -        \
        P1[0].y / 2 + 9 * P1[1].y / 4 - 9 * P1[2].y / 2 +               \
        5 * P1[3].y / 4 + P2[0].y - 9 * P2[1].y / 2 + 9 * P2[2].y -     \
        5 * P2[3].y / 2 - 5 * P3[0].y / 18 + 5 * P3[1].y / 4 -          \
        5 * P3[2].y / 2 + 25 * P3[3].y / 36;                            \
    B2[2].z =                                                           \
        P0[0].z / 9 - P0[1].z / 2 + P0[2].z - 5 * P0[3].z / 18 -        \
        P1[0].z / 2 + 9 * P1[1].z / 4 - 9 * P1[2].z / 2 +               \
        5 * P1[3].z / 4 + P2[0].z - 9 * P2[1].z / 2 + 9 * P2[2].z -     \
        5 * P2[3].z / 2 - 5 * P3[0].z / 18 + 5 * P3[1].z / 4 -          \
        5 * P3[2].z / 2 + 25 * P3[3].z / 36;                            \
                                                                        \
    B2[3].x =                                                           \
        P0[3].x / 3 - 3 * P1[3].x / 2 + 3 * P2[3].x - 5 * P3[3].x / 6;  \
    B2[3].y =                                                           \
        P0[3].y / 3 - 3 * P1[3].y / 2 + 3 * P2[3].y - 5 * P3[3].y / 6;  \
    B2[3].z =                                                           \
        P0[3].z / 3 - 3 * P1[3].z / 2 + 3 * P2[3].z - 5 * P3[3].z / 6;  \
                                                                        \
    B3[0].x = P3[0].x;                                                  \
    B3[0].y = P3[0].y;                                                  \
    B3[0].z = P3[0].z;                                                  \
                                                                        \
    B3[1].x =                                                           \
        3 * P3[1].x - 5 * P3[0].x / 6 - 3 * P3[2].x / 2 + P3[3].x / 3;  \
    B3[1].y =                                                           \
        3 * P3[1].y - 5 * P3[0].y / 6 - 3 * P3[2].y / 2 + P3[3].y / 3;  \
    B3[1].z =                                                           \
        3 * P3[1].z - 5 * P3[0].z / 6 - 3 * P3[2].z / 2 + P3[3].z / 3;  \
                                                                        \
    B3[2].x =                                                           \
        P3[0].x / 3 - 3 * P3[1].x / 2 + 3 * P3[2].x - 5 * P3[3].x / 6;  \
    B3[2].y =                                                           \
        P3[0].y / 3 - 3 * P3[1].y / 2 + 3 * P3[2].y - 5 * P3[3].y / 6;  \
    B3[2].z =                                                           \
        P3[0].z / 3 - 3 * P3[1].z / 2 + 3 * P3[2].z - 5 * P3[3].z / 6;  \
                                                                        \
    B3[3].x = P3[3].x;                                                  \
    B3[3].y = P3[3].y;                                                  \
    B3[3].z = P3[3].z;                                                  \
}                                                                       \
MACRO_STOP

#define RtBezierQuadMatrixWeightSetupMacro(_W, _P)                      \
MACRO_START                                                             \
{                                                                       \
    const RtBezierV4d       * const P0 = &(_P)[0][0];                         \
    const RtBezierV4d       * const P1 = &(_P)[1][0];                         \
    const RtBezierV4d       * const P2 = &(_P)[2][0];                         \
    const RtBezierV4d       * const P3 = &(_P)[3][0];                         \
    RtBezierMatrix      Q;                                              \
    RtBezierV4d             * const Q0 = &Q[0][0];                            \
    RtBezierV4d             * const Q1 = &Q[1][0];                            \
    RtBezierV4d             * const Q2 = &Q[2][0];                            \
    RtBezierV4d             * const Q3 = &Q[3][0];                            \
    RtBezierV4d             * const W0 = &(_W)[0][0];                         \
    RtBezierV4d             * const W1 = &(_W)[1][0];                         \
    RtBezierV4d             * const W2 = &(_W)[2][0];                         \
    RtBezierV4d             * const W3 = &(_W)[3][0];                         \
                                                                        \
    Q0[0].x = (P0[0].x);                                                \
    Q0[0].y = (P0[0].y);                                                \
    Q0[0].z = (P0[0].z);                                                \
    Q0[1].x = (P0[1].x);                                                \
    Q0[1].y = (P0[1].y);                                                \
    Q0[1].z = (P0[1].z);                                                \
    Q0[2].x = (P0[2].x);                                                \
    Q0[2].y = (P0[2].y);                                                \
    Q0[2].z = (P0[2].z);                                                \
    Q0[3].x = (P0[3].x);                                                \
    Q0[3].y = (P0[3].y);                                                \
    Q0[3].z = (P0[3].z);                                                \
    Q1[0].x = (3*P1[0].x - 3*P0[0].x);                                  \
    Q1[0].y = (3*P1[0].y - 3*P0[0].y);                                  \
    Q1[0].z = (3*P1[0].z - 3*P0[0].z);                                  \
    Q1[1].x = (3*P1[1].x - 3*P0[1].x);                                  \
    Q1[1].y = (3*P1[1].y - 3*P0[1].y);                                  \
    Q1[1].z = (3*P1[1].z - 3*P0[1].z);                                  \
    Q1[2].x = (3*P1[2].x - 3*P0[2].x);                                  \
    Q1[2].y = (3*P1[2].y - 3*P0[2].y);                                  \
    Q1[2].z = (3*P1[2].z - 3*P0[2].z);                                  \
    Q1[3].x = (3*P1[3].x - 3*P0[3].x);                                  \
    Q1[3].y = (3*P1[3].y - 3*P0[3].y);                                  \
    Q1[3].z = (3*P1[3].z - 3*P0[3].z);                                  \
    Q2[0].x = (6*P0[0].x - 12*P1[0].x + 6*P2[0].x);                     \
    Q2[0].y = (6*P0[0].y - 12*P1[0].y + 6*P2[0].y);                     \
    Q2[0].z = (6*P0[0].z - 12*P1[0].z + 6*P2[0].z);                     \
    Q2[1].x = (6*P0[1].x - 12*P1[1].x + 6*P2[1].x);                     \
    Q2[1].y = (6*P0[1].y - 12*P1[1].y + 6*P2[1].y);                     \
    Q2[1].z = (6*P0[1].z - 12*P1[1].z + 6*P2[1].z);                     \
    Q2[2].x = (6*P0[2].x - 12*P1[2].x + 6*P2[2].x);                     \
    Q2[2].y = (6*P0[2].y - 12*P1[2].y + 6*P2[2].y);                     \
    Q2[2].z = (6*P0[2].z - 12*P1[2].z + 6*P2[2].z);                     \
    Q2[3].x = (6*P0[3].x - 12*P1[3].x + 6*P2[3].x);                     \
    Q2[3].y = (6*P0[3].y - 12*P1[3].y + 6*P2[3].y);                     \
    Q2[3].z = (6*P0[3].z - 12*P1[3].z + 6*P2[3].z);                     \
    Q3[0].x = (18*P1[0].x - 6*P0[0].x - 18*P2[0].x + 6*P3[0].x);        \
    Q3[0].y = (18*P1[0].y - 6*P0[0].y - 18*P2[0].y + 6*P3[0].y);        \
    Q3[0].z = (18*P1[0].z - 6*P0[0].z - 18*P2[0].z + 6*P3[0].z);        \
    Q3[1].x = (18*P1[1].x - 6*P0[1].x - 18*P2[1].x + 6*P3[1].x);        \
    Q3[1].y = (18*P1[1].y - 6*P0[1].y - 18*P2[1].y + 6*P3[1].y);        \
    Q3[1].z = (18*P1[1].z - 6*P0[1].z - 18*P2[1].z + 6*P3[1].z);        \
    Q3[2].x = (18*P1[2].x - 6*P0[2].x - 18*P2[2].x + 6*P3[2].x);        \
    Q3[2].y = (18*P1[2].y - 6*P0[2].y - 18*P2[2].y + 6*P3[2].y);        \
    Q3[2].z = (18*P1[2].z - 6*P0[2].z - 18*P2[2].z + 6*P3[2].z);        \
    Q3[3].x = (18*P1[3].x - 6*P0[3].x - 18*P2[3].x + 6*P3[3].x);        \
    Q3[3].y = (18*P1[3].y - 6*P0[3].y - 18*P2[3].y + 6*P3[3].y);        \
    Q3[3].z = (18*P1[3].z - 6*P0[3].z - 18*P2[3].z + 6*P3[3].z);        \
    W0[0].x = (Q0[0].x);                                                \
    W0[0].y = (Q0[0].y);                                                \
    W0[0].z = (Q0[0].z);                                                \
    W1[0].x = (Q1[0].x);                                                \
    W1[0].y = (Q1[0].y);                                                \
    W1[0].z = (Q1[0].z);                                                \
    W2[0].x = (Q2[0].x);                                                \
    W2[0].y = (Q2[0].y);                                                \
    W2[0].z = (Q2[0].z);                                                \
    W3[0].x = (Q3[0].x);                                                \
    W3[0].y = (Q3[0].y);                                                \
    W3[0].z = (Q3[0].z);                                                \
    W0[1].x = (3*Q0[1].x - 3*Q0[0].x);                                  \
    W0[1].y = (3*Q0[1].y - 3*Q0[0].y);                                  \
    W0[1].z = (3*Q0[1].z - 3*Q0[0].z);                                  \
    W1[1].x = (3*Q1[1].x - 3*Q1[0].x);                                  \
    W1[1].y = (3*Q1[1].y - 3*Q1[0].y);                                  \
    W1[1].z = (3*Q1[1].z - 3*Q1[0].z);                                  \
    W2[1].x = (3*Q2[1].x - 3*Q2[0].x);                                  \
    W2[1].y = (3*Q2[1].y - 3*Q2[0].y);                                  \
    W2[1].z = (3*Q2[1].z - 3*Q2[0].z);                                  \
    W3[1].x = (3*Q3[1].x - 3*Q3[0].x);                                  \
    W3[1].y = (3*Q3[1].y - 3*Q3[0].y);                                  \
    W3[1].z = (3*Q3[1].z - 3*Q3[0].z);                                  \
    W0[2].x = (6*Q0[0].x - 12*Q0[1].x + 6*Q0[2].x);                     \
    W0[2].y = (6*Q0[0].y - 12*Q0[1].y + 6*Q0[2].y);                     \
    W0[2].z = (6*Q0[0].z - 12*Q0[1].z + 6*Q0[2].z);                     \
    W1[2].x = (6*Q1[0].x - 12*Q1[1].x + 6*Q1[2].x);                     \
    W1[2].y = (6*Q1[0].y - 12*Q1[1].y + 6*Q1[2].y);                     \
    W1[2].z = (6*Q1[0].z - 12*Q1[1].z + 6*Q1[2].z);                     \
    W2[2].x = (6*Q2[0].x - 12*Q2[1].x + 6*Q2[2].x);                     \
    W2[2].y = (6*Q2[0].y - 12*Q2[1].y + 6*Q2[2].y);                     \
    W2[2].z = (6*Q2[0].z - 12*Q2[1].z + 6*Q2[2].z);                     \
    W3[2].x = (6*Q3[0].x - 12*Q3[1].x + 6*Q3[2].x);                     \
    W3[2].y = (6*Q3[0].y - 12*Q3[1].y + 6*Q3[2].y);                     \
    W3[2].z = (6*Q3[0].z - 12*Q3[1].z + 6*Q3[2].z);                     \
    W0[3].x = (18*Q0[1].x - 6*Q0[0].x - 18*Q0[2].x + 6*Q0[3].x);        \
    W0[3].y = (18*Q0[1].y - 6*Q0[0].y - 18*Q0[2].y + 6*Q0[3].y);        \
    W0[3].z = (18*Q0[1].z - 6*Q0[0].z - 18*Q0[2].z + 6*Q0[3].z);        \
    W1[3].x = (18*Q1[1].x - 6*Q1[0].x - 18*Q1[2].x + 6*Q1[3].x);        \
    W1[3].y = (18*Q1[1].y - 6*Q1[0].y - 18*Q1[2].y + 6*Q1[3].y);        \
    W1[3].z = (18*Q1[1].z - 6*Q1[0].z - 18*Q1[2].z + 6*Q1[3].z);        \
    W2[3].x = (18*Q2[1].x - 6*Q2[0].x - 18*Q2[2].x + 6*Q2[3].x);        \
    W2[3].y = (18*Q2[1].y - 6*Q2[0].y - 18*Q2[2].y + 6*Q2[3].y);        \
    W2[3].z = (18*Q2[1].z - 6*Q2[0].z - 18*Q2[2].z + 6*Q2[3].z);        \
    W3[3].x = (18*Q3[1].x - 6*Q3[0].x - 18*Q3[2].x + 6*Q3[3].x);        \
    W3[3].y = (18*Q3[1].y - 6*Q3[0].y - 18*Q3[2].y + 6*Q3[3].y);        \
    W3[3].z = (18*Q3[1].z - 6*Q3[0].z - 18*Q3[2].z + 6*Q3[3].z);        \
                                                                        \
    /* N.B W is independent of U and V,                                 \
     * so need only be found once per set of control points P. */       \
                                                                        \
}                                                                       \
MACRO_STOP

#define RtBezierQuadMatrixDifferenceSetupMacro(_D, _W, _U, _V)          \
MACRO_START                                                             \
{                                                                       \
    const RtBezierV4d        * const W0 = &(_W)[0][0];                  \
    const RtBezierV4d        * const W1 = &(_W)[1][0];                  \
    const RtBezierV4d        * const W2 = &(_W)[2][0];                  \
    const RtBezierV4d        * const W3 = &(_W)[3][0];                  \
    RtBezierMatrix      X;                                              \
    RtBezierV4d              * const X0 = &X[0][0];                     \
    RtBezierV4d              * const X1 = &X[1][0];                     \
    RtBezierV4d              * const X2 = &X[2][0];                     \
    RtBezierV4d              * const X3 = &X[3][0];                     \
    const RwReal        U1 = ((_U));                                    \
    const RwReal        U2 = (U1*U1);                                   \
    const RwReal        U3 = (U2*U1);                                   \
    const RwReal        V1 = ((_V));                                    \
    const RwReal        V2 = (V1*V1);                                   \
    const RwReal        V3 = (V2*V1);                                   \
    RtBezierV4d              * const D0 = &(_D)[0][0];                  \
    RtBezierV4d              * const D1 = &(_D)[1][0];                  \
    RtBezierV4d              * const D2 = &(_D)[2][0];                  \
    RtBezierV4d              * const D3 = &(_D)[3][0];                  \
                                                                        \
    X3[0].x = (U3*W3[0].x);                                             \
    X3[0].y = (U3*W3[0].y);                                             \
    X3[0].z = (U3*W3[0].z);                                             \
    X3[1].x = (U3*W3[1].x);                                             \
    X3[1].y = (U3*W3[1].y);                                             \
    X3[1].z = (U3*W3[1].z);                                             \
    X3[2].x = (U3*W3[2].x);                                             \
    X3[2].y = (U3*W3[2].y);                                             \
    X3[2].z = (U3*W3[2].z);                                             \
    X3[3].x = (U3*W3[3].x);                                             \
    X3[3].y = (U3*W3[3].y);                                             \
    X3[3].z = (U3*W3[3].z);                                             \
    X2[0].x = (U2*W2[0].x + X3[0].x);                                   \
    X2[0].y = (U2*W2[0].y + X3[0].y);                                   \
    X2[0].z = (U2*W2[0].z + X3[0].z);                                   \
    X2[1].x = (U2*W2[1].x + X3[1].x);                                   \
    X2[1].y = (U2*W2[1].y + X3[1].y);                                   \
    X2[1].z = (U2*W2[1].z + X3[1].z);                                   \
    X2[2].x = (U2*W2[2].x + X3[2].x);                                   \
    X2[2].y = (U2*W2[2].y + X3[2].y);                                   \
    X2[2].z = (U2*W2[2].z + X3[2].z);                                   \
    X2[3].x = (U2*W2[3].x + X3[3].x);                                   \
    X2[3].y = (U2*W2[3].y + X3[3].y);                                   \
    X2[3].z = (U2*W2[3].z + X3[3].z);                                   \
    X1[0].x = (U1*W1[0].x + X2[0].x/2 - X3[0].x/3);                     \
    X1[0].y = (U1*W1[0].y + X2[0].y/2 - X3[0].y/3);                     \
    X1[0].z = (U1*W1[0].z + X2[0].z/2 - X3[0].z/3);                     \
    X1[1].x = (U1*W1[1].x + X2[1].x/2 - X3[1].x/3);                     \
    X1[1].y = (U1*W1[1].y + X2[1].y/2 - X3[1].y/3);                     \
    X1[1].z = (U1*W1[1].z + X2[1].z/2 - X3[1].z/3);                     \
    X1[2].x = (U1*W1[2].x + X2[2].x/2 - X3[2].x/3);                     \
    X1[2].y = (U1*W1[2].y + X2[2].y/2 - X3[2].y/3);                     \
    X1[2].z = (U1*W1[2].z + X2[2].z/2 - X3[2].z/3);                     \
    X1[3].x = (U1*W1[3].x + X2[3].x/2 - X3[3].x/3);                     \
    X1[3].y = (U1*W1[3].y + X2[3].y/2 - X3[3].y/3);                     \
    X1[3].z = (U1*W1[3].z + X2[3].z/2 - X3[3].z/3);                     \
    X0[0].x = (W0[0].x);                                                \
    X0[0].y = (W0[0].y);                                                \
    X0[0].z = (W0[0].z);                                                \
    X0[1].x = (W0[1].x);                                                \
    X0[1].y = (W0[1].y);                                                \
    X0[1].z = (W0[1].z);                                                \
    X0[2].x = (W0[2].x);                                                \
    X0[2].y = (W0[2].y);                                                \
    X0[2].z = (W0[2].z);                                                \
    X0[3].x = (W0[3].x);                                                \
    X0[3].y = (W0[3].y);                                                \
    X0[3].z = (W0[3].z);                                                \
    D3[0].x = (V3*X0[3].x);                                             \
    D3[0].y = (V3*X0[3].y);                                             \
    D3[0].z = (V3*X0[3].z);                                             \
    D3[1].x = (V3*X1[3].x);                                             \
    D3[1].y = (V3*X1[3].y);                                             \
    D3[1].z = (V3*X1[3].z);                                             \
    D3[2].x = (V3*X2[3].x);                                             \
    D3[2].y = (V3*X2[3].y);                                             \
    D3[2].z = (V3*X2[3].z);                                             \
    D3[3].x = (V3*X3[3].x);                                             \
    D3[3].y = (V3*X3[3].y);                                             \
    D3[3].z = (V3*X3[3].z);                                             \
    D2[0].x = (V2*X0[2].x + D3[0].x);                                   \
    D2[0].y = (V2*X0[2].y + D3[0].y);                                   \
    D2[0].z = (V2*X0[2].z + D3[0].z);                                   \
    D2[1].x = (V2*X1[2].x + D3[1].x);                                   \
    D2[1].y = (V2*X1[2].y + D3[1].y);                                   \
    D2[1].z = (V2*X1[2].z + D3[1].z);                                   \
    D2[2].x = (V2*X2[2].x + D3[2].x);                                   \
    D2[2].y = (V2*X2[2].y + D3[2].y);                                   \
    D2[2].z = (V2*X2[2].z + D3[2].z);                                   \
    D2[3].x = (V2*X3[2].x + D3[3].x);                                   \
    D2[3].y = (V2*X3[2].y + D3[3].y);                                   \
    D2[3].z = (V2*X3[2].z + D3[3].z);                                   \
    D1[0].x = (V1*X0[1].x + D2[0].x/2 - D3[0].x/3);                     \
    D1[0].y = (V1*X0[1].y + D2[0].y/2 - D3[0].y/3);                     \
    D1[0].z = (V1*X0[1].z + D2[0].z/2 - D3[0].z/3);                     \
    D1[1].x = (V1*X1[1].x + D2[1].x/2 - D3[1].x/3);                     \
    D1[1].y = (V1*X1[1].y + D2[1].y/2 - D3[1].y/3);                     \
    D1[1].z = (V1*X1[1].z + D2[1].z/2 - D3[1].z/3);                     \
    D1[2].x = (V1*X2[1].x + D2[2].x/2 - D3[2].x/3);                     \
    D1[2].y = (V1*X2[1].y + D2[2].y/2 - D3[2].y/3);                     \
    D1[2].z = (V1*X2[1].z + D2[2].z/2 - D3[2].z/3);                     \
    D1[3].x = (V1*X3[1].x + D2[3].x/2 - D3[3].x/3);                     \
    D1[3].y = (V1*X3[1].y + D2[3].y/2 - D3[3].y/3);                     \
    D1[3].z = (V1*X3[1].z + D2[3].z/2 - D3[3].z/3);                     \
    D0[0].x = (X0[0].x);                                                \
    D0[0].y = (X0[0].y);                                                \
    D0[0].z = (X0[0].z);                                                \
    D0[1].x = (X1[0].x);                                                \
    D0[1].y = (X1[0].y);                                                \
    D0[1].z = (X1[0].z);                                                \
    D0[2].x = (X2[0].x);                                                \
    D0[2].y = (X2[0].y);                                                \
    D0[2].z = (X2[0].z);                                                \
    D0[3].x = (X3[0].x);                                                \
    D0[3].y = (X3[0].y);                                                \
    D0[3].z = (X3[0].z);                                                \
                                                                        \
}                                                                       \
MACRO_STOP

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadMatrixControlFit returns the control points 
 * for a Bezier quadrilateral fitted to pass through the supplied 
 * sample points.
 *
 * \param B    Output Bezier control point matrix
 * \param P    Input sample point matrix
 */
void
RtBezierQuadMatrixControlFit(RtBezierMatrix B, RtBezierMatrix P)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadMatrixControlFit"));
    RtBezierQuadMatrixControlFitMacro(B, P);
    RWRETURNVOID();
}

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadMatrixWeightSetup returns a weight
 * matrix for an input control point matrix.
 *
 * Note that this function is used for debug purposes only and, for
 * efficiency, is available as a macro for final release versions of an
 * application.
 *
 * \param W     Output difference matrix
 * \param P     Input Bezier matrix
 *
 * \see RtBezierQuadDifferenceStepU
 * \see RtBezierQuadDifferenceStepV
 * \see RtBezierQuadMatrixSample
 */
void
RtBezierQuadMatrixWeightSetup(RtBezierMatrix W, RtBezierMatrix P)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadMatrixWeightSetup"));
    RtBezierQuadMatrixWeightSetupMacro(W, P);
    RWRETURNVOID();
}

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadMatrixDifferenceSetup returns a difference
 * matrix for an input weight matrix.
 *
 * Note that this function is used for debug purposes only and, for
 * efficiency, is available as a macro for final release versions of an
 * application.
 *
 * \param D     Output difference matrix
 * \param W     Input Bezier matrix
 * \param U     patch coordinate difference per step
 * \param V     patch coordinate difference per step
 *
 * \see RtBezierQuadDifferenceStepU
 * \see RtBezierQuadDifferenceStepV
 * \see RtBezierQuadMatrixSample
 */
void
RtBezierQuadMatrixDifferenceSetup(RtBezierMatrix D,
                                  RtBezierMatrix W, RwReal U, RwReal V)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadMatrixDifferenceSetup"));
    RtBezierQuadMatrixDifferenceSetupMacro(D, W, U, V);
    RWRETURNVOID();
}

#define RtBezierQuadTangentMacro(_Dt,_Dp, _theta, _P)                   \
MACRO_START                                                             \
{                                                                       \
    const RtBezierV4d      * const P0 = &(_P)[0][0];                         \
    const RtBezierV4d      * const P1 = &(_P)[1][0];                         \
    const RtBezierV4d      * const P2 = &(_P)[2][0];                         \
    const RtBezierV4d      * const P3 = &(_P)[3][0];                         \
    const RwReal       c = (RwReal) RwCos(_theta);                      \
    const RwReal       s = (RwReal) RwSin(_theta);                      \
    RtBezierV4d      * const Dt0 = &(_Dt)[0][0];                             \
    RtBezierV4d      * const Dt1 = &(_Dt)[1][0];                             \
    RtBezierV4d      * const Dt2 = &(_Dt)[2][0];                             \
    RtBezierV4d      * const Dt3 = &(_Dt)[3][0];                             \
    RtBezierV4d      * const Dp0 = &(_Dp)[0][0];                             \
    RtBezierV4d      * const Dp1 = &(_Dp)[1][0];                             \
    RtBezierV4d      * const Dp2 = &(_Dp)[2][0];                             \
    RtBezierV4d      * const Dp3 = &(_Dp)[3][0];                             \
                                                                        \
    Dt0[0].x =                                                          \
        (3*P1[0].x*c - 3*P0[0].x*c +                                    \
         3*P0[1].x*s - 3*P0[0].x*s);                                    \
    Dt0[0].y =                                                          \
        (3*P1[0].y*c - 3*P0[0].y*c +                                    \
         3*P0[1].y*s - 3*P0[0].y*s);                                    \
    Dt0[0].z =                                                          \
        (3*P1[0].z*c - 3*P0[0].z*c +                                    \
         3*P0[1].z*s - 3*P0[0].z*s);                                    \
                                                                        \
    Dt0[1].x =                                                          \
        (3*P1[1].x*c - 3*P0[1].x*c -                                    \
         P0[0].x*s - P0[1].x*s + 2*P0[2].x*s);                          \
    Dt0[1].y =                                                          \
        (3*P1[1].y*c - 3*P0[1].y*c -                                    \
         P0[0].y*s - P0[1].y*s + 2*P0[2].y*s);                          \
    Dt0[1].z =                                                          \
        (3*P1[1].z*c - 3*P0[1].z*c -                                    \
         P0[0].z*s - P0[1].z*s + 2*P0[2].z*s);                          \
                                                                        \
    Dt0[2].x =                                                          \
        (3*P1[2].x*c - 3*P0[2].x*c +                                    \
         P0[2].x*s - 2*P0[1].x*s + P0[3].x*s);                          \
    Dt0[2].y =                                                          \
        (3*P1[2].y*c - 3*P0[2].y*c +                                    \
         P0[2].y*s - 2*P0[1].y*s + P0[3].y*s);                          \
    Dt0[2].z =                                                          \
        (3*P1[2].z*c - 3*P0[2].z*c +                                    \
         P0[2].z*s - 2*P0[1].z*s + P0[3].z*s);                          \
                                                                        \
    Dt0[3].x =                                                          \
        (3*P1[3].x*c - 3*P0[3].x*c +                                    \
         3*P0[3].x*s - 3*P0[2].x*s);                                    \
    Dt0[3].y =                                                          \
        (3*P1[3].y*c - 3*P0[3].y*c +                                    \
         3*P0[3].y*s - 3*P0[2].y*s);                                    \
    Dt0[3].z =                                                          \
        (3*P1[3].z*c - 3*P0[3].z*c +                                    \
         3*P0[3].z*s - 3*P0[2].z*s);                                    \
                                                                        \
    Dt1[0].x =                                                          \
        (-P0[0].x*c - P1[0].x*c + 2*P2[0].x*c +                         \
         3*P1[1].x*s - 3*P1[0].x*s);                                    \
    Dt1[0].y =                                                          \
        (-P0[0].y*c - P1[0].y*c + 2*P2[0].y*c +                         \
         3*P1[1].y*s - 3*P1[0].y*s);                                    \
    Dt1[0].z =                                                          \
        (-P0[0].z*c - P1[0].z*c + 2*P2[0].z*c +                         \
         3*P1[1].z*s - 3*P1[0].z*s);                                    \
                                                                        \
    Dt1[1].x =                                                          \
        (-P0[1].x*c - P1[1].x*c + 2*P2[1].x*c -                         \
         P1[0].x*s - P1[1].x*s + 2*P1[2].x*s);                          \
    Dt1[1].y =                                                          \
        (-P0[1].y*c - P1[1].y*c + 2*P2[1].y*c -                         \
         P1[0].y*s - P1[1].y*s + 2*P1[2].y*s);                          \
    Dt1[1].z =                                                          \
        (-P0[1].z*c - P1[1].z*c + 2*P2[1].z*c -                         \
         P1[0].z*s - P1[1].z*s + 2*P1[2].z*s);                          \
                                                                        \
    Dt1[2].x =                                                          \
        (-P0[2].x*c - P1[2].x*c + 2*P2[2].x*c +                         \
         P1[2].x*s - 2*P1[1].x*s + P1[3].x*s);                          \
    Dt1[2].y =                                                          \
        (-P0[2].y*c - P1[2].y*c + 2*P2[2].y*c +                         \
         P1[2].y*s - 2*P1[1].y*s + P1[3].y*s);                          \
    Dt1[2].z =                                                          \
        (-P0[2].z*c - P1[2].z*c + 2*P2[2].z*c +                         \
         P1[2].z*s - 2*P1[1].z*s + P1[3].z*s);                          \
                                                                        \
    Dt1[3].x =                                                          \
        (-P0[3].x*c - P1[3].x*c + 2*P2[3].x*c +                        \
         3*P1[3].x*s - 3*P1[2].x*s);                                   \
    Dt1[3].y =                                                         \
        (-P0[3].y*c - P1[3].y*c + 2*P2[3].y*c +                        \
         3*P1[3].y*s - 3*P1[2].y*s);                                   \
    Dt1[3].z =                                                         \
        (-P0[3].z*c - P1[3].z*c + 2*P2[3].z*c +                        \
         3*P1[3].z*s - 3*P1[2].z*s);                                   \
                                                                       \
    Dt2[0].x =                                                         \
        (P2[0].x*c - 2*P1[0].x*c + P3[0].x*c +                         \
         3*P2[1].x*s - 3*P2[0].x*s);                                   \
    Dt2[0].y =                                                         \
        (P2[0].y*c - 2*P1[0].y*c + P3[0].y*c +                         \
         3*P2[1].y*s - 3*P2[0].y*s);                                   \
    Dt2[0].z =                                                         \
        (P2[0].z*c - 2*P1[0].z*c + P3[0].z*c +                         \
         3*P2[1].z*s - 3*P2[0].z*s);                                   \
                                                                       \
    Dt2[1].x =                                                         \
        (P2[1].x*c - 2*P1[1].x*c + P3[1].x*c -                         \
         P2[0].x*s - P2[1].x*s + 2*P2[2].x*s);                         \
    Dt2[1].y =                                                         \
        (P2[1].y*c - 2*P1[1].y*c + P3[1].y*c -                         \
         P2[0].y*s - P2[1].y*s + 2*P2[2].y*s);                         \
    Dt2[1].z =                                                         \
        (P2[1].z*c - 2*P1[1].z*c + P3[1].z*c -                         \
         P2[0].z*s - P2[1].z*s + 2*P2[2].z*s);                         \
                                                                       \
    Dt2[2].x =                                                         \
        (P2[2].x*c - 2*P1[2].x*c + P3[2].x*c +                         \
         P2[2].x*s - 2*P2[1].x*s + P2[3].x*s);                         \
    Dt2[2].y =                                                         \
        (P2[2].y*c - 2*P1[2].y*c + P3[2].y*c +                         \
         P2[2].y*s - 2*P2[1].y*s + P2[3].y*s);                         \
    Dt2[2].z =                                                         \
        (P2[2].z*c - 2*P1[2].z*c + P3[2].z*c +                         \
         P2[2].z*s - 2*P2[1].z*s + P2[3].z*s);                         \
                                                                       \
    Dt2[3].x =                                                         \
        (P2[3].x*c - 2*P1[3].x*c + P3[3].x*c +                         \
         3*P2[3].x*s - 3*P2[2].x*s);                                   \
    Dt2[3].y =                                                         \
        (P2[3].y*c - 2*P1[3].y*c + P3[3].y*c +                         \
         3*P2[3].y*s - 3*P2[2].y*s);                                   \
    Dt2[3].z =                                                         \
        (P2[3].z*c - 2*P1[3].z*c + P3[3].z*c +                         \
         3*P2[3].z*s - 3*P2[2].z*s);                                   \
                                                                       \
    Dt3[0].x =                                                         \
        (3*P3[0].x*c - 3*P2[0].x*c +                                   \
         3*P3[1].x*s - 3*P3[0].x*s);                                   \
    Dt3[0].y =                                                         \
        (3*P3[0].y*c - 3*P2[0].y*c +                                   \
         3*P3[1].y*s - 3*P3[0].y*s);                                   \
    Dt3[0].z =                                                         \
        (3*P3[0].z*c - 3*P2[0].z*c +                                   \
         3*P3[1].z*s - 3*P3[0].z*s);                                   \
                                                                       \
    Dt3[1].x =                                                         \
        (3*P3[1].x*c - 3*P2[1].x*c -                                   \
         P3[0].x*s - P3[1].x*s + 2*P3[2].x*s);                         \
    Dt3[1].y =                                                         \
        (3*P3[1].y*c - 3*P2[1].y*c -                                   \
         P3[0].y*s - P3[1].y*s + 2*P3[2].y*s);                         \
    Dt3[1].z =                                                         \
        (3*P3[1].z*c - 3*P2[1].z*c -                                   \
         P3[0].z*s - P3[1].z*s + 2*P3[2].z*s);                         \
                                                                       \
    Dt3[2].x =                                                         \
        (3*P3[2].x*c - 3*P2[2].x*c +                                   \
         P3[2].x*s - 2*P3[1].x*s + P3[3].x*s);                         \
    Dt3[2].y =                                                         \
        (3*P3[2].y*c - 3*P2[2].y*c +                                   \
         P3[2].y*s - 2*P3[1].y*s + P3[3].y*s);                         \
    Dt3[2].z =                                                         \
        (3*P3[2].z*c - 3*P2[2].z*c +                                   \
         P3[2].z*s - 2*P3[1].z*s + P3[3].z*s);                         \
                                                                       \
    Dt3[3].x =                                                         \
        (3*P3[3].x*c - 3*P2[3].x*c +                                   \
         3*P3[3].x*s - 3*P3[2].x*s);                                   \
    Dt3[3].y =                                                         \
        (3*P3[3].y*c - 3*P2[3].y*c +                                   \
         3*P3[3].y*s - 3*P3[2].y*s);                                   \
    Dt3[3].z =                                                         \
        (3*P3[3].z*c - 3*P2[3].z*c +                                   \
         3*P3[3].z*s - 3*P3[2].z*s);                                   \
                                                                       \
    Dp0[0].x =                                                         \
        (3*P0[0].x*s - 3*P1[0].x*s +                                   \
         3*P0[1].x*c - 3*P0[0].x*c);                                   \
    Dp0[0].y =                                                         \
        (3*P0[0].y*s - 3*P1[0].y*s +                                   \
         3*P0[1].y*c - 3*P0[0].y*c);                                   \
    Dp0[0].z =                                                         \
        (3*P0[0].z*s - 3*P1[0].z*s +                                   \
         3*P0[1].z*c - 3*P0[0].z*c);                                   \
                                                                       \
    Dp0[1].x =                                                         \
        (3*P0[1].x*s - 3*P1[1].x*s -                                   \
         P0[0].x*c - P0[1].x*c + 2*P0[2].x*c);                         \
    Dp0[1].y =                                                         \
        (3*P0[1].y*s - 3*P1[1].y*s -                                   \
         P0[0].y*c - P0[1].y*c + 2*P0[2].y*c);                         \
    Dp0[1].z =                                                         \
        (3*P0[1].z*s - 3*P1[1].z*s -                                   \
         P0[0].z*c - P0[1].z*c + 2*P0[2].z*c);                         \
                                                                       \
    Dp0[2].x =                                                         \
        (3*P0[2].x*s - 3*P1[2].x*s +                                   \
         P0[2].x*c - 2*P0[1].x*c + P0[3].x*c);                         \
    Dp0[2].y =                                                         \
        (3*P0[2].y*s - 3*P1[2].y*s +                                   \
         P0[2].y*c - 2*P0[1].y*c + P0[3].y*c);                         \
    Dp0[2].z =                                                         \
        (3*P0[2].z*s - 3*P1[2].z*s +                                   \
         P0[2].z*c - 2*P0[1].z*c + P0[3].z*c);                         \
                                                                       \
    Dp0[3].x =                                                         \
        (3*P0[3].x*s - 3*P1[3].x*s +                                   \
         3*P0[3].x*c - 3*P0[2].x*c);                                   \
    Dp0[3].y =                                                         \
        (3*P0[3].y*s - 3*P1[3].y*s +                                   \
         3*P0[3].y*c - 3*P0[2].y*c);                                   \
    Dp0[3].z =                                                         \
        (3*P0[3].z*s - 3*P1[3].z*s +                                   \
         3*P0[3].z*c - 3*P0[2].z*c);                                   \
                                                                       \
    Dp1[0].x =                                                         \
        (P0[0].x*s + P1[0].x*s - 2*P2[0].x*s +                         \
         3*P1[1].x*c - 3*P1[0].x*c);                                   \
    Dp1[0].y =                                                         \
        (P0[0].y*s + P1[0].y*s - 2*P2[0].y*s +                         \
         3*P1[1].y*c - 3*P1[0].y*c);                                   \
    Dp1[0].z =                                                         \
        (P0[0].z*s + P1[0].z*s - 2*P2[0].z*s +                         \
         3*P1[1].z*c - 3*P1[0].z*c);                                   \
                                                                       \
    Dp1[1].x =                                                         \
        (P0[1].x*s + P1[1].x*s - 2*P2[1].x*s -                         \
         P1[0].x*c - P1[1].x*c + 2*P1[2].x*c);                         \
    Dp1[1].y =                                                         \
        (P0[1].y*s + P1[1].y*s - 2*P2[1].y*s -                         \
         P1[0].y*c - P1[1].y*c + 2*P1[2].y*c);                         \
    Dp1[1].z =                                                         \
        (P0[1].z*s + P1[1].z*s - 2*P2[1].z*s -                         \
         P1[0].z*c - P1[1].z*c + 2*P1[2].z*c);                         \
                                                                       \
    Dp1[2].x =                                                         \
        (P0[2].x*s + P1[2].x*s - 2*P2[2].x*s +                         \
         P1[2].x*c - 2*P1[1].x*c + P1[3].x*c);                         \
    Dp1[2].y =                                                         \
        (P0[2].y*s + P1[2].y*s - 2*P2[2].y*s +                         \
         P1[2].y*c - 2*P1[1].y*c + P1[3].y*c);                         \
    Dp1[2].z =                                                         \
        (P0[2].z*s + P1[2].z*s - 2*P2[2].z*s +                         \
         P1[2].z*c - 2*P1[1].z*c + P1[3].z*c);                         \
                                                                       \
    Dp1[3].x =                                                         \
        (P0[3].x*s + P1[3].x*s - 2*P2[3].x*s +                         \
         3*P1[3].x*c - 3*P1[2].x*c);                                   \
    Dp1[3].y =                                                         \
        (P0[3].y*s + P1[3].y*s - 2*P2[3].y*s +                         \
         3*P1[3].y*c - 3*P1[2].y*c);                                   \
    Dp1[3].z =                                                         \
        (P0[3].z*s + P1[3].z*s - 2*P2[3].z*s +                         \
         3*P1[3].z*c - 3*P1[2].z*c);                                   \
                                                                       \
    Dp2[0].x =                                                         \
        (2*P1[0].x*s - P2[0].x*s - P3[0].x*s +                         \
         3*P2[1].x*c - 3*P2[0].x*c);                                   \
    Dp2[0].y =                                                         \
        (2*P1[0].y*s - P2[0].y*s - P3[0].y*s +                         \
         3*P2[1].y*c - 3*P2[0].y*c);                                   \
    Dp2[0].z =                                                         \
        (2*P1[0].z*s - P2[0].z*s - P3[0].z*s +                         \
         3*P2[1].z*c - 3*P2[0].z*c);                                   \
                                                                       \
    Dp2[1].x =                                                         \
        (2*P1[1].x*s - P2[1].x*s - P3[1].x*s -                         \
         P2[0].x*c - P2[1].x*c + 2*P2[2].x*c);                         \
    Dp2[1].y =                                                         \
        (2*P1[1].y*s - P2[1].y*s - P3[1].y*s -                         \
         P2[0].y*c - P2[1].y*c + 2*P2[2].y*c);                         \
    Dp2[1].z =                                                         \
        (2*P1[1].z*s - P2[1].z*s - P3[1].z*s -                         \
         P2[0].z*c - P2[1].z*c + 2*P2[2].z*c);                         \
                                                                       \
    Dp2[2].x =                                                         \
        (2*P1[2].x*s - P2[2].x*s - P3[2].x*s +                         \
         P2[2].x*c - 2*P2[1].x*c + P2[3].x*c);                         \
    Dp2[2].y =                                                         \
        (2*P1[2].y*s - P2[2].y*s - P3[2].y*s +                         \
         P2[2].y*c - 2*P2[1].y*c + P2[3].y*c);                         \
    Dp2[2].z =                                                         \
        (2*P1[2].z*s - P2[2].z*s - P3[2].z*s +                         \
         P2[2].z*c - 2*P2[1].z*c + P2[3].z*c);                         \
                                                                       \
    Dp2[3].x =                                                         \
        (2*P1[3].x*s - P2[3].x*s - P3[3].x*s +                         \
         3*P2[3].x*c - 3*P2[2].x*c);                                   \
    Dp2[3].y =                                                         \
        (2*P1[3].y*s - P2[3].y*s - P3[3].y*s +                         \
         3*P2[3].y*c - 3*P2[2].y*c);                                   \
    Dp2[3].z =                                                         \
        (2*P1[3].z*s - P2[3].z*s - P3[3].z*s +                         \
         3*P2[3].z*c - 3*P2[2].z*c);                                   \
                                                                       \
    Dp3[0].x =                                                         \
        (3*P2[0].x*s - 3*P3[0].x*s +                                   \
         3*P3[1].x*c - 3*P3[0].x*c);                                   \
    Dp3[0].y =                                                         \
        (3*P2[0].y*s - 3*P3[0].y*s +                                   \
         3*P3[1].y*c - 3*P3[0].y*c);                                   \
    Dp3[0].z =                                                         \
        (3*P2[0].z*s - 3*P3[0].z*s +                                   \
         3*P3[1].z*c - 3*P3[0].z*c);                                   \
                                                                       \
    Dp3[1].x =                                                         \
        (3*P2[1].x*s - 3*P3[1].x*s -                                   \
         P3[0].x*c - P3[1].x*c + 2*P3[2].x*c);                         \
    Dp3[1].y =                                                         \
        (3*P2[1].y*s - 3*P3[1].y*s -                                   \
         P3[0].y*c - P3[1].y*c + 2*P3[2].y*c);                         \
    Dp3[1].z =                                                         \
        (3*P2[1].z*s - 3*P3[1].z*s -                                   \
         P3[0].z*c - P3[1].z*c + 2*P3[2].z*c);                         \
                                                                       \
    Dp3[2].x =                                                         \
        (3*P2[2].x*s - 3*P3[2].x*s +                                   \
         P3[2].x*c - 2*P3[1].x*c + P3[3].x*c);                         \
    Dp3[2].y =                                                         \
        (3*P2[2].y*s - 3*P3[2].y*s +                                   \
         P3[2].y*c - 2*P3[1].y*c + P3[3].y*c);                         \
    Dp3[2].z =                                                         \
        (3*P2[2].z*s - 3*P3[2].z*s +                                   \
         P3[2].z*c - 2*P3[1].z*c + P3[3].z*c);                         \
                                                                       \
    Dp3[3].x =                                                         \
        (3*P2[3].x*s - 3*P3[3].x*s +                                   \
         3*P3[3].x*c - 3*P3[2].x*c);                                   \
    Dp3[3].y =                                                         \
        (3*P2[3].y*s - 3*P3[3].y*s +                                   \
         3*P3[3].y*c - 3*P3[2].y*c);                                   \
    Dp3[3].z =                                                         \
        (3*P2[3].z*s - 3*P3[3].z*s +                                   \
         3*P3[3].z*c - 3*P3[2].z*c);                                   \
}                                                                      \
MACRO_STOP

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadTangent returns 2 matrices, of 16 control points each, 
 * the tangents over a Bezier quadrilateral.
 *
 * \param Dt    Matrix of 16 control points for tangents
 *              in the parameter direction theta
 * \param Dp    Matrices of 16 control points for tangents 
 *              in the parameter direction at right angles to theta
 * \param theta Parameter direction theta
 * \param P     Matrix of 16 control points for a Bezier quadrilateral
 *
 */
void
RtBezierQuadTangent(RtBezierMatrix Dt, RtBezierMatrix Dp,
                    RwReal theta, RtBezierMatrix P)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadTangent"));

    RtBezierQuadTangentMacro(Dt, Dp, theta, P);

    RWRETURNVOID();
}

#define SpokesPerRightAngle 18

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadMatrixGetNormals calculates the surface normals
 *  corresponding to the control points of a Bezier quadrilateral.
 *
 * \param N     Output matrix of normals 
 * \param B     Matrix of 16 control points for Bezier quadrilateral
 */
void
RtBezierQuadMatrixGetNormals(RtBezierMatrix N, RtBezierMatrix B)
{
    const RwReal        stepU = ((RwReal) 1) / ((RwReal) 3);
    const RwReal        stepV = ((RwReal) 1) / ((RwReal) 3);
    RtBezierMatrix      T[(SpokesPerRightAngle << 1)];
    RtBezierMatrix      weight;
    RtBezierRow         Tan[(SpokesPerRightAngle << 1)];
    RwV3d               Normal;
    RwInt32             j;
    RwInt32             spoke;

    RWAPIFUNCTION(RWSTRING("RtBezierQuadMatrixGetNormals"));

    /* Prime spoke tangent matrices */
    for (spoke = 0; spoke < SpokesPerRightAngle; spoke++)
    {
        const RwReal        radians =
            (rwPIOVER2 * spoke) / SpokesPerRightAngle;

        RtBezierQuadTangentMacro(T[spoke],
                                 T[spoke + SpokesPerRightAngle],
                                 radians, B);

        RtBezierQuadMatrixWeightSetupMacro(weight, T[spoke]);
        RtBezierQuadMatrixDifferenceSetupMacro(T[spoke], weight, stepU,
                                               stepV);

        RtBezierQuadMatrixWeightSetupMacro(weight,
                                           T[spoke +
                                             SpokesPerRightAngle]);
        RtBezierQuadMatrixDifferenceSetupMacro(T
                                               [spoke +
                                                SpokesPerRightAngle],
                                               weight, stepU, stepV);
    }

    /* Work row by row */
    for (j = 0; j <= 3; j++)
    {
        RwInt32             i;

        /* Prime differencing matrices for this row */
        for (spoke = 0; spoke < (SpokesPerRightAngle << 1); spoke++)
        {
            Tan[spoke][0] = T[spoke][0][0];
            Tan[spoke][1] = T[spoke][0][1];
            Tan[spoke][2] = T[spoke][0][2];
            Tan[spoke][3] = T[spoke][0][3];
        }

        /* Work element by element */
        for (i = 0; i <= 3; i++)
        {
            RwReal              factor = 0;

            N[i][j].x = 0;
            N[i][j].y = 0;
            N[i][j].z = 0;

            /* Take normal from longest spoke cross product */

            for (spoke = 0; spoke < (SpokesPerRightAngle << 1); spoke++)
            {
                const RtBezierV4d  *const tan_s = &Tan[spoke][0];
                RwInt32             t;

                for (t = spoke + 1; t < (SpokesPerRightAngle << 1); t++)
                {
                    const RtBezierV4d  *const tan_t = &Tan[t][0];
                    RwReal              candidate;

                    RwV3dCrossProductMacro(&Normal, tan_s, tan_t);
                    candidate = RwV3dDotProductMacro(&Normal, &Normal);

                    if (factor < candidate)
                    {
#if (0 && defined(MONITOR_SPOKES))
                        RWMESSAGE(("%s(%d): [%d,%d] fact %f cand %f\n",
                                   __FILE__, __LINE__,
                                   s, t, factor, candidate));
#endif /* (0 && defined(MONITOR_SPOKES)) */

                        factor = candidate;
                        candidate = ((RwReal) 1) / candidate;
                        rwSqrtMacro(candidate, candidate);

                        N[i][j].x = Normal.x * candidate;
                        N[i][j].y = Normal.y * candidate;
                        N[i][j].z = Normal.z * candidate;
                    }
                }

                RtBezierQuadDifferenceStepUMacro(Tan[spoke]);
            }

        }

        for (spoke = 0; spoke < (SpokesPerRightAngle << 1); spoke++)
        {
            RtBezierQuadDifferenceStepVMacro(T[spoke]);
        }

    }

    RWRETURNVOID();
}

#if ( defined(RWDEBUG) || defined(RWSUPPRESSINLINE) )

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadDifferenceStepU updates a difference row for
 * a step in the U patch coordinate
 *
 * Note that this function is used for debug purposes only and, for
 * efficiency, is available as a macro for final release versions of an
 * application.
 *
 * \param row  Difference row to update for a step in the U patch coordinate
 *
 * \see RtBezierQuadDifferenceStepV
 * \see RtBezierQuadMatrixWeightSetup
 * \see RtBezierQuadMatrixDifferenceSetup
 * \see RtBezierQuadMatrixSample
 *
 * The following code illustrates the use of \ref RtBezierQuadDifferenceStepU
 \verbatim
   {
      static RtBezierMatrix control = {
          {{00, 00, 00}, {30, 00, 00}, {60, 00, 00}, {90, 00, 00}},
          {{00, 30, 00}, {30, 30, 90}, {60, 30, 90}, {90, 30, 00}},
          {{00, 60, 00}, {30, 60, 90}, {60, 60, 90}, {90, 60, 00}},
          {{00, 90, 00}, {30, 90, 00}, {60, 90, 00}, {90, 90, 00}}
      };
      RtBezierMatrix      weight;
      RtBezierMatrix      difference;
      RtBezierRow         row;
      RwInt32             i;
      RwInt32             j;
      RwReal              u;
      RwReal              v;

      RtBezierQuadMatrixWeightSetup(weight, control);
      RtBezierQuadMatrixDifferenceSetup(difference, weight,
                                      ((RwReal) 1) / ((RwReal) 8),
                                      ((RwReal) 1) / ((RwReal) 8));

      for (j = 0; j <= 8; j++)
      {
          v = ((RwReal) j) / ((RwReal) 8);

          row[0] = difference0[0];
          row[1] = difference0[1];
          row[2] = difference0[2];
          row[3] = difference0[3];

          for (i = 0; i <= 8; i++)
          {
              u = ((RwReal) i) / ((RwReal) 8);

              // row[0] contains a the point at (u,v) on the Bezier curve 

              RtBezierQuadDifferenceStepU(row);

          }
          RtBezierQuadDifferenceStepV(difference);
      }
  }
  \endverbatim
 */
void
RtBezierQuadDifferenceStepU(RtBezierRow row)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadDifferenceStepU"));
    RtBezierQuadDifferenceStepUMacro(row);
    RWRETURNVOID();
}

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadDifferenceStepV updates a difference matrix for
 * a step in the V patch coordinate
 *
 * Note that this function is used for debug purposes only and, for
 * efficiency, is available as a macro for final release versions of an
 * application.
 *
 * \param mat  Difference matrix to update for a step in the V patch coordinate
 *
 * \see RtBezierQuadDifferenceStepU
 * \see RtBezierQuadMatrixWeightSetup
 * \see RtBezierQuadMatrixDifferenceSetup
 * \see RtBezierQuadMatrixSample

 * The following code illustrates the use of \ref RtBezierQuadDifferenceStepU
 \verbatim
  {
      static RtBezierMatrix control = {
          {{00, 00, 00}, {30, 00, 00}, {60, 00, 00}, {90, 00, 00}},
          {{00, 30, 00}, {30, 30, 90}, {60, 30, 90}, {90, 30, 00}},
          {{00, 60, 00}, {30, 60, 90}, {60, 60, 90}, {90, 60, 00}},
          {{00, 90, 00}, {30, 90, 00}, {60, 90, 00}, {90, 90, 00}}
      };
      RtBezierMatrix      weight;
      RtBezierMatrix      difference;
      RtBezierRow         row;
      RwInt32             i;
      RwInt32             j;
      RwReal              u;
      RwReal              v;

      RtBezierQuadMatrixWeightSetup(weight, control);
      RtBezierQuadMatrixDifferenceSetup(difference, weight,
                                      ((RwReal) 1) / ((RwReal) 8),
                                      ((RwReal) 1) / ((RwReal) 8));

      for (j = 0; j <= 8; j++)
      {
          v = ((RwReal) j) / ((RwReal) 8);

          row[0] = difference0[0];
          row[1] = difference0[1];
          row[2] = difference0[2];
          row[3] = difference0[3];

          for (i = 0; i <= 8; i++)
          {
              u = ((RwReal) i) / ((RwReal) 8);

              // row[0] contains a the point at (u,v) on the Bezier curve

              RtBezierQuadDifferenceStepU(row);

          }
          RtBezierQuadDifferenceStepV(difference);
      }
  }
  \endverbatim
 */
void
RtBezierQuadDifferenceStepV(RtBezierMatrix mat)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadDifferenceStepV"));
    RtBezierQuadDifferenceStepVMacro(mat);
    RWRETURNVOID();
}

/**
 * \ingroup rtbezpatch
 * \ref RtBezierQuadMatrixSample returns a point on a Bezier patch
 * at the specified coordinates.
 *
 * \param out  Output point on Bezier patch
 * \param P  Matrix of Bezier control pionts
 * \param u  patch coordinate in [0,1]
 * \param v  patch coordinate in [0,1]
 *
 * \see RtBezierQuadDifferenceStepU
 * \see RtBezierQuadDifferenceStepV
 *
 * The following code illustrates the use of \ref RtBezierQuadMatrixSample
 * \verbatim
   {
       static RtBezierMatrix control = {
           {{00, 00, 00}, {30, 00, 00}, {60, 00, 00}, {90, 00, 00}},
           {{00, 30, 00}, {30, 30, 90}, {60, 30, 90}, {90, 30, 00}},
           {{00, 60, 00}, {30, 60, 90}, {60, 60, 90}, {90, 60, 00}},
           {{00, 90, 00}, {30, 90, 00}, {60, 90, 00}, {90, 90, 00}}
       };
       RwInt32             i;
       RwInt32             j;
       RwV3d               p;
       RwReal              u;
       RwReal              v;

       for (j = 0; j <= 8; j++)
       {
           v = ((RwReal) j) / ((RwReal) 8);

           for (i = 0; i <= 8; i++)
           {
               u = ((RwReal) i) / ((RwReal) 8);

               RtBezierQuadMatrixSample(&p, control, u, v);

               // p now contains a the point at (u,v) on the Bezier curve 
           }
       }
   }
   \endverbatim
 */
void
RtBezierQuadMatrixSample(RwV3d * out, RtBezierMatrix P,
                         RwReal u, RwReal v)
{
    RWAPIFUNCTION(RWSTRING("RtBezierQuadMatrixSample"));
    RtBezierQuadMatrixSampleMacro(out, P, u, v);
    RWRETURNVOID();
}

#endif /* ( defined(RWDEBUG) || defined(RWSUPPRESSINLINE) ) */
