/* *INDENT-OFF* */

/*
 * matInstance
 * default per-mesh callback functions for PS2-specific object pipelines. Instancing work
 * 
 * Copyright (c) Criterion Software Limited
 */
/****************************************************************************
 *                                                                          *
 * module : matInstance.c                                                   *
 *                                                                          *
 * purpose: yawn...                                                         *
 *                                                                          *
 ****************************************************************************/

/*
#### SYNCHRONISATION
####
#### UP TO DATE WITH VERSION 1.179 OF nodePS2MatInstance.c (1.167 ignored(?))
####
#### SYNCHRONISATION
*/

//TODO[9]: rename this file to be all lower-case

/****************************************************************************
 includes
 */

#include <rwcore.h>

#include "matinstance.h"
#include "ps2alldbg.h"


/****************************************************************************
 local defines
 */

/* VU related defines. These really come from elsewhere */
#define VU1_MAX_TS_INPUT_SIZE 256
#define VU1_MAX_TL_INPUT_SIZE 64

/* Seems a reasonable value given TS size is in vertices not triangles */
#define VU1_MAX_PL_INPUT_SIZE VU1_MAX_TS_INPUT_SIZE

#define OVERRIDELIGHTx
#define SUBSISTLIGHTx

/* Use this to get compatibility ITOP */
#define OLDITOP

/* Define to get silly amounts of info on stdout */
#define DMADUMPx
#define DMADUMPDATAx

#define CLEARMEMx

#if (defined(CLEARMEM))
unsigned long clearMemVal=1;
#endif /* (defined(CLEARMEM)) */

#if (!defined(DEBUGMARK))
#define DEBUGMARK() (0l)
#endif /* (!defined(DEBUGMARK)) */


//TODO[9]: COPY OF ps2all.c DEFINES, REMOVE WHEN INLINE EVERYTHING...
#define PRIVATEDATATYPE rxNodePS2AllMatPvtData

#define MESSAGE(_string) \
    RwDebugSendMessage(rwDEBUGMESSAGE, "PS2All.csl", (_string))

/* Get CL_CODE strings for error messages */
#define GETCLCODESTRING(_cl_code, _string)  \
MACRO_START                                 \
{                                           \
    switch(_cl_code)                        \
    {                                       \
    case CL_XYZ:                            \
        strcpy(_string, "XYZ");             \
        break;                              \
    case CL_UV:                             \
        strcpy(_string, "UV");              \
        break;                              \
    case CL_UV2:                            \
        strcpy(_string, "UV2");             \
        break;                              \
    case CL_RGBA:                           \
        strcpy(_string, "RGBA");            \
        break;                              \
    case CL_NORMAL:                         \
        strcpy(_string, "Normal");          \
        break;                              \
    case CL_USER1:                          \
        strcpy(_string, "User1");           \
        break;                              \
    case CL_USER2:                          \
        strcpy(_string, "User2");           \
        break;                              \
    case CL_USER3:                          \
        strcpy(_string, "User3");           \
        break;                              \
    case CL_USER4:                          \
        strcpy(_string, "User4");           \
        break;                              \
    default:                                \
        strcpy(_string, "!UNKNOWN!");       \
        break;                              \
    }                                       \
}                                           \
MACRO_STOP


/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

   Functions

   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */

/******************** Wrapper funcs for macros in debug **********************/

/**
 * \ingroup rpmeshps2all
 * \ref RpMeshPS2AllTestNumVerts is a macro to test for a
 * change in the number of vertices in a mesh since the mesh
 * was last instanced.
 *
 * This is a helper macro which is called from the default
 * \ref RxPipelineNodePS2AllMatMeshInstanceTestCallBack for
 * RpAtomics and RpWorldSectors,
 * \ref RpMeshPS2AllMeshInstanceTestCallBack. It calculates
 * the number of vertices necessary in instance data for an
 * RpMesh (on PS2, this is dependent on the primitive type
 * of the mesh since we 'dereference' vertex indices at
 * instance time so that our VU1 code can process geometry
 * without using indixes) and causes a full reinstance (see
 * \ref RxInstanceFlags) if this value has changed since
 * the mesh was last instanced. This macro may be used in
 * constructing user callbacks.
 *
 * This macro will be a function in a debug build, but it
 * may be used explicitly in macro form through the name
 * RpMeshPS2AllTestNumVertsMacro and in function
 * form through RpMeshPS2AllTestNumVertsFunc,
 * depending on how you wish to balance code size and
 * function call overheads. Note that the macro contains
 * RWASSERT, so it must be used within functions using
 * RWFUNCTION and RWRETURN.
 *
 *
 * \param  ps2AllPipeData A pointer to a \ref RxPS2AllPipeData struct
 *                        containing information relevant to the
 *                        current pipeline execution
 *
 * \see RpMeshPS2AllMeshInstanceTestCallBack
 * \see RxPipelineNodePS2AllMatMeshInstanceTestCallBack
 */
void
#if (defined(DOXYGEN))
RpMeshPS2AllTestNumVerts(RxPS2AllPipeData *ps2AllPipeData)
#else /* (defined(DOXYGEN)) */
RpMeshPS2AllTestNumVertsFunc(RxPS2AllPipeData *ps2AllPipeData)
#endif /* (defined(DOXYGEN)) */
{
    RWFUNCTION(RWSTRING("RpMeshPS2AllTestNumVerts"));
    RpMeshPS2AllTestNumVertsMacro(ps2AllPipeData);
    RWRETURNVOID();
}

/**
 * \ingroup rpmeshps2all
 * \ref RpMeshPS2AllTestMeshID is a macro to test for
 * changes in an RpMesh's identifier.
 *
 * This is a helper macro which is called from the default
 * \ref RxPipelineNodePS2AllMatMeshInstanceTestCallBack for
 * RpAtomics and RpWorldSectors,
 * \ref RpMeshPS2AllMeshInstanceTestCallBack. It generates
 * an identifier for the mesh, compares it to the existing
 * mesh identifier in the mesh's instance data (it uses
 * RWPS2ALLRESENTRYHEADERFROMRESENTRY and
 * RWPS2ALLRESENTRYHEADERGETMESHIDENTIFIER to extract this
 * from the meshCache member of the \ref RxPS2AllPipeData
 * struct) and updates the meshInstance member if it finds
 * any differences.
 *
 * The macro RPMESHPS2ALLMAKEMESHID can be used to construct
 * the identifier for an RpMesh from its parent RpMeshHeader.
 * RPMESHPS2ALLMESHIDGETFLAGS can be used to extract the
 * RpMeshHeader flags from this indentifier. These macros may
 * be used in constructing user callbacks.
 *
 * A full reinstance (see \ref RxInstanceFlags) will be caused
 * if the RpMeshHeader flags have changed.
 *
 * This macro will be a function in a debug build, but it
 * may be used explicitly in macro form through the name
 * RpMeshPS2AllTestMeshIDMacro and in function
 * form through RpMeshPS2AllTestMeshIDFunc,
 * depending on how you wish to balance code size and
 * function call overheads. Note that the macro contains
 * RWASSERT, so it must be used within functions using
 * RWFUNCTION and RWRETURN.
 *
 *
 * \param  ps2AllPipeData A pointer to a \ref RxPS2AllPipeData struct
 *                        containing information relevant to the
 *                        current pipeline execution
 *
 * \see RpMeshPS2AllMeshInstanceTestCallBack
 * \see RxPipelineNodePS2AllMatMeshInstanceTestCallBack
 */
void
#if (defined(DOXYGEN))
RpMeshPS2AllTestMeshID(RxPS2AllPipeData *ps2AllPipeData)
#else /* (defined(DOXYGEN)) */
RpMeshPS2AllTestMeshIDFunc(RxPS2AllPipeData *ps2AllPipeData)
#endif /* (defined(DOXYGEN)) */
{
    RWFUNCTION(RWSTRING("RpMeshPS2AllTestMeshID"));
    RpMeshPS2AllTestMeshIDMacro(ps2AllPipeData);
    RWRETURNVOID();
}

/***************** End of wrapper funcs for macros in debug ******************/


/****************************************************************************
 reDestroyCallBack

 This is called when the resEntry is to be freed - it spins till DMA is finished
 with it if necessary (basically, this shouldn't happen - if it does, you're most
 likely going to drop to < 60Hz).
 */
void
reDestroyCallBack(RwResEntry *resEntry)
{
    volatile RwUInt32 *count = (volatile RwUInt32 *)
//TODO[9]:        &(((rwPS2AllResEntryHeader*)(resEntry+1))->refCnt);
        &(RWPS2ALLRESENTRYHEADERFROMRESENTRY(resEntry)->refCnt);

    RWFUNCTION(RWSTRING("reDestroyCallBack"));

    while (*count != 0)
    {
        _sweFlush();
    }

    RWRETURNVOID();
}

/****************************************************************************
 _rwPS2AllRabinsConstructionTimeCode
 */
RwBool /* success? */
_rwPS2AllRabinsConstructionTimeCode(rxNodePS2AllMatPvtData *pvtData)
{
    RwBool result = FALSE;
    RwBool totallyOpaque = TRUE;
    RwBool pointList;
//TODO[6]: INTS???? WHAT NEED HAVE WE FOR THE PLATFORM-SPECIFIC TYPES????
//        (ASIDE FROM U_LONG128 - DO WE HAVE RwUInt128?)
    int sizeOnVU;
    int opaqueStep;
    int offsetOnVU;
    int dataTmp;
    int prevSize;
    int prevElementSize;
    int i;

    RWFUNCTION(RWSTRING("_rwPS2AllRabinsConstructionTimeCode"));

    /* We have to do the calculations twice as don't know until */
    /* pipeline execute time if we will be instancing a list or strip */

    /* Futher, we don't know how may verts there will be in the meshes */
    /* that pass down this pipe. All we can do is calculate block sizes */
    /* and strides */

    /* This is really inefficient, but I want it to work before I try */
    /* and make it neat */


    /* To support point lists, we reuse the triList structure which is
     * very similar. We don't bother setting up the triStrip data cos
     * a pointList pipeline can *only* handle pointLists, not triLists
     * or triStrips. */
    pointList = (pvtData->pipeType&rpMESHHEADERPOINTLIST)?TRUE:FALSE;


    /* Cluster attributes are set up by the pipelinenode init function */


//TODO[4]: I WANT TO TIDY THIS LOT UP - USE SMALL LUTS AND LESS PREDICATION/SWITCHING
//        - TIE IN WITH VERTEX FORMAT DESCRIPTORS

//TODO[3]: CAN WE MOVE ANY MORE INSTANCE-TIME STUFF TO CONTRUCTION-TIME HERE?
//        - WOULD SEPARATING TRILISTS/TRISTRIPS INTO SEPARATE
//         VERTEX-FORMAT-DESCRIPTORS/REINSTANCE-DATABLOCKS HELP?
//        - WORTH THE EFFORT? GIVEN INSTANCING AIN'T FAST-PATH?

#if (defined(FASTMORPH))
    pvtData->clinfo[CL_MAXCL].attrib = CL_ATTRIB_REQUIRED |
                                       CL_ATTRIB_OPAQUE |
                                       CL_V3_32;

    pvtData->clinfo[CL_MAXCL+1].attrib = CL_ATTRIB_REQUIRED |
                                         CL_ATTRIB_OPAQUE |
                                         CL_V4_8;
#endif /* (defined(FASTMORPH)) */
    /* Are we totally opaque? How big on the VU */
    totallyOpaque = TRUE;
    sizeOnVU = 0;
    for (i=0; i<((int)(CL_MAXCL)); i++)
    {
        if ((pvtData->clinfo[i].attrib & CL_ATTRIB_REQUIRED) ||
            (pvtData->clinfo[i].attrib & CL_ATTRIB_PLACEHOLDER))
        {
            if ((pvtData->clinfo[i].attrib & CL_ATTRIB_REQUIRED) &&
                (!(pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)))
            {
                totallyOpaque = FALSE;
            }
            sizeOnVU += 1; /* pvtData->clinfo[i].size; */
        }
    }

    /* Save for future reference */
    pvtData->totallyOpaque = (RwUInt8)totallyOpaque;

    /* If the VU input "vertex" is zero qw we die */
    /* Eventually we should support this (or zero vertices) */
    RWASSERT(sizeOnVU);

    /* If the calculated size on the VU isn't the same as the user set size */
    RWASSERT(sizeOnVU == pvtData->sizeOnVU);
    if (sizeOnVU != pvtData->sizeOnVU)
    {
        RWRETURN(FALSE);
    }

    /* Figure out the batch size and batches per tag for tristrips */
    if (FALSE == pointList)
    {
        if (totallyOpaque)
        {
            /* More optimal upload when we don't need to cope with striped data
             * as well (the duplication of the 2 verts per batch is done by
             * copying them in-place in opaque arrays - you can't do that for
             * broken-out arrays and if there are any broken-out arrays, you
             * can't have multiple batches hanging off one tag whereas you
             * can if there are none. #*!N.B!*# the mechanism used in the
             * instancing code to deal with this is not obvious. It relies on
             * fieldRec[#].numVerts being set up differently when things are
             * opaque/not - for opaque clusters it's set to batchSize, for
             * non-opaque ones, it's set (later, in RabinsMatInstanceCode) to
             * the number of verts in the whole mesh).
             * When we are totally opaque, we use fewer DMA tags (no DMA ref
             * tags inserted between batches, pointing at broken-out data),
             * so skip size is smaller and we can use tags that say "upload
             * N blocks of this size" */
            pvtData->triStrip.batchSize = (pvtData->vu1MaxTSInputSize/sizeOnVU)&~3;

            /* This includes the rather interesting assumption that totally */
            /* opaque means all fields required */
            pvtData->triStrip.batchesPerTag = 65535/(5
                                                     + ((3*pvtData->triStrip.batchSize+3)>>2)
                                                     + ((4*pvtData->triStrip.batchSize+3)>>2)
                                                     + ((pvtData->triStrip.batchSize+3)>>2)
                                                     + ((pvtData->triStrip.batchSize*3+15)>>4));
#if (defined(FASTMORPH))
            pvtData->triStrip.morphBatchSize = (pvtData->vu1MaxTSInputSize
                                                /(sizeOnVU+2))&~3;
            pvtData->triStrip.morphBatchesPerTag = 65535
                           /(7
                             + ((3*pvtData->triStrip.morphBatchSize+3)>>2)
                             + ((4*pvtData->triStrip.morphBatchSize+3)>>2)
                             + ((pvtData->triStrip.morphBatchSize+3)>>2)
                             + ((pvtData->triStrip.morphBatchSize*3+15)>>4)
                             /* Extra set of xyz and norm */
                             + ((3*pvtData->triStrip.morphBatchSize+3)>>2)
                             + ((pvtData->triStrip.morphBatchSize*3+15)>>4));
#endif /* (defined(FASTMORPH)) */
        }
        else
        {
            /* We need 2 verts over a qw size so we can restart on a */
            /* qw boundary */
            pvtData->triStrip.batchSize = (((pvtData->vu1MaxTSInputSize/sizeOnVU)-2)&~3)+2;
            pvtData->triStrip.batchesPerTag = 1;
#if (defined(FASTMORPH))
            pvtData->triStrip.morphBatchSize = (((pvtData->vu1MaxTSInputSize
                                                  /(sizeOnVU+2))-2)&~3)+2;
            pvtData->triStrip.morphBatchesPerTag = 1;
#endif /* (defined(FASTMORPH)) */
        }
    }

    /* Figure out the batch size and batches per tag for trilists */
    if (FALSE == pointList)
    {
        pvtData->triList.batchSize = ((pvtData->vu1MaxTLInputSize/sizeOnVU)/12)*12;
#if (defined(FASTMORPH))
        pvtData->triList.morphBatchSize = ((pvtData->vu1MaxTLInputSize
                                            /(sizeOnVU+2))/12)*12;
#endif /* (defined(FASTMORPH)) */
    }
    else
    {
        pvtData->triList.batchSize = (pvtData->vu1MaxPLInputSize/sizeOnVU)&~3;
#if (defined(FASTMORPH))
        /* Not convinced this is necessary */
        pvtData->triList.morphBatchSize = (pvtData->vu1MaxPLInputSize
                                           /(sizeOnVU+2))&~3;
#endif /* (defined(FASTMORPH)) */
    }
    if (totallyOpaque)
    {
        /* This includes the rather interesting assumption that totally */
        /* opaque means all fields required */
        pvtData->triList.batchesPerTag = 65535/(5
                                                + ((3*pvtData->triList.batchSize+3)>>2)
                                                + ((4*pvtData->triList.batchSize+3)>>2)
                                                + ((pvtData->triList.batchSize+3)>>2)
                                                + ((pvtData->triList.batchSize*3+15)>>4));
#if (defined(FASTMORPH))
        pvtData->triList.morphBatchesPerTag = 65535
                          /(7
                            + ((3*pvtData->triList.morphBatchSize+3)>>2)
                            + ((4*pvtData->triList.morphBatchSize+3)>>2)
                            + ((pvtData->triList.morphBatchSize+3)>>2)
                            + ((pvtData->triList.morphBatchSize*3+15)>>4)
                            /* Allow for an extra set of xyz and normals */
                            + ((3*pvtData->triList.morphBatchSize+3)>>2)
                            + ((pvtData->triList.morphBatchSize*3+15)>>4));
#endif /* (defined(FASTMORPH)) */
    }
    else
    {
        pvtData->triList.batchesPerTag = 1;
#if (defined(FASTMORPH))
        pvtData->triList.morphBatchesPerTag = 1;
#endif /* (defined(FASTMORPH)) */
    }

    /* We now figure out what the opaque step is for tristrips */
    if (!pointList)
    {
        if (totallyOpaque)
        {
            opaqueStep = 1;
        }
        else
        {
            opaqueStep = 2;
        }
        for (i=0; i<((int)(CL_MAXCL)); i++)
        {
            if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
            {
                if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
                {
                    RWASSERT((i == CL_XYZ) || (i == CL_UV) || (i == CL_UV2) ||
                             (i == CL_RGBA) || (i == CL_NORMAL));
                    /* Per batch size is complicated by the fact the */
                    /* each batch is not a whole multiple of qw */
                    switch (i)
                    {
                        case CL_XYZ:
                            opaqueStep += (3*pvtData->triStrip.batchSize+3)>>2;
                            break;
                        case CL_UV:
                            opaqueStep += (2*pvtData->triStrip.batchSize+3)>>2;
                            break;
                        case CL_UV2:
                            opaqueStep += (4*pvtData->triStrip.batchSize+3)>>2;
                            break;
                        case CL_RGBA:
                            opaqueStep += (pvtData->triStrip.batchSize+3)>>2;
                            break;
                        case CL_NORMAL:
                            opaqueStep += (3*pvtData->triStrip.batchSize+15)>>4;
                            break;
                    }
                    /* + vif tag */
                    opaqueStep +=1;
                }
                else
                {
                    /* Dma ref and Dma vnt 0 */
                    opaqueStep += 2;
                }
            }
        }

        /* Opaque data starts (2*numStripes + 1) qw from data */
        /* each following opaque cluster starts 1+prev size */
        dataTmp = 0 + 2*pvtData->numStripes + 1;
        prevSize = 0;
        prevElementSize = 0;
        offsetOnVU = 0;
        for (i=0; i<((int)(CL_MAXCL)); i++)
        {
            if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
            {
                pvtData->triStrip.fieldRec[i].vuoffset = offsetOnVU;
                if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
                {
                    int batchSize
                        = pvtData->triStrip.fieldRec[i].numVerts
                        = pvtData->triStrip.batchSize;
                    dataTmp += prevSize + 1;
                    pvtData->triStrip.fieldRec[i].dataoffset = dataTmp;
                    pvtData->triStrip.fieldRec[i].skip = opaqueStep;
                    pvtData->triStrip.fieldRec[i].reverse = 0;
                    offsetOnVU += 1; /* size on vu */
                    switch (i)
                    {
                        case CL_XYZ:
                            prevSize = (3*batchSize+3)>>2;
                            prevElementSize = 12;
                            break;
                        case CL_UV:
                            prevSize = (2*batchSize+3)>>2;
                            prevElementSize = 8;
                            break;
                        case CL_UV2:
                            prevSize = (4*batchSize+3)>>2;
                            prevElementSize = 16;
                            break;
                        case CL_RGBA:
                            prevSize = (batchSize+3)>>2;
                            prevElementSize = 4;
                            break;
                        case CL_NORMAL:
                            prevSize = (3*batchSize+15)>>4;
                            prevElementSize = 3;
                            break;
                    }
                }
                else
                {
                    pvtData->triStrip.fieldRec[i].numVerts = 0;
                    pvtData->triStrip.fieldRec[i].dataoffset = 0;
                    pvtData->triStrip.fieldRec[i].skip = 0;
                    pvtData->triStrip.fieldRec[i].reverse = 0;
                    offsetOnVU += 1; /* size on vu */
                }
            }
            else if (pvtData->clinfo[i].attrib  & CL_ATTRIB_PLACEHOLDER)
            {
                pvtData->triStrip.fieldRec[i].vuoffset = offsetOnVU;
                pvtData->triStrip.fieldRec[i].numVerts = 0;
                pvtData->triStrip.fieldRec[i].dataoffset = 0;
                pvtData->triStrip.fieldRec[i].skip = 0;
                pvtData->triStrip.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
            }
        }
#if (defined(FASTMORPH))
        if (totallyOpaque)
        {
            opaqueStep = 1;
        }
        else
        {
            opaqueStep = 2;
        }
        for (i=0; i<((int)(CL_MAXCL + 2)); i++)
        {
            if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
            {
                if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
                {
                    /* Per batch size is complicated by the fact the */
                    /* each batch is not a whole multiple of qw */
                    switch (i)
                    {
                        case CL_XYZ:
                            opaqueStep += (3*pvtData->triStrip.morphBatchSize+3)>>2;
                            break;
                        case CL_UV:
                            opaqueStep += (2*pvtData->triStrip.morphBatchSize+3)>>2;
                            break;
                        case CL_UV2:
                            opaqueStep += (4*pvtData->triStrip.morphBatchSize+3)>>2;
                            break;
                        case CL_RGBA:
                            opaqueStep += (pvtData->triStrip.morphBatchSize+3)>>2;
                            break;
                        case CL_NORMAL:
                            opaqueStep += (3*pvtData->triStrip.morphBatchSize+15)>>4;
                            break;
                        /* Second XYZ, that we morph to */
                        case CL_MAXCL:
                            opaqueStep += (3*pvtData->triStrip.morphBatchSize+3)>>2;
                            break;
                        /* Second NORMAL, that we morph to */
                        case CL_MAXCL+1:
                            opaqueStep += (3*pvtData->triStrip.morphBatchSize+15)>>4;
                            break;
                    }
                    /* + vif tag */
                    opaqueStep +=1;
                }
                else
                {
                    /* Dma ref and Dma vnt 0 */
                    opaqueStep += 2;
                }
            }
        }

        /* Opaque data starts (2*numStripes + 1) qw from data */
        /* each following opaque cluster starts 1+prev size */
        dataTmp = 0 + 2*pvtData->numStripes + 1;
        prevSize = 0;
        prevElementSize = 0;
        offsetOnVU = 0;
        for (i=0; i<((int)(CL_MAXCL + 2)); i++)
        {
            if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
            {
                pvtData->triStrip.fieldRec[i].vuoffset = offsetOnVU;
                if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
                {
                    int batchSize
                        = pvtData->triStrip.fieldRec[i].morphNumVerts
                        = pvtData->triStrip.morphBatchSize;
                    dataTmp += prevSize + 1;
                    pvtData->triStrip.fieldRec[i].morphDataoffset = dataTmp;
                    pvtData->triStrip.fieldRec[i].morphSkip = opaqueStep;
                    pvtData->triStrip.fieldRec[i].reverse = 0;
                    offsetOnVU += 1; /* size on vu */
                    switch (i)
                    {
                        case CL_XYZ:
                            prevSize = (3*batchSize+3)>>2;
                            prevElementSize = 12;
                            break;
                        case CL_UV:
                            prevSize = (2*batchSize+3)>>2;
                            prevElementSize = 8;
                            break;
                        case CL_UV2:
                            prevSize = (4*batchSize+3)>>2;
                            prevElementSize = 16;
                            break;
                        case CL_RGBA:
                            prevSize = (batchSize+3)>>2;
                            prevElementSize = 4;
                            break;
                        case CL_NORMAL:
                            prevSize = (3*batchSize+15)>>4;
                            prevElementSize = 3;
                            break;
                        /* Fakes */
                        case CL_MAXCL:
                            prevSize = (3*batchSize+3)>>2;
                            prevElementSize = 12;
                            break;
                        case CL_MAXCL+1:
                            prevSize = (3*batchSize+15)>>4;
                            prevElementSize = 3;
                            break;
                    }
                }
                else
                {
                    pvtData->triStrip.fieldRec[i].morphNumVerts = 0;
                    pvtData->triStrip.fieldRec[i].morphDataoffset = 0;
                    pvtData->triStrip.fieldRec[i].morphSkip = 0;
                    pvtData->triStrip.fieldRec[i].reverse = 0;
                    offsetOnVU += 1; /* size on vu */
                }
            }
            else if (pvtData->clinfo[i].attrib  & CL_ATTRIB_PLACEHOLDER)
            {
                pvtData->triStrip.fieldRec[i].vuoffset = offsetOnVU;
                pvtData->triStrip.fieldRec[i].morphNumVerts = 0;
                pvtData->triStrip.fieldRec[i].morphDataoffset = 0;
                pvtData->triStrip.fieldRec[i].morphSkip = 0;
                pvtData->triStrip.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
            }
        }
#endif /* (defined(FASTMORPH)) */
    }

    /* We now figure out what the opaque step is for trilists */
    if (totallyOpaque)
    {
        opaqueStep = 1;
    }
    else
    {
        opaqueStep = 2;
    }
    for (i=0; i<((int)(CL_MAXCL)); i++)
    {
        if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
        {
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
            {
                RWASSERT((i == CL_XYZ)  || (i == CL_UV) || (i == CL_UV2) ||
                         (i == CL_RGBA) || (i == CL_NORMAL));
                /* Per batch size is complicated by the fact the */
                /* each batch is not a whole multiple of qw */
                switch (i)
                {
                    case CL_XYZ:
                        opaqueStep += (3*pvtData->triList.batchSize+3)>>2;
                        break;
                    case CL_UV:
                        opaqueStep += (2*pvtData->triList.batchSize+3)>>2;
                        break;
                    case CL_UV2:
                        opaqueStep += (4*pvtData->triList.batchSize+3)>>2;
                        break;
                    case CL_RGBA:
                        opaqueStep += (pvtData->triList.batchSize+3)>>2;
                        break;
                    case CL_NORMAL:
                        opaqueStep += (3*pvtData->triList.batchSize+15)>>4;
                        break;
                }
                /* + vif tag */
                opaqueStep +=1;
            }
            else
            {
                /* Dma ref and Dma vnt 0 */
                opaqueStep += 2;
            }
        }
    }
    /* Opaque data starts (2*numStripes + 1) qw from data */
    /* each following opaque cluster starts 1+prev size */
    dataTmp = 0 + 2*pvtData->numStripes + 1;
    prevSize = 0;
    prevElementSize = 0;
    offsetOnVU = 0;
    for (i=0; i<((int)(CL_MAXCL)); i++)
    {
        if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
        {
            pvtData->triList.fieldRec[i].vuoffset = offsetOnVU;
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
            {
                int batchSize
                    = pvtData->triList.fieldRec[i].numVerts
                    = pvtData->triList.batchSize;
                dataTmp += prevSize + 1;
                pvtData->triList.fieldRec[i].dataoffset = dataTmp;
                pvtData->triList.fieldRec[i].skip = opaqueStep;
                pvtData->triList.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
                switch (i)
                {
                    case CL_XYZ:
                        prevSize = (3*batchSize+3)>>2;
                        prevElementSize = 12;
                        break;
                    case CL_UV:
                        prevSize = (2*batchSize+3)>>2;
                        prevElementSize = 8;
                        break;
                    case CL_UV2:
                        prevSize = (4*batchSize+3)>>2;
                        prevElementSize = 16;
                        break;
                    case CL_RGBA:
                        prevSize = (batchSize+3)>>2;
                        prevElementSize = 4;
                        break;
                    case CL_NORMAL:
                        prevSize = (3*batchSize+15)>>4;
                        prevElementSize = 3;
                        break;
                }
            }
            else
            {
                pvtData->triList.fieldRec[i].numVerts = 0;
                pvtData->triList.fieldRec[i].dataoffset = 0;
                pvtData->triList.fieldRec[i].skip = 0;
                pvtData->triList.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
            }
        }
    }
#if (defined(FASTMORPH))
    if (totallyOpaque)
    {
        opaqueStep = 1;
    }
    else
    {
        opaqueStep = 2;
    }
    for (i = 0; i < ((int)(CL_MAXCL + 2)); i++)
    {
        if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
        {
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
            {
                RWASSERT((i == CL_XYZ)  || (i == CL_UV) ||  (i == CL_UV2) ||
                         (i == CL_RGBA) || (i == CL_NORMAL) ||
                         (i == CL_MAXCL) || (i == CL_MAXCL+1));
                /* Per batch size is complicated by the fact the */
                /* each batch is not a whole multiple of qw */
                switch (i)
                {
                    case CL_XYZ:
                        opaqueStep += (3*pvtData->triList.morphBatchSize+3)>>2;
                        break;
                    case CL_UV:
                        opaqueStep += (2*pvtData->triList.morphBatchSize+3)>>2;
                        break;
                    case CL_UV2:
                        opaqueStep += (4*pvtData->triList.morphBatchSize+3)>>2;
                        break;
                    case CL_RGBA:
                        opaqueStep += (pvtData->triList.morphBatchSize+3)>>2;
                        break;
                    case CL_NORMAL:
                        opaqueStep += (3*pvtData->triList.morphBatchSize+15)>>4;
                        break;
                    case CL_MAXCL:
                        opaqueStep += (3*pvtData->triList.morphBatchSize+3)>>2;
                        break;
                    case CL_MAXCL+1:
                        opaqueStep += (3*pvtData->triList.morphBatchSize+15)>>4;
                        break;
                }
                /* + vif tag */
                opaqueStep +=1;
            }
            else
            {
                /* Dma ref and Dma vnt 0 */
                opaqueStep += 2;
            }
        }
    }
    /* Opaque data starts (2*numStripes + 1) qw from data */
    /* each following opaque cluster starts 1+prev size */
    dataTmp = 0 + 2*pvtData->numStripes + 1;
    prevSize = 0;
    prevElementSize = 0;
    offsetOnVU = 0;
    for (i=0; i<((int)(CL_MAXCL + 2)); i++)
    {
        if (pvtData->clinfo[i].attrib  & CL_ATTRIB_REQUIRED)
        {
            /* We don't need morphOffsetOnVU because this will only make a
             * difference to the FASTMORPH clusters (>= MAX_CL) */
            pvtData->triList.fieldRec[i].vuoffset = offsetOnVU;
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
            {
                int batchSize
                    = pvtData->triList.fieldRec[i].morphNumVerts
                    = pvtData->triList.morphBatchSize;
                dataTmp += prevSize + 1;
                pvtData->triList.fieldRec[i].morphDataoffset = dataTmp;
                pvtData->triList.fieldRec[i].morphSkip = opaqueStep;
                /* This unnecessarily repeats reverse initialisation,
                 * I don't think it's harmful though. */
                pvtData->triList.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
                switch (i)
                {
                    case CL_XYZ:
                        prevSize = (3*batchSize+3)>>2;
                        prevElementSize = 12;
                        break;
                    case CL_UV:
                        prevSize = (2*batchSize+3)>>2;
                        prevElementSize = 8;
                        break;
                    case CL_UV2:
                        prevSize = (4*batchSize+3)>>2;
                        prevElementSize = 16;
                        break;
                    case CL_RGBA:
                        prevSize = (batchSize+3)>>2;
                        prevElementSize = 4;
                        break;
                    case CL_NORMAL:
                        prevSize = (3*batchSize+15)>>4;
                        prevElementSize = 3;
                        break;
                    case CL_MAXCL:
                        prevSize = (3*batchSize+3)>>2;
                        prevElementSize = 12;
                        break;
                    case CL_MAXCL+1:
                        prevSize = (3*batchSize+15)>>4;
                        prevElementSize = 3;
                        break;
                }
            }
            else
            {
                pvtData->triList.fieldRec[i].morphNumVerts = 0;
                pvtData->triList.fieldRec[i].morphDataoffset = 0;
                pvtData->triList.fieldRec[i].morphSkip = 0;
                /* This unnecessarily repeats reverse initialisation,
                 * I don't think it's harmful though. */
                pvtData->triList.fieldRec[i].reverse = 0;
                offsetOnVU += 1; /* size on vu */
            }
        }
    }
#endif /* (defined(FASTMORPH)) */

    REDEBUGConstructionTimeMacro();

    result = TRUE; /* success */

    RWRETURN(result);
}

/****************************************************************************
 DMADataSizeRecalc()

 Recalculate DMA chain size/layout - used during a full reinstance.
 */
RwUInt32
DMADataSizeRecalc(RxPS2AllPipeData *ps2AllPipeData,
                  RwUInt32 numVerts,
                  rwPS2AllFieldRec *fieldRec,
                  RwUInt32 *batchSizePtr,
                  RwUInt32 *batchesPerTagPtr,
                  RwUInt32 *numBatchesPtr)
{
    PRIVATEDATATYPE *pvtData = (PRIVATEDATATYPE *)ps2AllPipeData->matPvtData;
    RwUInt32 batchSize, batchesPerTag, numBatches;
    RwUInt32 size, effectiveTotalVerts;
    RwInt32 i;

    RWFUNCTION(RWSTRING("DMADataSizeRecalc"));

    REDEBUGPrintf(("Figuring out DMA data sizes, etc\n"));
    /* We have to figure out how big the required area is and what the */
    /* offset of each of the data area is */

    /* First, what is the effective total vertex count? */
    if (ps2AllPipeData->meshHeader->flags & rpMESHHEADERTRISTRIP)
    {
        REDEBUGPrintf(("We have a tristrip\n"));
        batchSize = pvtData->triStrip.batchSize;
        batchesPerTag = pvtData->triStrip.batchesPerTag;
#if (defined(FASTMORPH))
        if (ps2AllPipeData->fastMorphing)
        {
            batchSize = pvtData->triStrip.morphBatchSize;
            batchesPerTag = pvtData->triStrip.morphBatchesPerTag;
        }
#endif /* (defined(FASTMORPH)) */
        numBatches = ((numVerts - 2) + ((batchSize - 2) - 1)) /
                     (batchSize - 2);
        effectiveTotalVerts = numVerts + 2*(numBatches - 1);
        /* We set up the initial fieldRec elements */
#if (defined(FASTMORPH))
        for (i=0; i<((int)(CL_MAXCL + ps2AllPipeData->fastMorphing)); i++)
#else /* (defined(FASTMORPH)) */
        for (i=0; i<((int)(CL_MAXCL)); i++)
#endif /* (defined(FASTMORPH)) */
        {
            fieldRec[i] = pvtData->triStrip.fieldRec[i];
        }
    }
    else
    {
        REDEBUGPrintf(("We have a trilist\n"));
        batchSize = pvtData->triList.batchSize;
        batchesPerTag = pvtData->triList.batchesPerTag;
#if (defined(FASTMORPH))
        if (ps2AllPipeData->fastMorphing)
        {
            batchSize = pvtData->triList.morphBatchSize;
            batchesPerTag = pvtData->triList.morphBatchesPerTag;
        }
#endif /* (defined(FASTMORPH)) */
        numBatches = (numVerts + (batchSize - 1))/batchSize;
        effectiveTotalVerts = numVerts;
#if (defined(FASTMORPH))
        for (i=0; i<((int)(CL_MAXCL + ps2AllPipeData->fastMorphing)); i++)
#else /* (defined(FASTMORPH)) */
        for (i=0; i<((int)(CL_MAXCL)); i++)
#endif /* (defined(FASTMORPH)) */
        {
            fieldRec[i] = pvtData->triList.fieldRec[i];
        }
    /* Batch size is apparently number of vertices (of the current size)
     * so no change is needed to get the correct number for linelists
     * (batch size is guaranteed to be a multiple of 12 (of which 2 is
     * a factor), so don't worry about indices from the same line being
     * split into different batches!) */
    }

    REDEBUGDMADataSizeRecalcMacro();

    if (pvtData->totallyOpaque)
    {
        /* Only the reverse field needs be updated */
        int viftags = 1;
        int lastReverse = 0;
        int vertsOver = ((batchSize*numBatches) - effectiveTotalVerts);

        size = 0;
        REDEBUGPrintf(("Totally opaque\n"));
        /* Skip's value is being used as a flag here for whether
         * this cluster requires space CPU-side (same as testing
         * (pvtData->clinfo[i].attrib & CL_ATTRIB_REQUIRED, *given*
         * that we know this cluster is opaque, which all are here) */
//TODO[6]: FASTMORPHERRORPOSS - I'm *pretty* sure we can use either skip or morphSkip for this test...
//                    best standardise on this test, elsewhere you do test CL_ATTRIB_REQUIRED
        size += fieldRec[CL_XYZ].skip?
            (viftags++,((3*effectiveTotalVerts + 3) >> 2)):0;
        fieldRec[CL_XYZ].reverse = lastReverse;
        lastReverse += fieldRec[CL_XYZ].skip?(vertsOver*3) >> 2:0;

        size += fieldRec[CL_UV].skip?
            (viftags++,((2*effectiveTotalVerts + 3) >> 2)):0;
        fieldRec[CL_UV].reverse = lastReverse;
        lastReverse += fieldRec[CL_UV].skip?vertsOver >> 1:0;

        size += fieldRec[CL_UV2].skip?
            (viftags++,((4*effectiveTotalVerts + 3) >> 2)):0;
        fieldRec[CL_UV2].reverse = lastReverse;
        lastReverse += fieldRec[CL_UV2].skip?vertsOver:0;

        size += fieldRec[CL_RGBA].skip?
            (viftags++,((effectiveTotalVerts + 3) >> 2)):0;
        fieldRec[CL_RGBA].reverse = lastReverse;
        lastReverse += fieldRec[CL_RGBA].skip?vertsOver >> 2:0;

        size += fieldRec[CL_NORMAL].skip?
            (viftags++,(((batchSize*3 + 15) >> 4)*(numBatches - 1)
                   + (((effectiveTotalVerts - (batchSize*(numBatches - 1)))
                        *3 + 15) >> 4))):0;
        fieldRec[CL_NORMAL].reverse = lastReverse;

#if (defined(FASTMORPH))
        if (ps2AllPipeData->fastMorphing)
        {
            lastReverse +=
                fieldRec[CL_NORMAL].skip ?
                (((batchSize*3 + 15) >> 4) - (((effectiveTotalVerts -
                        (batchSize*(numBatches - 1)))*3 + 15) >> 4))
                                         : 0;
            size += (viftags++,((3*effectiveTotalVerts + 3) >> 2));
            fieldRec[CL_MAXCL].reverse = lastReverse;
            lastReverse += (vertsOver*3) >> 2;

            size += (viftags++,(((batchSize*3 + 15) >> 4)*(numBatches - 1)
                   + (((effectiveTotalVerts - (batchSize*(numBatches - 1)))
                        *3 + 15) >> 4)));
            fieldRec[CL_MAXCL+1].reverse = lastReverse;
        }
#endif /* (defined(FASTMORPH)) */

        /* Vif commands */
        size += viftags*numBatches;
        /* Dma tags */
        size += (numBatches + batchesPerTag - 1) / batchesPerTag;
        REDEBUGPrintf(("size = %d\n", size));
#if (defined(DMAALIGN))
        /* Note: This isn't used for more that the first dma tag yet! */

        /* We know that we can round up the qwc in the dma tag so that */
        /* the next batch will be aligned as the max qwc count +1 is   */
        /* 8 qw aligned */
        size += ((numBatches + batchesPerTag - 1) / batchesPerTag)*8;
        /* We will pad the beginning with a Dma tag and nops if required */
        REDEBUGPrintf(("but DMAALIGN was defined so size = %d\n", size));
#endif /* (defined(DMAALIGN)) */
    }
    else
    {
        int nonOpaqueOffset;
        int lastReverse = 0;
        int vertsEndCnt = (effectiveTotalVerts - (batchSize*(numBatches - 1)));
        short skip;
        int offset;

        REDEBUGPrintf(("Some striped data\n"));
        /* I feel that some more of this code could be moved up into the
         * construction time function. E.g qw batch size could be computed,
         * though we would then be switching on strip/non-strip here */

        /* Where it would be if there was no opaque data */
        nonOpaqueOffset = 2*(pvtData->numStripes + 1)*numBatches;
        size = 0;
        for (i = 0; i < ((int)(CL_MAXCL)); i++)
        {
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_REQUIRED)
            {
                if (pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE)
                {
                    int qwBatchSize;
                    /* Per batch size is complicated by the fact the */
                    /* each batch is not a whole multiple of qw */
                    REDEBUGPrintf(("field %d is required and opaque\n", i));

                    /* Warning: Non Opaque Offset code overestimates due */
                    /* to not taking into account that all inline data */
                    /* is smaller in the last batch, so the "skip" value */
                    /* is smaller. The lastReverse is used to adjust this */
                    switch (i)
                    {
                    case CL_XYZ:
                        qwBatchSize = (3*batchSize + 3) >> 2;
                        skip = fieldRec[CL_XYZ].skip;
                        offset = fieldRec[CL_XYZ].dataoffset;
#if (defined(FASTMORPH))
                        if (ps2AllPipeData->fastMorphing)
                        {
                            skip = fieldRec[CL_XYZ].morphSkip;
                            offset = fieldRec[CL_XYZ].morphDataoffset;
                        }
#endif /* !FASTMORPH */
                        nonOpaqueOffset = offset +
                                          skip*(numBatches - 1) +
                                          qwBatchSize + 1;
                        fieldRec[CL_XYZ].reverse = lastReverse;
                        lastReverse +=
                            qwBatchSize - ((3*vertsEndCnt + 3) >> 2);
                        break;
                    case CL_UV:
                        qwBatchSize = (batchSize + 1) >> 1;
                        skip = fieldRec[CL_UV].skip;
                        offset = fieldRec[CL_UV].dataoffset;
#if (defined(FASTMORPH))
                        if (ps2AllPipeData->fastMorphing)
                        {
                            skip = fieldRec[CL_UV].morphSkip;
                            offset = fieldRec[CL_UV].morphDataoffset;
                        }
#endif /* !FASTMORPH */
                        nonOpaqueOffset = offset +
                                          skip*(numBatches - 1) +
                                          qwBatchSize + 1;
                        fieldRec[CL_UV].reverse = lastReverse;
                        lastReverse +=
                            qwBatchSize - ((vertsEndCnt + 1) >> 1);
                        break;
                    case CL_UV2:
                        qwBatchSize = (4*batchSize + 3) >> 2;
                        skip = fieldRec[CL_UV2].skip;
                        offset = fieldRec[CL_UV2].dataoffset;
#if (defined(FASTMORPH))
                        if (ps2AllPipeData->fastMorphing)
                        {
                            skip = fieldRec[CL_UV2].morphSkip;
                            offset = fieldRec[CL_UV2].morphDataoffset;
                        }
#endif /* !FASTMORPH */
                        nonOpaqueOffset = offset +
                                          skip*(numBatches - 1) +
                                          qwBatchSize + 1;
                        fieldRec[CL_UV2].reverse = lastReverse;
                        lastReverse +=
                            qwBatchSize - ((4*vertsEndCnt + 3) >> 2);
                        break;
                    case CL_RGBA:
                        qwBatchSize = (batchSize + 3)>>2;
                        skip = fieldRec[CL_RGBA].skip;
                        offset = fieldRec[CL_RGBA].dataoffset;
#if (defined(FASTMORPH))
                        if (ps2AllPipeData->fastMorphing)
                        {
                            skip = fieldRec[CL_RGBA].morphSkip;
                            offset = fieldRec[CL_RGBA].morphDataoffset;
                        }
#endif /* !FASTMORPH */
                        nonOpaqueOffset = offset +
                                          skip*(numBatches - 1) +
                                          qwBatchSize + 1;
                        fieldRec[CL_RGBA].reverse = lastReverse;
                        lastReverse += qwBatchSize - ((vertsEndCnt + 3) >> 2);
                        break;
                    case CL_NORMAL:
                        qwBatchSize = (3*batchSize + 15) >> 4;
                        skip = fieldRec[CL_NORMAL].skip;
                        offset = fieldRec[CL_NORMAL].dataoffset;
#if (defined(FASTMORPH))
                        if (ps2AllPipeData->fastMorphing)
                        {
                            skip = fieldRec[CL_NORMAL].morphSkip;
                            offset = fieldRec[CL_NORMAL].morphDataoffset;
                        }
#endif /* !FASTMORPH */
                        nonOpaqueOffset = offset +
                                          skip*(numBatches - 1) +
                                          qwBatchSize + 1;
                        fieldRec[CL_NORMAL].reverse = lastReverse;
                        lastReverse += qwBatchSize
                            - ((3*vertsEndCnt + 15) >> 4);
                        break;
                    default:
                        RWASSERT(i < 4);
                        break;
                    }
                    REDEBUGPrintf(("nonOpaqueOffset currently at %d\n", nonOpaqueOffset));
                    REDEBUGPrintf(("lastReverse currently at %d\n", lastReverse));
                }
                else
                {
                    REDEBUGPrintf(("field %d is required and striped\n", i));
                    REDEBUGPrintf(("size was %d\n", size));

                    REDEBUGPrintf(("__size %d\n", pvtData->clinfo[i].stride));
                    /* for striped data we only have each vertex once */
                    size += ((numVerts*
                              (pvtData->clinfo[i].stride) + 3) >> 2);
                    REDEBUGPrintf(("size now %d\n", size));
                    /* We update the numVerts field for non-opaque clusters
                     * in order to affect how instancing occurs (we can
                     * instance all the verts in one go because there is no
                     * in-place tristrip vertex duplication - it is done
                     * by frigging DMA ref tags). */
                    fieldRec[i].numVerts = numVerts;
#if (defined(FASTMORPH))
                    /* The duplicate XYZ/NORMAL clusters are always opaque
                     * currently but that might change so this is a bit of
                     * future-proofing */
                    fieldRec[i].morphNumVerts = numVerts;
#endif /* (defined(FASTMORPH)) */
                    fieldRec[i].reverse = 0;
                }
            }
        }
#if (defined(FASTMORPH))
        /* These two are always opaque for now */
        if (ps2AllPipeData->fastMorphing)
        {
            int qwBatchSize;

            qwBatchSize = (3*batchSize + 3) >> 2;
            skip = fieldRec[CL_MAXCL].morphSkip;
            nonOpaqueOffset = fieldRec[CL_MAXCL].morphDataoffset +
                              skip*(numBatches - 1) +
                              qwBatchSize + 1;
            fieldRec[CL_MAXCL].reverse = lastReverse;
            lastReverse += qwBatchSize
                - ((3*vertsEndCnt + 3) >> 2);

            qwBatchSize = (3*batchSize + 15) >> 4;
            skip = fieldRec[CL_MAXCL+1].morphSkip;
            nonOpaqueOffset = fieldRec[CL_MAXCL+1].morphDataoffset +
                              skip*(numBatches - 1) +
                              qwBatchSize + 1;
            fieldRec[CL_MAXCL+1].reverse = lastReverse;
            lastReverse += qwBatchSize
                - ((3*vertsEndCnt + 15) >> 4);
        }
#endif /* (defined(FASTMORPH)) */
        /* We now need to move the non-opaque data down by lastReverse */
        nonOpaqueOffset -= lastReverse;

        /* Add size of non opaque data to its start offset to get total size */
        size += nonOpaqueOffset;
#if (defined(DMAALIGN))
        /* We don't do this here as it complicates things to no benefit */
#endif /* (defined(DMAALIGN)) */
        /* We now need to set up the dataoffset for the non-opaque */
#if (defined(FASTMORPH))
        for (i=0; i<((int)(CL_MAXCL + ps2AllPipeData->fastMorphing)); i++)
#else /* (defined(FASTMORPH)) */
        for (i=0; i<((int)(CL_MAXCL)); i++)
#endif /* (defined(FASTMORPH)) */
        {
            if (pvtData->clinfo[i].attrib & CL_ATTRIB_REQUIRED)
            {
                if (!(pvtData->clinfo[i].attrib & CL_ATTRIB_OPAQUE))
                {
#if (defined(FASTMORPH))
                    /* Only set one of dataoffset and morphDataoffset, so
                     * the other is zero and it's clear what we're doing */
                    if (ps2AllPipeData->fastMorphing)
                    {
                        fieldRec[i].morphDataoffset = nonOpaqueOffset;
                    }
                    else
#endif /* (defined(FASTMORPH)) */
                    {
                        fieldRec[i].dataoffset = nonOpaqueOffset;
                    }
                    REDEBUGPrintf(("field %d offset at %d\n", i, nonOpaqueOffset));

                    nonOpaqueOffset +=
                        ((numVerts*(pvtData->clinfo[i].stride) + 3) >> 2);
                }
            }
        }
    }

    REDEBUGPrintf(("Computed size in qw: %d\n", size));
    /* convert to bytes */
    size <<= 4;

    /* Add size of header */
    size += (sizeof(rwPS2AllResEntryHeader)+15) & ~0xf;
    REDEBUGPrintf(("+ sizeof(rwPS2AllResEntryHeader) in bytes: %d\n", size));

    /* ensure that we fill the final cache line */
    size += 127;
    REDEBUGPrintf(("+ 127: %d\n", size));

    /* Return values used in the main function */
   *batchSizePtr = batchSize;
   *batchesPerTagPtr = batchesPerTag;
   *numBatchesPtr = numBatches;

    RWRETURN(size);
}

/****************************************************************************
 DMADataFillTags()

 Fills the tags in a DMA chain - used in congruent or full reinstancing.
 */
RwBool
DMADataFillTags(RxPS2AllPipeData *ps2AllPipeData)
{
    rwPS2AllResEntryHeader *ps2ResHeader;
    PRIVATEDATATYPE *pvtData;
    u_long128 *data;
    u_long128 *lastTag;
    /* NOTE: initialising 128-bit ints doesn't work on most compilers,
     * These just save us a compile warning! (hope it doesn't cost us...) */
    u_long128 longZero = 0;
    u_long128 ltmp = 0;
    RwUInt32 sizeOnVU, numBatches, batchesPerTag, numVerts, effectiveTotalVerts, stripTmp;
    RwBool wroteATag;
//TODO[6]: IS INT 32-BIT (OFFSET => RwUInt32) OR 64? DOES IT MATTER IN THIS CASE?
    int offset;
//TODO[6]: Grr, non-RW types!
    unsigned long tmp, tmp1;
    int i, j;

    RWFUNCTION(RWSTRING("DMADataFillTags"));


    RWASSERT(NULL != ps2AllPipeData);
    pvtData = ps2AllPipeData->matPvtData;
    RWASSERT(NULL != ps2AllPipeData->cacheEntryRef);
    RWASSERT(NULL != *(ps2AllPipeData->cacheEntryRef));
    ps2ResHeader = RWPS2ALLRESENTRYHEADERFROMRESENTRY(
                       *(ps2AllPipeData->cacheEntryRef));
    RWASSERT(NULL != ps2ResHeader);

    data = ps2ResHeader->data;
    numVerts = ps2ResHeader->numVerts;
    numBatches = ps2ResHeader->numBatches;
    batchesPerTag = ps2ResHeader->batchesPerTag;
//TODO[3]: WHAT IS THIS FOR???  --->  batchNumBatches = ps2ResHeader->numBatches;
    sizeOnVU = (RwUInt32)pvtData->sizeOnVU;

/*TODO[5]: THIS NOT NECESSARILY NECESSARY... '= 0' ABOVE SEEMS TO WORK UNDER CW
 - BE BETTER OFF WITH A FILE-SCOPE STATIC VARIABLE? (IF NEXT TO OTHERS THAT ARE
  RECENTLY CACHED) CERTAINLY DON'T USE BOTH
 - BEST IS PROBABLY A MACRO SIMILAR TO MAKE128 THAT SETS "*(u_long128 *)ptr" TO ZERO */
    MAKE128(longZero, 0L, 0L);

    if (ps2AllPipeData->meshHeader->flags & rpMESHHEADERTRISTRIP)
    {
        stripTmp = 2;
        effectiveTotalVerts = numVerts + 2*(numBatches - 1);
    }
    else
    {
        stripTmp = 0;
        effectiveTotalVerts = numVerts;
    }

    /* Fill in the DMA tags in the chain */
    lastTag = data;
    wroteATag = FALSE;
    for (i = 0; i < (int)ps2ResHeader->numBatches; i++)
    {
        int currentBatchSize;

        if (i < (int)ps2ResHeader->numBatches - 1)
        {
            currentBatchSize = ps2ResHeader->batchSize;
        }
        else
        {
            currentBatchSize = effectiveTotalVerts -
                (ps2ResHeader->numBatches - 1)*ps2ResHeader->batchSize;
        }
        /* Insert a dma tag if required, and fix up last one */
        if (i%batchesPerTag == 0)
        {
#if (0)
            /* Later instancing code may not know about this yet, so we leave it out */
#if (defined(DMAALIGN))
            if (pvtData->totallyOpaque && ((int)data & 0x70))
            {
                *data++ = longZero;
            }
#endif /* (defined(DMAALIGN)) */
#endif /* (0) */
            tmp = (1 << 28) | (data - lastTag - 1);
            if (i==0)
            {
                /* NOTE: the comment avoids masking path three, so we
                 * are free to do asynchronous texture uploads using it. */
                tmp1 = ((0x06L << 24 /*| 0x8000l*/) << 32) | (0x11L << 24);
            }
            else
            {
                tmp1 = 0;
            }
            MAKE128(ltmp, tmp1, tmp);
            
            if ((pvtData->totallyOpaque) && (i != 0))
            {
                /* Only overwrite the bottom 64 bits in the all opaque case!
//TODO[6]: GET THIS RIGHT... USE CW DMA TAG VIEWER, TAGS FOR THE FIRST BATCH ARE DIFFERENT...
                 * We have a VIF unpack command in the top 64 bits. */
               *(long *)lastTag = tmp;
                lastTag = data;
                wroteATag = TRUE;
            }
            else
            {
                /* In the non-totally-opaque case, we need zeroes to be
                 * written in the top 64 bits. This is because we have tags
                 * before every batch but only the very first tag needs to
//TODO[6]: GET THIS RIGHT... USE CW DMA TAG VIEWER, FIRST TAG IS DIFFERENT, REST ARE CNTs
                 * have stuff in the top 64 bits (what?) */
               *lastTag = ltmp;
                lastTag = data++;
            }
        }
        /* first do striped clusters */
#if (!defined(FASTMORPH))
        for (j = 0; j < ((int)(CL_MAXCL)); j++)
#else /* (!defined(FASTMORPH)) */
        for (j = 0; j < ((int)(CL_MAXCL + ps2AllPipeData->fastMorphing)); j++)
#endif /* (!defined(FASTMORPH)) */
        {
            rwPS2AllClusterInstanceInfo *clinfo = &(pvtData->clinfo[j]);
            if (clinfo->attrib & CL_ATTRIB_REQUIRED)
            {
                if (!(clinfo->attrib & CL_ATTRIB_OPAQUE))
                {
                    int __size, roundTerm;

                    switch (clinfo->attrib & CL_TYPE_MASK & ~CL_USN)
                    {
                        case CL_S32:
                        case CL_V4_8:
                        case CL_V2_16:
                            __size = 1;
                            break;
                        case CL_V2_32:
                        case CL_V4_16:
                            __size = 2;
                            break;
                        case CL_V3_32:
                            __size = 3;
                            break;
                        case CL_V4_32:
                        default:
                            __size = 4;
                            break;
                    }
                    offset = ps2ResHeader->fieldRec[j].dataoffset;
#if (defined(FASTMORPH))
                    if (ps2AllPipeData->fastMorphing)
                    {
                        offset = ps2ResHeader->fieldRec[j].morphDataoffset;
                    }
#endif /* (defined(FASTMORPH)) */
                    tmp = (3 << 28) |
                          ((currentBatchSize*__size + 3) >> 2) |
                    /* Note: The cast to int then long is to prevent a compiler
                     * warning on casting from a pointer to a non-32-bit int */
                          ((4*i*__size*(ps2ResHeader->batchSize - stripTmp) +
                            (long)(int)(ps2ResHeader->data + offset)) << 32);
                    /* We must upload >= the amount of ref'd data */
                    roundTerm = ((currentBatchSize*__size + 3) & ~0x3) -
                                currentBatchSize*__size;
                    tmp1 = (CL_TYPE_MASK & (long)pvtData->clinfo[j].attrib) |
                           0x8000 |
                           ps2ResHeader->fieldRec[j].vuoffset |
                           ((currentBatchSize +
                             ((roundTerm + (__size - 1)) / __size)) << 16);
                    tmp1 = (tmp1 << 32) | (1 << 24) | (1 << 8 ) | (sizeOnVU);

                    MAKE128(ltmp, tmp1, tmp);
                    *lastTag = ltmp;

                    /* Cnt 0 for return */
                    tmp = (1 << 28);
                    MAKE128(ltmp, 0L, tmp);
                    *data++  = ltmp;
                    lastTag = data++;
                }
            }
        }
        /* Now do opaque clusters */
#if (!defined(FASTMORPH))
        for (j = 0; j < ((int)(CL_MAXCL)); j++)
#else /* (!defined(FASTMORPH)) */
        for (j = 0; j < ((int)(CL_MAXCL + ps2AllPipeData->fastMorphing)); j++)
#endif /* (!defined(FASTMORPH)) */
        {
            rwPS2AllClusterInstanceInfo *clinfo = &(pvtData->clinfo[j]);
            if (clinfo->attrib & CL_ATTRIB_REQUIRED)
            {
                if (clinfo->attrib & CL_ATTRIB_OPAQUE)
                {
                    /* We back fill the last qw of each block */
                    /* with zero so we won't have to do so each time */
                    switch (j)
                    {
                        case CL_XYZ:
                            if (wroteATag)
                            {
                                tmp = *(long *)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (1 << 8) | (sizeOnVU) |
                                (((0x68L << 24) | (currentBatchSize << 16)
                                  | 0x8000 | ps2ResHeader->fieldRec[j].vuoffset) << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (3*currentBatchSize + 3) >> 2;
                            *data++ = longZero;
                            break;
                        case CL_UV:
                            if (wroteATag)
                            {
                                tmp = *(long*)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (1 << 8) | (sizeOnVU)|
                                (((0x64L << 24) | (currentBatchSize << 16)
                                  | 0x8000 |  ps2ResHeader->fieldRec[j].vuoffset) << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (2*currentBatchSize + 3) >> 2;
                            *data++ = longZero;
                            break;
                        case CL_UV2:
                            if (wroteATag)
                            {
                                tmp = *(long*)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (1 << 8) | (sizeOnVU)|
                                (((0x6CL << 24) | (currentBatchSize << 16)
                                  | 0x8000 |  ps2ResHeader->fieldRec[j].vuoffset) << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (4*currentBatchSize + 3) >> 2;
                            *data++ = longZero;
                            break;
                        case CL_RGBA:
                            if (wroteATag)
                            {
                                tmp = *(long*)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (1 << 8) | (sizeOnVU)|
                                (((0x6EL << 24) | (currentBatchSize << 16)
                                  | 0xC000 |  ps2ResHeader->fieldRec[j].vuoffset) << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (currentBatchSize + 3) >> 2;
                            *data++ = longZero;
                            break;
                        case CL_NORMAL:
                            if (wroteATag)
                            {
                                tmp = *(long*)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (1 << 8) | (sizeOnVU)|
                                (((0x6AL << 24)
                                  | (currentBatchSize << 16)
                                  | 0x8000 | ps2ResHeader->fieldRec[j].vuoffset)
                                 << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (3*currentBatchSize + 15) >> 4;
                            *data++ = longZero;
                            break;
#if (defined(FASTMORPH))
                        case CL_MAXCL:
                            if (wroteATag)
                            {
                                tmp = *(long *)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            /* For the extra POS/NORMAL clusters, we plonk them
                             * in contiguous arrays after the normal verts on
                             * VU1 so we set stride to be the width of cluster,
                             * not the vertex... */
                            tmp1 = (1 << 24) | (4 << 8) | (4) |
                                (((0x68L << 24) | (currentBatchSize << 16)
                            /* ...and we start it at the end of the 'interlaced'
                             * batch of normal vertices. */
                                  | 0x8000 | (currentBatchSize*sizeOnVU))
                                 << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (3*currentBatchSize + 3) >> 2;
                            *data++ = longZero;
                            break;
                        case CL_MAXCL+1:
                            if (wroteATag)
                            {
                                tmp = *(long*)data;
                                wroteATag = FALSE;
                            }
                            else
                            {
                                tmp = ((5L << 24) << 32) | DEBUGMARK();
                            }
                            tmp1 = (1 << 24) | (4 << 8) | (4)|
                                (((0x6AL << 24)
                                  | (currentBatchSize << 16)
                                  | 0x8000 | (currentBatchSize*(sizeOnVU+1)))
                                 << 32);
                            MAKE128(ltmp, tmp1, tmp);
                            *data = ltmp;
                            data += (3*currentBatchSize + 15) >> 4;
                            *data++ = longZero;
                            break;
#endif /* (defined(FASTMORPH)) */
                    }
                }
            }
        }

        /* Now do ITOP an run/cont */
#if (defined(VUCONTINUE))
//TODO[6]: NEED COMMENTS
        tmp = ((0x17L << 24)) <<32;
#else /* (defined(VUCONTINUE)) */
        tmp = (((i == 0)?(0x15L << 24):(0x17L << 24)) <<32);
        /* For all supported primitive types (polylines get converted
         * to linelists CPU-side and trifans to trilists, otherwise
         * tristrips/trilists/linelists are supported and pointlists
         * are supported as long as appropriate VU code is supplied),
         * our counter is the number of verts uploaded per batch. */
#endif /* (defined(VUCONTINUE)) */
        tmp |= (4 << 24) | currentBatchSize;

        MAKE128(ltmp, 0L, tmp);
        *data++ = ltmp;
    }
#if (defined(VUCONTINUE))
//TODO[6]: NEED COMMENTS
    *(((unsigned long *)data) - 1) = (0x10L << 24) | ((0x06L << 24 | 0x0000L) << 32);
#else /* (defined(VUCONTINUE)) */
    /* We make the very last vif commands FLUSH/unmask3 */
    *(((unsigned long *)data) - 1) = (0x11L << 24) | ((0x06L << 24 | 0x0000L) << 32);
#endif /* (defined(VUCONTINUE)) */

    /* return */
    tmp = (6 << 28) | (data - lastTag - 1);
    *(long *)lastTag = tmp;
    if (!(pvtData->totallyOpaque))
    {
        *((long *)lastTag + 1) = 0l;
    }

    REDEBUGDMADumpMacro();

    RWRETURN(TRUE);
}

