/***************************************************************************
 *                                                                         *
 * Module  : skyinst.c                                                     *
 *                                                                         *
 * Purpose : Custom instance stage for Playstation II                      *
 *                                                                         *
 **************************************************************************/

/***************************************************************************
 Includes
 */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>

/* Sony lib includes. I can't believe I'm using them */
#include <eekernel.h>
#include <eetypes.h>
#include <eeregs.h>
#include <libgraph.h>
#include <libdma.h>
#include <libdev.h>
/* end Sony includes */

#include "batypes.h"
#include "badevice.h"
#include "baimmedi.h"
#include "baim3d.h"
#include "bacamera.h"
#include "bapipe.h"
#include "balibtyp.h"
#include "baraster.h"
#include "bamemory.h"
#include "barwtyp.h"
#include "baimage.h"
#include "baraster.h"
#include "batextur.h"

/* indirectly included by pipmodel.h; required for rpworld.h */
#include "baresour.h"

/* String abstraction API for unicode support */
#include "rwstring.h"

/* Drag in the vertex format */
#include "drvmodel.h"

#include "badma.h"
#include "dmaalloc.h"
/* GS defines */
#include "gs.h"
/* baasm.s externs */
#include "baasm.h"
/* cache includes */

#include "basky.h"

#include "devprofile.h"

/* This files header */
#include "skyinst.h"

#define RWCORE_H
#include <rpworld.h>

static const char rcsid[] __RWUNUSED__ =
    "@@(#)$Id: skyinst.c,v 1.49 2001/07/04 12:29:14 rabin Exp $";

/****************************************************************************
 Local Types
 */

typedef struct _skyImDispatchModel _skyImDispatchModel;
struct _skyImDispatchModel
{
    openPktFn    openPacket;
    void         *vu1LineCode;
    void         *vu1TriCode;
};

typedef RwBool (*WorldPipesFunc)(void);

/****************************************************************************
 Local (Static) Prototypes
 */

/****************************************************************************
 Local Defines
 */

#define DISPATCHVERT(VERT)                      \
MACRO_START                                     \
{                                               \
    SWEADDCONTFAST((VERT)->u.qWords[0]);        \
    SWEADDCONTFAST((VERT)->u.qWords[1]);        \
    SWEADDCONTFAST((VERT)->u.qWords[2]);        \
    SWEADDCONTFAST((VERT)->u.qWords[3]);        \
}                                               \
MACRO_STOP

#define MAXSKYSTACKDEPTH 10

/****************************************************************************
 Globals (across program)
 */



WorldPipesFunc overloadWorldPipes = (WorldPipesFunc)NULL;
WorldPipesFunc unOverloadWorldPipes = (WorldPipesFunc)NULL;
RwUInt8 skyTransType = 0;
const void *skyUploadedCode = 0;
u_long128 contCmd128 = 0;
u_long128 dma10Header128 = 0;
u_long128 dma1End128 = 0;
u_long128 dma1Header128 = 0;
u_long128 dma1Ret128 = 0;
u_long128 gifTag128 = 0;
u_long128 gifTagPrim128 = 0;
u_long128 maskCmd128 = 0;
u_long128 nullLightBlock[2] = { 0, 0};
u_long128 runCmd128 = 0;
u_long128 surfLightCoeffs = 0;
u_long128 unmaskCmd128 = 0;
u_long128 zero128 = 0;
u_long128 skyClipVect1   = 0;
u_long128 skyClipVect2   = 0;
u_long128 skyCClipVect1   = 0; /* Close Clip vec */
u_long128 skyCClipVect2   = 0;
RwUInt32  skyUserSwitch1 = 0;
RwUInt32  skyUserSwitch2 = 0;
RwBool	skyTSClipperMode = 0;
RwBool	skyTLClipperMode = 1;
/* Driver Linear Fog Parameters */
RwBool	useFarClip;
RwReal	farFogPlane;

static RwUInt32 maxCopyLinePacketSize = 0;
static RwUInt32 maxCopyTriPacketSize = 0;
static u_long128 baseOffsetCmd128 = 0;
static u_long128 colScaleNoTexCmd128 = 0;
static u_long128 colScaleTexCmd128 = 0;
static u_long128 prologHeader2D128 = 0;
static u_long128 prologHeader3D128 = 0;
static u_long128 vu1DataFarClip = 0;
static u_long128 vu1DataNearClip = 0;
static u_long128 vu1DataOffset2D = 0;
static u_long128 vu1DataOffset3D = 0;
static u_long128 vu1DataXYZScale = 0;
static u_long128 vu1DataXYZShift = 0;
static u_long128 xMaxYMax128 = 0;

static RwBool
open2DVU1Pkt(RwUInt32 batchCost, RwUInt32 primCost, RwUInt32 numPrims,
             RwUInt32 primCode, const void *code,
             const RwMatrix * __RWUNUSED__ matrix)
{
    RwUInt32 packetSize;
    RwUInt64 tmp, tmp1;
    u_long128 ltmp;

    RWFUNCTION(RWSTRING("open2DVU1Pkt"));
    PFENTRY(PFopen2DVU1Pkt);
    RWASSERT(code);

    packetSize = PROLOGSIZE2D + (sizeof(nullLightBlock)/sizeof(u_long128)) +
                 (batchCost*((numPrims+MAXVU1BATCHCNT-1)/MAXVU1BATCHCNT)) +
                 (primCost*numPrims) +
                 EPILOGSIZE;

    if (skyUploadedCode == code)
    {
        packetSize--;
    }

    PFCALL(PFopen2DVU1Pkt);
    sweOpenLocalPkt(SWE_LPS_NOFIXUP|SWE_PKT_DMA_MODE_CHAIN_TTE | SWE_PKT_LOCAL
                    | SWE_PKT_VU1 | SWE_PKT_CIRCALLOC, packetSize);
    PFRET(PFopen2DVU1Pkt);

    if (!sweLocalPacket)
    {
        PFEXIT(PFopen2DVU1Pkt);
        RWRETURN(FALSE);
    }

    SWEADDCONTFAST(dma1Header128);

    tmp = ((1l<<24)|(4<<8)|(4));
    /* tmp1 = (0x13l<<24) | ((0x06l<<24 | 0x8000)<<32); */
    tmp1 = (0x11l<<24) /* | ((0x06l<<24 | 0x8000)<<32) */ ;
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    SWEADDCONTFAST(prologHeader2D128);

    /* floating offset */
    SWEADDCONTFAST(vu1DataOffset2D);

    /* Modify the gif tag for the prim */
    ((RwUInt32 *)&gifTag128)[1] &=
        ~(0x7ff<<(47-32));              /* Mask out old one */
    ((RwUInt32 *)&gifTag128)[1] |=
        (((skyPrim_State | primCode) & 0x7ff) << (47-32));

    /* GIF tag for 1 primitive using packed mode */
    SWEADDCONTFAST(gifTag128);

    /* Upload a standard color scale (no material color to worry about) */
    if (skyPrim_State & 0x10)
    {
        SWEADDCONTFAST(colScaleTexCmd128);
    }
    else
    {
        SWEADDCONTFAST(colScaleNoTexCmd128);
    }

    /* dma cnt (1) + base and offset */
    SWEADDCONTFAST(dma1Header128);
    SWEADDCONTFAST(baseOffsetCmd128);

    if (skyUploadedCode != code)
    {
        /* dma ref to code upload */
        tmp = (5l<<28)| (RwUInt64)((RwUInt32)code)<<32;
        MAKE128(ltmp, 0l, tmp);
        SWEADDCONTFAST(ltmp);
        skyUploadedCode = code;
    }

    PFEXIT(PFopen2DVU1Pkt);
    RWRETURN(TRUE);
}

static RwBool
open3DVU1Pkt(RwUInt32 batchCost, RwUInt32 primCost, RwUInt32 numPrims,
           RwUInt32 primCode, const void *code,
           const RwMatrix *matrix)
{
    RwUInt32 packetSize;
    RwUInt64 tmp, tmp1;
    u_long128 ltmp;
    const RwUInt32 *matVector;

    RWFUNCTION(RWSTRING("open3DVU1Pkt"));
    PFENTRY(PFopen3DVU1Pkt);
    RWASSERT(code);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    packetSize = PROLOGSIZE3D + (sizeof(nullLightBlock)/sizeof(u_long128)) +
                 (batchCost*((numPrims+MAXVU1BATCHCNT-1)/MAXVU1BATCHCNT)) +
                 (primCost*numPrims) +
                 EPILOGSIZE;

    if (skyUploadedCode == code)
    {
        packetSize--;
    }

    PFCALL(PFopen3DVU1Pkt);
    sweOpenLocalPkt(SWE_LPS_NOFIXUP|SWE_PKT_DMA_MODE_CHAIN_TTE | SWE_PKT_LOCAL
                    | SWE_PKT_VU1 | SWE_PKT_CIRCALLOC, packetSize);
    PFRET(PFopen3DVU1Pkt);

    if (!sweLocalPacket)
    {
        PFEXIT(PFopen3DVU1Pkt);
        RWRETURN(FALSE);
    }

    SWEADDCONTFAST(dma1Header128);

    tmp = ((1l<<24)|(4<<8)|(4));
    /* tmp1 = (0x13l<<24) | ((0x06l<<24 | 0x8000)<<32); */
    tmp1 = (0x11l<<24) /* | ((0x06l<<24 | 0x8000)<<32) */ ;
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    SWEADDCONTFAST(prologHeader3D128);

    /* matrix (4 lines) */
    matVector = (const RwUInt32 *)(&matrix->right);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->up);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->at);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->pos);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    /* nearClip in W */
    SWEADDCONTFAST(vu1DataNearClip);

    /* farClip in W */
    SWEADDCONTFAST(vu1DataFarClip);

    /* xMax yMax in W */
    SWEADDCONTFAST(xMaxYMax128);

    /* camW, camH, zScale */
    SWEADDCONTFAST(vu1DataXYZScale);

    /* OffX, OffY, zShift */
    SWEADDCONTFAST(vu1DataXYZShift);

    /* floating offset */
    SWEADDCONTFAST(vu1DataOffset3D);

    /* Modify the gif tag for the prim */
    ((RwUInt32 *)&gifTag128)[1]
        &= ~(0x7ff<<(47-32));           /* Mask out old one */
    ((RwUInt32 *)&gifTag128)[1]
        |= (((skyPrim_State | primCode) & 0x7ff) << (47-32));

    /* GIF tag for 1 primitive using packed mode */
    SWEADDCONTFAST(gifTag128);

    /* Upload a standard color scale (no material color to worry about) */
    if (skyPrim_State & 0x10)
    {
        SWEADDCONTFAST(colScaleTexCmd128);
    }
    else
    {
        SWEADDCONTFAST(colScaleNoTexCmd128);
    }

    /* No lighting in immediate mode, so why bother with the coefficients */
    SWEADDCONTFAST(zero128);

    /* dma cnt (1) + base and offset */
    SWEADDCONTFAST(dma1Header128);
    SWEADDCONTFAST(baseOffsetCmd128);

    /* Upload a null light block */
    SWEADDCONTFAST(nullLightBlock[0]);
    SWEADDCONTFAST(nullLightBlock[1]);

    if (skyUploadedCode != code)
    {
        /* dma ref to code upload */
        tmp = (5l<<28)| (RwUInt64)((RwUInt32)code)<<32;
        MAKE128(ltmp, 0l, tmp);
        SWEADDCONTFAST(ltmp);
        skyUploadedCode = code;
    }

    PFEXIT(PFopen3DVU1Pkt);
    RWRETURN(TRUE);
}

/****************************************************************************
 Local (static) Globals
 */

/* Dispatch models for what to call, what VU1 code to upload */
static const _skyImDispatchModel dispatchModel2DNoFog = {open2DVU1Pkt, (void *)&vu1nfim2dll, (void *)&vu1nfim2dtl};
static const _skyImDispatchModel dispatchModel2DFog = {open2DVU1Pkt, (void *)&vu1im2dll, (void *)&vu1im2dtl};
static const _skyImDispatchModel dispatchModel3DPer = {open3DVU1Pkt, (void *)&vu1SegmentsPRS, (void *)&vu1GenericPRS};
static const _skyImDispatchModel dispatchModel3DIso = {open3DVU1Pkt, (void *)&vu1SegmentsPRL, (void *)&vu1SegmentsPRL};

#if (0)
static const _skyImDispatchModel dispatchModel3DPer = {open3DVU1Pkt, (void *)&vu1transl, (void *)&vu1trans};
static const _skyImDispatchModel dispatchModel3DIso = {open3DVU1Pkt, (void *)&vu1isotransl, (void *)&vu1isotrans};
#endif /* (0) */


#define openVU1SetupPktType ( SWE_LPS_NOFIXUP|                  \
                              SWE_PKT_DMA_MODE_CHAIN_TTE|       \
                              SWE_PKT_LOCAL |                   \
                              SWE_PKT_VU1 |                     \
                              SWE_PKT_CIRCALLOC )

RwBool
openVU1SetupPkt(RwUInt32 primCode, const void *code, const RwMatrix *matrix,
                const u_long128 *lightPkt, RwUInt32 lightPktLen)
{
    RwUInt32 packetSize;
    RwUInt64 tmp, tmp1;
    u_long128 ltmp;
    const RwUInt32 *matVector;

    RWFUNCTION(RWSTRING("openVU1SetupPkt"));
    PFENTRY(PFopenVU1SetupPkt);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    packetSize = lightPktLen + PROLOGSIZE3D + 3;

    if ((skyUploadedCode == code) || (!code))
    {
        packetSize--;
    }

#if (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO))
    {
        RwBool status;
        int size = packetSize;
        sweFinaliseOpenLocalPktMacro(status, openVU1SetupPktType, size);
    }

#else /* (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO)) */

    PFCALL(PFopenVU1SetupPkt);
    sweFinaliseOpenLocalPkt(openVU1SetupPktType, packetSize);
    PFRET(PFopenVU1SetupPkt);

#endif /* (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO)) */

    if (!sweLocalPacket)
    {
        PFEXIT(PFopenVU1SetupPkt);
        RWRETURN(FALSE);
    }

    SWEADDCONTFAST(dma1Header128);

    /* We have to do the flush here as the static data upload could
       corrupt the final XGKICK packet */
    tmp = (0x11l<<24) | ((0x06l<<24 | 0x0000)<<32);
    MAKE128(ltmp, tmp, tmp);
    SWEADDCONTFAST(ltmp);

/* We must not do a mask pth 3 in this code
    SWEADDCONTFAST(prologHeader3D128);
 */
    tmp = (1l<<28)| 13;
    tmp1 = (((0x6cl<<24)|(13l<<16)|(VU1DATABLOCKHIGH)) << 32)| ((1l<<24)|(4<<8)|(4));
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    /* matrix (4 lines) */
    matVector = (const RwUInt32 *)(&matrix->right);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->up);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->at);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    matVector = (const RwUInt32 *)(&matrix->pos);
    tmp  = matVector[0] | ((RwUInt64)(matVector[1])<<32);
    tmp1 = matVector[2] | ((RwUInt64)(matVector[2])<<32);
    MAKE128(ltmp, tmp1, tmp);
    SWEADDCONTFAST(ltmp);

    /* nearClip in W */
    SWEADDCONTFAST(vu1DataNearClip);

    /* farClip in W */
    SWEADDCONTFAST(vu1DataFarClip);

    /* xMax yMax in W */
    SWEADDCONTFAST(xMaxYMax128);

    /* camW, camH, zScale */
    SWEADDCONTFAST(vu1DataXYZScale);

    /* OffX, OffY, zShift */
    SWEADDCONTFAST(vu1DataXYZShift);

    /* floating offset */
    SWEADDCONTFAST(vu1DataOffset3D);

    /* Modify the gif tag for the prim */
    ((RwUInt32 *)&gifTag128)[1] &= ~(0x7ff<<(47-32));           /* Mask out old one */
    ((RwUInt32 *)&gifTag128)[1] |= (((skyPrim_State | primCode) & 0x7ff) << (47-32));

    /* GIF tag for 1 primitive using packed mode */
    /* store for later use */
    SWEADDCONTFAST(gifTagPrim128 = gifTag128);

    /* This gets uploaded per mesh anyway... */
    SWEADDCONTFAST(zero128);

    /* Surface lighting coefficients for VU1 lighting
       (overwriten per mesh, but we need a place for them) */
    SWEADDCONTFAST(zero128);

    /* dma cnt (1) + base and offset */
    SWEADDCONTFAST(dma1Header128);
    SWEADDCONTFAST(baseOffsetCmd128);

    /* Upload the light pkt */
    while (lightPktLen--)
    {
        SWEADDCONTFAST(*lightPkt);
        lightPkt++;
    }

    if ((code) && (skyUploadedCode != code))
    {
        /* dma ref to code upload */
        tmp = (5l<<28)| (RwUInt64)((RwUInt32)code)<<32;
        MAKE128(ltmp, 0l, tmp);
        SWEADDCONTFAST(ltmp);
        skyUploadedCode = code;
    }

    tmp = (0xfl<<28);
    MAKE128(ltmp, 0l, tmp);
    SWEADDCONTFAST(ltmp);

#if (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO))
    {
        RwBool status;
        int size = 0;
        sweFinaliseOpenLocalPktMacro(status, SWE_LPS_CONT, size);
    }

#else /* (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO)) */

    PFCALL(PFopenVU1SetupPkt);
    sweFinaliseOpenLocalPkt(SWE_LPS_CONT, 0);
    PFRET(PFopenVU1SetupPkt);

#endif /* (defined(RWUSESWEFINALISEOPENLOCALPKTMACRO)) */

    PFEXIT(PFopenVU1SetupPkt);
    RWRETURN(TRUE);
}

static void
closeAndDispatchVU1Packet(void)
{
    RWFUNCTION(RWSTRING("closeAndDispatchVU1Packet"));
    PFENTRY(PFcloseAndDispatchVU1Packet);

    /* dma cnt + flush the DMA (by filling queue with NOPs) */
    SWEADDCONTFAST(dma10Header128);

    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    SWEADDCONTFAST(zero128);
    {
        u_long128 ltmp;
        MAKE128(ltmp, 0l, 0x11l<<24);
        SWEADDCONTFAST(ltmp);
    }

    /* dma end */
    SWEADDCONTFAST(dma1End128);

    /* Unmask path 3 from VIF1 */
    SWEADDCONTFAST(unmaskCmd128);

    /* add pkt */
    PFCALL(PFcloseAndDispatchVU1Packet);
    sweCloseLocalPkt();
    PFRET(PFcloseAndDispatchVU1Packet);

    PFEXIT(PFcloseAndDispatchVU1Packet);
    RWRETURNVOID();
}

static RwInt32
createAndInitCopyLinePacket(const _skyImDispatchModel *dispatchModel,
                            RwUInt32 numLines, const RwMatrix *matrix)
{
    RWFUNCTION(RWSTRING("createAndInitCopyLinePacket"));
    PFENTRY(PFcreateAndInitCopyLinePacket);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    /* Fail to open line packets until we bring batransl.vu up to date. */
    if (numLines)
    {
        /* Fixed size:
           PROLOG                        (Total:16)
           Dma cnt + vif unpack v4-32    (1)
           (Setup data, matrix, etc)     (12)
           Dma cnt 1                     (1)
            base offset                  (1)
           Dma call (uploads prog)       (1)

           PER BATCH OF LINES            (Total:3)
           Dma cnt + ITOPS + STCYCLE     (1)
           STMASK + Vif unpack v4-32     (1)
           Execute cmds                  (1)

           PER LINE                      (Total:8)
           Vertex1                       (4)
           Vertex1                       (4)

           EPILOG                        (Total:12)
           Dma cnt + flush               (1)
           10*NOP qw                     (10)
           Dma end                       (1)

           ie: (16 * (PROLOGSIZE3D + COPYPKTSIZE*((numLines+MAXVU1BATCHCNT-1)/MAXVU1BATCHCNT) + VERTEXSIZE*2*numLines + EPILOGSIZE);

         */

        /* Cap to something that will fit it a DMA packet */
        if (numLines > maxCopyLinePacketSize)
        {
            numLines = maxCopyLinePacketSize;
        }

        PFCALL(PFcreateAndInitCopyLinePacket);
        if (dispatchModel->openPacket(COPYPKTSIZE, VERTEXSIZE*2, numLines, 1, dispatchModel->vu1LineCode, matrix))
        {
            PFRET(PFcreateAndInitCopyLinePacket);
            PFEXIT(PFcreateAndInitCopyLinePacket);
            RWRETURN(numLines);
        }
    }

    PFEXIT(PFcreateAndInitCopyLinePacket);
    RWRETURN(0);
}

static RwInt32
createAndInitCopyTriPacket(const _skyImDispatchModel *dispatchModel,
                           RwUInt32 numTris, const RwMatrix *matrix)
{
    RWFUNCTION(RWSTRING("createAndInitCopyTriPacket"));
    PFENTRY(PFcreateAndInitCopyTriPacket);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    if (numTris)
    {
        /* Fixed size:
           PROLOG                        (Total:16)
           Dma cnt + vif unpack v4-32    (1)
           (Setup data, matrix, etc)     (12)
           Dma cnt 1                     (1)
            base offset                  (1)
           Dma call (uploads prog)       (1)

           PER BATCH OF TRIANGLES        (Total:3)
           Dma cnt + ITOPS + STCYCLE     (1)
           STMASK + Vif unpack v4-32     (1)
           Execute cmds                  (1)

           PER TRIANGLE                  (Total:12)
           Vertex1                       (4)
           Vertex1                       (4)
           Vertex1                       (4)

           EPILOG                        (Total:12)
           Dma cnt + flush               (1)
           10*NOP qw                     (10)
           Dma end                       (1)

           ie: (16 * (PROLOGSIZE3D + COPYPKTSIZE*((numTris+MAXVU1BATCHCNT-1)/MAXVU1BATCHCNT) + VERTEXSIZE*3*numTris + EPILOGSIZE);

         */

        /* Cap to something that will fit it a DMA packet */
        if (numTris > maxCopyTriPacketSize)
        {
            numTris = maxCopyTriPacketSize;
        }

        /* Say trifan, not triangle, then we can save a bit of space. */
        PFCALL(PFcreateAndInitCopyTriPacket);
        if (dispatchModel->openPacket(COPYPKTSIZE, VERTEXSIZE*3, numTris, 5, dispatchModel->vu1TriCode, matrix))
        {
            PFRET(PFcreateAndInitCopyTriPacket);
            PFEXIT(PFcreateAndInitCopyTriPacket);
            RWRETURN(numTris);
        }
    }

    PFEXIT(PFcreateAndInitCopyTriPacket);
    RWRETURN(0);
}

static void
initVu1DispatchStuff(void)
{
    RwUInt64 tmp, tmp1;
    RwUInt32 packetSize;

    RWFUNCTION(RWSTRING("initVu1DispatchStuff"));
    PFENTRY(PFinitVu1DispatchStuff);

    /* Warning: This masks path 3 from the VIF1 side */
    tmp = (1l<<28)| 13;
    tmp1 = (((0x6cl<<24)|(13l<<16)|
             (VU1DATABLOCKHIGH)) << 32) /* |(0x06l<<24|0x8000l) */;
    MAKE128(prologHeader3D128, tmp1, tmp);

    /* Warning: This masks path 3 from the VIF1 side */
    tmp = (1l<<28)| 3;
    tmp1 = (((0x6cl<<24)|(3l<<16)|
             (VU1DATABLOCKHIGH+9)) << 32) /* |(0x06l<<24|0x8000l) */;
    MAKE128(prologHeader2D128, tmp1, tmp);

    MAKE128(zero128, 0l, 0l);

    /* xMax yMax in W */
    /* We want these numbers to be a bit bigger than required */
#if 1
    /* This version allows for subrasters */
    //((RwReal *)&xMaxYMax128)[0] = (RwReal)(640.0f/(2047.9374f-320.0f));
    //((RwReal *)&xMaxYMax128)[1] = (RwReal)(228.0f/(2047.9374f-114.0f));

    ((RwReal *)&xMaxYMax128)[0] =
        (RwReal)((float)(skyVideoMode->width)/
                 (2047.9374f-(float)(skyVideoMode->width>>1)));
    ((RwReal *)&xMaxYMax128)[1] =
        (RwReal)((float)(skyVideoMode->height)/
                 (2047.9374f-(float)(skyVideoMode->height>>1)));
#else
    //((RwReal *)&xMaxYMax128)[0] = (RwReal)(640.0f/(2047.9374f));
    //((RwReal *)&xMaxYMax128)[1] = (RwReal)(228.0f/(2047.9374f));

    ((RwReal *)&xMaxYMax128)[0] =
        (RwReal)((float)(skyVideoMode->width)/(2047.9374f));
    ((RwReal *)&xMaxYMax128)[1] =
        (RwReal)((float)(skyVideoMode->height)/(2047.9374f));
#endif

    /* GIF tag for 1 vertex using packed mode
     * (prim gets filled at dispatch, nloop gets filled at exec time) */
    tmp = /* NLOOP */ 1l
        | /* EOP */ (0l<<15)
        | /* PRE */ (1l<<46)
        | /* FLG */ (0l<<58)
        | /* NREG */(3l<<60);
    tmp1 = /* stq */ (0x2l<<(64-64))
        | /* rgba */ (0x1l<<(68-64))
        | /* xyzf2 */ (0x4l<<(72-64));
    MAKE128(gifTag128, tmp1, tmp);

    tmp = (0xfl<<28)|1;
    MAKE128(dma1End128, 0l, tmp);

    tmp = (0x6l<<28)|1;
    MAKE128(dma1Ret128, 0l, tmp);

    tmp = (1l<<28) | (1l);
    MAKE128(dma1Header128, 0, tmp);

    tmp = (1l<<28) | (10l);
    MAKE128(dma10Header128, 0, tmp);

    tmp = (0x11l<<24)|((0x15l<<24)<<32);
    MAKE128(runCmd128, 0l, tmp);

    tmp = (0x10l<<24)|((0x17l<<24)<<32);
    MAKE128(contCmd128, 0l, tmp);

    /* This used to be 513. Now its (1024-16-13-32)/2 = 481
     * [This is also used as the default pipeline VIFOffset
     *  CHANGE BOTH NOT ONE - KEEP 'EM SYNCH'D!] */
    tmp = (3l<<24) | (0l) | (((2l<<24) | (481l))<<32);
    MAKE128(baseOffsetCmd128, 0l, tmp);

    ((RwReal *)&colScaleTexCmd128)[0] = (RwReal)(128.01f/255.0f);
    ((RwReal *)&colScaleTexCmd128)[1] = (RwReal)(128.01f/255.0f);
    ((RwReal *)&colScaleTexCmd128)[2] = (RwReal)(128.01f/255.0f);
    ((RwReal *)&colScaleTexCmd128)[3] = (RwReal)(128.01f/255.0f);

    ((RwReal *)&colScaleNoTexCmd128)[0] = (RwReal)(255.01f/255.0f);
    ((RwReal *)&colScaleNoTexCmd128)[1] = (RwReal)(255.01f/255.0f);
    ((RwReal *)&colScaleNoTexCmd128)[2] = (RwReal)(255.01f/255.0f);
    ((RwReal *)&colScaleNoTexCmd128)[3] = (RwReal)(128.01f/255.0f);

    /* Determine maximum allowable size for a triangle packet */
    maxCopyTriPacketSize = 0;
    do
    {
        RwUInt32  numBatches;

        maxCopyTriPacketSize++;
        numBatches = ( (maxCopyTriPacketSize+MAXVU1BATCHCNT-1) /
                       MAXVU1BATCHCNT );

        packetSize = PROLOGSIZE3D + EPILOGSIZE;
        packetSize += COPYPKTSIZE * numBatches;
        packetSize += VERTEXSIZE * 3 * maxCopyTriPacketSize;
    } while (packetSize < SWE_LPS_MAX_PACKET_SIZE);
    maxCopyTriPacketSize--;

    /* Determine maximum allowable size for a line packet */
    maxCopyLinePacketSize = 0;
    do
    {
        RwUInt32  numBatches;

        maxCopyLinePacketSize++;
        numBatches = ( (maxCopyLinePacketSize+MAXVU1BATCHCNT-1) /
                       MAXVU1BATCHCNT );

        packetSize = PROLOGSIZE3D + EPILOGSIZE;
        packetSize += COPYPKTSIZE * numBatches;
        packetSize += VERTEXSIZE * 3 * maxCopyLinePacketSize;
    } while (packetSize < SWE_LPS_MAX_PACKET_SIZE);
    maxCopyLinePacketSize--;

    /* Mask VIF command */
    /* tmp = (0x13l<<24) | ((0x13l<<24)<<32); */
    tmp = (0x11l<<24) | ((0x11l<<24)<<32);
    /* tmp1 = ((0x06l<<24 | 0x8000l)<<32) | (0x13l<<24); */
    tmp1 = /* ((0x06l<<24 | 0x8000l)<<32) | */ (0x11l<<24);
    MAKE128(maskCmd128, tmp1, tmp);

    /* Unmask VIF command */
    tmp = (0x11<<24);
    tmp1 = (0x06l<<24 | 0x0000l)<<32;
    MAKE128(unmaskCmd128, tmp1, tmp);

    /* Initialise the null light block (for uploading zero lights) */

    /* The end of list marker */
    ((RwUInt32 *)&nullLightBlock[1])[0] = 0;
    ((RwUInt32 *)&nullLightBlock[1])[1] = 0;
    ((RwUInt32 *)&nullLightBlock[1])[2] = 0;
    ((RwUInt32 *)&nullLightBlock[1])[3] = (RwUInt32)rpNALIGHTTYPE;

    tmp = (1l<<28) | (1);
    tmp1 = (((0x6cl<<24)|(1l<<16) | (VU1LIGHTOFFSET+4)) << 32) |
        ((1l<<24)|(4<<8)|(4));
    MAKE128(nullLightBlock[0], tmp1, tmp);

    PFCALL(PFinitVu1DispatchStuff);
    SyncDCache(nullLightBlock, SCESYNCDCACHEROUNDUP(nullLightBlock+2));
    PFRET(PFinitVu1DispatchStuff);

    PFEXIT(PFinitVu1DispatchStuff);
    RWRETURNVOID();
}

/*
 * You think addCopyVU1Line() is hacky?
 * Well, it is.
 * But this horror is equally bad
 * (I want the basic Im2d layer without Im3d &
 *   associated pipeline nonsense, see).
 */

#undef RwIm3DVertex
#define RwIm3DVertex RwIm2DVertex

typedef void (*SkyAddCopyVU1PrimFunc)(const _skyImDispatchModel *dispatchModel,
                                      const RwMatrix *matrix,
                                      RwIm3DVertex *verts,
                                      RwUInt32 numVerts);

static void
addCopyVU1Line(const _skyImDispatchModel *dispatchModel,
               const RwMatrix *matrix, RwIm3DVertex *vert1,
               RwIm3DVertex *vert2)
{
    RWFUNCTION(RWSTRING("addCopyVU1Line"));
    PFENTRY(PFaddCopyVU1Line);
    RWASSERT(RWMATRIXALIGNMENT(matrix));
    RWASSERT(vert1);
    RWASSERT(vert2);

    /* Nasty hack here for code reuse.  We know that 2D and 3D immediate mode
     * vertices are the same same.  We can therefore use the same code to
     * index the arrays and dispatch them...!  Just have some asserts here to
     * protect against the day they change (and we have to write a whole lot
     * more code).
     */
    RWASSERT(sizeof(RwIm3DVertex) == sizeof(RwIm2DVertex));

    /* Create a packet with just one line...! */
    PFCALL(PFaddCopyVU1Line);
    if (createAndInitCopyLinePacket(dispatchModel, 1, matrix))
    {
        long tmp, tmp1;
        u_long128 ltmp;

        PFRET(PFaddCopyVU1Line);

        /* DMA cnt, STYCLE, and load ITOPS */
        tmp = (1l<<28) | ((RwUInt64)(1*VERTEXSIZE*2) + 2);
        tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
            ((4l<<24) | (1));
        MAKE128(ltmp, tmp1, tmp);
        SWEADDCONTFAST(ltmp);

        /* UNPACK V4-32 & STMASK */
        tmp1 = (((0x7cl<<24) | ((1*VERTEXSIZE*2)<<16) | 0x8000l)<<32);
        tmp = (0x20l<<24) | ((0xC000C000l) << 32);
        MAKE128(ltmp, tmp1, tmp);
        SWEADDCONTFAST(ltmp);

        /* Dispatch the verts for the line */
        DISPATCHVERT(vert1);
        DISPATCHVERT(vert2);

        /* Kick the batch off */
        SWEADDCONTFAST(runCmd128);

        PFCALL(PFaddCopyVU1Line);
        /* Send it on it's merry way */
        closeAndDispatchVU1Packet();
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Line);

    PFEXIT(PFaddCopyVU1Line);
    RWRETURNVOID();
}

static RwBool
SkyIm2DRenderLine(RwIm2DVertex *verts, RwInt32 __RWUNUSED__ numVerts,
                  RwInt32 v1, RwInt32 v2)
{
    RWFUNCTION(RWSTRING("SkyIm2DRenderLine"));

    addCopyVU1Line(skyTransType&TRANSFOG?
                   &dispatchModel2DFog:
                   &dispatchModel2DNoFog,
                   (const RwMatrix *)NULL,
                   (RwIm3DVertex *)&verts[v1],
                   (RwIm3DVertex *)&verts[v2]);

#ifdef RWMETRICS
    /* We don't count lines */
    RWSRCGLOBAL(metrics)->numVertices += 2;
#endif /* RWMETRICS */

    RWRETURN(TRUE);
}

static void
addCopyVU1Triangle(const _skyImDispatchModel *dispatchModel,
                   const RwMatrix *matrix, RwIm3DVertex *vert1,
                   RwIm3DVertex *vert2, RwIm3DVertex *vert3)
{
    RWFUNCTION(RWSTRING("addCopyVU1Triangle"));
    PFENTRY(PFaddCopyVU1Triangle);
    RWASSERT(RWMATRIXALIGNMENT(matrix));
    RWASSERT(vert1);
    RWASSERT(vert2);
    RWASSERT(vert3);

    /* Nasty hack here for code reuse.  We know that 2D and 3D immediate mode
     * vertices are the same same.  We can therefore use the same code to
     * index the arrays and dispatch them...!  Just have some asserts here to
     * protect against the day they change (and we have to write a whole lot
     * more code).
     */
    RWASSERT(sizeof(RwIm3DVertex) == sizeof(RwIm2DVertex));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles++;
    RWSRCGLOBAL(metrics)->numProcTriangles++;
#endif /* RWMETRICS */

    /* Create a packet with just one triangle...! */
    PFCALL(PFaddCopyVU1Triangle);
    if (createAndInitCopyTriPacket(dispatchModel, 1, matrix))
    {
        long tmp, tmp1;
        u_long128 ltmp;

        PFRET(PFaddCopyVU1Triangle);

        /* DMA cnt, STYCLE, and load ITOPS */
        tmp = (1l<<28) | ((RwUInt64)(1*VERTEXSIZE*3) + 2);
        tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
            ((4l<<24) | (1));
        MAKE128(ltmp, tmp1, tmp);
        SWEADDCONTFAST(ltmp);

        /* UNPACK V4-32 & STMASK */
        tmp1 = (((0x7cl<<24) | ((1*VERTEXSIZE*3)<<16) | 0x8000l)<<32);
        tmp = (0x20l<<24) | ((0xC000C000l) << 32);
        MAKE128(ltmp, tmp1, tmp);
        SWEADDCONTFAST(ltmp);

        /* Dispatch the verts for the triangle */
        DISPATCHVERT(vert1);
        DISPATCHVERT(vert2);
        DISPATCHVERT(vert3);

        /* Kick the batch off */
        SWEADDCONTFAST(runCmd128);

        PFCALL(PFaddCopyVU1Triangle);
        /* Send it on it's merry way */
        closeAndDispatchVU1Packet();
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Triangle);

    PFEXIT(PFaddCopyVU1Triangle);
    RWRETURNVOID();
}

static RwBool
SkyIm2DRenderTriangle(RwIm2DVertex *verts, RwInt32 __RWUNUSED__ numVerts,
                      RwInt32 v1, RwInt32 v2, RwInt32 v3)
{
    RWFUNCTION(RWSTRING("SkyIm2DRenderTriangle"));

    addCopyVU1Triangle(skyTransType&TRANSFOG?
                       &dispatchModel2DFog:
                       &dispatchModel2DNoFog,
                       (const RwMatrix *)NULL,
                       (RwIm3DVertex *)&verts[v1],
                       (RwIm3DVertex *)&verts[v2],
                       (RwIm3DVertex *)&verts[v3]);

#ifdef RWMETRICS
    /* numTriangles and numProcTriangles updated in addCopyVU1Triangle */
    RWSRCGLOBAL(metrics)->numVertices += 3;
#endif /* RWMETRICS */

    RWRETURN(TRUE);
}

static void
addCopyVU1PrimLineList(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwUInt32 numVerts)
{
    RwUInt32 numLines = numVerts / 2;
    RwUInt32 thisPacketLines;
    RWFUNCTION(RWSTRING("addCopyVU1PrimLineList"));

    PFCALL(PFaddCopyVU1Prim);
    while ((thisPacketLines =
            createAndInitCopyLinePacket(dispatchModel,
                                        numLines, matrix)))
    {
        u_long128  runCmd;

        PFRET(PFaddCopyVU1Prim);
        numLines -= thisPacketLines;

        runCmd = runCmd128;
        while (thisPacketLines)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketLines;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketLines -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*2) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*2)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[0];
                RwIm3DVertex *v2 = &verts[1];

                /* Dispatch the verts for each line */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);

                /* Ready for next triangle */
                verts += 2;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1Prim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Prim);

    RWRETURNVOID();
}

static void
addCopyVU1PrimPolyLine(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwUInt32 numVerts)
{
    RwUInt32 numLines = numVerts - 1;
    RwUInt32 thisPacketLines;
    RWFUNCTION(RWSTRING("addCopyVU1PrimPolyLine"));

    PFCALL(PFaddCopyVU1Prim);
    while ((thisPacketLines =
            createAndInitCopyLinePacket(dispatchModel,
                                        numLines, matrix)))
    {
        u_long128  runCmd;
        PFRET(PFaddCopyVU1Prim);
        numLines -= thisPacketLines;

        runCmd = runCmd128;
        while (thisPacketLines)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketLines;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketLines -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) |
                ((RwUInt64)(batchCount*VERTEXSIZE*2) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*2)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[0];
                RwIm3DVertex *v2 = &verts[1];

                /* Dispatch the verts for each line */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);

                /* Ready for next triangle */
                verts++;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1Prim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Prim);

    RWRETURNVOID();
}

static void
addCopyVU1PrimTriList(const _skyImDispatchModel *dispatchModel,
                      const RwMatrix *matrix,
                      RwIm3DVertex *verts,
                      RwUInt32 numVerts)
{
    RwUInt32 numTris = numVerts / 3;
    RwUInt32 thisPacketTris;
    RWFUNCTION(RWSTRING("addCopyVU1PrimTriList"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1Prim);
    while ((thisPacketTris =
            createAndInitCopyTriPacket(dispatchModel,
                                       numTris, matrix)))
    {
        u_long128  runCmd;

        PFRET(PFaddCopyVU1Prim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*3)<<16) | 0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[0];
                RwIm3DVertex *v2 = &verts[1];
                RwIm3DVertex *v3 = &verts[2];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                verts += 3;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1Prim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Prim);

    RWRETURNVOID();
}

static void
addCopyVU1PrimTriStrip(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwUInt32 numVerts)
{
    RwUInt32 numTris = (numVerts > 2) ? (numVerts - 2) : 0;
    RwUInt32 vertInd = 1;
    RwUInt32 thisPacketTris;

    RWFUNCTION(RWSTRING("addCopyVU1PrimTriStrip"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1Prim);
    while ((thisPacketTris =
            createAndInitCopyTriPacket(dispatchModel,
                                       numTris, matrix)))
    {
        u_long128  runCmd;

        PFRET(PFaddCopyVU1Prim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) | ((batchCount*VERTEXSIZE*3)<<16) | 0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[0];
                RwIm3DVertex *v2 = &verts[vertInd];
                RwIm3DVertex *v3 = &verts[vertInd^3];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                verts++;
                vertInd ^= 3;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1Prim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Prim);

    RWRETURNVOID();
}

static void
addCopyVU1PrimTriFan(const _skyImDispatchModel *dispatchModel,
                     const RwMatrix *matrix,
                     RwIm3DVertex *verts,
                     RwUInt32 numVerts)
{
    RwUInt32 numTris = (numVerts > 2) ? (numVerts - 2) : 0;
    RwUInt32 thisPacketTris;
    RwIm3DVertex *v1 = &verts[0];
    RWFUNCTION(RWSTRING("addCopyVU1PrimTriFan"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1Prim);
    while ((thisPacketTris = createAndInitCopyTriPacket(dispatchModel, numTris, matrix)))
    {
        u_long128  runCmd;

        PFRET(PFaddCopyVU1Prim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) | ((batchCount*VERTEXSIZE*3)<<16) | 0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v2 = &verts[1];
                RwIm3DVertex *v3 = &verts[2];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                verts++;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1Prim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1Prim);

    RWRETURNVOID();
}

#define SkyAddCopyVU1PrimFuncCount              \
    ( sizeof(SkyAddCopyVU1PrimFuncArray)/       \
      sizeof(SkyAddCopyVU1PrimFuncArray[0]) )

#define  addCopyVU1PrimMacro(_dispatchModel, _matrix,                         \
                             _primType, _verts, _numVerts)                    \
MACRO_START                                                                   \
{                                                                             \
    static SkyAddCopyVU1PrimFunc SkyAddCopyVU1PrimFuncArray[] =               \
    {                                                                         \
        (SkyAddCopyVU1PrimFunc)NULL,   /* rwPRIMTYPENAPRIMTYPE = 0 */         \
        addCopyVU1PrimLineList,        /* rwPRIMTYPELINELIST = 1*/            \
        addCopyVU1PrimPolyLine,        /* rwPRIMTYPEPOLYLINE = 2*/            \
        addCopyVU1PrimTriList,         /* rwPRIMTYPETRILIST = 3*/             \
        addCopyVU1PrimTriStrip,        /* rwPRIMTYPETRISTRIP = 4*/            \
        addCopyVU1PrimTriFan           /* rwPRIMTYPETRIFAN = 5 */             \
    };                                                                        \
                                                                              \
    PFENTRY(PFaddCopyVU1Prim);                                                \
    RWASSERT(RWMATRIXALIGNMENT(_matrix));                                     \
    RWASSERT((_verts));                                                       \
                                                                              \
    /* Nasty hack here for code reuse.  We know that 2D and 3D immediate mode \
     * vertices are the same same.  We can therefore use the same code to     \
     * index the arrays and dispatch them...!  Just have some asserts here to \
     * protect against the day they change (and we have to write a whole lot  \
     * more code).                                                            \
     */                                                                       \
    RWASSERT(sizeof(RwIm3DVertex) == sizeof(RwIm2DVertex));                   \
                                                                              \
    if ( ((_primType) > 0) &&                                                 \
         ((_primType) < SkyAddCopyVU1PrimFuncCount) )                         \
    {                                                                         \
        SkyAddCopyVU1PrimFunc func =                                          \
            SkyAddCopyVU1PrimFuncArray[(_primType)];                          \
        func((_dispatchModel),  (_matrix),                                    \
             (_verts), (_numVerts));                                          \
    }                                                                         \
                                                                              \
    PFEXIT(PFaddCopyVU1Prim);                                                 \
}                                                                             \
MACRO_STOP

static RwBool
SkyIm2DRenderPrimitive(RwPrimitiveType primType,
                       RwIm2DVertex *verts, RwInt32 numVerts)
{
    const _skyImDispatchModel * model =
        skyTransType&TRANSFOG?&dispatchModel2DFog:&dispatchModel2DNoFog;

    RWFUNCTION(RWSTRING("SkyIm2DRenderPrimitive"));

    addCopyVU1PrimMacro(model, 
                        (const RwMatrix *)NULL, 
                        primType,
                        (RwIm3DVertex *)verts, 
                        numVerts);

#ifdef RWMETRICS
    /* numTriangles and numProcTriangles are updated in addCopyVU1Prim */
    RWSRCGLOBAL(metrics)->numVertices += numVerts;
#endif /* RWMETRICS */

    RWRETURN(TRUE);
}

typedef void (*SkyAddCopyVU1IndPrimFunc)(const _skyImDispatchModel *dispatchModel,
                                         const RwMatrix *matrix,
                                         RwIm3DVertex *verts,
                                         RwImVertexIndex *indices,
                                         RwUInt32 numIndices);

static void
addCopyVU1IndPrimLineList(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwImVertexIndex *indices, RwUInt32 numIndices)
{
    RwUInt32 numLines = numIndices / 2;
    RwUInt32 thisPacketLines;

    RWFUNCTION(RWSTRING("addCopyVU1IndPrimLineList"));

    PFCALL(PFaddCopyVU1IndPrim);
    while ((thisPacketLines =
            createAndInitCopyLinePacket(dispatchModel,
                                        numLines, matrix)))
    {
        u_long128  runCmd;

        PFRET(PFaddCopyVU1IndPrim);
        numLines -= thisPacketLines;

        runCmd = runCmd128;
        while (thisPacketLines)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketLines;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketLines -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*2) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*2)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[indices[0]];
                RwIm3DVertex *v2 = &verts[indices[1]];

                /* Dispatch the verts for each line */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);

                /* Ready for next triangle */
                indices += 2;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1IndPrim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1IndPrim);

    RWRETURNVOID();
}

static void
addCopyVU1IndPrimPolyLine(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwImVertexIndex *indices, RwUInt32 numIndices)
{
    RwUInt32 numLines = numIndices - 1;
    RwUInt32 thisPacketLines;

    RWFUNCTION(RWSTRING("addCopyVU1IndPrimPolyLine"));

    PFCALL(PFaddCopyVU1IndPrim);
    while ((thisPacketLines =
            createAndInitCopyLinePacket(dispatchModel, numLines,
                                        matrix)))
    {
        u_long128  runCmd;
        PFRET(PFaddCopyVU1IndPrim);
        numLines -= thisPacketLines;

        runCmd = runCmd128;
        while (thisPacketLines)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketLines;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketLines -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*2) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*2)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[indices[0]];
                RwIm3DVertex *v2 = &verts[indices[1]];

                /* Dispatch the verts for each line */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);

                /* Ready for next triangle */
                indices++;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1IndPrim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1IndPrim);
    RWRETURNVOID();
}

static void
addCopyVU1IndPrimTriList(const _skyImDispatchModel *dispatchModel,
                      const RwMatrix *matrix,
                      RwIm3DVertex *verts,
                      RwImVertexIndex *indices, RwUInt32 numIndices)
{
    RwUInt32 numTris = numIndices / 3;
    RwUInt32 thisPacketTris;

    RWFUNCTION(RWSTRING("addCopyVU1IndPrimTriList"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1IndPrim);
    while ((thisPacketTris =
            createAndInitCopyTriPacket(dispatchModel,
                                       numTris, matrix)))
    {
        u_long128  runCmd;
        PFRET(PFaddCopyVU1IndPrim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*3)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[indices[0]];
                RwIm3DVertex *v2 = &verts[indices[1]];
                RwIm3DVertex *v3 = &verts[indices[2]];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                indices += 3;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1IndPrim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1IndPrim);

    RWRETURNVOID();
}

static void
addCopyVU1IndPrimTriStrip(const _skyImDispatchModel *dispatchModel,
                       const RwMatrix *matrix,
                       RwIm3DVertex *verts,
                       RwImVertexIndex *indices, RwUInt32 numIndices)
{
    RwUInt32 numTris = (numIndices > 2) ? (numIndices - 2) : 0;
    RwUInt32 vertInd = 1;
    RwUInt32 thisPacketTris;

    RWFUNCTION(RWSTRING("addCopyVU1IndPrimTriStrip"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1IndPrim);
    while ((thisPacketTris =
            createAndInitCopyTriPacket(dispatchModel,
                                       numTris, matrix)))
    {
        u_long128  runCmd;
        PFRET(PFaddCopyVU1IndPrim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*3)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v1 = &verts[indices[0]];
                RwIm3DVertex *v2 = &verts[indices[vertInd]];
                RwIm3DVertex *v3 = &verts[indices[vertInd^3]];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                indices++;
                vertInd ^= 3;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1IndPrim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1IndPrim);

    RWRETURNVOID();
}

static void
addCopyVU1IndPrimTriFan(const _skyImDispatchModel *dispatchModel,
                     const RwMatrix *matrix,
                     RwIm3DVertex *verts,
                     RwImVertexIndex *indices, RwUInt32 numIndices)
{
    RwUInt32 numTris = (numIndices > 2) ? (numIndices - 2) : 0;
    RwUInt32 thisPacketTris;
    RwIm3DVertex *v1 = &verts[indices[0]];

    RWFUNCTION(RWSTRING("addCopyVU1IndPrimTriFan"));

#ifdef RWMETRICS
    RWSRCGLOBAL(metrics)->numTriangles += numTris;
    RWSRCGLOBAL(metrics)->numProcTriangles += numTris;
#endif /* RWMETRICS */

    PFCALL(PFaddCopyVU1IndPrim);
    while ((thisPacketTris =
            createAndInitCopyTriPacket(dispatchModel,
                                       numTris, matrix)))
    {
        u_long128  runCmd;
        PFRET(PFaddCopyVU1IndPrim);
        numTris -= thisPacketTris;

        runCmd = runCmd128;
        while (thisPacketTris)
        {
            RwUInt64     tmp, tmp1;
            u_long128    ltmp;
            RwUInt32     batchCount;

            batchCount = thisPacketTris;
            if (batchCount > MAXVU1BATCHCNT)
            {
                batchCount = MAXVU1BATCHCNT;
            }
            thisPacketTris -= batchCount;

            /* DMA cnt, STYCLE, and load ITOPS */
            tmp = (1l<<28) | ((RwUInt64)(batchCount*VERTEXSIZE*3) + 2);
            tmp1 = (((1l<<24) | (4<<8) | (4))<<32) |
                ((4l<<24) | (batchCount));
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            /* UNPACK V4-32 & STMASK */
            tmp1 = (((0x7cl<<24) |
                     ((batchCount*VERTEXSIZE*3)<<16) |
                     0x8000l)<<32);
            tmp = (0x20l<<24) | ((0xC000C000l) << 32);
            MAKE128(ltmp, tmp1, tmp);
            SWEADDCONTFAST(ltmp);

            while (batchCount--)
            {
                RwIm3DVertex *v2 = &verts[indices[1]];
                RwIm3DVertex *v3 = &verts[indices[2]];

                /* Dispatch the verts for each triangle */
                DISPATCHVERT(v1);
                DISPATCHVERT(v2);
                DISPATCHVERT(v3);

                /* Ready for next triangle */
                indices++;
            }

            /* Kick the batch off */
            SWEADDCONTFAST(runCmd);

            /* Next time do a continue */
            runCmd = contCmd128;
        }
        PFCALL(PFaddCopyVU1IndPrim);
        closeAndDispatchVU1Packet();

        /* Send it on it's merry way */
        _sweFlush();
    }
    PFRET(PFaddCopyVU1IndPrim);

    RWRETURNVOID();
}

#define SkyAddCopyVU1IndPrimFuncCount                   \
    ( sizeof(SkyAddCopyVU1IndPrimFuncArray)/            \
      sizeof(SkyAddCopyVU1IndPrimFuncArray[0]) )

#define addCopyVU1IndPrimMacro(_dispatchModel, _matrix,                       \
                               _primType, _verts, _indices, _numIndices)      \
MACRO_START                                                                   \
{                                                                             \
    static SkyAddCopyVU1IndPrimFunc SkyAddCopyVU1IndPrimFuncArray[] =         \
    {                                                                         \
        (SkyAddCopyVU1IndPrimFunc)NULL,  /* rwPRIMTYPENAPRIMTYPE = 0 */       \
        addCopyVU1IndPrimLineList,       /* rwPRIMTYPELINELIST = 1*/          \
        addCopyVU1IndPrimPolyLine,       /* rwPRIMTYPEPOLYLINE = 2*/          \
        addCopyVU1IndPrimTriList,        /* rwPRIMTYPETRILIST = 3*/           \
        addCopyVU1IndPrimTriStrip,       /* rwPRIMTYPETRISTRIP = 4*/          \
        addCopyVU1IndPrimTriFan          /* rwPRIMTYPETRIFAN = 5 */           \
    };                                                                        \
                                                                              \
    PFENTRY(PFaddCopyVU1IndPrim);                                             \
    RWASSERT(RWMATRIXALIGNMENT((_matrix)));                                   \
    RWASSERT((_verts));                                                       \
    RWASSERT((_indices));                                                     \
                                                                              \
    /* Nasty hack here for code reuse.  We know that 2D and 3D immediate mode \
     * vertices are the same same.  We can therefore use the same code to     \
     * index the arrays and dispatch them...!  Just have some asserts here to \
     * protect against the day they change (and we have to write a whole lot  \
     * more code).                                                            \
     */                                                                       \
    RWASSERT(sizeof(RwIm3DVertex) == sizeof(RwIm2DVertex));                   \
                                                                              \
    if ( ((_primType) >0) &&                                                  \
         ((_primType) < SkyAddCopyVU1IndPrimFuncCount) )                      \
    {                                                                         \
        SkyAddCopyVU1IndPrimFunc func =                                       \
            SkyAddCopyVU1IndPrimFuncArray[(_primType)];                       \
                                                                              \
        func((_dispatchModel), (_matrix),                                     \
             (_verts), (_indices), (_numIndices));                            \
    }                                                                         \
                                                                              \
    PFEXIT(PFaddCopyVU1IndPrim);                                              \
}                                                                             \
MACRO_STOP

static RwBool
SkyIm2DRenderIndexedPrimitive(RwPrimitiveType primType,
                              RwIm2DVertex *verts,
                              RwInt32 __RWUNUSED__ numVerts,
                              RwImVertexIndex *indices,
                              RwInt32 numIndices)
{
    const _skyImDispatchModel * model =
        skyTransType&TRANSFOG?&dispatchModel2DFog:&dispatchModel2DNoFog;

    RWFUNCTION(RWSTRING("SkyIm2DRenderIndexedPrimitive"));

    addCopyVU1IndPrimMacro(model, 
                           (const RwMatrix *)NULL, 
                           primType,
                           (RwIm3DVertex *)verts, 
                           indices, 
                           numIndices);

#ifdef RWMETRICS
    /* numTriangles and numProcTriangles are updated in addCopyVU1IndPrim */
    RWSRCGLOBAL(metrics)->numVertices += numVerts;
#endif /* RWMETRICS */

    RWRETURN(TRUE);
}

void
_rwSkyOverloadPipelines(void)
{
    RWFUNCTION(RWSTRING("_rwSkyOverloadPipelines"));
    PFENTRY(PF_rwSkyOverloadPipelines);

    RWSRCGLOBAL(dOpenDevice).fpIm2DRenderLine = SkyIm2DRenderLine;
    RWSRCGLOBAL(dOpenDevice).fpIm2DRenderTriangle = SkyIm2DRenderTriangle;
    RWSRCGLOBAL(dOpenDevice).fpIm2DRenderPrimitive = SkyIm2DRenderPrimitive;
    RWSRCGLOBAL(dOpenDevice).fpIm2DRenderIndexedPrimitive = SkyIm2DRenderIndexedPrimitive;

    PFCALL(PF_rwSkyOverloadPipelines);
    initVu1DispatchStuff();
    PFRET(PF_rwSkyOverloadPipelines);

    PFEXIT(PF_rwSkyOverloadPipelines);
    RWRETURNVOID();
}

void
_rwSkyUnOverloadPipelines(void)
{
    RWFUNCTION(RWSTRING("_rwSkyUnOverloadPipelines"));

    RWRETURNVOID();
}

#define CLIP_DIAMETER ((RwReal)2047.9374)

void
_rwSkyPipeSetupForCamera(RwCamera * camera)
{
    RwReal             *const ClipVect1 = ((RwReal *) & skyClipVect1);
    RwReal             *const ClipVect2 = ((RwReal *) & skyClipVect2);
    RwReal             *const CClipVect1 = ((RwReal *) & skyCClipVect1);
    RwReal             *const CClipVect2 = ((RwReal *) & skyCClipVect2);
    RwReal             *const FarClip = ((RwReal *) & vu1DataFarClip);
    RwReal             *const NearClip = ((RwReal *) & vu1DataNearClip);
    RwReal             *const Offset2D = ((RwReal *) & vu1DataOffset2D);
    RwReal             *const Offset3D = ((RwReal *) & vu1DataOffset3D);
    RwReal             *const XYMax = ((RwReal *) & xMaxYMax128);
    RwReal             *const XYZScale = ((RwReal *) & vu1DataXYZScale);
    RwReal             *const XYZShift = ((RwReal *) & vu1DataXYZShift);
    RwReal              Delta;
    RwRaster           *frameBuffer;

    RWFUNCTION(RWSTRING("_rwSkyPipeSetupForCamera"));
    PFENTRY(PF_rwSkyPipeSetupForCamera);
    RWASSERT(camera);

    frameBuffer = camera->frameBuffer;

    RWASSERT(frameBuffer);

    /* nearClip in W */
    NearClip[0] = NearClip[1] = NearClip[2] = NearClip[3] =
        camera->nearPlane;

    /* farClip in W */
    FarClip[0] = FarClip[1] = FarClip[2] = FarClip[3] =
        camera->farPlane;

    /* camW, camH, zScale */
    XYZScale[0] = (RwReal) (frameBuffer->width);
    XYZScale[1] = (RwReal) (frameBuffer->height);
    XYZScale[2] = camera->zScale;
    XYZScale[3] = ((RwReal) 0);

    /* OffX, OffY, zShift */
    /* This is now taken account of elsewhere */
    XYZShift[0] = (RwReal) /*(frameBuffer->nOffsetX) */ 0;
    XYZShift[1] = (RwReal) /*(frameBuffer->nOffsetY) */ 0;
    XYZShift[2] = camera->zShift;
    XYZShift[3] = ((RwReal) 0);

    /* floating offset (3d - wher center of screen is 0, 0) */
    Offset3D[0] = ((RwReal) (frameBuffer->nOffsetX + 2048 -
                             (frameBuffer->parent->width / 2) +
                             (frameBuffer->width / 2)));
    Offset3D[1] = ((RwReal) (frameBuffer->nOffsetY + 2048 -
                             (frameBuffer->parent->height / 2) +
                             (frameBuffer->height / 2)));
    Offset3D[2] = ((RwReal) 0);
    Offset3D[3] = ((RwReal) 0);

    /* floating offset (2d - where top left of screen is 0, 0) */
    Offset2D[0] = ((RwReal) (frameBuffer->nOffsetX + 2048 -
                             (frameBuffer->parent->width / 2)));
    Offset2D[1] = ((RwReal) (frameBuffer->nOffsetY + 2048 -
                             (frameBuffer->parent->height / 2)));
    Offset2D[2] = ((RwReal) 0);
    Offset2D[3] = ((RwReal) 0);

    /* We hide the two fog values in the top of xMaxYMax128 */
#if 0
    XYMax[2] = (RwReal) (camera->farPlane);
    XYMax[3] = (RwReal) (camera->fogPlane);
#else
    /* We have to do the test here, since each camera could have different */

    /* far clip planes */
    if (useFarClip)
    {
        XYMax[3] = (RwReal) (camera->farPlane);
        XYMax[2] = ((RwReal) (((RwReal) -255) /
                              ((camera->farPlane) - (camera->fogPlane))));
    }
    else
    {
        /* Use the rpSKYRENDERSTATEFARFOGPLANE RenderState */
        XYMax[3] = farFogPlane;
        XYMax[2] =
            ((RwReal)
             (((RwReal) -255) / ((farFogPlane) - (camera->fogPlane))));

    }
#endif
    Delta = ((RwReal) 1) / (camera->farPlane - camera->nearPlane);

    ClipVect1[0] = ((RwReal) (CLIP_DIAMETER -
                              (RwReal) ((skyVideoMode->width >> 1))));
    ClipVect1[1] = ((RwReal) (((RwReal) 1) /
                              (CLIP_DIAMETER -
                               (RwReal) ((skyVideoMode->width >> 1)))));
    ClipVect1[3] = camera->farPlane;

    ClipVect2[0] = ((RwReal) (CLIP_DIAMETER -
                              (RwReal) ((skyVideoMode->height >> 1))));
    ClipVect2[1] = ((RwReal) (((RwReal) 1) /
                              (CLIP_DIAMETER -
                               (RwReal) ((skyVideoMode->
                                          height >> 1)))));
    ClipVect2[3] = camera->nearPlane;

    CClipVect1[0] = ((RwReal) ((skyVideoMode->width >> 1) + 5 ));
    CClipVect1[1] = ((RwReal) (((RwReal) 1) / ((skyVideoMode->width >> 1) +5)));
    CClipVect1[3] = camera->farPlane;

    CClipVect2[0] = ((RwReal) ((skyVideoMode->height >> 1) + 5));
    CClipVect2[1] = ((RwReal) (((RwReal) 1) /
                               ((skyVideoMode->height >> 1) + 5)));
    CClipVect2[3] = camera->nearPlane;

    /* Grab projection type */
    if (rwPERSPECTIVE == camera->projectionType)
    {
        ClipVect1[2] = CClipVect1[2] =
            ((-((RwReal) 2) * camera->farPlane * camera->nearPlane *
              Delta));
        ClipVect2[2] = CClipVect2[2] =
            ((RwReal) ((camera->farPlane + camera->nearPlane) * Delta));
        skyTransType &= ~TRANSISO;

    }
    else
    {
        ClipVect1[2] = CClipVect1[2] =
            ((RwReal)
             ((camera->farPlane + camera->nearPlane) * -Delta));
        ClipVect2[2] = CClipVect2[2] = (((RwReal) 2) * Delta);
        skyTransType |= TRANSISO;

    }

    PFEXIT(PF_rwSkyPipeSetupForCamera);
    RWRETURNVOID();

}
