use POSIX;  #for ceil & floor fns

#each line of the configuration file is parsed to generate a shader
open SHADERCFGFILE, "shadercfg.txt" or die "couldn't open shadercfg.txt!";

#generate makefile header

#generate shaderglue.c to include all the shaders headers into one place.
open (SHADERGLUEFILE, ">shaderglue.c") or die "couldn't create file shaderglue.c";
print SHADERGLUEFILE <<"END";
/* shadergen.pl generated this file */
#include <xtl.h>
#include <d3d8.h>
#include "shaderdesc.h"
#include "skindefs.h"

END

#so we can address xyzw components of registers by number
@elements = ("x", "y", "z", "w");

#matfx "enum"
$bump = 1;
$env = 2;
$bumpenv = 3;
$dual = 4;

@shaderdescarray = ();

#some fun stats
$numShadersGenerated = 0;
$totalSlots = 0;
$totalStalls = 0;
$totalCycles = 0;
$minmaxBones = 9999; #start big & replace as we find shader that can only handle a smaller number

foreach $line (<SHADERCFGFILE>)
{   
    chomp($line);
    
    #break the line up by spaces into shader features 
    @features = split / /, $line;
    
    $weights = 0;
    $direct = 0;
    $point = 0;
    $matfx = 0;
    $normals = 0;
    $prelit = 0;
    
    foreach $feature (@features)
    {
        #how many weights?
        if ($feature =~ /WGT[1-4]/)
        {
            ($junk, $weights) = split /WGT/, $feature;
        }
        elsif ($feature =~ /DIR(\d)+/)
        {
            ($junk, $direct) = split /DIR/, $feature;
            $normals = 1; #now we need normals too
        }
        elsif ($feature =~ /PNT(\d)+/)
        {
            ($junk, $point) = split /PNT/, $feature;
            $normals = 1; #now we need normals too
        }
        elsif ($feature eq "VERTCOL")
        {
            $prelit = 1;
        }
        elsif ($feature eq "ENV")
        {
            $matfx == 0 or die "can't mix matfx";
            $matfx = $env;
            $envTexStage = 1;
            $normals = 1; #now we need normals too
        }
        elsif ($feature eq "DUAL")
        {
            $matfx == 0 or die "can't mix matfx";
            $matfx = $dual;
        }
        elsif ($feature eq "BUMP")
        {
            $matfx == 0 or die "can't mix matfx";
            $matfx = $bump;
            $normals = 1; #now we need normals too            
        }
        elsif ($feature eq "ENVBUMP")
        {
            $matfx == 0 or die "can't mix matfx";
            $envTexStage = 2;
            $matfx = $bumpenv;
            $normals = 1; #now we need normals too
        }
        else
        {
            die "syntax error $feature of $line";
        }
    }

#################################################################################################################################
# GENERATE SHADER
#################################################################################################################################
    
    #just concatentate all the features to make a name for the shader    
    $shadername = "matfx${matfx}_pre${prelit}_wgt${weights}_dir${direct}_pnt${point}";

    print "$shadername";
    
    #generate shader.vsh
    open (SHADERFILE, ">shaders/$shadername.vsh") or die "couldn't create file $shadername.vsh";

    #header
print SHADERFILE <<"END";
xvs.1.1
#pragma screenspace
#include "skindefs.h"
;file $shadername.vsh
END

#################################################################################################################################
# BONES
#################################################################################################################################
    
    $boneStart = -82 + 2 * $direct + 2 * $point + ceil($point / 4);
#print SHADERFILE << "END";
#    mul VSTMP_REG_ILU_TMP, VSIN_REG_INDICES, c[VSCONST_REG_SCREENSPACE_OFFSET].wwww
#END

    for ($wi = 0; $wi<$weights; $wi++)
    {
print SHADERFILE << "END";
    ; Bone $wi ------------------------------------------------------------------------------------
    mov     a0.x, VSIN_REG_INDICES.$elements[$wi]

    ; Transform position
    dp4		VSTMP_REG_POS_TMP.x, VSIN_REG_POS, c[0 + a0.x + $boneStart]
    dp4		VSTMP_REG_POS_TMP.y, VSIN_REG_POS, c[1 + a0.x + $boneStart]
    dp4		VSTMP_REG_POS_TMP.z, VSIN_REG_POS, c[2 + a0.x + $boneStart]

    ; Scale transformed point by weight
END
        if ($wi == 0)
        {
        #multiply
print SHADERFILE << "END";
    mul     VSTMP_REG_POS_ACCUM.xyz, VSIN_REG_WEIGHTS.$elements[$wi], VSTMP_REG_POS_TMP.xyz
    
END
        }
        else    
        {
        #multiply add
print SHADERFILE << "END";
    mad     VSTMP_REG_POS_ACCUM.xyz, VSIN_REG_WEIGHTS.$elements[$wi], VSTMP_REG_POS_TMP.xyz, VSTMP_REG_POS_ACCUM.xyz
    
END
        }
        
        if ($normals)
        {
print SHADERFILE << "END";
    ; Transform normal
    dp3		VSTMP_REG_NORMAL_TMP.x, VSIN_REG_NORMAL, c[0 + a0.x + $boneStart]
    dp3		VSTMP_REG_NORMAL_TMP.y, VSIN_REG_NORMAL, c[1 + a0.x + $boneStart]
    dp3		VSTMP_REG_NORMAL_TMP.z, VSIN_REG_NORMAL, c[2 + a0.x + $boneStart]

    ; Scale transformed normal by weight
END
            if ($wi == 0)
            {
            #multiply
print SHADERFILE << "END";
    mul     VSTMP_REG_NORMAL_ACCUM.xyz, VSIN_REG_WEIGHTS.$elements[$wi], VSTMP_REG_NORMAL_TMP.xyz
    
END
            }
            else    
            {
            #multiply add
print SHADERFILE << "END";
    mad     VSTMP_REG_NORMAL_ACCUM.xyz, VSIN_REG_WEIGHTS.$elements[$wi], VSTMP_REG_NORMAL_TMP.xyz, VSTMP_REG_NORMAL_ACCUM.xyz
    
END
            }
        }
    }
    
    if ($normals)
    {
print SHADERFILE <<"END";
    ; Normalize normal
    dp3     VSTMP_REG_NORMAL_ACCUM.w, VSTMP_REG_NORMAL_ACCUM, VSTMP_REG_NORMAL_ACCUM
    rsq     VSTMP_REG_NORMAL_ACCUM.w, VSTMP_REG_NORMAL_ACCUM.w
    mul     VSTMP_REG_NORMAL_ACCUM.xyz, VSTMP_REG_NORMAL_ACCUM.xyz, VSTMP_REG_NORMAL_ACCUM.w

END
    }

print SHADERFILE <<"END";
    ;start with ambient color, (0,0,0,1) if theres no amb, but lit, or (1,1,1,1) if unlit.
    mov     VSTMP_REG_COLOR_TMP, VSCONST_REG_AMBIENT

END

    if ($prelit)
    {
print SHADERFILE <<"END";
    ;add in prelit vertex color
    add     VSTMP_REG_COLOR_TMP, VSIN_REG_COLOR, VSTMP_REG_COLOR_TMP

END
    }
    
#################################################################################################################################
# DIRECTIONALS
#################################################################################################################################

print SHADERFILE <<"END";
    ; Directional lights ---------------------------------------------------------------------------
END
    
    #do lights in blocks of 4 so we can get the most of the max instruction
    for ($dir = 0; $dir<$direct; $dir+=4)
    {
        #if there are less than 4 to go, just pack in as many as we have left
        $maxblock = 4;
        if ($direct - $dir < $maxblock)
        {
            $maxblock = $direct - $dir;
        }

        #need to mask the max instruction or we get a compile error about uninitialized reads
        $mask = "";
            
        for ($m=0; $m<$maxblock; $m++)
        {
            $constantDirOffset = ($dir + $m) * 2;

print SHADERFILE <<"END";
    ;diffuse intensity of directional light max(N dot L, 0)
    dp3     VSTMP_REG_CLAMP_TMP.$elements[$m], c[$constantDirOffset + VSCONST_REG_DIR_LIGHT_OFFSET], VSTMP_REG_NORMAL_ACCUM
    
END

            $mask = $mask.$elements[$m];
        }
     
print SHADERFILE <<"END";
    max     VSTMP_REG_CLAMP_TMP.$mask, VSCONST_REG_LIGHT_DIR.w, -VSTMP_REG_CLAMP_TMP.$mask
    
END
        
        for ($m=0; $m<$maxblock; $m++)
        {
            $constantDirOffset = ($dir + $m) * 2;
                
print SHADERFILE <<"END";
    mad     VSTMP_REG_COLOR_TMP.xyz, c[$constantDirOffset + 1 + VSCONST_REG_DIR_LIGHT_OFFSET].xyz, VSTMP_REG_CLAMP_TMP.$elements[$m]$elements[$m]$elements[$m], VSTMP_REG_COLOR_TMP.xyz
    
END
        }    
    }
    
#################################################################################################################################
# POINTS
#################################################################################################################################
print SHADERFILE <<"END";
    ; Point lights ---------------------------------------------------------------------------
    ; Renderware point light equation attenuates diffuse intensity against radius of light
    ; L = v - l
    ; max(n dot L, 0) * (max( 1/|L|, 1/r ) - 1/r) * color

END
   
   $pointOffset = $direct * 2; #point offsets start just after directional constants
   
   #do point lights in blocks of 4 to vectorize intensity calculations where possible
   for ($pnt = 0; $pnt < $point; $pnt+=4)
   {
        #if there are less than 4 to go, just pack in as many as we have left
        $maxblock = 4;
        if ($point - $pnt < $maxblock)
        {
            $maxblock = $point - $pnt;
        }
                    
        $mask = "";

        for ($m=0; $m<$maxblock; $m++)
        {
            $lightnum = $pnt + $m;
            
            $constantDirOffset = $pointOffset   #start of block of 4 light constants
                                + 1             #skip radii
                                + $m * 2;       #2 constants per light thereafter
            
            $mask = $mask.$elements[$m];

print SHADERFILE <<"END";        
    ;vector from vertex to light $lightnum
    add     VSTMP_REG_DIST_TMP.xyz, - c[$constantDirOffset + VSCONST_REG_DIR_LIGHT_OFFSET].xyz, VSTMP_REG_POS_ACCUM.xyz

    ;1/distance to light $lightnum
    dp3     VSTMP_REG_ILU_TMP.$elements[$m], VSTMP_REG_DIST_TMP, VSTMP_REG_DIST_TMP
    rsq     VSTMP_REG_ILU_TMP.$elements[$m], VSTMP_REG_ILU_TMP.$elements[$m]

    ;diffuse intensity of light $lightnum max(N dot L, 0)
    dp3     VSTMP_REG_CLAMP_TMP.$elements[$m], VSTMP_REG_NORMAL_ACCUM, VSTMP_REG_DIST_TMP
    
END
        }
        
print SHADERFILE <<"END";
    ;clamp 1/distance to 1/light radius
    max     VSTMP_REG_ATTEN_TMP.$mask, c[$pointOffset + VSCONST_REG_DIR_LIGHT_OFFSET].$mask, VSTMP_REG_ILU_TMP.$mask

    ;clamp diffuse intensity
    max     VSTMP_REG_CLAMP_TMP.$mask, c[$pointOffset + 1 + VSCONST_REG_DIR_LIGHT_OFFSET].wwww, -VSTMP_REG_CLAMP_TMP.$mask

    ;atten = 1/distance - 1/r
    add     VSTMP_REG_ATTEN_TMP.$mask, -c[$pointOffset + VSCONST_REG_DIR_LIGHT_OFFSET].$mask, VSTMP_REG_ATTEN_TMP.$mask
    
    ;intensity *= attenuation
    mul     VSTMP_REG_CLAMP_TMP.$mask, VSTMP_REG_CLAMP_TMP.$mask, VSTMP_REG_ATTEN_TMP.$mask

END
        for ($m=0; $m<$maxblock; $m++)
        {
            $constantDirOffset = $pointOffset   #start of block of 4 light constants
                                + 1             #skip radii
                                + $m * 2;       #2 constants per light thereafter
            
            $lightnum = $pnt + $m;

print SHADERFILE <<"END";
    ;multiply color by final intensity of light $lightnum and accumulate
    mad     VSTMP_REG_COLOR_TMP.xyz, c[$constantDirOffset + 1 + VSCONST_REG_DIR_LIGHT_OFFSET].xyz, VSTMP_REG_CLAMP_TMP.$elements[$m]$elements[$m]$elements[$m], VSTMP_REG_COLOR_TMP.xyz

END
        }

        $pointOffset+= 2 * $maxblock + 1; #2 more constants for each point light we did, one constant for 4 radii
   }

#################################################################################################################################
# MATFX
#################################################################################################################################

    if (($matfx == $env) || ($matfx == $bumpenv))
    {
print SHADERFILE <<"END";
    ; env map ---------------------------------------------------------------------------
    ;generate texture coordinates from normals
    dph     oT${envTexStage}.x, VSTMP_REG_NORMAL_ACCUM, c[VSCONST_REG_ENV_OFFSET]
    dph     oT${envTexStage}.y, VSTMP_REG_NORMAL_ACCUM, c[1 + VSCONST_REG_ENV_OFFSET]
    
END
    }
    
    if ($matfx == $dual)
    {
print SHADERFILE <<"END";
    ; Copy 2nd texture coordinates
    mov     oT0, VSIN_REG_TEXCOORDS2

END
    }
    else
    {
print SHADERFILE <<"END";    
    ; Copy texture coordinates
    mov     oT0, VSIN_REG_TEXCOORDS    

END
    }
    
    if (($matfx == $bump) || ($matfx == $bumpenv))
    {
print SHADERFILE <<"END";
    ; bump map ---------------------------------------------------------------------------
    ; transform normals by world matrix
    dph VSTMP_REG_ILU_TMP.x, VSTMP_REG_NORMAL_ACCUM, c[VSCONST_REG_BUMPWORLD_OFFSET]
    dph VSTMP_REG_ILU_TMP.y, VSTMP_REG_NORMAL_ACCUM, c[1 + VSCONST_REG_BUMPWORLD_OFFSET]
    dph VSTMP_REG_ILU_TMP.z, VSTMP_REG_NORMAL_ACCUM, c[2 + VSCONST_REG_BUMPWORLD_OFFSET]
    
    ; normalize transformed normal
    dp3     VSTMP_REG_ILU_TMP.w, VSTMP_REG_ILU_TMP, VSTMP_REG_ILU_TMP
    rsq     VSTMP_REG_ILU_TMP.w, VSTMP_REG_ILU_TMP.w
    mul     VSTMP_REG_BUMPDIR_TMP.xyz, VSTMP_REG_ILU_TMP.xyz, VSTMP_REG_ILU_TMP.www
    
    ; bump dir = normalized transformed normal * fudge + bump pos
    mad     VSTMP_REG_BUMPDIR_TMP.xyz, c[VSCONST_REG_BUMPPOSFUDGE_OFFSET].www, VSTMP_REG_BUMPDIR_TMP.xyz, c[VSCONST_REG_BUMPPOSFUDGE_OFFSET].xyz
    
    ; normalize bump dir
    dp3     VSTMP_REG_ILU_TMP.w, VSTMP_REG_BUMPDIR_TMP, VSTMP_REG_BUMPDIR_TMP
    rsq     VSTMP_REG_ILU_TMP.w, VSTMP_REG_ILU_TMP.w
    mul     VSTMP_REG_BUMPDIR_TMP.xyz, VSTMP_REG_BUMPDIR_TMP.xyz, VSTMP_REG_ILU_TMP.www
    
    ; bump uv = bumpDir * bumpShift + tex uv
    mad     oT1.xy, c[VSCONST_REG_BUMPSHIFT_OFFSET].xy, VSTMP_REG_BUMPDIR_TMP.xy, VSIN_REG_TEXCOORDS.xy
END
    }

#################################################################################################################################
# OUTPUT
#################################################################################################################################
    
    #chuck as much stuff at the end as possible - the optimizer will bubble up these instructions
    #to avoid stalls in the lighting & matfx code.
    
print SHADERFILE <<"END";
    ; Transform -----------------------------------------------------------------------------------

    ; Combined camera & projection & screenspace scale matrix
    ; we'd like VSTMP_REG_POS_ACCUM.w to be 1 but that's not possible with the skinning stuff.
    ; so use dph instead of dp4 get the desired result 
    dph		oPos.x, VSTMP_REG_POS_ACCUM, VSCONST_REG_TRANSFORM_X
    dph     oPos.w, VSTMP_REG_POS_ACCUM, VSCONST_REG_TRANSFORM_W
    dph		oPos.y, VSTMP_REG_POS_ACCUM, VSCONST_REG_TRANSFORM_Y
    dph		oPos.z, VSTMP_REG_POS_ACCUM, VSCONST_REG_TRANSFORM_Z

    ;nice trick from the xds.graphics newsgroup - copy the output pos.w to fog
    ;rather than longer calculations from the nVidia samples - seems to work with linear, exp, & exp2 as well!
    ;displaces pairing of mov into oT0 in unlit case, but oh well, probably cheaper than changing shaders anyway.
    mov     oFog.x, r12.w                                           ;dph is done by now

    ;compute 1/w
    + rcc VSTMP_REG_ILU_TMP.x, r12.w
        
    ;multiply accumulated color by material color & output
    ;so if unlit, put (1,1,1,1) in VSCONST_REG_AMBIENT
    mul     oD0, VSCONST_REG_MAT_COLOR, VSTMP_REG_COLOR_TMP
           
    ;multiply 1/w through position & add screen space transform offset 
    mad oPos.xyz, r12, VSTMP_REG_ILU_TMP.x, c[VSCONST_REG_SCREENSPACE_OFFSET]

END
    
#################################################################################################################################
# DONE SHADER
#################################################################################################################################

    close SHADERFILE;

    #call xsasm to assemble it
    @systemargs = ("xsasm", "-nologo", "-h", "-hname $shadername", "-l", "shaders\\$shadername.vsh");
    system (@systemargs) == 0 or die "system @systemargs failed: $?";
        
    #copy bytecode into shaderglue.c & give it a description
    open (SHADERHFILE, "shaders\\$shadername.h") or die "couldn't open file shaders\\$shadername.h";

    print SHADERGLUEFILE "static \n";
    
    foreach $line (<SHADERHFILE>)
    {
        print SHADERGLUEFILE $line;
    }
    
    close SHADERHFILE;
    
print SHADERGLUEFILE <<"END";
static _rpSkinXboxShaderDesc ${shadername}_desc = {$matfx, $prelit, $weights, $direct, $point, $shadername };

END
    
    #print out optimization summary
    open (SHADERLSTFILE, "shaders\\$shadername.lst") or die "couldn't open file shaders\\$shadername.lst";
    foreach $line (<SHADERLSTFILE>)
    {
        if ($line =~ /microcode/)
        {
            @stuff = split / /, $line;

            $slots = $stuff[5];
            $totalSlots += $slots;
            
            #As of the August XDK cycle counts are divided by 2 to reflect
            #how long (in GPU clock cycles) the shader is going to take.
            #But I just want to count stalls, so multiply it by 2 to make it right!
            $cycles = 2 * $stuff[7];
            
            $totalCycles += $cycles;
            $stalls = $cycles - $slots;
            $totalStalls += $stalls;
            
            print " slots $slots stalls $stalls";
        }
    }
    close SHADERLSTFILE;
    
    $maxBones = floor( (95 - $boneStart + 1) / 3);
    print " maxbones $maxBones \n";
    
    if ($maxBones < $minmaxBones)
    {
        $minmaxBones = $maxBones;
    }
    
    #concatenate all the names into a big long array
    $shaderdescary[$numShadersGenerated++] = "${shadername}_desc";
}

print "$numShadersGenerated shaders generated $totalSlots slots $totalStalls stalls $totalCycles cycles $minmaxBones bones max\n";

#alphabetize them here rather than at runtime - use it to look up best shader efficiently
@shaderdescary = sort @shaderdescary;

#generate shaderglue.c trailer
print SHADERGLUEFILE <<"END";
int _rpSkinXboxGetNumShaderDesc()
{
    return $numShadersGenerated;
}

_rpSkinXboxShaderDesc ** _rpSkinXboxGetShaderDesc()
{
static _rpSkinXboxShaderDesc *shaderDescList[] = {
END


    foreach $desc (@shaderdescary)
    {
        print SHADERGLUEFILE "\t&${desc},\n";
    }

print SHADERGLUEFILE <<"END";
    };
    
    return shaderDescList;
}
END

#################################################################################################################################
# VERTEX FORMATS
#################################################################################################################################

#print out some shader format declarations for use with the shaders and vertex formats
#could just generate this at runtime and save a few bytes on the lib size I guess. :)

$vertexFormatNameList = "";

for ($wgt = 1; $wgt<=4; $wgt++)
{
    for ($nor = 0; $nor <= 1; $nor++)
    {
        for ($col=0; $col <= 1; $col++)
        {
            for ($tex=0; $tex<=2; $tex++)
            {
                $vertexFormatName = "vertexFormatDeclWgt${wgt}Nor${nor}Col${col}Tex${tex}";
                if ($vertexFormatNameList eq "")
                {
                    $vertexFormatNameList = "$vertexFormatName"
                }
                else
                {
                    $vertexFormatNameList = join ",\n", ( $vertexFormatNameList, "$vertexFormatName" );
                }
                
print SHADERGLUEFILE <<"END";
static DWORD ${vertexFormatName}[] =
{
    D3DVSD_STREAM( 0 ),
    D3DVSD_REG( VSD_REG_POS,       D3DVSDT_FLOAT3 ),        /* Position */
    D3DVSD_REG( VSD_REG_WEIGHTS,   D3DVSDT_PBYTE$wgt ),     /* Weights */
    D3DVSD_REG( VSD_REG_INDICES,   D3DVSDT_SHORT$wgt ),     /* Indices */
END
                if ($nor)
                {
print SHADERGLUEFILE <<"END";
      D3DVSD_REG( VSD_REG_NORMAL,   D3DVSDT_NORMPACKED3 ),  /* Normals */

END
                }
                
                if ($col)
                {
print SHADERGLUEFILE <<"END";
    D3DVSD_REG( VSD_REG_COLOR,     D3DVSDT_D3DCOLOR ),   /* Diffuse color */
END
                }
                
                if ($tex > 0)
                {
print SHADERGLUEFILE <<"END";
    D3DVSD_REG( VSD_REG_TEXCOORDS, D3DVSDT_FLOAT2 ),    /* Texture coordinates */
END
                }
                
                if ($tex == 2)
                {
print SHADERGLUEFILE <<"END";
    D3DVSD_REG( VSD_REG_TEXCOORDS2, D3DVSDT_FLOAT2 ),    /* Texture coordinates */
END
                }
                
                                
print SHADERGLUEFILE <<"STOPTHEINSANITY";
    D3DVSD_END()
};
STOPTHEINSANITY
            }
        } 
    }
}

print SHADERGLUEFILE <<"END";
static DWORD *vertexFormatList[] = {
$vertexFormatNameList
};

int _rpSkinXboxGetNumVertexFormats()
{
    return sizeof(vertexFormatList) / sizeof(DWORD*);
}

DWORD ** _rpSkinXboxGetVertexFormatList()
{
    return vertexFormatList;
}
END

close SHADERGLUEFILE;
