/* Extracted from the gcc sources */
/* Changes include, but are not limited to:
   remove use of sbrk. We may be called after malloc
   different I/O model
   _mcount stub to get caller/caller parent and chain mcount
   replacement for profil(2)
   gprof magic and file format updates.
*/

#ifdef PGA

#include <stdio.h>
#include <malloc.h>

#include "eekernel.h"
#include "sifdev.h"

#include "drvmodel.h"
#include "skygmon.h"
#include "/usr/local/sce/ee/gcc/src/gprof/gmon_out.h"

#include "basky.h"

static const char rcsid[] __RWUNUSED__ = "@@(#)$Id: skygmon.c,v 1.8 2000/12/07 11:02:40 johns Exp $";


//extern mcount() asm ("mcount");


#if 1
/* Uncached */
#define UCACHEOFFSET (0x20000000)
#else
/* Uncached accelerated */
#define UCACHEOFFSET (0x30000000)
#endif

/* interface to fake profil */

/*
 *	froms is actually a bunch of unsigned shorts indexing tos
 */
static int		profiling = 3;
static unsigned short	*froms;
static struct tostruct	*tos = 0;
static int		tolimit = 0;
static char		*s_lowpc = 0;
static char		*s_highpc = 0;
static unsigned int	s_textsize = 0;

static int	ssiz;
static char	*sbuf;
static int	s_scale;
#define		SCALE_1_TO_1	0x10000L

#define	MSG "No space for profiling buffer(s)\n"
 
static void*
local_sbrk(int size)
{
    char *ptr;
    /* We just malloc a d-cache aligned area, flush it and return */
    /* Note that this code leaks, but reverse sbrk() is buggy on most
       platforms so I don't care! */
    ptr = malloc(size+128);
    if (!ptr)
    {
        return((void*)-1);
    }
    /* We know this to be qw aligned */
    {
        int i;

	/* we don't care about the 15 bytes we may miss */
        for (i=0; i<(size+128)>>4; i++)
	    ((u_long128*)ptr)[i] = 0;
    }
    SyncDCache(ptr, SCESYNCDCACHEROUNDUP(ptr+size+64));
    ptr = (char*)(((int)ptr+63)&~63);
    /* Uncached please */
    ptr += UCACHEOFFSET;
    return((void*)ptr);
}

static int
local_profil(char *buf, size_t bufsize, size_t offset, unsigned int scale)
{
    /* Crufty interface back into badma.c */
    swePHbuf = (char*)buf;
    swePHbufsize = bufsize;
    swePHoffset = offset;
    swePHscale = scale;
    return (0);
}

void
monstartup(char *lowpc, char *highpc)
{
    int			monsize;
    char		*buffer;
    register int	o;

    swePHbuf = NULL;
    /*
     *	round lowpc and highpc to multiples of the density we're using
     *	so the rest of the scaling (here and in gprof) stays in ints.
     */
    lowpc = (char *)
	    ROUNDDOWN((unsigned) lowpc, HISTFRACTION*sizeof(HISTCOUNTER));
    s_lowpc = lowpc;
    highpc = (char *)
	    ROUNDUP((unsigned) highpc, HISTFRACTION*sizeof(HISTCOUNTER));
    s_highpc = highpc;
    s_textsize = highpc - lowpc;
    monsize = (s_textsize / HISTFRACTION) + sizeof(struct gmon_hist_hdr);
    buffer = local_sbrk( monsize );
    if (buffer == (char *) -1)
    {
	printf(MSG);
	return;
    }
    froms = (unsigned short *) local_sbrk( s_textsize / HASHFRACTION );
    if (froms == (unsigned short *) -1)
    {
	printf(MSG);
	froms = 0;
	return;
    }
    tolimit = s_textsize * ARCDENSITY / 100;
    if (tolimit < MINARCS)
    {
	tolimit = MINARCS;
    }
    else if (tolimit > 65534)
    {
	tolimit = 65534;
    }
    tos = (struct tostruct *) local_sbrk( tolimit * sizeof( struct tostruct ) );
    if (tos == (struct tostruct *) -1)
    {
	printf(MSG);
	froms = 0;
	tos = 0;
	return;
    }
    tos[0].link = 0;
    sbuf = buffer;
    ssiz = monsize;
    /* We build a gmon style file */
    monsize -= sizeof(struct gmon_hist_hdr);
    *(char**)&(((struct gmon_hist_hdr *)buffer)->low_pc) = lowpc;
    *(char**)&(((struct gmon_hist_hdr *)buffer)->high_pc) = highpc;
    *(int*)&(((struct gmon_hist_hdr *)buffer)->hist_size) = s_textsize/(HISTFRACTION*sizeof(HISTCOUNTER));
    *(int*)&(((struct gmon_hist_hdr *)buffer)->prof_rate) = 2250; /* 147.456MHz*/
    ((struct gmon_hist_hdr *)buffer)->dimen[0] = 's';
    ((struct gmon_hist_hdr *)buffer)->dimen[1] = 'e';
    ((struct gmon_hist_hdr *)buffer)->dimen[2] = 'c';
    ((struct gmon_hist_hdr *)buffer)->dimen[3] = 'o';
    ((struct gmon_hist_hdr *)buffer)->dimen[4] = 'n';
    ((struct gmon_hist_hdr *)buffer)->dimen[5] = 'd';
    ((struct gmon_hist_hdr *)buffer)->dimen[6] = 's';
    ((struct gmon_hist_hdr *)buffer)->dimen[7] = '\0';
    ((struct gmon_hist_hdr *)buffer)->dimen_abbrev = 's';
    if (monsize <= 0)
	return;
    o = highpc - lowpc;
    if (monsize < o)
	s_scale = ( (float) monsize / o ) * SCALE_1_TO_1;
    else
	s_scale = SCALE_1_TO_1;
    moncontrol(1);
}

void
_mcleanup()
{
    int			fd;
    int			fromindex;
    int			endfrom;
    char		*frompc;
    int			toindex;

    moncontrol(0);

#ifdef SCE_11 
    fd = sceOpen("sim:gmon.out" , SCE_WRONLY|SCE_CREAT|SCE_TRUNC);
#else // SCE_11
    fd = sceOpen("host:gmon.out" , SCE_WRONLY|SCE_CREAT|SCE_TRUNC);
#endif // SCE_11   

    if ( fd < 0 )
    {
	printf("_mcleanup(): Failed to open sim:gmon.out\n" );
	return;
    }
    /* first we dump gprof magic */
    {
        struct gmon_hdr hdr;
        char buf[5] = GMON_MAGIC;

        printf("Above address range: %ld\n", sweProfORH);
        printf("Below address range: %ld\n", sweProfORL);

        hdr.cookie[0] = buf[0];
        hdr.cookie[1] = buf[1];
        hdr.cookie[2] = buf[2];
        hdr.cookie[3] = buf[3];
        hdr.version[0] = GMON_VERSION;
        hdr.version[1] = 0;
        hdr.version[2] = 0;
        hdr.version[3] = 0;

        sceWrite(fd, &hdr, sizeof(hdr));
    }

    /* dump gprof tag for histogram */
    {
        unsigned char tag = GMON_TAG_TIME_HIST;
        sceWrite(fd, &tag, 1);
    }
#ifdef LDEBUG
	printf("[mcleanup] sbuf 0x%x ssiz %d\n" , sbuf , ssiz );
#endif /* LDEBUG */
    sceWrite(fd, sbuf, ssiz);

    endfrom = s_textsize / (HASHFRACTION * sizeof(*froms));
    for (fromindex = 0; fromindex < endfrom; fromindex++)
    {
        unsigned char tag = GMON_TAG_CG_ARC;
        struct gmon_cg_arc_record grec;

	if (froms[fromindex] == 0)
        {
	    continue;
	}
	frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms));
	for (toindex=froms[fromindex]; toindex!=0; toindex=tos[toindex].link)
        {
            /* dump gprof tag for arc */
            sceWrite(fd, &tag, 1);
#ifdef LDEBUG
            printf("[mcleanup] frompc 0x%x selfpc 0x%x count %d\n",
			frompc, tos[toindex].selfpc, tos[toindex].count);
#endif /* LDEBUG */
                    *(RwUInt32*)&grec.from_pc = (unsigned int)frompc;
                    *(RwUInt32*)&grec.self_pc = (unsigned int)tos[toindex].selfpc;
                    *(RwUInt32*)&grec.count = tos[toindex].count;
                    sceWrite(fd, &grec, sizeof(grec));
	}
    }
    sceClose(fd);
}

void skymcount(char *a, char *b);

void
sky_mcountHolder()
{
    /* This just an assembler place holder to hold _mcount */
asm volatile("
    # we have two 32bit stack slots
    # stack is 16 byte aligned for ABI, but 8 byte aligned here so
.balign 64
.globl _mcount
.globl _mcountend
_mcount:
.set noreorder
.set noat
    subu $sp,$sp,(5*16-8)
    sd  $01, 72($sp)
    sd  $31, 64($sp)
    sd  $04, 56($sp)
    lui $04, %hi(skymcount)
    sd  $05, 48($sp)
    lui $05, %hi(skymcountend)
    addiu $04, $04, %lo(skymcount)
    sd  $06, 40($sp)
    addiu $05, $05, %lo(skymcountend)
    sd  $07, 32($sp)
    subu $04, $04, $31
    sd  $08, 24($sp)
    subu $05, $31, $05
    sd  $09, 16($sp)
    and $05, $04, $05 # if (skymcount-ret<0) && (ret-skymcountend<0)
    sd  $10, 8($sp)
    sd  $11, 0($sp)
    bltz $05, 0f
    add $04, $31, $00 # bds

    jal skymcount
    add $05, $01, $00 # bds

0:
    ld  $01, 64($sp)
    ld  $31, 72($sp)
    ld  $04, 56($sp)
    ld  $05, 48($sp)
    ld  $06, 40($sp)
    ld  $07, 32($sp)
    ld  $08, 24($sp)
    ld  $09, 16($sp)
    ld  $10, 8($sp)
    ld  $11, 0($sp)
    j $01
    addu $sp, $sp, (5*16)
.set reorder
.set at ");

}

void
skymcount(char *a, char *b)
{
    register char		*selfpc;
    register unsigned short	*frompcindex;
    register struct tostruct	*top;
    register struct tostruct	*prevtop;
    register int		toindex; /* void* is 32 bit */

    /* _mcount has been called by here */
    asm volatile("nop; skymcountend: nop");
    /*
     *	find the return address for mcount,
     *	and the return address for mcount's caller.
     */

    /* selfpc = pc pushed by mcount call.
        This identifies the function that was just entered.  */
    selfpc = (void *)a; // __builtin_return_address (0);
    /* frompcindex = pc in preceding frame.
        This identifies the caller of the function just entered.  */
    frompcindex = (void *)b; // __builtin_return_address (1);
    /*
     *	check that we are profiling
     *	and that we aren't recursively invoked.
     */
    if (profiling)
    {
	goto out;
    }
    profiling++;
    /*
     *	check that frompcindex is a reasonable pc value.
     *	for example:	signal catchers get called from the stack,
     *			not from text space.  too bad.
     */
    frompcindex = (unsigned short *) ((int) frompcindex - (int) s_lowpc);
    if ((unsigned int) frompcindex > s_textsize)
    {
	goto done;
    }
    frompcindex = &froms[((int)frompcindex) / (HASHFRACTION * sizeof(*froms))];
    toindex = *frompcindex;
    if (toindex == 0)
    {
	/*
	 *	first time traversing this arc
	 */
	toindex = ++tos[0].link;
	if (toindex >= tolimit)
        {
		goto overflow;
	}
	*frompcindex = toindex;
	top = &tos[toindex];
	top->selfpc = selfpc;
	top->count = 1;
	top->link = 0;
	goto done;
    }
    top = &tos[toindex];
    if (top->selfpc == selfpc)
    {
	/*
	 *	arc at front of chain; usual case.
	 */
	top->count++;
	goto done;
    }
    /*
     *	have to go looking down chain for it.
     *	top points to what we are looking at,
     *	prevtop points to previous top.
     *	we know it is not at the head of the chain.
     */
    for (; /* goto done */; )
    {
	if (top->link == 0)
        {
	    /*
	     *	top is end of the chain and none of the chain
	     *	had top->selfpc == selfpc.
	     *	so we allocate a new tostruct
	     *	and link it to the head of the chain.
	     */
	    toindex = ++tos[0].link;
	    if (toindex >= tolimit)
            {
		goto overflow;
	    }
	    top = &tos[toindex];
	    top->selfpc = selfpc;
	    top->count = 1;
	    top->link = *frompcindex;
	    *frompcindex = toindex;
	    goto done;
	}
	/*
	 *	otherwise, check the next arc on the chain.
	 */
	prevtop = top;
	top = &tos[top->link];
	if (top->selfpc == selfpc)
        {
	    /*
	     *	there it is.
	     *	increment its count
	     *	move it to the head of the chain.
	     */
	    top->count++;
	    toindex = prevtop->link;
	    prevtop->link = top->link;
	    top->link = *frompcindex;
	    *frompcindex = toindex;
	    goto done;
	}

    }
done:
    profiling--;
    /* and fall through */
out:
    return;		/* normal return restores saved registers */

overflow:
    profiling++; /* halt further profiling */
#define	TOLIMIT	"mcount: tos overflow, profiling stopped\n"
    /* Bit dangerous. My stack is in a bit of a state here */
    printf(TOLIMIT);
    goto out;
}


/* Control profiling;
   profiling is what mcount checks to see if
   all the data structures are ready.  */

void
moncontrol(int mode)
{
    if (mode)
    {
	/* start */
	local_profil(sbuf + sizeof(struct gmon_hist_hdr),
                     ssiz - sizeof(struct gmon_hist_hdr),
		     (int)s_lowpc, s_scale);
	profiling = 0;
    }
    else
    {
	/* stop */
	local_profil((char *) 0, 0, 0, 0);
	profiling = 3;
    }
}

#else /* PGA */

#error PGA not defined. Profiling support in main lib not enabled.
#endif /* PGA */
