[Bio] / FastaParse / FastaParse.xs Repository:
ViewVC logotype

View of /FastaParse/FastaParse.xs

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (annotate) (vendor branch)
Tue Nov 20 22:35:20 2007 UTC (11 years, 11 months ago) by olson
Branch: MAIN, xx
CVS Tags: xxy, HEAD
Changes since 1.1: +0 -0 lines
Import of C-based fasta parser

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "ppport.h"

#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>

static char fbuf[10485760];
static char idbuf[1024];

static int fd;
static char *ptr, *end;

static int sim_fd;
static char *sim_ptr, *sim_end;
static HV *sim_stash = 0;

MODULE = SimParse		PACKAGE = SimParse		

int open(file)
	const char *	file
CODE:
{
    if (sim_stash == 0)
    {
	sim_stash = gv_stashpv("Sim", 0);
    }
    
    sim_fd = open(file, O_RDONLY);
    if (sim_fd < 0)
    {
	perror("open");
	RETVAL = 0;
	goto end;
    }

    struct stat st;
    if (fstat(sim_fd, &st) < 0)
    {
	perror("stat");
	RETVAL = 0;
	goto end;
    }

    sim_ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, sim_fd, 0);
    if (sim_ptr == 0)
    {
	perror("mmap");
	RETVAL = 0;
	goto end;
    }
    
    if (madvise(sim_ptr, st.st_size, MADV_SEQUENTIAL) < 0)
    {
	perror("madvise");
	RETVAL = 0;
	goto end;
    }

    sim_end = sim_ptr + st.st_size;

    RETVAL = 1;

    end:
;
}
OUTPUT:
RETVAL

SV *
next()
CODE:
{
    if (sim_ptr >= sim_end)
    {
	RETVAL = &PL_sv_undef;
	goto end;
    }

    char *val_begin = sim_ptr;
    int col = 0;
    static int numeric_cols[] = { 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 0, 0 };
    static SV *data[17];
    static int first = 0;
    static AV *ret;
    static SV *bret;
    
    if (first)
    {
	int i;
    

	ret = newAV();
	av_extend(ret, 17);

	for (i = 0; i < 17; i++)
	{
	    data[i] = newSV(0);
	    SvREFCNT_inc(data[i]);
	    av_store(ret, i, data[i]);
	}

	SvREFCNT_inc(ret);
	bret = sv_bless(newRV_inc((SV *) ret), sim_stash);
	SvREFCNT_inc(bret);
	
	first = 0;
    }

    // fprintf(stderr, "enter\n");
    while (*sim_ptr && sim_ptr < sim_end && *sim_ptr != '\n')
    {
	while (*sim_ptr && sim_ptr < sim_end && *sim_ptr != '\t' && *sim_ptr != '\n')
	{
	    sim_ptr++;
	}
	// fprintf(stderr, "col %d\n", col);

	if (1)
	{
	    if (numeric_cols[col] == 1) /* Integer */
	    {
		data[col] = newSViv(atoi(val_begin));
		//sv_setiv(data[col], atoi(val_begin));
	    }
	    else if (numeric_cols[col] == 2) /* Float */
	    {
		data[col] = newSVnv(atof(val_begin));
		// sv_setnv(data[col], atof(val_begin));
	    }
	    else
	    {
		data[col] = newSVpvn(val_begin, sim_ptr - val_begin);
		// sv_setpvn(data[col], val_begin, sim_ptr - val_begin);
	    }
	}
	else
	{
	    data[col] = &PL_sv_undef;
	}
	col++;

	if (*sim_ptr != '\t' && sim_ptr < sim_end)
	    break;

	sim_ptr++;
	val_begin = sim_ptr;
    }
    if (*sim_ptr && sim_ptr < sim_end)
	sim_ptr++;

    ret = newAV();
    av_extend(ret, col);

    int i;
    for (i = 0; i < col; i++)
	av_store(ret, i, data[i]);

    if (1)
    {
	SV *sv = newRV_inc((SV *) ret);
	RETVAL = sv_bless(sv, sim_stash);
    }
    else if (0)
    {
	RETVAL = newRV_noinc((SV *) ret);
    }
    else
    {
	SvREFCNT_inc(bret);
	RETVAL = bret;
    }
 end:
    ;
}
OUTPUT:
RETVAL


MODULE = FastaParse		PACKAGE = FastaParse		

int open(file)
	const char *	file
CODE:
{
    fd = open(file, O_RDONLY);
    if (fd < 0)
    {
	perror("open");
	RETVAL = 0;
	goto end;
    }

    struct stat st;
    if (fstat(fd, &st) < 0)
    {
	perror("stat");
	RETVAL = 0;
	goto end;
    }

    ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
    if (ptr == 0)
    {
	perror("mmap");
	RETVAL = 0;
	goto end;
    }
    
    if (madvise(ptr, st.st_size, MADV_SEQUENTIAL) < 0)
    {
	perror("madvise");
	RETVAL = 0;
	goto end;
    }

    end = ptr + st.st_size;

    RETVAL = 1;

    end:
;
}
OUTPUT:
RETVAL

int
next(id, id2, seq, dump_whitespace)
	SV *	id
	SV *	id2
	SV *	seq
	int	dump_whitespace
CODE:
{
    if (ptr >= end)
    {
	RETVAL = 0;
	goto end;
    }
    
    if (*ptr != '>')
    {
	printf("parse error: no > (%c)\n", *ptr);
	RETVAL = 0;
	goto end;
    }
    ptr++;
    char *idptr = idbuf;
    char *id2end = 0;

    while (ptr < end && *ptr != '\n')
    {
	if (isspace(*ptr) && id2end == 0)
	    id2end = idptr;
	    
	*idptr++ = *ptr++;
    }
    if (id2end == 0)
	id2end = idptr;
    *idptr = 0;
    sv_setpvn(id, idbuf, idptr - idbuf);
    sv_setpvn(id2, idbuf, id2end - idbuf);
    ptr++;

    /* here starts data */

    char *dptr = fbuf;
    int n = 0;
    while (ptr < end && *ptr != '>')
    {
	if (!dump_whitespace || !isspace(*ptr))
	{
	    *dptr++ = *ptr;
	    if (dptr - fbuf >= sizeof(fbuf))
	    {
		printf("Overrun !\n");
		break;
	    }
	}
	ptr++;
    }
    *dptr = 0;
    if (dptr - fbuf > 1048576)
    {
	fprintf(stderr, "Warning: very long (%d) sequence %s\n", dptr - fbuf, idbuf);
    }
    sv_setpvn(seq, fbuf, dptr - fbuf);
    RETVAL = 1;
 end:
;
}
OUTPUT:
RETVAL


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3