[Bio] / FigKernelScripts / gjoseqio.c Repository:
ViewVC logotype

View of /FigKernelScripts/gjoseqio.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Jun 11 20:08:03 2012 UTC (7 years, 5 months ago) by golsen
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Support for codon usage analyses.

/*
 *    gjoseqio.c
 */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include "gjoseqlib.h"
#include "gjoseqio.h"

databuf_t idbuf =
    { IDBUFSIZE,
      0,
      0,
      (databufseg_t *) NULL
    };

databuf_t defbuf =
    { DEFBUFSIZE,
      0,
      0,
      (databufseg_t *) NULL
    };

databuf_t seqbuf =
    { SEQBUFSIZE,
      0,
      0,
      (databufseg_t *) NULL
    };


/* These are internal functions, not included in gjoseqio.h */

int read_fasta_id(  FILE * fptr, databuf_t * buffer );
int read_fasta_def( FILE * fptr, databuf_t * buffer );
int read_fasta_seq( FILE * fptr, databuf_t * buffer );

int
write_fasta_entry( FILE * fptr, seq_t * entry )
{
    size_t   len, i;
    char   * ptr;

    if ( ! fptr || ! entry ) return( 0 );
    if ( ! entry->id  || ! entry->id_len  ) return( 0 );
    if ( ! entry->seq || ! entry->seq_len ) return( 0 );

    fputc( '>', fptr );

    ptr = entry->id;
    len = entry->id_len;
    for ( i = 0; i < len && *ptr; i++ ) { fputc( (int) *ptr++, fptr ); }

    ptr = entry->def;
    len = entry->def_len;
    if ( ptr && len > 0 )
    {
        fputc( ' ', fptr );
        for ( i = 0; i < len && *ptr; i++ ) { fputc( (int) *ptr++, fptr ); }
    }

    fputc( '\n', fptr );

    ptr = entry->seq;
    len = entry->seq_len;
    for ( i = 0; i < len && *ptr; i++ )
    {
        if ( i%60 == 0 && i ) { fputc( '\n', fptr ); }
        fputc( (int) *ptr++, fptr );
    }
    fputc( '\n', fptr );

    return( 1 );
}


seqptr_t
read_next_fasta( FILE * fptr )
{
    seqptr_t newseq;

    if ( ! read_fasta_id(  fptr, &idbuf )
      || ! read_fasta_def( fptr, &defbuf )
      || ! read_fasta_seq( fptr, &seqbuf )
       )
    {
        return( (seqptr_t) NULL );
    }
    
    newseq = (seqptr_t) malloc( sizeof( seq_t ) );
    if ( ! newseq ) return( (seqptr_t) NULL );

    if ( ! buffer_2_string( &idbuf, &newseq->id, &newseq->id_len ) )
    {
        free( (void *) newseq );
        return( (seqptr_t) NULL );
    }
    if ( ! buffer_2_string( &defbuf, &newseq->def, &newseq->def_len ) )
    {
        free( (void *) newseq->id );
        free( (void *) newseq );
        return( (seqptr_t) NULL );
    }
    if ( ! buffer_2_string( &seqbuf, &newseq->seq, &newseq->seq_len ) )
    {
        free( (void *) newseq->id );
        free( (void *) newseq->def );
        free( (void *) newseq );
        return( (seqptr_t) NULL );
    }

    return( newseq );
}


int
read_fasta_id( FILE * fptr, databuf_t * buffer )
{
    int c, state, ntogo;
    databufseg_t * seg;
    char * bufptr;

    if ( ! init_buffer( buffer ) ) return( 0 );

    /* state == 0 seeking '>'
     * state == 1 discarding white space
     * state == 2 copying id
     * state == 3 discarding white space
     */

    state = 0;
    while ( state == 0 )
    {
        c = getc( fptr );
        if ( c == EOF ) return( 0 );
        if ( c == '>' ) state = 1;
    }

    while ( state == 1 )
    {
        c = getc( fptr );
        if ( c == EOF || c == '\n' ) return( 0 );
        if ( ! isspace( c ) ) state = 2;
    }

    seg    = buffer->seg;
    bufptr = seg->datum;
    ntogo  = seg->segsize;

    *bufptr++ = c;
    buffer->datasize++;
    ntogo--;

    while ( state == 2 )
    {
        c = getc( fptr );
        if ( c == EOF ) return( 0 );
        if ( c == '\n' )
        {
            ungetc( c, fptr );
            state = 4;
        }
        else if ( isspace( c ) )
        {
            state = 3;
        }
        else if ( c )  // not null
        {
            if ( ntogo < 1 )
            {
                seg = next_buffer_seg( seg );
                if ( ! seg ) return( 0 );
                bufptr = seg->datum;
                ntogo  = seg->segsize;
            }
            *bufptr++ = c;
            buffer->datasize++;
            ntogo--;
        }
    }

    while ( state == 3 )
    {
        c = getc( fptr );
        if ( c == EOF ) return( 0 );
        if ( c == '\n' || ! isspace( c ) )
        {
            ungetc( c, fptr );
            state = 4;
        }
    }

    return ( 1 );
}


int
read_fasta_def( FILE * fptr, databuf_t * buffer )
{
    int c, state, ntogo;
    databufseg_t * seg;
    char * bufptr;

    if ( ! init_buffer( buffer ) ) return( 0 );

    /* state == 0 discarding white space
     * state == 1 copying def
     */

    state = 0;
    while ( state == 0 )
    {
        c = getc( fptr );
        if ( c == EOF ) return( 0 );
        if ( c == '\n' ) return( 1 );
        if ( c && ! isspace( c ) ) state = 1;
    }

    seg    = buffer->seg;
    bufptr = seg->datum;
    ntogo  = seg->segsize;

    *bufptr++ = c;
    buffer->datasize++;
    ntogo--;

    while ( ( c = getc( fptr ) ) != EOF )
    {
        if ( c == '\n' )
        {
            // Remove trailing white space (at least the easy part):
            while ( ( bufptr > seg->datum ) && isspace( *(bufptr-1) ) )
            {
                bufptr--;
                buffer->datasize--;
            }
            return( 1 );
        }
        if ( c )  // not null
        {
            if ( ntogo < 1 )
            {
                seg = next_buffer_seg( seg );
                if ( ! seg ) return( 0 );
                bufptr = seg->datum;
                ntogo  = seg->segsize;
            }
            *bufptr++ = c;
            buffer->datasize++;
            ntogo--;
        }
    }

    return ( 0 );  // EOF is failure
}


int
read_fasta_seq( FILE * fptr, databuf_t * buffer )
{
    int c, state, ntogo;
    databufseg_t * seg;
    char * bufptr;

    if ( ! init_buffer( buffer ) ) return( 0 );

    /* state == 0 first char of line
     * state == 1 internal
     */

    seg    = buffer->seg;
    bufptr = seg->datum;
    ntogo  = seg->segsize;

    state = 0;
    while ( ( c = getc( fptr ) ) != EOF )
    {
        if ( ! c || isspace( c ) )
        {
            state = ( c == '\n' ) ? 0 : 1;
        }
        else
        {
            if ( c == '>' && state == 0 )
            {
                ungetc( c, fptr );
                return( 1 );
            }
            state = 1;
            if ( ntogo < 1 )
            {
                seg = next_buffer_seg( seg );
                if ( ! seg ) return( 0 );
                bufptr = seg->datum;
                ntogo  = seg->segsize;
            }
            *bufptr++ = c;
            buffer->datasize++;
            ntogo--;
        }
    }

    return( 1 );
}


int
init_buffer( databuf_t * buf )
{
    if ( ! buf->seg )
    {
        buf->seg = extend_buffer( buf, (databufseg_t *) NULL, buf->segsize );
        if ( ! buf->seg ) return( 0 );
    }
    buf->datasize = 0;

    return( 1 );
}


databufseg_t *
next_buffer_seg( databufseg_t * seg )
{
    return( seg->next ? seg->next : extend_buffer( seg->buffer, seg, seg->segsize ) );
}


int
buffer_2_string( databuf_t * buffer, char ** strptr, size_t * lenptr )
{
    char            *s1ptr, *s2ptr;
    size_t           ntogo1, ntogo2;
    databufsegptr_t  seg;

    if ( ! buffer || ! strptr ) return( 0 );
    s2ptr = (char *) malloc( ( buffer->datasize + 1 ) * sizeof( char ) );
    if ( ! s2ptr ) return( 0 );

    *strptr = s2ptr;

    seg    = buffer->seg;
    s1ptr  = seg->datum;
    ntogo1 = seg->segsize;
    ntogo2 = buffer->datasize;

    while ( ntogo2 > 0 )
    {
        if ( ntogo1 < 1 )  // should check for valid values
        {
            seg    = seg->next;
            s1ptr  = seg->datum;
            ntogo1 = seg->segsize;
        }
        *s2ptr++ = *s1ptr++;
        ntogo1--;
        ntogo2--;
    }

    *s2ptr = '\0';

    if ( lenptr ) *lenptr = buffer->datasize;

    return( 1 );
}


databufseg_t *
extend_buffer( databuf_t * buffer, databufseg_t * prev, size_t segsize )
{
    databufseg_t * newseg;
    newseg = (databufseg_t *) malloc( sizeof( databufseg_t ) );
    if ( ! newseg ) return( (databufseg_t *) NULL );

    buffer->nsegalloc++;
    newseg->buffer  = buffer;
    newseg->segnum  = buffer->nsegalloc;
    newseg->segsize = segsize;
    newseg->datum   = (char *) malloc( segsize * sizeof( char ) );
    newseg->next    = (databufseg_t *) NULL;

    if ( ! newseg->datum ) return( (databufseg_t *) NULL );

    if ( prev ) prev->next = newseg;

    return( newseg );
}



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3