[Bio] / FigKernelScripts / translate_fasta_6.c Repository:
ViewVC logotype

View of /FigKernelScripts/translate_fasta_6.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed May 20 03:01:15 2015 UTC (4 years, 6 months ago) by golsen
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +12 -3 lines
Fixed issues in correctly printing the descriptions.

/*
 *  translate_fasta_6.c
 *
 *  Usage: translate_fasta_6 [options]  < fasta > fasta
 *
 *  Options:
 *
 *      -f                   //  Flush the output after every sequence
 *      -g NCBI_code_number  //  Translate using given NCBI genetic code (D = 1)
 *      -m                   //  Start protein with Met (not very meaningful)
 *      -v                   //  Print version to standard output and exit
 *
 *  cc -O3 -o translate_fasta_6 translate_fasta_6.c
 *
 *  Versions:
 *
 *      1.00 - Unnumbered first release
 *
 *      1.01 - Add -f, -g, -m and -v flags
 */

#include <stdio.h>
#include <stdlib.h>    // atoi

#include "gjoseqlib.h"
#include "gjoseqio.h"
#include "gjoseqlib.c"
#include "gjoseqio.c"

#define  VERSION  "1.02"
/*  Versions:
 *
 *     1.01 - first distributed version
 *     1.02 - fix issues in definition
 *     1.03 - fix issues in definition of reverse strand, 2015-05-19
 */

void usage( char * command );


/* Used to find the definition length */
size_t
slen( char *str )
{
    size_t len = 0;
    if ( str ) { while ( *str++ ) { len++; } }
    return( len );
}


int main( int argc, char ** argv )
{
    seq_t *entry, *xlation;
    char  *compl;
    int    i, dnalen, n1, n2;
    int    flush     = 0;
    int    met       = 0;
    int    NCBI_code = 1;

    /* Read command flags */

    i = 1;
    while ( ( i < argc ) && ( argv[i][0] == '-' ) )
    {
        if ( argv[i][1] == 'f' )
        {
            flush = 1;
        }
        else if ( argv[i][1] == 'g' )
        {
            if ( argv[i][2] ) { argv[i] += 2; }
            else if ( ++i >= argc )
            {
                fprintf( stderr, "Missing genetic code number after -g\n" );
                usage( argv[0] );
                return 1;
            }
            NCBI_code = atoi( argv[i] );

            // NCODES (defined in gjoseqlib.c) includes a code number zero
            // Invalid codes between 1 and 23 (e.g., 7 and 20) default to 1.
            if ( NCBI_code <= 0 || NCBI_code > NCODES-1 )
            {
                fprintf( stderr, "Bad genetic code number '%s'.\n", argv[i] );
                fprintf( stderr, "Allowed values are 1 to %d.\n", NCODES-1 );
                usage( argv[0] );
                return 1;
            }
        }
        else if ( argv[i][1] == 'm' )
        {
            met = 1;
        }
        else if ( argv[i][1] == 'v' )
        {
            printf( "%s\n", VERSION );
            return 0;
        }
        else
        {
            fprintf( stderr, "Bad flag '%s'.\n", argv[i] );
            usage( argv[0] );
            return 2;
        }

        i++;
    }

    while ( ( entry = read_next_fasta( stdin ) ) )
    {
        dnalen  = entry->seq_len;

        xlation = new_sequence_by_sizes( entry->id_len+3, entry->def_len+64, 0 );
        if ( ! xlation )
        {
            fprintf( stderr, "Error generating translation structure for '%s'\n", entry->id );
            exit( 1 );
        }

        for ( i = 0; i < 3; i++ )
        {
            sprintf( xlation->id, "%s.+%d", entry->id, i+1 );

            n1 = i + 1;
            n2 = dnalen - ( (dnalen-i) % 3 );
            if ( entry->def && entry->def_len > 0 )
            {
                sprintf( xlation->def, "%s %d-%d/%d", entry->def, n1, n2, dnalen );
            }
            else
            {
                sprintf( xlation->def, "%d-%d/%d", n1, n2, dnalen );
            }
            xlation->def_len = slen( xlation->def );

            xlation->seq = translate_seq_with_code( entry->seq + i, met, NCBI_code );
            if ( ! xlation->seq )
            {
                fprintf( stderr, "Error translating sequence for '%s'\n", entry->id );
                exit( 1 );
            }
            xlation->seq_len = ( n2 - n1 + 1 ) / 3;

            write_fasta_entry( stdout, xlation );
            if ( flush ) fflush( stdout );

            free( (void *) xlation->seq );
            xlation->seq = (char *) NULL;
        }

        compl = complement_DNA_seq( entry->seq );
        if ( ! compl )
        {
            fprintf( stderr, "Error generating complementary sequence for '%s'\n", entry->id );
            exit( 1 );
        }

        for ( i = 0; i < 3; i++ )
        {
            sprintf( xlation->id, "%s.-%d", entry->id, i+1 );

            n1 = dnalen - i;
            n2 = 1 + ( (dnalen-i) % 3 );
            if ( entry->def && entry->def_len > 0 )
            {
                sprintf( xlation->def, "%s %d-%d/%d", entry->def, n1, n2, dnalen );
            }
            else
            {
                sprintf( xlation->def, "%d-%d/%d", n1, n2, dnalen );
            }
            xlation->def_len = slen( xlation->def );

            xlation->seq = translate_seq_with_code( compl + i, met, NCBI_code );
            if ( ! xlation->seq )
            {
                fprintf( stderr, "Error translating sequence for '%s'\n", entry->id );
                exit( 1 );
            }
            xlation->seq_len = ( n1 - n2 + 1 ) / 3;

            write_fasta_entry( stdout, xlation );
            if ( flush ) fflush( stdout );

            free( (void *) xlation->seq );
            xlation->seq = (char *) NULL;
        }

        free( (void *) compl );
        free_sequence( xlation );
        free_sequence( entry );
    }

    return( 0 );
}


/*-----------------------------------------------------------------------------
 *  Usage statement:
 *
 *      void usage( char * command )
 *
 *-----------------------------------------------------------------------------
 */

void usage( char * command )
{
    fprintf( stderr, "\n" );
    fprintf( stderr, "Usage: %s [options] < fasta > fasta\n", command );
    fprintf( stderr, "\n" );
    fprintf( stderr, "Options:\n\n" );
    fprintf( stderr, "\n" );
    fprintf( stderr, "    -f                   //  Flush the output stream after every sequence\n" );
    fprintf( stderr, "    -g NCBI_code_number  //  Translate using given NCBI genetic code (D = 1)\n" );
    fprintf( stderr, "    -m                   //  Start all translations with Met\n" );
    fprintf( stderr, "    -v                   //  Print version to standard output and exit\n" );
    fprintf( stderr, "\n" );

    return;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3