1 /******************************************************************
2 Copyright 2006 by Michael Farrar. All rights reserved.
3 This program may not be sold or incorporated into a commercial product,
4 in whole or in part, without written consent of Michael Farrar. For
5 further information regarding permission for use or reproduction, please
6 contact: Michael Farrar at farrar.michael@gmail.com.
7 *******************************************************************/
10 Written by Michael Farrar, 2006.
11 Please send bug reports and/or suggestions to farrar.michael@gmail.com.
21 #define READ_BUFFER_SIZE (128 * 1024)
22 #define SEQ_NAME_SIZE (128)
24 FASTA_LIB *openLib (char *file, int pad)
30 if ((fp = fopen (file, "r")) == NULL) {
31 fprintf (stderr, "Unable to open file %s\n", file);
35 lib = (FASTA_LIB *) malloc (sizeof (FASTA_LIB));
37 fprintf (stderr, "Unable to allocate memory for library header\n");
41 lib->readBuffer = (char *) malloc (READ_BUFFER_SIZE);
42 if (!lib->readBuffer) {
43 fprintf (stderr, "Unable to allocate memory for read buffer\n");
47 lib->seqBuffer = (unsigned char *) malloc (MAX_SEQ_LENGTH);
48 if (!lib->seqBuffer) {
49 fprintf (stderr, "Unable to allocate memory for sequence\n");
53 lib->seqName = (char *) malloc (SEQ_NAME_SIZE);
55 fprintf (stderr, "Unable to allocate memory for sequence name\n");
59 lib->size = (int) fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
60 if (lib->size == 0 && !feof (fp)) {
61 fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
78 readNextBlock (FASTA_LIB *lib)
83 size = fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
84 if (lib->size == 0 && !feof (fp)) {
85 fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
90 lib->size = (int) size;
96 nextSeq (FASTA_LIB *lib, int *length)
103 char *name = lib->seqName;
104 unsigned char *seq = lib->seqBuffer;
106 /* check if we are at the end of the library */
107 if (lib->size == 0) {
112 if (lib->pos == lib->size) {
118 /* check for the start of a sequence */
119 if (lib->readBuffer[inx] != '>') {
120 fprintf (stderr, "Error parsing fasta file expecting > found %c\n",
121 lib->readBuffer[inx]);
127 /* read in the sequence name */
131 if (inx >= lib->size) {
132 size = readNextBlock (lib);
138 } else if (lib->readBuffer[inx] == '\n') {
141 } else if (len < SEQ_NAME_SIZE - 1) {
142 *name++ = lib->readBuffer[inx];
150 /* read in the sequence */
154 if (inx >= lib->size) {
155 size = readNextBlock (lib);
161 } else if (isspace(lib->readBuffer[inx])) {
163 } else if (lib->readBuffer[inx] == '>') {
166 } else if (len >= MAX_SEQ_LENGTH) {
167 fprintf (stderr, "Sequence %s exceeds maximum length\n",
171 int value = AMINO_ACID_VALUE[lib->readBuffer[inx]];
173 fprintf (stderr, "Unknown amino acid %c in sequence %s\n",
174 lib->readBuffer[inx], lib->seqName);
177 *seq++ = (char) value;
187 lib->residues += len;
189 /* check if we need to pad the sequence to a multiple of 16 */
191 inx = 16 - (len % 16);
198 return lib->seqBuffer;
201 void closeLib (FASTA_LIB *lib)
205 free (lib->readBuffer);
206 free (lib->seqBuffer);