]> git.donarmstrong.com Git - mothur.git/blob - readseqsphylip.cpp
modified filter.seqs to not store all seqs in memory but to read off disc
[mothur.git] / readseqsphylip.cpp
1 /*
2  *  readphylip.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 4/24/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readseqsphylip.h"
11 #include <iostream>
12 #include <fstream>
13
14 /*******************************************************************************/
15 bool ReadPhylip::isSeq(string seq) {
16         string validChars[] = {"A","G","C","T","U","N","-"};
17         
18         for(int i = 0; i < seq.length(); i++) {
19                 bool valid = false;
20                 string c = seq.substr(i,1);
21                 for(int k = 0; k < 7; k++)
22                         if(c.compare(validChars[k]) == 0) {
23                                 valid = true;
24                                 k = 7;
25                         }
26                 if(!valid)
27                         return false;
28         }
29         
30         return true;
31 }
32
33 /*******************************************************************************/
34 ReadPhylip::ReadPhylip(string file) : ReadSeqs(file) {  }
35 /*******************************************************************************/
36 ReadPhylip::~ReadPhylip(){
37 //      for(int i = 0; i < sequencedb.getNumSeqs(); i++)
38 //              delete sequencedb.get(i);
39 }
40 /*******************************************************************************/
41 void ReadPhylip::read() {
42         string temp;
43         string name;
44         string sequence;
45         
46         int count = 0;
47         int letterCount = 0;
48         int numCols = 0;
49         filehandle >> temp;
50         int numSeqs = atoi(temp.c_str());
51         filehandle >> temp;
52         int numLetters = atoi(temp.c_str());
53         
54         bool firstDone = false; 
55         bool last = false;
56         filehandle >> name;
57         
58         while(!filehandle.eof()) {
59                 if(!firstDone) {
60                         sequence = "";
61                         if(count == 0) {
62                                 filehandle >> temp;
63                                 while(isSeq(temp)) {
64                                         sequence += temp;
65                                         numCols++;
66                                         filehandle >> temp;
67                                 }
68                                 letterCount += sequence.length();
69                         }
70                         else {
71                                 for(int i = 0; i < numCols; i++) {
72                                         filehandle >> temp;
73                                         sequence += temp;
74                                 }
75                                 if(count < numSeqs-1)
76                                         filehandle >> temp;
77                         }
78                         Sequence newSeq(name, sequence);
79                         sequencedb.add(newSeq);
80                         if(count < numSeqs-1)
81                                 name = temp;
82                 }       
83                 else {
84                         sequence = "";
85                         for(int i = 0; i < numCols; i++) {
86                                 filehandle >> temp;
87                                 sequence += temp;
88                                 if(count == 0)
89                                         letterCount += temp.length();
90                                 if(letterCount == numLetters && count == 0) {
91                                         numCols = i + 1;
92                                         i = numCols;
93                                 }
94                         }
95                         if(!(last && count == 0))
96                                 sequencedb.set(count, sequencedb.get(count).getUnaligned() + sequence);
97                         if(letterCount == numLetters && count == 0)
98                                 last = true;
99                 }
100                 
101                 count++;
102                 
103                 if(count == numSeqs) {
104                         firstDone = true;
105                         count = 0;
106                 }
107         }
108         filehandle.close();
109 }
110
111 /*********************************************************************************/
112 SequenceDB* ReadPhylip::getDB() {
113         return &sequencedb;
114 }