]> git.donarmstrong.com Git - mothur.git/blobdiff - fastamap.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / fastamap.cpp
index 14065231e96f4a7303921424ab53ce633e0984ec..a1beda31ac5ee82c532676163179197a004f91eb 100644 (file)
@@ -15,31 +15,36 @@ void FastaMap::readFastaFile(ifstream& in) {
                string name, sequence, line;
                sequence = "";
        
-               getline(in, line);
+               in >> line;
                name = line.substr(1, line.length());  //rips off '>'
        
                //read through file
-               while (getline(in, line)) {
-                       if (isalnum(line.at(0))){  //if it's a sequence line
-                               sequence += line;
-                       }
-                       else{
+               while (in.eof() != true) {
+                       in >> line;
+                       if (line != "") {
+                               if (isalnum(line.at(0))) {  //if it's a sequence line
+                                       sequence += line;
+                               }
+                               else{
                                //input sequence info into map
-                               it = data.find(sequence);
-                               if (it == data.end()) {         //it's unique.
-                                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-                                       data[sequence].groupnumber = 1;
-                                       data[sequence].names = name;
-                               }else { // its a duplicate.
-                                       data[sequence].names += "," + name;
-                                       data[sequence].groupnumber++;
+                                       seqmap[name] = sequence;  
+                                       it = data.find(sequence);
+                                       if (it == data.end()) {         //it's unique.
+                                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                                               data[sequence].groupnumber = 1;
+                                               data[sequence].names = name;
+                                       }else { // its a duplicate.
+                                               data[sequence].names += "," + name;
+                                               data[sequence].groupnumber++;
+                                       }
+                                       name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
+                                       sequence = "";
                                }
-                               name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
-                               sequence = "";
                        }
                }
        
                //store last sequence and name info.
+               seqmap[name] = sequence;
                it = data.find(sequence);
                if (it == data.end()) {         //it's unique.
                        data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
@@ -48,7 +53,8 @@ void FastaMap::readFastaFile(ifstream& in) {
                }else { // its a duplicate.
                        data[sequence].names += "," + name;
                        data[sequence].groupnumber++;
-               }       
+               }
+                       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -72,25 +78,34 @@ int FastaMap::getGroupNumber(string seq) {  //pass a sequence get the number of i
        return data[seq].groupnumber;
 }
 /*******************************************************************************/
-void FastaMap::push_back(string seq, string Name) {//sequencename, name
-       data[seq].groupname = Name;
-       data[seq].names = Name;
-}
-/*******************************************************************************/
-void FastaMap::set(string seq, string groupName, string Names) {
-       data[seq].groupname = groupName;
-       data[seq].names = Names;
-}
+string FastaMap::getSequence(string name) {
+       it2 = seqmap.find(name);
+       if (it2 == seqmap.end()) {      //it's not found
+               return "not found";
+       }else { // found it
+               return it2->second;
+       }
+}      
 /*******************************************************************************/
-void FastaMap::clear() { //clears out data
-       data.clear();
+void FastaMap::push_back(string name, string seq) {
+       it = data.find(seq);
+       if (it == data.end()) {         //it's unique.
+               data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
+               data[seq].groupnumber = 1;
+               data[seq].names = name;
+       }else { // its a duplicate.
+               data[seq].names += "," + name;
+               data[seq].groupnumber++;
+       }
+       
+       seqmap[name] = seq;
 }
 /*******************************************************************************/
-int FastaMap::size(){ //returns datas size which is the number of unique sequences
+int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
        return data.size();
 }
 /*******************************************************************************/
-void FastaMap::print(ostream& out){ //prints data
+void FastaMap::printNamesFile(ostream& out){ //prints data
        try {
                // two column file created with groupname and them list of identical sequence names
                for (it = data.begin(); it != data.end(); it++) {