]> git.donarmstrong.com Git - mothur.git/blobdiff - fastamap.cpp
added screen.seqs command - pds
[mothur.git] / fastamap.cpp
index 0e6c22e21f48aec62b4a365db1e2454cd65ce7c9..0848363be7cf4099cbb50cb58a3d11623eecc78f 100644 (file)
@@ -15,16 +15,20 @@ void FastaMap::readFastaFile(ifstream& in) {
                string name, sequence, line;
                sequence = "";
        
-               getline(in, line);
+               in >> line;
                name = line.substr(1, line.length());  //rips off '>'
        
                //read through file
-               while (getline(in, line)) {
-                       if (isalnum(line.at(0))){  //if it's a sequence line
+               while (!in.eof()) {
+                       in >> line;
+
+                       if (line[0] != '>') {  //if it's a sequence line
                                sequence += line;
                        }
                        else{
-                               //input sequence info into map
+                       //input sequence info into map
+                               seqmap[name] = sequence;  
+
                                it = data.find(sequence);
                                if (it == data.end()) {         //it's unique.
                                        data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
@@ -37,9 +41,9 @@ void FastaMap::readFastaFile(ifstream& in) {
                                name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
                                sequence = "";
                        }
+                       
+                       gobble(in);
                }
-       
-               //store last sequence and name info.
                it = data.find(sequence);
                if (it == data.end()) {         //it's unique.
                        data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
@@ -48,7 +52,9 @@ void FastaMap::readFastaFile(ifstream& in) {
                }else { // its a duplicate.
                        data[sequence].names += "," + name;
                        data[sequence].groupnumber++;
-               }       
+               }
+               
+                       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -72,25 +78,34 @@ int FastaMap::getGroupNumber(string seq) {  //pass a sequence get the number of i
        return data[seq].groupnumber;
 }
 /*******************************************************************************/
-void FastaMap::push_back(string seq, string Name) {//sequencename, name
-       data[seq].groupname = Name;
-       data[seq].names = Name;
-}
-/*******************************************************************************/
-void FastaMap::set(string seq, string groupName, string Names) {
-       data[seq].groupname = groupName;
-       data[seq].names = Names;
-}
+string FastaMap::getSequence(string name) {
+       it2 = seqmap.find(name);
+       if (it2 == seqmap.end()) {      //it's not found
+               return "not found";
+       }else { // found it
+               return it2->second;
+       }
+}      
 /*******************************************************************************/
-void FastaMap::clear() { //clears out data
-       data.clear();
+void FastaMap::push_back(string name, string seq) {
+       it = data.find(seq);
+       if (it == data.end()) {         //it's unique.
+               data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
+               data[seq].groupnumber = 1;
+               data[seq].names = name;
+       }else { // its a duplicate.
+               data[seq].names += "," + name;
+               data[seq].groupnumber++;
+       }
+       
+       seqmap[name] = seq;
 }
 /*******************************************************************************/
-int FastaMap::size(){ //returns datas size which is the number of unique sequences
+int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
        return data.size();
 }
 /*******************************************************************************/
-void FastaMap::print(ostream& out){ //prints data
+void FastaMap::printNamesFile(ostream& out){ //prints data
        try {
                // two column file created with groupname and them list of identical sequence names
                for (it = data.begin(); it != data.end(); it++) {
@@ -107,3 +122,22 @@ void FastaMap::print(ostream& out){ //prints data
        }
 }
 /*******************************************************************************/
+void FastaMap::printCondensedFasta(ostream& out){ //prints data
+       try {
+               // two column file created with groupname and them list of identical sequence names
+               for (it = data.begin(); it != data.end(); it++) {
+                       out << ">" << it->second.groupname << endl;
+                       out << it->first << endl;
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/*******************************************************************************/
+