]> git.donarmstrong.com Git - mothur.git/blobdiff - fastamap.cpp
added logfile feature
[mothur.git] / fastamap.cpp
index 1cc9abf08da58bad5497079d46185413e0c23ff4..57754667da593265618aa0d9e5c046e18b2b7203 100644 (file)
  */
 
 #include "fastamap.h"
+#include "sequence.hpp"
 
 /*******************************************************************************/
-void FastaMap::readFastaFile(ifstream& in) {
+
+void FastaMap::readFastaFile(string inFileName) {
        try {
+               ifstream in;
+               openInputFile(inFileName, in);
                string name, sequence, line;
                sequence = "";
-               int c;
                string temp;
-               
-               
-               //read through file
-               while ((c = in.get()) != EOF) {
-                       name = ""; sequence = ""; 
-                       //is this a name
-                       if (c == '>') { 
-                               name = readName(in); 
-                               sequence = readSequence(in); 
-                       }else {  cout << "Error fasta in your file. Please correct." << endl; }
-
-                       //store info in map
-                       //input sequence info into map
+
+               while(!in.eof()){
+                       Sequence currSeq(in);
+                       name = currSeq.getName();
+                       
+                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                       else                                            {       sequence = currSeq.getUnaligned();      }
+                       
                        seqmap[name] = sequence;  
-                       it = data.find(sequence);
+                       map<string,group>::iterator it = data.find(sequence);
                        if (it == data.end()) {         //it's unique.
                                data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-                               data[sequence].groupnumber = 1;
+//                             data[sequence].groupnumber = 1;
                                data[sequence].names = name;
                        }else { // its a duplicate.
                                data[sequence].names += "," + name;
-                               data[sequence].groupnumber++;
+//                             data[sequence].groupnumber++;
                        }       
                        
                        gobble(in);
                }
-                                       
+               in.close();             
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-}
-/*******************************************************************************/
-string FastaMap::readName(ifstream& in) {
-       try{
-               string name = "";
-               int c;
-               string temp;
-               
-               while ((c = in.get()) != EOF) {
-                       //if c is not a line return
-                       if (c != 10) {
-                               name += c;
-                       }else { break;  }
-               }
-                       
-               return name;
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FastaMap class function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "FastaMap", "readFastaFile");
                exit(1);
        }
 }
 
 /*******************************************************************************/
-string FastaMap::readSequence(ifstream& in) {
-       try{
-               string sequence = "";
-               string line;
-               int pos, c;
+
+void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
+       
+       ifstream oldNameFile;
+       openInputFile(oldNameFileName, oldNameFile);
+       
+       map<string,string> oldNameMap;
+       string name, list;
+       while(!oldNameFile.eof()){
+               oldNameFile >> name >> list;
+               oldNameMap[name] = list;
+               gobble(oldNameFile);
+       }
+       oldNameFile.close();
+       
+       ifstream inFASTA;
+       openInputFile(inFastaFile, inFASTA);
+       string sequence;
+       while(!inFASTA.eof()){
+               Sequence currSeq(inFASTA);
+               name = currSeq.getName();
                
-               while (!in.eof()) {
-                       //save position in file in case next line is a new name.
-                       pos = in.tellg();
-                       line = "";
-                       in >> line;                     
-                       //if you are at a new name
-                       if (line[0] == '>') {
-                               //put file pointer back since you are now at a new name
-                               in.seekg(pos, ios::beg);
-                               c = in.get();  //because you put it back to a newline char
-                               break;
-                       }else {  sequence += line;      }
-               }
-                       
-               return sequence;
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FastaMap class function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
+               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+               else                                            {       sequence = currSeq.getUnaligned();      }
+               
+               seqmap[name] = sequence;  
+               map<string,group>::iterator it = data.find(sequence);
+               if (it == data.end()) {         //it's unique.
+                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+//                     data[sequence].groupnumber = 1;
+                       data[sequence].names = oldNameMap[name];
+               }else { // its a duplicate.
+                       data[sequence].names += "," + oldNameMap[name];
+//                     data[sequence].groupnumber++;
+               }       
+               
+               gobble(inFASTA);
        }
+       
+       
+       inFASTA.close();
 }
+
 /*******************************************************************************/
+
 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
        return data[seq].groupname;
 }
+
 /*******************************************************************************/
+
 string FastaMap::getNames(string seq) {        //pass a sequence get the string of names in the group separated by ','s.
        return data[seq].names;
 }
+
 /*******************************************************************************/
-int FastaMap::getGroupNumber(string seq) {     //pass a sequence get the number of identical sequences.
-       return data[seq].groupnumber;
-}
-/*******************************************************************************/
+
 string FastaMap::getSequence(string name) {
-       it2 = seqmap.find(name);
-       if (it2 == seqmap.end()) {      //it's not found
-               return "not found";
-       }else { // found it
-               return it2->second;
-       }
+       
+       map<string,string>::iterator it = seqmap.find(name);
+       if (it == seqmap.end()) {       return "not found";             }
+       else                                    {       return it->second;              }
+       
 }      
+
 /*******************************************************************************/
+
 void FastaMap::push_back(string name, string seq) {
-       it = data.find(seq);
+       
+       map<string,group>::iterator it = data.find(seq);
        if (it == data.end()) {         //it's unique.
                data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
-               data[seq].groupnumber = 1;
                data[seq].names = name;
        }else { // its a duplicate.
                data[seq].names += "," + name;
-               data[seq].groupnumber++;
        }
-       
        seqmap[name] = seq;
 }
+
 /*******************************************************************************/
+
 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
        return data.size();
 }
+
 /*******************************************************************************/
-void FastaMap::printNamesFile(ostream& out){ //prints data
+
+void FastaMap::printNamesFile(string outFileName){ //prints data
        try {
+               ofstream outFile;
+               openOutputFile(outFileName, outFile);
+               
                // two column file created with groupname and them list of identical sequence names
-               for (it = data.begin(); it != data.end(); it++) {
-                       out << it->second.groupname << '\t' << it->second.names << endl;
+               for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
+                       outFile << it->second.groupname << '\t' << it->second.names << endl;
                }
+               outFile.close();
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "FastaMap", "printNamesFile");
                exit(1);
        }
 }
+
 /*******************************************************************************/
-void FastaMap::printCondensedFasta(ostream& out){ //prints data
+
+void FastaMap::printCondensedFasta(string outFileName){ //prints data
        try {
+               ofstream out;
+               openOutputFile(outFileName, out);
                //creates a fasta file
-               for (it = data.begin(); it != data.end(); it++) {
+               for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
                        out << ">" << it->second.groupname << endl;
                        out << it->first << endl;
                }
+               out.close();
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               errorOut(e, "FastaMap", "printCondensedFasta");
                exit(1);
        }
 }
+
 /*******************************************************************************/