]> git.donarmstrong.com Git - mothur.git/blobdiff - fastamap.cpp
changes while testing
[mothur.git] / fastamap.cpp
index 4f4bf33206d77e437dd32bd2ff14b7f5f1290042..bf55493faece21cbd6c0043ac89149826c83c166 100644 (file)
  */
 
 #include "fastamap.h"
+#include "sequence.hpp"
 
 /*******************************************************************************/
- FastaMap::FastaMap(ifstream& in) {
-       //int numberOfSequences = 0;
+
+void FastaMap::readFastaFile(string inFileName) {
+       try {
+               ifstream in;
+               m->openInputFile(inFileName, in);
+               string name, sequence, line;
+               sequence = "";
+               string temp;
+               map<string, string>::iterator itName;
+               
+               
+               while(!in.eof()){
+                       if (m->control_pressed) { break; }
+                       
+                       Sequence currSeq(in);
+                       name = currSeq.getName();
+                       
+                       if (name != "") {
+                               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                               else                                            {       sequence = currSeq.getUnaligned();      }
+                               
+                               itName = seqmap.find(name);
+                               if (itName == seqmap.end()) { seqmap[name] = sequence;  }
+                               else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); }
+                               
+                               map<string,group>::iterator it = data.find(sequence);
+                               if (it == data.end()) {         //it's unique.
+                                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                                       //                              data[sequence].groupnumber = 1;
+                                       data[sequence].names = name;
+                               }else { // its a duplicate.
+                                       data[sequence].names += "," + name;
+                                       //                              data[sequence].groupnumber++;
+                               }       
+                       }
+                       m->gobble(in);
+               }
+               in.close();             
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FastaMap", "readFastaFile");
+               exit(1);
+       }
+}
+
+/*******************************************************************************/
+
+void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
        
-       string name, sequence, line;
-       sequence = "";
+       ifstream oldNameFile;
+       m->openInputFile(oldNameFileName, oldNameFile);
        
-       getline(in, line);
-       name = line.substr(1, line.length());  //rips off '>'
+       map<string,string> oldNameMap;
+       map<string, string>::iterator itName;
+       string name, list;
+       while(!oldNameFile.eof()){
+               if (m->control_pressed) { break; }
+               
+               oldNameFile >> name; m->gobble(oldNameFile);
+               oldNameFile >> list;
+               oldNameMap[name] = list;
+               m->gobble(oldNameFile);
+       }
+       oldNameFile.close();
        
-       //read through file
-       while (getline(in, line)) {
-               if (isalnum(line.at(0))){  //if it's a sequence line
-                       sequence += line;
-               }
-               else{
-                       //input sequence info into map
-                       it = data.find(sequence);
+       ifstream inFASTA;
+       m->openInputFile(inFastaFile, inFASTA);
+       string sequence;
+       while(!inFASTA.eof()){
+               if (m->control_pressed) { break; }
+               
+               Sequence currSeq(inFASTA);
+               name = currSeq.getName();
+               
+               if (name != "") {
+                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                       else                                            {       sequence = currSeq.getUnaligned();      }
+                       
+                       itName = seqmap.find(name);
+                       if (itName == seqmap.end()) { seqmap[name] = sequence;  }
+                       else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); }
+                       
+                       seqmap[name] = sequence;  
+                       map<string,group>::iterator it = data.find(sequence);
                        if (it == data.end()) {         //it's unique.
                                data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-                               data[sequence].groupnumber = 1;
-                               data[sequence].names = name;
+                               //                      data[sequence].groupnumber = 1;
+                               data[sequence].names = oldNameMap[name];
                        }else { // its a duplicate.
-                               data[sequence].names += "," + name;
-                               data[sequence].groupnumber++;   
-                       }
-                       name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
+                               data[sequence].names += "," + oldNameMap[name];
+                               //                      data[sequence].groupnumber++;
+                       }       
                }
+               m->gobble(inFASTA);
        }
        
-       //store last sequence and name info.
-       it = data.find(sequence);
-       if (it == data.end()) {         //it's unique.
-               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-               data[sequence].groupnumber = 1;
-               data[sequence].names = name;
-       }else { // its a duplicate.
-               data[sequence].names += "," + name;
-               data[sequence].groupnumber++;   
-       }       
+       
+       inFASTA.close();
 }
+
 /*******************************************************************************/
+
 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
        return data[seq].groupname;
 }
+
 /*******************************************************************************/
-int FastaMap::getGroupNumber(string seq) {  //pass a sequence name get number of sequence in its group
-       return data[seq].groupnumber;
-}
-/*******************************************************************************/
+
 string FastaMap::getNames(string seq) {        //pass a sequence get the string of names in the group separated by ','s.
        return data[seq].names;
 }
+
 /*******************************************************************************/
-void FastaMap::push_back(string seq, string Name) {//sequencename, name
-       data[seq].groupname = Name;
-       data[seq].groupnumber = 1;
-       data[seq].names = Name;
-}
+
+string FastaMap::getSequence(string name) {
+       
+       map<string,string>::iterator it = seqmap.find(name);
+       if (it == seqmap.end()) {       return "not found";             }
+       else                                    {       return it->second;              }
+       
+}      
+
 /*******************************************************************************/
-void FastaMap::clear() { //clears out data
-       data.clear();
+
+void FastaMap::push_back(string name, string seq) {
+       
+       map<string,group>::iterator it = data.find(seq);
+       if (it == data.end()) {         //it's unique.
+               data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
+               data[seq].names = name;
+       }else { // its a duplicate.
+               data[seq].names += "," + name;
+       }
+       seqmap[name] = seq;
 }
+
 /*******************************************************************************/
-int FastaMap::size(){ //returns datas size which is the number of unique sequences
+
+int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
        return data.size();
 }
+
 /*******************************************************************************/
-void FastaMap::print(ostream&){ //prints data
 
+void FastaMap::printNamesFile(string outFileName){ //prints data
+       try {
+               ofstream outFile;
+               m->openOutputFile(outFileName, outFile);
+               
+               // two column file created with groupname and them list of identical sequence names
+               for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
+                       if (m->control_pressed) { break; }
+                       outFile << it->second.groupname << '\t' << it->second.names << endl;
+               }
+               outFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FastaMap", "printNamesFile");
+               exit(1);
+       }
 }
+
 /*******************************************************************************/
+
+void FastaMap::printCondensedFasta(string outFileName){ //prints data
+       try {
+               ofstream out;
+               m->openOutputFile(outFileName, out);
+               //creates a fasta file
+               for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
+                       if (m->control_pressed) { break; }
+                       out << ">" << it->second.groupname << endl;
+                       out << it->first << endl;
+               }
+               out.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FastaMap", "printCondensedFasta");
+               exit(1);
+       }
+}
+
+/*******************************************************************************/
+