]> git.donarmstrong.com Git - mothur.git/blobdiff - bellerophon.cpp
added parse.list command
[mothur.git] / bellerophon.cpp
index f2dd821acf896302bc4b265cdb3bd8a03e89428d..17859dc710309b7535e234aec56791bc7a8ee1b2 100644 (file)
 
 //***************************************************************************************************************
 
-Bellerophon::Bellerophon(string name) {
+Bellerophon::Bellerophon(string name, string o)  {
        try {
                fastafile = name;
+               outputDir = o;
        }
        catch(exception& e) {
                errorOut(e, "Bellerophon", "Bellerophon");
@@ -26,17 +27,18 @@ Bellerophon::Bellerophon(string name) {
 }
 
 //***************************************************************************************************************
-void Bellerophon::print(ostream& out) {
+void Bellerophon::print(ostream& out, ostream& outAcc) {
        try {
                int above1 = 0;
                out << "Name\tScore\tLeft\tRight\t" << endl;
                //output prefenence structure to .chimeras file
                for (int i = 0; i < pref.size(); i++) {
-                       out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+                       out << pref[i].name << '\t' << setprecision(3) << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
                        
                        //calc # of seqs with preference above 1.0
                        if (pref[i].score[0] > 1.0) { 
                                above1++; 
+                               outAcc << pref[i].name << endl;
                                mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
                                mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
                        }
@@ -75,56 +77,62 @@ inline bool comparePref(Preference left, Preference right){
 }
 
 //***************************************************************************************************************
-void Bellerophon::getChimeras() {
+int Bellerophon::getChimeras() {
        try {
                
                //do soft filter
                if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
+                       string optionString = "fasta=" + fastafile + ", soft=50";
+                       if (outputDir != "") { optionString += ", outputdir=" + outputDir; }
+                       
                        filterSeqs = new FilterSeqsCommand(optionString);
                        filterSeqs->execute();
                        delete filterSeqs;
                        
                        //reset fastafile to filtered file
-                       fastafile = getRootName(fastafile) + "filter.fasta";
+                       if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
+                       else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
+                       
                }
                
                distCalculator = new eachGapDist();
                
                //read in sequences
-               readSeqs();
+               seqs = readSeqs(fastafile);
+               
+               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon method."); mothurOutEndLine(); return 1;  }
                
                int numSeqs = seqs.size();
                
                if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); }
                
                //set default window to 25% of sequence length
-               string seq0 = seqs[0].getAligned();
+               string seq0 = seqs[0]->getAligned();
                if (window == 0) { window = seq0.length() / 4;  }
                else if (window > (seq0.length() / 2)) {  
                        mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
                        window = (seq0.length() / 2);
                }
                
-               if (increment > (seqs[0].getAlignLength() - (2*window))) { 
+               if (increment > (seqs[0]->getAlignLength() - (2*window))) { 
                        if (increment != 10) {
                        
                                mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
                                increment = 10;
-                               if (increment > (seqs[0].getAlignLength() - (2*window))) {  increment = 0;  }
+                               if (increment > (seqs[0]->getAlignLength() - (2*window))) {  increment = 0;  }
                                
                        }else{ increment = 0; }
                }
-cout << "increment = " << increment << endl;           
+               
                if (increment == 0) { iters = 1; }
-               else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
+               else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
                
                //initialize pref
                pref.resize(numSeqs);  
                
                for (int i = 0; i < numSeqs; i++ ) { 
                        pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
-                       pref[i].name = seqs[i].getName();
+                       pref[i].name = seqs[i]->getName();
                        pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
                        pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
                        pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
@@ -138,21 +146,22 @@ cout << "increment = " << increment << endl;
                                vector<Sequence> left;  vector<Sequence> right;
                                
                                for (int i = 0; i < seqs.size(); i++) {
-//cout << "whole = " << seqs[i].getAligned() << endl;
+//cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
+//cout << "whole = " << seqs[i]->getAligned().length() << endl;
                                        //save left side
-                                       string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
+                                       string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
                                        Sequence tempLeft;
-                                       tempLeft.setName(seqs[i].getName());
+                                       tempLeft.setName(seqs[i]->getName());
                                        tempLeft.setAligned(seqLeft);
                                        left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned() << endl;                  
+//cout << "left = " << tempLeft.getAligned().length() << endl;                 
                                        //save right side
-                                       string seqRight = seqs[i].getAligned().substr(midpoint, window);
+                                       string seqRight = seqs[i]->getAligned().substr(midpoint, window);
                                        Sequence tempRight;
-                                       tempRight.setName(seqs[i].getName());
+                                       tempRight.setName(seqs[i]->getName());
                                        tempRight.setAligned(seqRight);
                                        right.push_back(tempRight);
-//cout << "right = " << seqRight << endl;      
+//cout << "right = " << seqRight.length() << endl;     
                                }
                                
                                //adjust midpoint by increment
@@ -173,6 +182,7 @@ cout << "increment = " << increment << endl;
                                vector<SeqMap> distMapLeft;
                                
                                // Create a data structure to quickly access the distance information.
+                               //this is from thallingers reimplementation on get.oturep
                                // It consists of a vector of distance maps, where each map contains
                                // all distances of a certain sequence. Vector and maps are accessed
                                // via the index of a sequence in the distance matrix
@@ -192,7 +202,6 @@ cout << "increment = " << increment << endl;
                                delete SparseLeft;
                                delete SparseRight;
                                
-                               
                                //fill preference structure
                                generatePreferences(distMapLeft, distMapRight, midpoint);
                                
@@ -215,43 +224,17 @@ cout << "increment = " << increment << endl;
                        
                        //how much higher or lower is this than expected
                        pref[i].score[0] = pref[i].score[0] / expectedPercent;
-                       
-               }
                
+               }
                
                //sort Preferences highest to lowest
                sort(pref.begin(), pref.end(), comparePref);
-
-
-
-       }
-       catch(exception& e) {
-               errorOut(e, "Bellerophon", "getChimeras");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void Bellerophon::readSeqs(){
-       try {
-               ifstream inFASTA;
-               openInputFile(fastafile, inFASTA);
                
-               //read in seqs and store in vector
-               while(!inFASTA.eof()){
-                       Sequence current(inFASTA);
-                       
-                       if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
-                       
-                       seqs.push_back(current);
-                       
-                       gobble(inFASTA);
-               }
-               inFASTA.close();
-
+               return 0;
+               
        }
        catch(exception& e) {
-               errorOut(e, "Bellerophon", "readSeqs");
+               errorOut(e, "Bellerophon", "getChimeras");
                exit(1);
        }
 }
@@ -272,8 +255,7 @@ int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* spar
                                
                        }
                }
-                       
-       
+               
                return 1;
        }
        catch(exception& e) {
@@ -335,24 +317,24 @@ void Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                                        if (itL->second < pref[i].closestLeft[1]) {  
 
                                                pref[i].closestLeft[1] = itL->second;
-                                               pref[i].leftParent[1] = seqs[j].getName();
+                                               pref[i].leftParent[1] = seqs[j]->getName();
 //cout << "updating closest left to " << pref[i].leftParent[1] << endl;
                                        }
 //cout << "pref[" << j << "].closestLeft[1] = "        <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
                                        if (itL->second < pref[j].closestLeft[1]) { 
                                                pref[j].closestLeft[1] = itL->second;
-                                               pref[j].leftParent[1] = seqs[i].getName();
+                                               pref[j].leftParent[1] = seqs[i]->getName();
 //cout << "updating closest left to " << pref[j].leftParent[1] << endl;
                                        }
                                        
                                        //are you the closest right sequence
                                        if (itR->second < pref[i].closestRight[1]) {   
                                                pref[i].closestRight[1] = itR->second;
-                                               pref[i].rightParent[1] = seqs[j].getName();
+                                               pref[i].rightParent[1] = seqs[j]->getName();
                                        }
                                        if (itR->second < pref[j].closestRight[1]) {   
                                                pref[j].closestRight[1] = itR->second;
-                                               pref[j].rightParent[1] = seqs[i].getName();
+                                               pref[j].rightParent[1] = seqs[i]->getName();
                                        }
                                        
                                }
@@ -363,7 +345,6 @@ void Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right,
                
                  
                //calculate the dme
-               
                int count0 = 0;
                for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }