]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraseqscommand.cpp
worked on chimeras
[mothur.git] / chimeraseqscommand.cpp
index 182c25e4b909ba2dd49fc1971beb7abee3b99188..aaed3bab7b56b14460d06de90c9551df70c4351d 100644 (file)
@@ -8,6 +8,8 @@
  */
 
 #include "chimeraseqscommand.h"
+#include "bellerophon.h"
+#include "pintail.h"
 
 //***************************************************************************************************************
 
@@ -20,7 +22,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -38,17 +40,39 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        if (fastafile == "not open") { abort = true; }
                        else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
                        
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  }   
+                       
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { consfile = "";  }   
+                       
+                       quanfile = validParameter.validFile(parameters, "quantile", true);
+                       if (quanfile == "not open") { abort = true; }
+                       else if (quanfile == "not found") { quanfile = "";  }   
+                       
+
                        string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "T"; }
                        filter = isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                        correction = isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "processors", true);                if (temp == "not found") { temp = "1"; }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                        convert(temp, processors);
                        
-                       method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "bellerophon"; }
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                                       
+                       temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
+                               
+                       method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "pintail"; }
+                       
+                       if ((method == "pintail") && (templatefile == "")) { mothurOut("You must provide a template file with the pintail method."); mothurOutEndLine(); abort = true;  }
+                       
 
                }
        }
@@ -64,9 +88,9 @@ void ChimeraSeqsCommand::help(){
                mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n");
                mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method.  fasta is required.\n");
                mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter.  The default is false. \n");
-               mothurOut("The correction parameter allows you to .....  The default is true. \n");
+               mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.   The default is true. \n");
                mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
-               mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is bellerophon. \n");
+               mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. \n");
                mothurOut("The chimera.seqs command should be in the following format: \n");
                mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
                mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
@@ -89,215 +113,45 @@ int ChimeraSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                
-               //do soft filter
-               if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F";
-                       filterSeqs = new FilterSeqsCommand(optionString);
-                       filterSeqs->execute();
-                       delete filterSeqs;
-                       
-                       //reset fastafile to filtered file
-                       fastafile = getRootName(fastafile) + "filter.fasta";
-               }
-               
-               //read in sequences
-               readSeqs();
-               
-               //int numSeqs = seqs.size();
-               
-               //find average midpoint of seqs
-               midpoint = findAverageMidPoint();
-               
-               //create 2 vectors of sequences, 1 for left side and one for right side
-               vector<Sequence> left;  vector<Sequence> right;
-               
-               for (int i = 0; i < seqs.size(); i++) {
-                       //save left side
-                       string seqLeft = seqs[i].getAligned();
-                       seqLeft = seqLeft.substr(0, midpoint);
-                       Sequence tempLeft(seqs[i].getName(), seqLeft);
-                       left.push_back(tempLeft);
-                       
-                       //save right side
-                       string seqRight = seqs[i].getAligned();
-                       seqRight = seqRight.substr(midpoint+1, (seqRight.length()-midpoint-1));
-                       Sequence tempRight(seqs[i].getName(), seqRight);
-                       right.push_back(tempRight);
-               }
+               if (method == "bellerophon")    {               chimera = new Bellerophon(fastafile);                   }
+               else if (method == "pintail")   {               chimera = new Pintail(fastafile, templatefile); 
+                       //saves time to avoid generating it
+                       if (consfile != "")                     {               chimera->setCons(consfile);                                             }
+                       else                                            {               chimera->setCons("");                                                   }
+                       
+                       //saves time to avoid generating it
+                       if (quanfile != "")                     {               chimera->setQuantiles(quanfile);                                }
+                       else                                            {               chimera->setQuantiles("");                                              }
+               }else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;         }
+               
+               //set user options
+               chimera->setFilter(filter);
+               chimera->setCorrection(correction);
+               chimera->setProcessors(processors);
+               chimera->setWindow(window);
+               chimera->setIncrement(increment);
                                
-               //this should be parallelized
-               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
-               //create a matrix containing the distance from left to left and right to right
-               //calculate distances
-               SparseMatrix* SparseLeft = new SparseMatrix();
-               SparseMatrix* SparseRight = new SparseMatrix();
+               //find chimeras
+               chimera->getChimeras();
                
-               createSparseMatrix(0, left.size(), SparseLeft, left);
-               createSparseMatrix(0, right.size(), SparseRight, right);
+               string outputFileName = getRootName(fastafile) + method + ".chimeras";
+               ofstream out;
+               openOutputFile(outputFileName, out);
                
+               //print results
+               chimera->print(out);
                
-               //vector<SeqMap> distMapRight;
-               //vector<SeqMap> distMapLeft;
+               out.close();
                
-               // Create a data structure to quickly access the distance information.
-               // It consists of a vector of distance maps, where each map contains
-               // all distances of a certain sequence. Vector and maps are accessed
-               // via the index of a sequence in the distance matrix
-               //distMapRight = vector<SeqMap>(globaldata->gListVector->size()); 
-               //distMapLeft = vector<SeqMap>(globaldata->gListVector->size()); 
-               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
-                       //distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
-                       //distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-
-               
-               //fill preference structure
-               //generatePreferences(distMapLeft, distMapRight);
+               delete chimera;
                
-                               
-               //output results to screen                                              
-               mothurOutEndLine();
-               mothurOut("\t\t"); mothurOutEndLine();
-               //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
-               //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
-               //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
-               //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
-               //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
-               //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
-               //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
-               //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
-               
-               //outSummary.close();
                return 0;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "execute");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void ChimeraSeqsCommand::readSeqs(){
-       try {
-               ifstream inFASTA;
-               openInputFile(fastafile, inFASTA);
-               
-               //read in seqs and store in vector
-               while(!inFASTA.eof()){
-                       Sequence current(inFASTA);
-                       
-                       seqs.push_back(current);
-                       
-                       gobble(inFASTA);
-               }
-               inFASTA.close();
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "readSeqs");
-               exit(1);
-       }
-}
-
-
-//***************************************************************************************************************
-int ChimeraSeqsCommand::findAverageMidPoint(){
-       try {
-               int totalMids = 0;
-               int averageMid = 0;
                
-               //loop through the seqs and find midpoint
-               for (int i = 0; i < seqs.size(); i++) {
-                       
-                       //get unaligned sequence
-                       seqs[i].setUnaligned(seqs[i].getUnaligned());  //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned
-                       
-                       string unaligned = seqs[i].getUnaligned();
-                       string aligned = seqs[i].getAligned();
-                       
-                       //find midpoint of this seq
-                       int count = 0;
-                       int thismid = 0;
-                       for (int j = 0; j < aligned.length(); j++) {
-                               
-                               thismid++;
-                               
-                               //if you are part of the unaligned sequence increment
-                               if (isalpha(aligned[j])) {  count++;  }
-                               
-                               //if you have reached the halfway point stop
-                               if (count >= (unaligned.length() / 2)) { break; }
-                       }
-                       
-                       //add this mid to total
-                       totalMids += thismid;
-               
-               }
-               
-               averageMid = (totalMids / seqs.size());
-               
-               return averageMid; 
-       
-       
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint");
-               exit(1);
-       }
-}
-
-/***************************************************************************************************************/
-int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
-       try {
-
-               for(int i=startSeq; i<endSeq; i++){
-                       
-                       for(int j=0;j<i;j++){
-                       
-                               //distCalculator->calcDist(s.get(i), s.get(j));
-                               float dist = distCalculator->getDist();
-                               
-                               PCell temp(i, j, dist);
-                               sparse->addCell(temp);
-                               
-                       }
-               }
-                       
-       
-               return 1;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix");
-               exit(1);
-       }
-}
-/***************************************************************************************************************
-void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right){
-       try {
-
-               for (int i = 0; i < left.size(); i++) {
-                       
-                       int iscore = 0;
-                       float closestLeft = 100000.0;
-                       float closestRight = 100000.0;
-                       
-                       for (int j = 0; j < left.size(); j++) {
-                               
-                               //iscore += abs(left
-                       
-                       }
-               
-               }
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "generatePreferences");
+               errorOut(e, "ChimeraSeqsCommand", "execute");
                exit(1);
        }
 }
 /**************************************************************************************************/
 
-/**************************************************************************************************/
-