]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
fixed clearcut version bug, added group count output to get.groups and remove.groups
[mothur.git] / shhhercommand.cpp
index b2b4de0559feff2074fb6269b6e18a8dafb9d6e4..024fbab7221c328a02958d34084e4704da55b150 100644 (file)
@@ -32,7 +32,7 @@
 vector<string> ShhherCommand::getValidParameters(){    
        try {
                string Array[] =  {     
-                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
+                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta", "order"       
                };
                
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
@@ -111,7 +111,7 @@ ShhherCommand::ShhherCommand(string option) {
                        
                        //valid paramters for this command
                        string AlignArray[] =  {
-                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
+                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta", "order"       
                        };
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
@@ -211,6 +211,12 @@ ShhherCommand::ShhherCommand(string option) {
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        convert(temp, sigma); 
                        
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
+                       }
+                       
                        globaldata = GlobalData::getInstance();
                }
                        
@@ -250,9 +256,6 @@ int ShhherCommand::execute(){
                int tag = 1976;
                MPI_Status status; 
 
-               double begClock = clock();
-               unsigned long int begTime = time(NULL);
-               
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -288,9 +291,10 @@ int ShhherCommand::execute(){
                        }
                        
                        for(int i=0;i<numFiles;i++){
-                               flowFileName = flowFileVector[i];
+                               double begClock = clock();
+                               unsigned long int begTime = time(NULL);
 
-                               
+                               flowFileName = flowFileVector[i];
                                
                                m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
                                m->mothurOut("Reading flowgrams...\n");
@@ -370,7 +374,7 @@ int ShhherCommand::execute(){
                                                MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                                MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                        }
-                               
+                                       
                                        calcCentroidsDriver(0, numOTUsOnCPU);
                                        
                                        for(int i=1;i<ncpus;i++){
@@ -489,7 +493,6 @@ int ShhherCommand::execute(){
                }
                else{
                        int abort = 1;
-                       bool live = 1;
 
                        MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                        if(abort){      return 0;       }
@@ -499,6 +502,8 @@ int ShhherCommand::execute(){
 
                        for(int i=0;i<numFiles;i++){
                                //Now into the pyrodist part
+                               bool live = 1;
+
                                char fileName[1024];
                                MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
                                MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -554,12 +559,12 @@ int ShhherCommand::execute(){
                                int total;
 
                                while(live){
-
+                                       
                                        MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        singleTau.assign(total, 0.0000);
                                        seqNumber.assign(total, 0);
                                        seqIndex.assign(total, 0);
-                                       
+
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
@@ -567,13 +572,12 @@ int ShhherCommand::execute(){
                                        MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       
+
                                        calcCentroidsDriver(startOTU, endOTU);
 
                                        MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
                                        MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
 
-                                       
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
@@ -592,8 +596,8 @@ int ShhherCommand::execute(){
                                }
                        }
                }               
-
                MPI_Barrier(MPI_COMM_WORLD);
+
                return 0;
 
        }
@@ -704,10 +708,6 @@ int ShhherCommand::execute(){
                        double begClock = clock();
                        unsigned long int begTime = time(NULL);
 
-                       
-                       cout << numOTUs << endl;
-                       
-                       
                        m->mothurOut("\nDenoising flowgrams...\n");
                        m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                        
@@ -1282,12 +1282,6 @@ void ShhherCommand::getOTUData(string listFileName){
                
                listFile.close();       
                
-               for(int i=0;i<seqNumber.size();i++){
-                       cout << seqNumber[i] << ' ';
-               }
-               cout << endl;
-               
-               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getOTUData");
@@ -1308,8 +1302,6 @@ void ShhherCommand::initPyroCluster(){
                nSeqsBreaks.assign(processors+1, 0);
                nOTUsBreaks.assign(processors+1, 0);
 
-               cout << numSeqs << '\t' << numOTUs << '\t' << processors << endl;
-               
                nSeqsBreaks[0] = 0;
                for(int i=0;i<processors;i++){
                        nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
@@ -1408,10 +1400,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
        
        try{
                
-//             for(int i=0;i<seqNumber.size();i++){
-//                     cout << seqNumber[i] << ' ';
-//             }cout << endl;
-               
+       
                for(int i=start;i<finish;i++){
                        
                        double count = 0;
@@ -2027,7 +2016,6 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 
 void ShhherCommand::writeSequences(vector<int> otuCounts){
        try {
-               string bases = "TACG";
                
                string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
                ofstream fastaFile;
@@ -2041,14 +2029,17 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        if(otuCounts[i] > 0){
                                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
                                
-                               for(int j=8;j<numFlowCells;j++){
+                               string newSeq = "";
+                               
+                               for(int j=0;j<numFlowCells;j++){
                                        
-                                       char base = bases[j % 4];
+                                       char base = flowOrder[j % 4];
                                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
-                                               fastaFile << base;
+                                               newSeq += base;
                                        }
                                }
-                               fastaFile << endl;
+                               
+                               fastaFile << newSeq.substr(4) << endl;
                        }
                }
                fastaFile.close();
@@ -2118,7 +2109,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
-               string bases = "TACG";
+               string bases = flowOrder;
                
                for(int i=0;i<numOTUs;i++){
                        //output the translated version of the centroid sequence for the otu
@@ -2138,15 +2129,18 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        int sequence = aaI[i][j];
                                        otuCountsFile << seqNameVector[sequence] << '\t';
                                        
-                                       for(int k=8;k<lengths[sequence];k++){
+                                       string newSeq = "";
+                                       
+                                       for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                               
+                                                       
                                                for(int s=0;s<freq;s++){
-                                                       otuCountsFile << base;
+                                                       newSeq += base;
+                                                       //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << endl;
+                                       otuCountsFile << newSeq.substr(4) << endl;
                                }
                                otuCountsFile << endl;
                        }