]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
took cout's from shhh.seqs
[mothur.git] / shhhercommand.cpp
index 14eee605880e3dfa61333cc8eb437f23f7c3d6c5..6833f5d4c1cae8f4ee0e3b6f3da3bd2474b31285 100644 (file)
@@ -32,7 +32,7 @@
 vector<string> ShhherCommand::getValidParameters(){    
        try {
                string Array[] =  {     
-                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
+                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
                };
                
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
@@ -111,7 +111,7 @@ ShhherCommand::ShhherCommand(string option) {
                        
                        //valid paramters for this command
                        string AlignArray[] =  {
-                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
+                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
                        };
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
@@ -252,11 +252,6 @@ int ShhherCommand::execute(){
 
                double begClock = clock();
                unsigned long int begTime = time(NULL);
-
-               cout.setf(ios::fixed, ios::floatfield);
-               cout.setf(ios::showpoint);
-               cout << setprecision(2);
-               
                
                if(pid == 0){
 
@@ -267,7 +262,7 @@ int ShhherCommand::execute(){
 
                        processors = ncpus;
                        
-                       cout << "\nGetting preliminary data..." << endl;
+                       m->mothurOut("\nGetting preliminary data...\n");
                        getSingleLookUp();
                        getJointLookUp();
                        
@@ -294,14 +289,17 @@ int ShhherCommand::execute(){
                        
                        for(int i=0;i<numFiles;i++){
                                flowFileName = flowFileVector[i];
-                       
-                               cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
-                               cout << "Reading flowgrams..." << endl;
+
+                               
+                               
+                               m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+                               m->mothurOut("Reading flowgrams...\n");
                                getFlowData();
-                               cout << "Identifying unique flowgrams..." << endl;
+
+                               m->mothurOut("Identifying unique flowgrams...\n");
                                getUniques();
 
-                               cout << "Calculating distances between flowgrams..." << endl;
+                               m->mothurOut("Calculating distances between flowgrams...\n");
                                char fileName[1024];
                                strcpy(fileName, flowFileName.c_str());
 
@@ -332,7 +330,7 @@ int ShhherCommand::execute(){
 
                                string namesFileName = createNamesFile();
                                
-                               cout << "\nClustering flowgrams..." << endl;
+                               m->mothurOut("\nClustering flowgrams...\n");
                                string listFileName = cluster(distFileName, namesFileName);
 
                                getOTUData(listFileName);
@@ -351,9 +349,8 @@ int ShhherCommand::execute(){
                                
                                int numOTUsOnCPU = numOTUs / ncpus;
                                int numSeqsOnCPU = numSeqs / ncpus;
-                               
-                               cout << "\nDenoising flowgrams..." << endl;
-                               cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                               m->mothurOut("\nDenoising flowgrams...\n");
+                               m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                                
                                while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
 
@@ -454,7 +451,7 @@ int ShhherCommand::execute(){
                                        
                                        iter++;
                                        
-                                       cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+                                       m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
 
                                        if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
                                                int live = 1;
@@ -471,7 +468,7 @@ int ShhherCommand::execute(){
                                        
                                }       
                                
-                               cout << "\nFinalizing..." << endl;
+                               m->mothurOut("\nFinalizing...\n");
                                fill();
                                setOTUs();
                                vector<int> otuCounts(numOTUs, 0);
@@ -486,10 +483,9 @@ int ShhherCommand::execute(){
                                remove(distFileName.c_str());
                                remove(namesFileName.c_str());
                                remove(listFileName.c_str());
-                               
-                               cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+                                                                
+                               m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
                        }
-
                }
                else{
                        int abort = 1;
@@ -632,10 +628,11 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
                                }
                        }
                        if(i % 100 == 0){
-                               cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+                               m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
                        }
                }
-               cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+               
+               m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
                
                string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
                if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
@@ -659,9 +656,6 @@ int ShhherCommand::execute(){
        try {
                if (abort == true) { return 0; }
                
-               cout.setf(ios::fixed, ios::floatfield);
-               cout.setf(ios::showpoint);
-               
                getSingleLookUp();
                getJointLookUp();
                                
@@ -686,18 +680,19 @@ int ShhherCommand::execute(){
                for(int i=0;i<numFiles;i++){
                        flowFileName = flowFileVector[i];
 
-                       cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
-                       cout << "Reading flowgrams..." << endl;
+                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+                       m->mothurOut("Reading flowgrams...\n");
                        getFlowData();
-                       cout << "Identifying unique flowgrams..." << endl;
+                       
+                       m->mothurOut("Identifying unique flowgrams...\n");
                        getUniques();
                        
                        
-                       cout << "Calculating distances between flowgrams..." << endl;                   
+                       m->mothurOut("Calculating distances between flowgrams...\n");
                        string distFileName = createDistFile(processors);
                        string namesFileName = createNamesFile();
-                       
-                       cout << "\nClustering flowgrams..." << endl;
+                               
+                       m->mothurOut("\nClustering flowgrams...\n");
                        string listFileName = cluster(distFileName, namesFileName);
                        getOTUData(listFileName);
                        
@@ -709,8 +704,9 @@ int ShhherCommand::execute(){
                        double begClock = clock();
                        unsigned long int begTime = time(NULL);
 
-                       cout << "\nDenoising flowgrams..." << endl;
-                       cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                       
+                       m->mothurOut("\nDenoising flowgrams...\n");
+                       m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                        
                        while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
                                
@@ -728,10 +724,11 @@ int ShhherCommand::execute(){
 
                                iter++;
                                
-                               cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+                               m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
                        }       
                        
-                       cout << "\nFinalizing..." << endl;
+                       m->mothurOut("\nFinalizing...\n");
                        fill();
                        setOTUs();
                        
@@ -749,7 +746,7 @@ int ShhherCommand::execute(){
                        remove(namesFileName.c_str());
                        remove(listFileName.c_str());
                        
-                       cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+                       m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                return 0;
        }
@@ -767,6 +764,11 @@ void ShhherCommand::getFlowData(){
                m->openInputFile(flowFileName, flowFile);
                
                string seqName;
+               seqNameVector.clear();
+               lengths.clear();
+               flowDataIntI.clear();
+               nameMap.clear();
+               
                
                int currentNumFlowCells;
                
@@ -903,13 +905,19 @@ void ShhherCommand::getUniques(){
                        int index = 0;
                        
                        vector<short> current(numFlowCells);
-                       for(int j=0;j<numFlowCells;j++){        current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));        }
+                       for(int j=0;j<numFlowCells;j++){
+                               current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+                       }
                                                
                        for(int j=0;j<numUniques;j++){
                                int offset = j * numFlowCells;
                                bool toEnd = 1;
                                
-                               for(int k=0;k<numFlowCells;k++){
+                               int shorterLength;
+                               if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
+                               else                                                            {       shorterLength = uniqueLengths[j];       }
+
+                               for(int k=0;k<shorterLength;k++){
                                        if(current[k] != uniqueFlowgrams[offset + k]){
                                                toEnd = 0;
                                                break;
@@ -920,6 +928,7 @@ void ShhherCommand::getUniques(){
                                        mapSeqToUnique[i] = j;
                                        uniqueCount[j]++;
                                        index = j;
+                                       if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
                                        break;
                                }
                                index++;
@@ -942,7 +951,7 @@ void ShhherCommand::getUniques(){
                uniqueFlowDataIntI.resize(numFlowCells * numUniques);
                uniqueLengths.resize(numUniques);       
                
-               flowDataPrI.assign(numSeqs * numFlowCells, 0);
+               flowDataPrI.resize(numSeqs * numFlowCells, 0);
                for(int i=0;i<flowDataPrI.size();i++)   {       flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);             }
        }
        catch(exception& e) {
@@ -1096,7 +1105,8 @@ string ShhherCommand::createDistFile(int processors){
 
                m->mothurOutEndLine();
                
-               cout << "Total time: " << (time(NULL) - begTime) << "\t"  << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
+               m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+               
 
                return fDistFileName;
        }
@@ -1141,9 +1151,6 @@ string ShhherCommand::createNamesFile(){
 string ShhherCommand::cluster(string distFileName, string namesFileName){
        try {
                
-               SparseMatrix* matrix;
-               ListVector* list;
-               RAbundVector* rabund;
                
                globaldata->setNameFile(namesFileName);
                globaldata->setColumnFile(distFileName);
@@ -1156,13 +1163,13 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                clusterNameMap->readMap();
                read->read(clusterNameMap);
                
-               list = read->getListVector();
-               matrix = read->getMatrix();
+               ListVector* list = read->getListVector();
+               SparseMatrix* matrix = read->getMatrix();
                
                delete read; 
                delete clusterNameMap; 
                                
-               rabund = new RAbundVector(list->getRAbundVector());
+               RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
                Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
                string tag = cluster->getTag();
@@ -1204,7 +1211,11 @@ void ShhherCommand::getOTUData(string listFileName){
                otuData.assign(numSeqs, 0);
                cumNumSeqs.assign(numOTUs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
-               aaP.resize(numOTUs);
+               aaP.clear();aaP.resize(numOTUs);
+               
+               seqNumber.clear();
+               aaI.clear();
+               seqIndex.clear();
                
                string singleOTU = "";
                
@@ -1256,6 +1267,8 @@ void ShhherCommand::getOTUData(string listFileName){
                        for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
                                aaP[i].push_back(0);
                        }
+                       
+                       
                }
                
                for(int i=1;i<numOTUs;i++){
@@ -1265,6 +1278,7 @@ void ShhherCommand::getOTUData(string listFileName){
                seqIndex = seqNumber;
                
                listFile.close();       
+               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getOTUData");
@@ -1383,6 +1397,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
        
        try{
                
+       
                for(int i=start;i<finish;i++){
                        
                        double count = 0;
@@ -1394,7 +1409,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
                        }
-                       
+
                        if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
                                vector<double> adF(nSeqsPerOTU[i]);
                                vector<int> anL(nSeqsPerOTU[i]);
@@ -1674,9 +1689,9 @@ void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<i
        try{
                vector<double> newTau(numOTUs,0);
                vector<double> norms(numSeqs, 0);
-               otuIndex.resize(0);
-               seqIndex.resize(0);
-               singleTau.resize(0);
+               otuIndex.clear();
+               seqIndex.clear();
+               singleTau.clear();