]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
added citation function to commands
[mothur.git] / shhhercommand.cpp
index 6a19618b503d13157a1fdea8ae5a234c019fe9af..0977cfb2f0db4d4df43ff731628c82c8d918047f 100644 (file)
 #define MIN_WEIGHT 0.1
 #define MIN_TAU 0.0001
 #define MIN_ITER 10
-
 //**********************************************************************************************************************
-
-vector<string> ShhherCommand::getValidParameters(){    
+vector<string> ShhherCommand::setParameters(){ 
        try {
-               string Array[] =  {     
-                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
-               };
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
+               CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
+               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+               CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
+               CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
+               CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getValidParameters");
+               m->errorOut(e, "ShhherCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-ShhherCommand::ShhherCommand(){        
+string ShhherCommand::getHelpString(){ 
        try {
-               abort = true; calledHelp = true;
-               
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["pn.dist"] = tempOutNames;
-
+               string helpString = "";
+               helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "ShhherCommand");
+               m->errorOut(e, "ShhherCommand", "getHelpString");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-vector<string> ShhherCommand::getRequiredParameters(){ 
+ShhherCommand::ShhherCommand(){        
        try {
-               string Array[] =  {"flow"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredParameters");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
+               abort = true; calledHelp = true;
+               setParameters();
+               
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["pn.dist"] = tempOutNames;
 
-vector<string> ShhherCommand::getRequiredFiles(){      
-       try {
-               vector<string> myArray;
-               return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredFiles");
+               m->errorOut(e, "ShhherCommand", "ShhherCommand");
                exit(1);
        }
 }
@@ -106,15 +98,10 @@ ShhherCommand::ShhherCommand(string option) {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       
-                       //valid paramters for this command
-                       string AlignArray[] =  {
-                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
-                       };
-                       
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -196,8 +183,9 @@ ShhherCommand::ShhherCommand(string option) {
                        else if(temp == "not open")     {       abort = true;                   } 
                        else                                            {       lookupFileName = temp;  }
                        
-                       temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){      temp = "1";                     }
-                       convert(temp, processors); 
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
 
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
                        convert(temp, cutoff); 
@@ -211,7 +199,12 @@ ShhherCommand::ShhherCommand(string option) {
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        convert(temp, sigma); 
                        
-                       globaldata = GlobalData::getInstance();
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
+                       }
+                       
                }
                        
 #ifdef USE_MPI
@@ -224,23 +217,6 @@ ShhherCommand::ShhherCommand(string option) {
                exit(1);
        }
 }
-
-//**********************************************************************************************************************
-
-ShhherCommand::~ShhherCommand(){}
-
-//**********************************************************************************************************************
-
-void ShhherCommand::help(){
-       try {
-               m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "help");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 #ifdef USE_MPI
 int ShhherCommand::execute(){
@@ -250,9 +226,6 @@ int ShhherCommand::execute(){
                int tag = 1976;
                MPI_Status status; 
 
-               double begClock = clock();
-               unsigned long int begTime = time(NULL);
-               
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -288,12 +261,15 @@ int ShhherCommand::execute(){
                        }
                        
                        for(int i=0;i<numFiles;i++){
+                               double begClock = clock();
+                               unsigned long int begTime = time(NULL);
+
                                flowFileName = flowFileVector[i];
-                       
+                               
                                m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
                                m->mothurOut("Reading flowgrams...\n");
-                               
                                getFlowData();
+
                                m->mothurOut("Identifying unique flowgrams...\n");
                                getUniques();
 
@@ -368,7 +344,7 @@ int ShhherCommand::execute(){
                                                MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                                MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                        }
-                               
+                                       
                                        calcCentroidsDriver(0, numOTUsOnCPU);
                                        
                                        for(int i=1;i<ncpus;i++){
@@ -487,7 +463,6 @@ int ShhherCommand::execute(){
                }
                else{
                        int abort = 1;
-                       bool live = 1;
 
                        MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                        if(abort){      return 0;       }
@@ -497,6 +472,8 @@ int ShhherCommand::execute(){
 
                        for(int i=0;i<numFiles;i++){
                                //Now into the pyrodist part
+                               bool live = 1;
+
                                char fileName[1024];
                                MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
                                MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -552,12 +529,12 @@ int ShhherCommand::execute(){
                                int total;
 
                                while(live){
-
+                                       
                                        MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        singleTau.assign(total, 0.0000);
                                        seqNumber.assign(total, 0);
                                        seqIndex.assign(total, 0);
-                                       
+
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
@@ -565,13 +542,12 @@ int ShhherCommand::execute(){
                                        MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       
+
                                        calcCentroidsDriver(startOTU, endOTU);
 
                                        MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
                                        MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
 
-                                       
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
@@ -590,8 +566,8 @@ int ShhherCommand::execute(){
                                }
                        }
                }               
-
                MPI_Barrier(MPI_COMM_WORLD);
+
                return 0;
 
        }
@@ -761,6 +737,11 @@ void ShhherCommand::getFlowData(){
                m->openInputFile(flowFileName, flowFile);
                
                string seqName;
+               seqNameVector.clear();
+               lengths.clear();
+               flowDataIntI.clear();
+               nameMap.clear();
+               
                
                int currentNumFlowCells;
                
@@ -943,7 +924,7 @@ void ShhherCommand::getUniques(){
                uniqueFlowDataIntI.resize(numFlowCells * numUniques);
                uniqueLengths.resize(numUniques);       
                
-               flowDataPrI.assign(numSeqs * numFlowCells, 0);
+               flowDataPrI.resize(numSeqs * numFlowCells, 0);
                for(int i=0;i<flowDataPrI.size();i++)   {       flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);             }
        }
        catch(exception& e) {
@@ -1143,11 +1124,6 @@ string ShhherCommand::createNamesFile(){
 string ShhherCommand::cluster(string distFileName, string namesFileName){
        try {
                
-               
-               globaldata->setNameFile(namesFileName);
-               globaldata->setColumnFile(distFileName);
-               globaldata->setFormat("column");
-               
                ReadMatrix* read = new ReadColumnMatrix(distFileName);  
                read->setCutoff(cutoff);
                
@@ -1203,7 +1179,11 @@ void ShhherCommand::getOTUData(string listFileName){
                otuData.assign(numSeqs, 0);
                cumNumSeqs.assign(numOTUs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
-               aaP.resize(numOTUs);
+               aaP.clear();aaP.resize(numOTUs);
+               
+               seqNumber.clear();
+               aaI.clear();
+               seqIndex.clear();
                
                string singleOTU = "";
                
@@ -1255,6 +1235,8 @@ void ShhherCommand::getOTUData(string listFileName){
                        for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
                                aaP[i].push_back(0);
                        }
+                       
+                       
                }
                
                for(int i=1;i<numOTUs;i++){
@@ -1264,6 +1246,7 @@ void ShhherCommand::getOTUData(string listFileName){
                seqIndex = seqNumber;
                
                listFile.close();       
+               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getOTUData");
@@ -1382,6 +1365,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
        
        try{
                
+       
                for(int i=start;i<finish;i++){
                        
                        double count = 0;
@@ -1393,7 +1377,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
                        }
-                       
+
                        if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
                                vector<double> adF(nSeqsPerOTU[i]);
                                vector<int> anL(nSeqsPerOTU[i]);
@@ -1673,9 +1657,9 @@ void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<i
        try{
                vector<double> newTau(numOTUs,0);
                vector<double> norms(numSeqs, 0);
-               otuIndex.resize(0);
-               seqIndex.resize(0);
-               singleTau.resize(0);
+               otuIndex.clear();
+               seqIndex.clear();
+               singleTau.clear();
                
                
                
@@ -1997,7 +1981,6 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 
 void ShhherCommand::writeSequences(vector<int> otuCounts){
        try {
-               string bases = "TACG";
                
                string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
                ofstream fastaFile;
@@ -2011,14 +1994,17 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        if(otuCounts[i] > 0){
                                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
                                
-                               for(int j=8;j<numFlowCells;j++){
+                               string newSeq = "";
+                               
+                               for(int j=0;j<numFlowCells;j++){
                                        
-                                       char base = bases[j % 4];
+                                       char base = flowOrder[j % 4];
                                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
-                                               fastaFile << base;
+                                               newSeq += base;
                                        }
                                }
-                               fastaFile << endl;
+                               
+                               fastaFile << newSeq.substr(4) << endl;
                        }
                }
                fastaFile.close();
@@ -2088,7 +2074,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
-               string bases = "TACG";
+               string bases = flowOrder;
                
                for(int i=0;i<numOTUs;i++){
                        //output the translated version of the centroid sequence for the otu
@@ -2108,15 +2094,18 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        int sequence = aaI[i][j];
                                        otuCountsFile << seqNameVector[sequence] << '\t';
                                        
-                                       for(int k=8;k<lengths[sequence];k++){
+                                       string newSeq = "";
+                                       
+                                       for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                               
+                                                       
                                                for(int s=0;s<freq;s++){
-                                                       otuCountsFile << base;
+                                                       newSeq += base;
+                                                       //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << endl;
+                                       otuCountsFile << newSeq.substr(4) << endl;
                                }
                                otuCountsFile << endl;
                        }