]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
added cutoff change to hcluster
[mothur.git] / shhhercommand.cpp
index 066a9c51f26b764f14fa3f9490e094748ad3e7b6..a6fee2a3201bde30698cb145d9d92729c2cbdd74 100644 (file)
 #define MIN_WEIGHT 0.1
 #define MIN_TAU 0.0001
 #define MIN_ITER 10
-
 //**********************************************************************************************************************
-
-vector<string> ShhherCommand::getValidParameters(){    
+vector<string> ShhherCommand::setParameters(){ 
        try {
-               string Array[] =  {     
-                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
-               };
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
+               CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
+               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plookup);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+               CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
+               CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
+               CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getValidParameters");
+               m->errorOut(e, "ShhherCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-ShhherCommand::ShhherCommand(){        
+string ShhherCommand::getHelpString(){ 
        try {
-               abort = true; calledHelp = true;
-               
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["pn.dist"] = tempOutNames;
-
+               string helpString = "";
+               helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "ShhherCommand");
+               m->errorOut(e, "ShhherCommand", "getHelpString");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-vector<string> ShhherCommand::getRequiredParameters(){ 
+ShhherCommand::ShhherCommand(){        
        try {
-               string Array[] =  {"flow"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredParameters");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
+               abort = true; calledHelp = true;
+               setParameters();
+               
+               //initialize outputTypes
+//             vector<string> tempOutNames;
+//             outputTypes["pn.dist"] = tempOutNames;
 
-vector<string> ShhherCommand::getRequiredFiles(){      
-       try {
-               vector<string> myArray;
-               return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredFiles");
+               m->errorOut(e, "ShhherCommand", "ShhherCommand");
                exit(1);
        }
 }
@@ -106,15 +98,10 @@ ShhherCommand::ShhherCommand(string option) {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       
-                       //valid paramters for this command
-                       string AlignArray[] =  {
-                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors", "maxiter", "mindelta"        
-                       };
-                       
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -129,7 +116,7 @@ ShhherCommand::ShhherCommand(string option) {
                        
                        //initialize outputTypes
                        vector<string> tempOutNames;
-                       outputTypes["pn.dist"] = tempOutNames;
+//                     outputTypes["pn.dist"] = tempOutNames;
                        //                      outputTypes["fasta"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -173,12 +160,21 @@ ShhherCommand::ShhherCommand(string option) {
                        }
                        else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
                        
-                       if(flowFileName != "not found"){        compositeFASTAFileName = "";    }
+                       if(flowFileName != "not found"){
+                               compositeFASTAFileName = "";    
+                               compositeNamesFileName = "";    
+                       }
                        else{
-                               compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta";
                                ofstream temp;
+
+                               //flow.files = 9 character offset
+                               compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
                                m->openOutputFile(compositeFASTAFileName, temp);
                                temp.close();
+                               
+                               compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
+                               m->openOutputFile(compositeNamesFileName, temp);
+                               temp.close();
                        }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -192,12 +188,78 @@ ShhherCommand::ShhherCommand(string option) {
                        // ...at some point should added some additional type checking...
                        string temp;
                        temp = validParameter.validFile(parameters, "lookup", true);
-                       if (temp == "not found")        {       lookupFileName = "LookUp_Titanium.pat"; }
-                       else if(temp == "not open")     {       abort = true;                   } 
-                       else                                            {       lookupFileName = temp;  }
+                       if (temp == "not found")        {       
+                               lookupFileName = "LookUp_Titanium.pat"; 
+                               
+                               int ableToOpen;
+                               ifstream in;
+                               ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
+                               in.close();     
+                               
+                               //if you can't open it, try input location
+                               if (ableToOpen == 1) {
+                                       if (inputDir != "") { //default path is set
+                                               string tryPath = inputDir + lookupFileName;
+                                               m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
+                                               ifstream in2;
+                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                               in2.close();
+                                               lookupFileName = tryPath;
+                                       }
+                               }
+                               
+                               //if you can't open it, try default location
+                               if (ableToOpen == 1) {
+                                       if (m->getDefaultPath() != "") { //default path is set
+                                               string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
+                                               m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                               ifstream in2;
+                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                               in2.close();
+                                               lookupFileName = tryPath;
+                                       }
+                               }
+                               
+                               //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+                               if (ableToOpen == 1) {
+                                       string exepath = m->argv;
+                                       string tempPath = exepath;
+                                       for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+                                       exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+                                       
+                                       string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
+                                       m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+                                       ifstream in2;
+                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                       in2.close();
+                                       lookupFileName = tryPath;
+                               }
+                               
+                               if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
+                       }
+                       else if(temp == "not open")     {       
+                               
+                               lookupFileName = validParameter.validFile(parameters, "lookup", false);
+                               
+                               //if you can't open it its not inputDir, try mothur excutable location
+                               string exepath = m->argv;
+                               string tempPath = exepath;
+                               for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+                               exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+                                       
+                               string tryPath = m->getFullPathName(exepath) + lookupFileName;
+                               m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+                               ifstream in2;
+                               int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                               in2.close();
+                               lookupFileName = tryPath;
+                               
+                               if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
+                       }else                                           {       lookupFileName = temp;  }
                        
-                       temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){      temp = "1";                     }
-                       convert(temp, processors); 
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
 
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
                        convert(temp, cutoff); 
@@ -211,7 +273,12 @@ ShhherCommand::ShhherCommand(string option) {
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        convert(temp, sigma); 
                        
-                       globaldata = GlobalData::getInstance();
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
+                       }
+                       
                }
                        
 #ifdef USE_MPI
@@ -224,23 +291,6 @@ ShhherCommand::ShhherCommand(string option) {
                exit(1);
        }
 }
-
-//**********************************************************************************************************************
-
-ShhherCommand::~ShhherCommand(){}
-
-//**********************************************************************************************************************
-
-void ShhherCommand::help(){
-       try {
-               m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "help");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 #ifdef USE_MPI
 int ShhherCommand::execute(){
@@ -332,6 +382,11 @@ int ShhherCommand::execute(){
                                string listFileName = cluster(distFileName, namesFileName);
 
                                getOTUData(listFileName);
+
+                               remove(distFileName.c_str());
+                               remove(namesFileName.c_str());
+                               remove(listFileName.c_str());
+                               
                                initPyroCluster();
 
                                for(int i=1;i<ncpus;i++){
@@ -469,18 +524,17 @@ int ShhherCommand::execute(){
                                m->mothurOut("\nFinalizing...\n");
                                fill();
                                setOTUs();
+                               
                                vector<int> otuCounts(numOTUs, 0);
                                for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
                                calcCentroidsDriver(0, numOTUs);
+                               
                                writeQualities(otuCounts);
                                writeSequences(otuCounts);
                                writeNames(otuCounts);
                                writeClusters(otuCounts);
                                writeGroups();
                                
-                               remove(distFileName.c_str());
-                               remove(namesFileName.c_str());
-                               remove(listFileName.c_str());
                                                                 
                                m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
                        }
@@ -592,6 +646,17 @@ int ShhherCommand::execute(){
                }               
                MPI_Barrier(MPI_COMM_WORLD);
 
+               
+               if(compositeFASTAFileName != ""){
+                       outputNames.push_back(compositeFASTAFileName);
+                       outputNames.push_back(compositeNamesFileName);
+               }
+
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
                return 0;
 
        }
@@ -632,7 +697,7 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
                
                m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
                
-               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
                if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
 
                ofstream distFile(fDistFileName.c_str());
@@ -694,6 +759,10 @@ int ShhherCommand::execute(){
                        string listFileName = cluster(distFileName, namesFileName);
                        getOTUData(listFileName);
                        
+                       remove(distFileName.c_str());
+                       remove(namesFileName.c_str());
+                       remove(listFileName.c_str());
+                       
                        initPyroCluster();
                        
                        double maxDelta = 0;
@@ -739,12 +808,19 @@ int ShhherCommand::execute(){
                        writeClusters(otuCounts);
                        writeGroups();
                        
-                       remove(distFileName.c_str());
-                       remove(namesFileName.c_str());
-                       remove(listFileName.c_str());
-                       
                        m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
+               
+               if(compositeFASTAFileName != ""){
+                       outputNames.push_back(compositeFASTAFileName);
+                       outputNames.push_back(compositeNamesFileName);
+               }
+
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
                return 0;
        }
        catch(exception& e) {
@@ -1036,7 +1112,7 @@ void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int st
 
 string ShhherCommand::createDistFile(int processors){
        try{
-               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
                                
                unsigned long int begTime = time(NULL);
                double begClock = clock();
@@ -1124,7 +1200,7 @@ string ShhherCommand::createNamesFile(){
                        duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
                }
                
-               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.names";
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
                
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
@@ -1148,11 +1224,6 @@ string ShhherCommand::createNamesFile(){
 string ShhherCommand::cluster(string distFileName, string namesFileName){
        try {
                
-               
-               globaldata->setNameFile(namesFileName);
-               globaldata->setColumnFile(distFileName);
-               globaldata->setFormat("column");
-               
                ReadMatrix* read = new ReadColumnMatrix(distFileName);  
                read->setCutoff(cutoff);
                
@@ -1178,7 +1249,7 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                
                list->setLabel(toString(cutoff));
                
-               string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list";
+               string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
                ofstream listFile;
                m->openOutputFile(listFileName, listFile);
                list->print(listFile);
@@ -1909,7 +1980,7 @@ void ShhherCommand::setOTUs(){
 void ShhherCommand::writeQualities(vector<int> otuCounts){
        
        try {
-               string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual";
+               string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
 
                ofstream qualityFile;
                m->openOutputFile(qualityFileName, qualityFile);
@@ -1998,7 +2069,8 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
                        }
                }
                qualityFile.close();
-               
+               outputNames.push_back(qualityFileName);
+
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeQualities");
@@ -2010,9 +2082,8 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 
 void ShhherCommand::writeSequences(vector<int> otuCounts){
        try {
-               string bases = "TACG";
                
-               string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
+               string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
                
@@ -2024,18 +2095,23 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        if(otuCounts[i] > 0){
                                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
                                
-                               for(int j=8;j<numFlowCells;j++){
+                               string newSeq = "";
+                               
+                               for(int j=0;j<numFlowCells;j++){
                                        
-                                       char base = bases[j % 4];
+                                       char base = flowOrder[j % 4];
                                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
-                                               fastaFile << base;
+                                               newSeq += base;
                                        }
                                }
-                               fastaFile << endl;
+                               
+                               fastaFile << newSeq.substr(4) << endl;
                        }
                }
                fastaFile.close();
-               
+
+               outputNames.push_back(fastaFileName);
+
                if(compositeFASTAFileName != ""){
                        m->appendFiles(fastaFileName, compositeFASTAFileName);
                }
@@ -2050,7 +2126,7 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
 
 void ShhherCommand::writeNames(vector<int> otuCounts){
        try {
-               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names";
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
                
@@ -2066,6 +2142,12 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
                        }
                }
                nameFile.close();
+               outputNames.push_back(nameFileName);
+               
+               
+               if(compositeNamesFileName != ""){
+                       m->appendFiles(nameFileName, compositeNamesFileName);
+               }               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeNames");
@@ -2078,7 +2160,7 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
 void ShhherCommand::writeGroups(){
        try {
                string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.'));
-               string groupFileName = fileRoot + ".pn.groups";
+               string groupFileName = fileRoot + ".shhh.groups";
                ofstream groupFile;
                m->openOutputFile(groupFileName, groupFile);
                
@@ -2086,6 +2168,8 @@ void ShhherCommand::writeGroups(){
                        groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
                }
                groupFile.close();
+               outputNames.push_back(groupFileName);
+
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeGroups");
@@ -2097,11 +2181,11 @@ void ShhherCommand::writeGroups(){
 
 void ShhherCommand::writeClusters(vector<int> otuCounts){
        try {
-               string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts";
+               string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
-               string bases = "TACG";
+               string bases = flowOrder;
                
                for(int i=0;i<numOTUs;i++){
                        //output the translated version of the centroid sequence for the otu
@@ -2121,20 +2205,25 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        int sequence = aaI[i][j];
                                        otuCountsFile << seqNameVector[sequence] << '\t';
                                        
-                                       for(int k=8;k<lengths[sequence];k++){
+                                       string newSeq = "";
+                                       
+                                       for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                               
+                                                       
                                                for(int s=0;s<freq;s++){
-                                                       otuCountsFile << base;
+                                                       newSeq += base;
+                                                       //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << endl;
+                                       otuCountsFile << newSeq.substr(4) << endl;
                                }
                                otuCountsFile << endl;
                        }
                }
                otuCountsFile.close();
+               outputNames.push_back(otuCountsFileName);
+
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeClusters");