]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
changed normalize.shared and sub.sample commands to fix bug in normalize.shared and...
[mothur.git] / trimseqscommand.cpp
index dc87fff99489ebe9ebfc29bec6ff2328875d7030..a8c3fc697ae10c1c6a117ce68c550a928281ec44 100644 (file)
@@ -189,6 +189,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else if (fastaFile == "not open") { abort = true; }    
+                       else { m->setFastaFile(fastaFile); }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -207,7 +208,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "oligos", true);
                        if (temp == "not found"){       oligoFile = "";         }
                        else if(temp == "not open"){    abort = true;   } 
-                       else                                    {       oligoFile = temp;               }
+                       else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
                        
                        
                        temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
@@ -236,12 +237,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "qfile", true);     
                        if (temp == "not found")        {       qFileName = "";         }
                        else if(temp == "not open")     {       abort = true;           }
-                       else                                            {       qFileName = temp;       }
+                       else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
                        
                        temp = validParameter.validFile(parameters, "name", true);      
                        if (temp == "not found")        {       nameFile = "";          }
                        else if(temp == "not open")     {       nameFile = "";  abort = true;           }
-                       else                                            {       nameFile = temp;        }
+                       else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        convert(temp, qThreshold);
@@ -307,6 +308,7 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+               createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
                vector<vector<string> > nameFileNames;
@@ -342,9 +344,11 @@ int TrimSeqsCommand::execute(){
                
                string outputGroupFileName;
                if(oligoFile != ""){
-                       outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
-                       outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
-                       getOligos(fastaFileNames, qualFileNames, nameFileNames);
+                       createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
+                       if (createGroup) {
+                               outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+                               outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
+                       }
                }
                
                vector<unsigned long int> fastaFilePos;
@@ -379,16 +383,16 @@ int TrimSeqsCommand::execute(){
                                        if (fastaFileNames[i][j] != "") {
                                                if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
                                                        if(m->isBlank(fastaFileNames[i][j])){
-                                                               remove(fastaFileNames[i][j].c_str());
+                                                               m->mothurRemove(fastaFileNames[i][j]);
                                                                namesToRemove.insert(fastaFileNames[i][j]);
                                                        
                                                                if(qFileName != ""){
-                                                                       remove(qualFileNames[i][j].c_str());
+                                                                       m->mothurRemove(qualFileNames[i][j]);
                                                                        namesToRemove.insert(qualFileNames[i][j]);
                                                                }
                                                                
                                                                if(nameFile != ""){
-                                                                       remove(nameFileNames[i][j].c_str());
+                                                                       m->mothurRemove(nameFileNames[i][j]);
                                                                        namesToRemove.insert(nameFileNames[i][j]);
                                                                }
                                                        }else{  
@@ -427,7 +431,7 @@ int TrimSeqsCommand::execute(){
                        }
                }
                
-               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
+               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
 
                //output group counts
                m->mothurOutEndLine();
@@ -438,7 +442,7 @@ int TrimSeqsCommand::execute(){
                }
                if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
                
-               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
+               if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
 
                //set fasta file as new current fastafile
                string current = "";
@@ -504,7 +508,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                
                ofstream outGroupsFile;
-               if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
+               if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
                if(allFiles){
                        for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
@@ -540,12 +544,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                
                        if (m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (oligoFile != "") {   outGroupsFile.close();   }
+                               if (createGroup) {       outGroupsFile.close();   }
 
                                if(qFileName != ""){
                                        qFile.close();
                                }
-                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
 
                                return 0;
                        }
@@ -555,12 +559,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        int currentSeqsDiffs = 0;
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
-                       
+                       //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
                        QualityScores currQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
                        }
-
+                       
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
                                
@@ -604,7 +608,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
                                        else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
                                        else                                            {       success = 1;                            }
-                               
+                                       
                                        //you don't want to trim, if it fails above then scrap it
                                        if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
                                        
@@ -645,30 +649,39 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
                                        
-                                       if(barcodes.size() != 0){
-                                               string thisGroup = barcodeNameVector[barcodeIndex];
-                                               if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
-                                               
-                                               outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
-                                               
-                                               if (nameFile != "") {
-                                                       map<string, string>::iterator itName = nameMap.find(currSeq.getName());
-                                                       if (itName != nameMap.end()) { 
-                                                               vector<string> thisSeqsNames; 
-                                                               m->splitAtChar(itName->second, thisSeqsNames, ',');
-                                                               for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                                                       outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
-                                                               }
-                                                       }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
-                                               }
-                                               
-                                               map<string, int>::iterator it = groupCounts.find(thisGroup);
-                                               if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
-                                               else { groupCounts[it->first]++; }
+                                       if (createGroup) {
+                                               if(barcodes.size() != 0){
+                                                       string thisGroup = barcodeNameVector[barcodeIndex];
+                                                       if (primers.size() != 0) { 
+                                                               if (primerNameVector[primerIndex] != "") { 
+                                                                       if(thisGroup != "") {
+                                                                               thisGroup += "." + primerNameVector[primerIndex]; 
+                                                                       }else {
+                                                                               thisGroup = primerNameVector[primerIndex]; 
+                                                                       }
+                                                               } 
+                                                       }
+                                                       
+                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       
+                                                       if (nameFile != "") {
+                                                               map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                                                               if (itName != nameMap.end()) { 
+                                                                       vector<string> thisSeqsNames; 
+                                                                       m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                                                       for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                                                               outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+                                                                       }
+                                                               }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
+                                                       }
                                                        
+                                                       map<string, int>::iterator it = groupCounts.find(thisGroup);
+                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
+                                                       else { groupCounts[it->first]++; }
+                                                               
+                                               }
                                        }
                                        
-                                       
                                        if(allFiles){
                                                ofstream output;
                                                m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
@@ -711,22 +724,23 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                unsigned long int pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= line->end)) { break; }
+                       
                        #else
                                if (inFASTA.eof()) { break; }
                        #endif
-                               
+                       
                        //report progress
                        if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                        
                }
                //report progress
                if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-
+               
                
                inFASTA.close();
                trimFASTAFile.close();
                scrapFASTAFile.close();
-               if (oligoFile != "") {   outGroupsFile.close();   }
+               if (createGroup) {       outGroupsFile.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
                if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
                
@@ -798,14 +812,18 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 qLines[process]);
                                
                                //pass groupCounts to parent
-                               ofstream out;
-                               string tempFile = filename + toString(getpid()) + ".num.temp";
-                               m->openOutputFile(tempFile, out);
-                               for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
-                                       out << it->first << '\t' << it->second << endl;
+                               if(createGroup){
+                                       ofstream out;
+                                       string tempFile = filename + toString(getpid()) + ".num.temp";
+                                       m->openOutputFile(tempFile, out);
+                                       
+                                       out << groupCounts.size() << endl;
+                                       
+                                       for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
+                                       out.close();
                                }
-                               out.close();
-                               
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -841,26 +859,28 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
                        
                        m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
-                       remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
                        m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
-                       remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
                        
                        if(qFileName != ""){
                                m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
-                               remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
                                m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
-                               remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
                        }
                        
                        if(nameFile != ""){
                                m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
-                               remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
                                m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
-                               remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
                        }
                        
-                       m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
-                       remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
+                       if(createGroup){
+                               m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
+                               m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
+                       }
                        
                        
                        if(allFiles){
@@ -868,35 +888,42 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                        for(int k=0;k<fastaFileNames[j].size();k++){
                                                if (fastaFileNames[j][k] != "") {
                                                        m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
-                                                       remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                       m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        
                                                        if(qFileName != ""){
                                                                m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
-                                                               remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                               m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        }
                                                        
                                                        if(nameFile != ""){
                                                                m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
-                                                               remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                                               m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                                        }
                                                }
                                        }
                                }
                        }
                        
-                       ifstream in;
-                       string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
-                       m->openInputFile(tempFile, in);
-                       int tempNum;
-                       string group;
-                       while (!in.eof()) { 
-                               in >> group >> tempNum; m->gobble(in);
+                       if(createGroup){
+                               ifstream in;
+                               string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
+                               m->openInputFile(tempFile, in);
+                               int tempNum;
+                               string group;
+                               
+                               in >> tempNum; m->gobble(in);
+                               
+                               if (tempNum != 0) {
+                                       while (!in.eof()) { 
+                                               in >> group >> tempNum; m->gobble(in);
                                
-                               map<string, int>::iterator it = groupCounts.find(group);
-                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
-                               else { groupCounts[it->first] += tempNum; }
+                                               map<string, int>::iterator it = groupCounts.find(group);
+                                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
+                                               else { groupCounts[it->first] += tempNum; }
+                                       }
+                               }
+                               in.close(); m->mothurRemove(tempFile);
                        }
-                       in.close(); remove(tempFile.c_str());
                        
                }
        
@@ -1029,7 +1056,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
 
 //***************************************************************************************************************
 
-void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
+bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
        try {
                ifstream inOligos;
                m->openInputFile(oligoFile, inOligos);
@@ -1184,7 +1211,29 @@ void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                }
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
-
+               
+               bool allBlank = true;
+               for (int i = 0; i < barcodeNameVector.size(); i++) {
+                       if (barcodeNameVector[i] != "") {
+                               allBlank = false;
+                               break;
+                       }
+               }
+               for (int i = 0; i < primerNameVector.size(); i++) {
+                       if (primerNameVector[i] != "") {
+                               allBlank = false;
+                               break;
+                       }
+               }
+               
+               if (allBlank) {
+                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+                       allFiles = false;
+                       return false;
+               }
+               
+               return true;
+               
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "getOligos");
@@ -1303,7 +1352,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group
                        if (alignment != NULL) {  delete alignment;  }
                        
                }
-               
+       
                return success;
                
        }