]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
paralellized the indicator command
[mothur.git] / trimseqscommand.cpp
index ed84cc0c60d404b0d7c12eb0c3751a2e2a601706..a2c8021a57f6f3f076c8d1c9be7353eb8cffff93 100644 (file)
@@ -189,6 +189,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else if (fastaFile == "not open") { abort = true; }    
+                       else { m->setFastaFile(fastaFile); }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -207,7 +208,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "oligos", true);
                        if (temp == "not found"){       oligoFile = "";         }
                        else if(temp == "not open"){    abort = true;   } 
-                       else                                    {       oligoFile = temp;               }
+                       else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
                        
                        
                        temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
@@ -236,12 +237,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "qfile", true);     
                        if (temp == "not found")        {       qFileName = "";         }
                        else if(temp == "not open")     {       abort = true;           }
-                       else                                            {       qFileName = temp;       }
+                       else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
                        
                        temp = validParameter.validFile(parameters, "name", true);      
                        if (temp == "not found")        {       nameFile = "";          }
-                       else if(temp == "not open")     {       abort = true;           }
-                       else                                            {       nameFile = temp;        }
+                       else if(temp == "not open")     {       nameFile = "";  abort = true;           }
+                       else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        convert(temp, qThreshold);
@@ -432,8 +433,9 @@ int TrimSeqsCommand::execute(){
                //output group counts
                m->mothurOutEndLine();
                int total = 0;
+               if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
                for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
-                        total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine(); 
+                        total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
                }
                if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
                
@@ -554,12 +556,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        int currentSeqsDiffs = 0;
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
-                       
                        QualityScores currQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
                        }
-
+                       //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
                                
@@ -603,7 +604,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
                                        else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
                                        else                                            {       success = 1;                            }
-                               
+                                       
                                        //you don't want to trim, if it fails above then scrap it
                                        if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
                                        
@@ -650,6 +651,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                
                                                outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
                                                
+                                               if (nameFile != "") {
+                                                       map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                                                       if (itName != nameMap.end()) { 
+                                                               vector<string> thisSeqsNames; 
+                                                               m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                                               for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                                                       outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+                                                               }
+                                                       }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
+                                               }
+                                               
                                                map<string, int>::iterator it = groupCounts.find(thisGroup);
                                                if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
                                                else { groupCounts[it->first]++; }
@@ -699,17 +711,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                unsigned long int pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= line->end)) { break; }
+                       
                        #else
                                if (inFASTA.eof()) { break; }
                        #endif
-                               
+                       
                        //report progress
                        if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                        
                }
                //report progress
                if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-
+               
                
                inFASTA.close();
                trimFASTAFile.close();
@@ -786,14 +799,18 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 qLines[process]);
                                
                                //pass groupCounts to parent
-                               ofstream out;
-                               string tempFile = filename + toString(getpid()) + ".num.temp";
-                               m->openOutputFile(tempFile, out);
-                               for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
-                                       out << it->first << '\t' << it->second << endl;
+                               if(oligoFile != ""){
+                                       ofstream out;
+                                       string tempFile = filename + toString(getpid()) + ".num.temp";
+                                       m->openOutputFile(tempFile, out);
+                                       
+                                       out << groupCounts.size() << endl;
+                                       
+                                       for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
+                                       out.close();
                                }
-                               out.close();
-                               
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -806,10 +823,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                ofstream temp;
                m->openOutputFile(trimFASTAFileName, temp);             temp.close();
                m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
-               m->openOutputFile(trimQualFileName, temp);              temp.close();
-               m->openOutputFile(scrapQualFileName, temp);             temp.close();
-               m->openOutputFile(trimNameFileName, temp);              temp.close();
-               m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               if(qFileName != ""){
+                       m->openOutputFile(trimQualFileName, temp);              temp.close();
+                       m->openOutputFile(scrapQualFileName, temp);             temp.close();
+               }
+               if (nameFile != "") {
+                       m->openOutputFile(trimNameFileName, temp);              temp.close();
+                       m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               }
 
                driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
                
@@ -843,8 +864,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
                        }
                        
-                       m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
-                       remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
+                       if(oligoFile != ""){
+                               m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
+                               remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
+                       }
                        
                        
                        if(allFiles){
@@ -868,19 +891,26 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                        }
                        
-                       ifstream in;
-                       string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
-                       m->openInputFile(tempFile, in);
-                       int tempNum;
-                       string group;
-                       while (!in.eof()) { 
-                               in >> group >> tempNum; m->gobble(in);
+                       if(oligoFile != ""){
+                               ifstream in;
+                               string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
+                               m->openInputFile(tempFile, in);
+                               int tempNum;
+                               string group;
+                               
+                               in >> tempNum; m->gobble(in);
+                               
+                               if (tempNum != 0) {
+                                       while (!in.eof()) { 
+                                               in >> group >> tempNum; m->gobble(in);
                                
-                               map<string, int>::iterator it = groupCounts.find(group);
-                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
-                               else { groupCounts[it->first] += tempNum; }
+                                               map<string, int>::iterator it = groupCounts.find(group);
+                                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
+                                               else { groupCounts[it->first] += tempNum; }
+                                       }
+                               }
+                               in.close(); remove(tempFile.c_str());
                        }
-                       in.close(); remove(tempFile.c_str());
                        
                }
        
@@ -1287,7 +1317,7 @@ int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group
                        if (alignment != NULL) {  delete alignment;  }
                        
                }
-               
+       
                return success;
                
        }