]> git.donarmstrong.com Git - mothur.git/blobdiff - makecontigscommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / makecontigscommand.cpp
index b1e78ae091a13a020360137303b3143a5d9536f6..f888fbe4928a9526d34e9b8f25200fb32a329f6f 100644 (file)
@@ -73,6 +73,7 @@ string MakeContigsCommand::getHelpString(){
         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+
         helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
         helpString += "The make.contigs command should be in the following format: \n";
                helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
@@ -110,7 +111,7 @@ MakeContigsCommand::MakeContigsCommand(){
                outputTypes["fasta"] = tempOutNames;
         outputTypes["group"] = tempOutNames;
         outputTypes["report"] = tempOutNames;
-       }
+    }
        catch(exception& e) {
                m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
                exit(1);
@@ -320,6 +321,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
             temp = validParameter.validFile(parameters, "allfiles", false);            if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
             
+            
             temp = validParameter.validFile(parameters, "trimoverlap", false);         if (temp == "not found") { temp = "F"; }
                        trimOverlap = m->isTrue(temp);
                        
@@ -386,10 +388,14 @@ int MakeContigsCommand::execute(){
             outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
         }
         
+        map<string, int> totalGroupCounts;
+        
         for (int l = 0; l < filesToProcess.size(); l++) {
             
             m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
             
+            groupCounts.clear();
+            groupMap.clear();
             vector<vector<string> > fastaFileNames;
             createOligosGroup = false;
             string outputGroupFileName;
@@ -415,7 +421,7 @@ int MakeContigsCommand::execute(){
                         
             m->mothurOut("Making contigs...\n"); 
             createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
-            m->mothurOut("Done.\n");
+             m->mothurOut("Here...\n"); 
             
             //remove temp fasta and qual files
             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
@@ -455,17 +461,17 @@ int MakeContigsCommand::execute(){
                     
                     ofstream out;
                     string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
-                    thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); 
+                    thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
                     m->openOutputFile(thisGroupName, out);
                     
                     while (!in.eof()){
                         if (m->control_pressed) { break; }
                         
                         Sequence currSeq(in); m->gobble(in);
-                        out << currSeq.getName() << '\t' << it->second << endl;  
+                        out << currSeq.getName() << '\t' << it->second << endl;
                     }
-                    in.close();
                     out.close();
+                    in.close();
                 }
             }
             
@@ -479,8 +485,8 @@ int MakeContigsCommand::execute(){
             }
             
             if (filesToProcess.size() > 1) { //merge into large combo files
-                if (createFileGroup || createOligosGroup) { 
-                    if (l == 0) { 
+                if (createFileGroup || createOligosGroup) {
+                    if (l == 0) {
                         ofstream outCGroup;
                         m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
                         outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
@@ -488,6 +494,12 @@ int MakeContigsCommand::execute(){
                     m->appendFiles(outputGroupFileName, compositeGroupFile);
                     if (!allFiles) { m->mothurRemove(outputGroupFileName);  }
                     else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+                    
+                    for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
+                        map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
+                        if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
+                        else { itTemp->second += itGroups->second; } //existing group, update total
+                    }
                 }
                 if (l == 0) {  m->appendFiles(outMisMatchFile, compositeMisMatchFile);  }
                 else {  m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile);  }
@@ -503,12 +515,17 @@ int MakeContigsCommand::execute(){
                     outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
                 }
             }else {
+                totalGroupCounts = groupCounts;
                 outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
                 outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
                 outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
-                if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+                if (createFileGroup || createOligosGroup) {
+                     outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); 
+                }
             }
+            m->mothurOut("Done.\n");
         }
+        
         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
         
         if (m->control_pressed) {      for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
@@ -516,8 +533,8 @@ int MakeContigsCommand::execute(){
                //output group counts
                m->mothurOutEndLine();
                int total = 0;
-               if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
-               for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+               if (totalGroupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
+               for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
             total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
                }
                if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
@@ -643,7 +660,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                                                }
                                        }
                                }
-
+                
                                num = driver(files[process], 
                              outputFasta + toString(getpid()) + ".temp", 
                              outputScrapFasta + toString(getpid()) + ".temp", 
@@ -754,8 +771,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                     }
                 }
             }
-
-                                 
+                                
                        contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
                        pDataArray.push_back(tempcontig);
             
@@ -1525,6 +1541,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
             }
             m->gobble(in);
             
+            if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); }
             
             //check to make sure both are able to be opened
             ifstream in2;
@@ -1660,17 +1677,21 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, stri
                     }
                     //roligo = reverseOligo(roligo);
                     
+                    if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
+                    
                     group = "";
                     
                                        // get rest of line in case there is a primer name
                                        while (!in.eof())       {       
                                                char c = in.get(); 
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        } 
                     
                     oligosPair newPrimer(foligo, roligo);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
                                        
                                        //check for repeat barcodes
                     string tempPair = foligo+roligo;
@@ -1697,7 +1718,7 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, stri
                     group = "";
                     while (!in.eof())  {       
                                                char c = in.get(); 
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        }