Merge remote-tracking branch 'mothur/master'

author Pat Schloss <pschloss@umich.edu>

Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)

committer Pat Schloss <pschloss@umich.edu>

Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)
author Pat Schloss <pschloss@umich.edu>
Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)
committer Pat Schloss <pschloss@umich.edu>
Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index ea4f7edffad3749bd6c87275a3f94d5d61ce8264..15a5cff06d869482c252cdc50e82c5ec0a75abdf 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -17,6 +17,7 @@
                 A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotulabelscommand.cpp */; };
                 A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
                 A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
+               A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
                 A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
                 A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                 A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
@@ -39,6 +40,7 @@
                 A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
                 A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
                 A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
+               A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
                 A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
                 A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
                 A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
@@ -63,6 +65,7 @@
                 A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                 A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                 A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
+               A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; };
                 A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
                 A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
                 A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
@@ -401,6 +404,8 @@
                 A70056E9156AB6D400924A2D /* removeotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotulabelscommand.h; sourceTree = "<group>"; };
                 A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotulabelscommand.cpp; sourceTree = "<group>"; };
                 A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
+               A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = "<group>"; };
+               A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = "<group>"; };
                 A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                 A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
                 A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
@@ -448,6 +453,8 @@
                 A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kruskalwalliscommand.h; sourceTree = "<group>"; };
                 A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
                 A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
+               A74C06E616A9C097008390A3 /* primerdesigncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = primerdesigncommand.h; sourceTree = "<group>"; };
+               A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = primerdesigncommand.cpp; sourceTree = "<group>"; };
                 A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
                 A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
                 A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
@@ -496,6 +503,8 @@
                 A7A61F2B130062E000E05B6B /* amovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = amovacommand.h; sourceTree = "<group>"; };
                 A7A61F2C130062E000E05B6B /* amovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = amovacommand.cpp; sourceTree = "<group>"; };
                 A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
+               A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = "<group>"; };
+               A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = "<group>"; };
                 A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
                 A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
                 A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
@@ -1355,6 +1364,8 @@
                                 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */,
                                 A7FE7C3E1330EA1000F7B327 /* getcurrentcommand.h */,
                                 A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */,
+                               A7128B1A16B7001200723BE4 /* getdistscommand.h */,
+                               A7128B1C16B7002600723BE4 /* getdistscommand.cpp */,
                                 A7E9B6F312D37EC400DA6239 /* getgroupcommand.h */,
                                 A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */,
                                 A7E9B6F512D37EC400DA6239 /* getgroupscommand.h */,
@@ -1455,12 +1466,16 @@
                                 A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */,
                                 A7E9B79812D37EC400DA6239 /* preclustercommand.h */,
                                 A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */,
+                               A74C06E616A9C097008390A3 /* primerdesigncommand.h */,
+                               A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */,
                                 A7E9B7A212D37EC400DA6239 /* quitcommand.h */,
                                 A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */,
                                 A7E9B7AC12D37EC400DA6239 /* rarefactcommand.h */,
                                 A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */,
                                 A7E9B7AF12D37EC400DA6239 /* rarefactsharedcommand.h */,
                                 A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */,
+                               A7B0231716B8245D006BA09E /* removedistscommand.h */,
+                               A7B0231416B8244B006BA09E /* removedistscommand.cpp */,
                                 A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */,
                                 A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */,
                                 A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */,
@@ -2310,6 +2325,9 @@
                                 834D9D5C1656DEC800E7FAB9 /* regularizeddecisiontree.cpp in Sources */,
                                 A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */,
                                 A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */,
+                               A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
+                               A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */,
+                               A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/aligncommand.cpp b/aligncommand.cpp

index a871244f4538881c8d285c2a1202597be319498d..fc98c10924138a3aa168dabbc609f64bfb89f3e4 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -965,6 +965,9 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         num += pDataArray[i]->count;
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
diff --git a/aligncommand.h b/aligncommand.h

index cfded3819156764fc86e49e9cb1977af9bb2ef48..e5e9601781fdff5e35125b49de6ba452e180f9a2 100644 (file)
--- a/aligncommand.h
+++ b/aligncommand.h
@@ -144,8 +144,6 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){
                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
                 }
                 
-               pDataArray->count = pDataArray->end;
-               
                 AlignmentDB* templateDB = new AlignmentDB(pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->threadID);
                 
                 //moved this into driver to avoid deep copies in windows paralellized version
@@ -161,7 +159,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){
                         alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);
                 }
                 
-               int count = 0;
+               pDataArray->count = 0;
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                         
                         if (pDataArray->m->control_pressed) {  break; }
@@ -242,16 +240,16 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){
                                 delete nast;
                                 if (needToDeleteCopy) {   delete copy;   }
                                 
-                               count++;
+                               pDataArray->count++;
                         }
                         delete candidateSeq;
                         
                         //report progress
-                       if((count) % 100 == 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
+                       if((pDataArray->count) % 100 == 0){     pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                         
                 }
                 //report progress
-               if((count) % 100 != 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                 
                 delete alignment;
                 delete templateDB;
diff --git a/bayesian.cpp b/bayesian.cpp

index cf70010e76a6b9cba77b8ade2fcd7d18713391d3..6eaab6f2f974f8e1036561c2f76d9587d6dd0399 100644 (file)
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -308,7 +308,11 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                 //bootstrap - to set confidenceScore
                 int numToSelect = queryKmers.size() / 8;
         
+        if (m->debug) {  m->mothurOut(seq->getName() + "\t"); }
+        
                 tax = bootstrapResults(queryKmers, index, numToSelect);
+        
+        if (m->debug) {  m->mothurOut("\n"); }
                 
                 return tax;     
         }
@@ -374,6 +378,7 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                 int seqTaxIndex = tax;
                 TaxNode seqTax = phyloTree->get(tax);
                 
+        
                 while (seqTax.level != 0) { //while you are not at the root
                                         
                                 itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on
@@ -383,11 +388,13 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                         confidence = itBoot2->second;
                                 }
                                 
+                if (m->debug) { m->mothurOut(seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");"); }
+            
                                 if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
                                         confidenceTax = seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");" + confidenceTax;
                                         simpleTax = seqTax.name + ";" + simpleTax;
                                 }
-                               
+            
                                 seqTaxIndex = seqTax.parent;
                                 seqTax = phyloTree->get(seqTax.parent);
                 }
diff --git a/catchallcommand.cpp b/catchallcommand.cpp

index 584f798d30d51155b439064bbf2fafb1b693ad64..a914f733caef6f3535bfe88a4ff532a2989f7971 100644 (file)
--- a/catchallcommand.cpp
+++ b/catchallcommand.cpp
@@ -80,6 +80,7 @@ CatchAllCommand::CatchAllCommand(){
          outputTypes["models"] = tempOutNames;
                 outputTypes["bubble"] = tempOutNames;
                 outputTypes["summary"] = tempOutNames;
+        outputTypes["sabund"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
@@ -118,6 +119,7 @@ CatchAllCommand::CatchAllCommand(string option)  {
              outputTypes["models"] = tempOutNames;
              outputTypes["bubble"] = tempOutNames;
              outputTypes["summary"] = tempOutNames;
+            outputTypes["sabund"] = tempOutNames;
  
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -237,7 +239,7 @@ int CatchAllCommand::execute() {
          catchAllTest = m->getFullPathName(catchAllTest);
          
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-        catchAllCommandExe += "mono " + catchAllTest + " ";
+        catchAllCommandExe += "mono \"" + catchAllTest + "\" ";
  #else
          catchAllCommandExe += "\"" + catchAllTest + "\" ";
  #endif
@@ -291,7 +293,7 @@ int CatchAllCommand::execute() {
                                                 //create system command
                                                 string catchAllCommand = "";
                                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                                                       catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
+                                                       catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \""  + outputPath + + "\" 1";
                                                 #else
                              //removes extra '\\' catchall doesnt like that
                              vector<string> tempNames;
@@ -354,7 +356,7 @@ int CatchAllCommand::execute() {
                                                 //create system command
                                                 string catchAllCommand = "";
                                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                                                       catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
+                            catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \""  + outputPath + + "\" 1";
                                                 #else
                              //removes extra '\\' catchall doesnt like that
                              vector<string> tempNames;
@@ -439,7 +441,7 @@ int CatchAllCommand::execute() {
                                 //create system command
                                 string catchAllCommand = "";
                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                                       catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
+                    catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \""  + outputPath + + "\" 1";
                                 #else
                      //removes extra '\\' catchall doesnt like that
                      vector<string> tempNames;
diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp

index b4e478caea6bab2c3cf3a7d13a756f95a29f2223..66889f145af3340d5a7fe38db4e8aa34997c2219 100644 (file)
--- a/chimeraperseuscommand.cpp
+++ b/chimeraperseuscommand.cpp
@@ -1076,6 +1076,9 @@ int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accn
                         
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         num += pDataArray[i]->count;
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp

index 576fee94250f780685bbc57cd20bf0478241f74e..a1be2359c1516aa26395a2ba00c6101fb3a996f3 100644 (file)
--- a/chimeraslayercommand.cpp
+++ b/chimeraslayercommand.cpp
@@ -1497,6 +1497,9 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         num += pDataArray[i]->count;
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h

index 0c6ebe2de76c957f2a3f9c072b9127efb002e5fb..ee43bcbf72a2ca00af3e2270768f34ac1357bb71 100644 (file)
--- a/chimeraslayercommand.h
+++ b/chimeraslayercommand.h
@@ -213,8 +213,6 @@ static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){
                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
                 }
                 
-               pDataArray->count = pDataArray->end;
-               
                 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera;  return 0;    }
                 
                 if (chimera->getUnaligned()) { 
@@ -227,7 +225,7 @@ static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){
                 
                 if (pDataArray->start == 0) { chimera->printHeader(out); }
                 
-               int count = 0;
+               pDataArray->count = 0;
                 for(int i = 0; i < pDataArray->end; i++){
                         
                         if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1;   }
@@ -318,18 +316,18 @@ static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){
                                         
                                         
                                 }
-                               count++;
+                               pDataArray->count++;
                         }
                         
                         delete candidateSeq;
                         //report progress
-                       if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
+                       if((pDataArray->count) % 100 == 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
                 }
                 //report progress
-               if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
                 
                 pDataArray->numNoParents = chimera->getNumNoParents();
-               if (pDataArray->numNoParents == count) {        pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
+               if (pDataArray->numNoParents == pDataArray->count) {    pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
  
                 out.close();
                 out2.close();
diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp

index ce7ce456d87c5c8c6e5a137cd661b02710e0977c..9a25582ddad078665bbc3f7e098dc4cbcf047097 100644 (file)
--- a/chimerauchimecommand.cpp
+++ b/chimerauchimecommand.cpp
@@ -23,6 +23,7 @@ vector<string> ChimeraUchimeCommand::setParameters(){
          CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
@@ -61,7 +62,7 @@ string ChimeraUchimeCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
          helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
@@ -543,6 +544,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
+            
+            strand = validParameter.validFile(parameters, "strand", false);    if (strand == "not found")      {  strand = ""; }
                         
                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
                         ucl = m->isTrue(temp);
@@ -1232,6 +1235,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
                         cPara.push_back(tempa);
                 }
+        
+        if (strand != "") {
+                       char* tempA = new char[9]; 
+                       *tempA = '\0'; strncat(tempA, "--strand", 8);
+                       cPara.push_back(tempA);
+                       char* tempa = new char[strand.length()+1];
+                       *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
+                       cPara.push_back(tempa);
+               }
                 
                 if (useAbskew) {
                         char* tempskew = new char[9];
@@ -1631,7 +1643,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                         
                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
                         processIDS.push_back(i);
@@ -1763,7 +1775,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                         
                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
                         processIDS.push_back(i);
diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h

index 735c97d1269848b94ac5a01926f93e08d1ccd5ef..6d9d001a142aaa6357cc81614c5f50af23f2bf76 100644 (file)
--- a/chimerauchimecommand.h
+++ b/chimerauchimecommand.h
@@ -49,7 +49,7 @@ private:
         int createProcesses(string, string, string, string, int&);
                 
         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
-       string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
+       string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
         int processors;
         
         SequenceParser* sparser;
@@ -87,7 +87,7 @@ struct uchimeData {
         int threadID, count, numChimeras;
         vector<string> groups;
         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
-       string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+       string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
         
         uchimeData(){}
         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
@@ -130,10 +130,11 @@ struct uchimeData {
          hasCount = hc;
         }
         
-       void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac) {
+       void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
                 abskew = abske;
                 minh = min;
                 mindiv = mindi;
+        strand = stra;
                 xn = x;
                 dn = d;
                 xa = xa2;
@@ -243,6 +244,15 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                                 cPara.push_back(tempa);
                         }
                         
+            if (pDataArray->strand != "") {
+                char* tempA = new char[9]; 
+                *tempA = '\0'; strncat(tempA, "--strand", 8);
+                cPara.push_back(tempA);
+                char* tempa = new char[pDataArray->strand.length()+1];
+                *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+                cPara.push_back(tempa);
+            }
+            
                         if (pDataArray->useAbskew) {
                                 char* tempskew = new char[9];
                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
@@ -536,12 +546,13 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
          ofstream out23;
          pDataArray->m->openOutputFile(outputFileName, out23);
          
+        int fcount = 0;
          while (!in23.eof()) {
              if (pDataArray->m->control_pressed) { break;  }
              
              Sequence seq(in23); pDataArray->m->gobble(in23);
              
-            if (seq.getName() != "") { seq.printSequence(out23); }
+            if (seq.getName() != "") { seq.printSequence(out23); fcount++; }
          }
          in23.close();
          out23.close();
@@ -589,6 +600,15 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                         cPara.push_back(tempa);
                 }
                 
+        if (pDataArray->strand != "") {
+            char* tempA = new char[9]; 
+            *tempA = '\0'; strncat(tempA, "--strand", 8);
+            cPara.push_back(tempA);
+            char* tempa = new char[pDataArray->strand.length()+1];
+            *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+            cPara.push_back(tempa);
+        }
+        
                 if (pDataArray->useAbskew) {
                         char* tempskew = new char[9];
                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
@@ -802,12 +822,15 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                 in.close();
                 out.close();
                 
+        if (fcount != totalSeqs) { pDataArray->m->mothurOut("[ERROR]: process " + toString(pDataArray->threadID) + " only processed " + toString(pDataArray->count) + " of " + toString(pDataArray->end) + " sequences assigned to it, quitting. \n"); pDataArray->m->control_pressed = true; }
+        
                 if (pDataArray->m->control_pressed) { return 0; }
                 
                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
         
                 pDataArray->count = totalSeqs;
                 pDataArray->numChimeras = numChimeras;
+        
                 return totalSeqs;
                 
         }
diff --git a/chopseqscommand.cpp b/chopseqscommand.cpp

index 0cce69f23efa4869ae5b4b8955c6192d25a69f52..80dd8561ef286fb862c01e99089eaea534f92884 100644 (file)
--- a/chopseqscommand.cpp
+++ b/chopseqscommand.cpp
@@ -331,6 +331,10 @@ bool ChopSeqsCommand::createProcesses(vector<linePair> lines, string filename, s
                 for(int i=0; i < pDataArray.size(); i++){
              if (pDataArray[i]->wroteAccnos) { wroteAccnos = pDataArray[i]->wroteAccnos; nonBlankAccnosFiles.push_back(outAccnos + toString(processIDS[i]) + ".temp");  }
                         else { m->mothurRemove((outAccnos + toString(processIDS[i]) + ".temp"));  }
+            //check to make sure the process finished
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
diff --git a/chopseqscommand.h b/chopseqscommand.h

index cd2b2fe9640eb7e359cdc068d98552611f5b2d13..c9a4962c96fe9e548b50db1e4c0d6f63d1cdfff4 100644 (file)
--- a/chopseqscommand.h
+++ b/chopseqscommand.h
@@ -60,7 +60,7 @@ struct chopData {
         string outFasta, outAccnos, keep; 
         unsigned long long start;
         unsigned long long end;
-       int numbases;
+       int numbases, count;
      bool countGaps, Short, wroteAccnos;
         MothurOut* m;
         string namefile;
@@ -108,7 +108,7 @@ static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){
  
                 bool done = false;
          bool wroteAccnos = false;
-               int count = 0;
+               pDataArray->count = 0;
  
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                                                 
@@ -238,14 +238,14 @@ static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){
                                         outAcc << seq.getName() << endl;
                                         pDataArray->wroteAccnos = true;
                                 }
-                count++;
+                pDataArray->count++;
                         }
              //report progress
-                       if((count) % 1000 == 0){        pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
+                       if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                         
                 }
                 //report progress
-               if((count) % 1000 != 0){        pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
+               if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
          
                 
                 in.close();
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index 89668d9e7bd122577d9be167f590a96a6aedbf22..147b3830dded524050b2b8cf96cfbbabde09e3c0 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -610,7 +610,7 @@ int ClassifySeqsCommand::execute(){
         try {
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
          
-        string outputMethodTag = method + ".";
+        string outputMethodTag = method;
                 if(method == "wang"){   classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);     }
                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
          else if(method == "zap"){      
@@ -782,7 +782,7 @@ int ClassifySeqsCommand::execute(){
                         }
  #endif
                         
-                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
+                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
                  outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
              }else { m->mothurRemove(newaccnosFile); }
  
@@ -1044,6 +1044,9 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
diff --git a/classifyseqscommand.h b/classifyseqscommand.h

index a3328d8dcea09eebe63c641e04e1b816fc92fee1..f8a820dcf6347aa39432d461c78f6f8613ae6acf 100644 (file)
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -163,8 +163,6 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
                 }
                 
-               pDataArray->count = pDataArray->end;
-               
                 //make classify
                 Classify* myclassify;
          string outputMethodTag = pDataArray->method + ".";
@@ -183,7 +181,7 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                 
                 if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
                 
-               int count = 0;
+               pDataArray->count = 0;
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                         
                         if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
@@ -209,15 +207,15 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                                         
                                 if (myclassify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
                                 
-                               count++;
+                               pDataArray->count++;
                         }
                         delete candidateSeq;
                         //report progress
-                       if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
+                       if((pDataArray->count) % 100 == 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
                         
                 }
                 //report progress
-               if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine();         }
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
                 
                 delete myclassify;
                 inFASTA.close();
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp

index 6d3c86ff1fa2efb62d46618f372706fd117c002a..87d26cea75033c911d4db315265123fdd3dd49de 100644 (file)
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -439,6 +439,8 @@ int ClusterSplitCommand::execute(){
                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
                 delete split;
                 
+        if (m->debug) { m->mothurOut("[DEBUG]: distName.size() = " + toString(distName.size()) + ".\n"); }
+                
                 //output a merged distance file
                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
                         
@@ -447,11 +449,9 @@ int ClusterSplitCommand::execute(){
                 
                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
                 estart = time(NULL);
-                
+              
          if (!runCluster) { 
-#ifdef USE_MPI
-    }
-#endif 
+
                  m->mothurOutEndLine();
                  m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                  for (int i = 0; i < distName.size(); i++) {    m->mothurOut(distName[i].begin()->first); m->mothurOutEndLine(); m->mothurOut(distName[i].begin()->second); m->mothurOutEndLine();      }
@@ -459,7 +459,7 @@ int ClusterSplitCommand::execute(){
                  return 0;
                  
          }
-                
+   
                 //****************** break up files between processes and cluster each file set ******************************//
         #ifdef USE_MPI
                         ////you are process 0 from above////
diff --git a/commandfactory.cpp b/commandfactory.cpp

index a691509ab0ae00e5f19ae52a8bffe6801fc26a31..42e99d1331b3694792eed47e94ab9d21b33f9545 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -137,6 +137,9 @@
  #include "sffmultiplecommand.h"
  #include "classifysharedcommand.h"
  #include "filtersharedcommand.h"
+#include "primerdesigncommand.h"
+#include "getdistscommand.h"
+#include "removedistscommand.h"
  
  /*******************************************************/
  
@@ -297,6 +300,9 @@ CommandFactory::CommandFactory(){
         commands["quit"]                                = "MPIEnabled"; 
      commands["classify.shared"]                = "classify.shared"; 
      commands["filter.shared"]          = "filter.shared"; 
+    commands["primer.design"]          = "primer.design";
+    commands["get.dists"]           = "get.dists";
+    commands["remove.dists"]        = "remove.dists";
      
  
  }
@@ -513,6 +519,9 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
          else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
          else if(commandName == "classify.shared")       {      command = new ClassifySharedCommand(optionString);          }
          else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
+        else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
+        else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
+        else if(commandName == "remove.dists")          {      command = new RemoveDistsCommand(optionString);             }
                 else                                                                                    {       command = new NoCommand(optionString);                                          }
  
                 return command;
@@ -670,6 +679,9 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
          else if(commandName == "sff.multiple")          {      pipecommand = new SffMultipleCommand(optionString);             }
          else if(commandName == "classify.shared")       {      pipecommand = new ClassifySharedCommand(optionString);          }
          else if(commandName == "filter.shared")         {      pipecommand = new FilterSharedCommand(optionString);            }
+        else if(commandName == "primer.design")         {      pipecommand = new PrimerDesignCommand(optionString);            }
+        else if(commandName == "get.dists")             {      pipecommand = new GetDistsCommand(optionString);                }
+        else if(commandName == "remove.dists")          {      pipecommand = new RemoveDistsCommand(optionString);             }
                 else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
  
                 return pipecommand;
@@ -813,6 +825,9 @@ Command* CommandFactory::getCommand(string commandName){
          else if(commandName == "sff.multiple")          {      shellcommand = new SffMultipleCommand();            }
          else if(commandName == "classify.shared")       {      shellcommand = new ClassifySharedCommand();         }
          else if(commandName == "filter.shared")         {      shellcommand = new FilterSharedCommand();           }
+        else if(commandName == "primer.design")         {      shellcommand = new PrimerDesignCommand();           }
+        else if(commandName == "get.dists")             {      shellcommand = new GetDistsCommand();               }
+        else if(commandName == "remove.dists")          {      shellcommand = new RemoveDistsCommand();            }
                 else                                                                                    {       shellcommand = new NoCommand();                                         }
  
                 return shellcommand;
@@ -892,6 +907,41 @@ void CommandFactory::printCommands(ostream& out) {
         }
  }
  /***********************************************************************/
+void CommandFactory::printCommandsCategories(ostream& out) {
+    try {
+        map<string, string> commands = getListCommands();
+        map<string, string>::iterator it;
+
+        map<string, string> categories;
+        map<string, string>::iterator itCat;
+        //loop through each command outputting info
+        for (it = commands.begin(); it != commands.end(); it++) {
+    
+            Command* thisCommand = getCommand(it->first);
+    
+            //don't add hidden commands
+            if (thisCommand->getCommandCategory() != "Hidden") {
+                itCat = categories.find(thisCommand->getCommandCategory());
+                if (itCat == categories.end()) {
+                    categories[thisCommand->getCommandCategory()] = thisCommand->getCommandName();
+                }else {
+                    categories[thisCommand->getCommandCategory()] += ", " + thisCommand->getCommandName();
+                }
+            }
+        }
+        
+        for (itCat = categories.begin(); itCat != categories.end(); itCat++) {
+            out << itCat->first << " commmands include: " << itCat->second << endl;
+        }
+
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommandFactory", "printCommandsCategories");
+               exit(1);
+       }
+}      
+
+/***********************************************************************/
  
  
  
diff --git a/commandfactory.hpp b/commandfactory.hpp

index e95db8016a4149c9c2defdbb9e7933ff6ee3905a..36e8f462b146fc7846ee6cf9ead1eb42e9757a36 100644 (file)
--- a/commandfactory.hpp
+++ b/commandfactory.hpp
@@ -26,6 +26,7 @@ public:
         bool isValidCommand(string);\r
         bool isValidCommand(string, string);\r
         void printCommands(ostream&);\r
+    void printCommandsCategories(ostream&);\r
         void setOutputDirectory(string o)               {       outputDir = o;  m->setOutputDir(o);     }\r
         void setInputDirectory(string i)                {       inputDir = i;           }\r
         void setLogfileName(string n, bool a)   {       logFileName = n;  append = a;           }\r
diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp

index f0fc6bff9da6d6d8b0ed8b368ad2422f25cd732a..9bfdc9caba1e6bef2fe14d6eb469af2218bd29d8 100644 (file)
--- a/consensusseqscommand.cpp
+++ b/consensusseqscommand.cpp
@@ -219,6 +219,8 @@ int ConsensusSeqsCommand::execute(){
                 
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
+        int start = time(NULL);
+        
                 readFasta();
                 
                 if (m->control_pressed) { return 0; }
@@ -391,6 +393,8 @@ int ConsensusSeqsCommand::execute(){
                         delete input;
                 }
                 
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the consensus sequences.");
+        
                 m->mothurOutEndLine();
                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
diff --git a/countgroupscommand.cpp b/countgroupscommand.cpp

index 215c2e5a24c925e449ecbb297f267c463f283d39..6da077485c5fcd7641d759ceb6820dc9652033a7 100644 (file)
--- a/countgroupscommand.cpp
+++ b/countgroupscommand.cpp
@@ -14,9 +14,9 @@
  //**********************************************************************************************************************
  vector<string> CountGroupsCommand::setParameters(){    
         try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","",false,false,true); parameters.push_back(pshared);
-               CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","",false,false,true); parameters.push_back(pgroup);
-        CommandParameter pcount("count", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pshared("shared", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pshared);
+               CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pgroup);
+        CommandParameter pcount("count", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pcount);
                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -32,6 +32,21 @@ vector<string> CountGroupsCommand::setParameters(){
         }
  }
  //**********************************************************************************************************************
+string CountGroupsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "summary") {  pattern = "[filename],count.summary"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "PrimerDesignCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
  string CountGroupsCommand::getHelpString(){    
         try {
                 string helpString = "";
@@ -55,6 +70,8 @@ CountGroupsCommand::CountGroupsCommand(){
         try {
                 abort = true; calledHelp = true;
                 setParameters();
+        vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "CountGroupsCommand", "CountGroupsCommand");
@@ -125,6 +142,8 @@ CountGroupsCommand::CountGroupsCommand(string option)  {
                                 }
                         }
                         
+            vector<string> tempOutNames;
+            outputTypes["summary"] = tempOutNames;
                         
                         //check for required parameters
                         accnosfile = validParameter.validFile(parameters, "accnos", true);
@@ -200,6 +219,15 @@ int CountGroupsCommand::execute(){
                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
                 
                 if (groupfile != "") {
+            map<string, string> variables; 
+            string thisOutputDir = outputDir;
+            if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
+            variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+            string outputFileName = getOutputFileName("summary", variables);
+            outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+            ofstream out;
+            m->openOutputFile(outputFileName, out);
+            
                         GroupMap groupMap(groupfile);
                         groupMap.readMap();
                         
@@ -214,14 +242,24 @@ int CountGroupsCommand::execute(){
                  int num = groupMap.getNumSeqs(Groups[i]);
                  total += num;
                                 m->mothurOut(Groups[i] + " contains " + toString(num) + "."); m->mothurOutEndLine();
+                out << Groups[i] << '\t' << num << endl;
                         }
-            
+            out.close();
              m->mothurOut("\nTotal seqs: " + toString(total) + "."); m->mothurOutEndLine();
                 }
          
          if (m->control_pressed) { return 0; }
          
          if (countfile != "") {
+            map<string, string> variables; 
+            string thisOutputDir = outputDir;
+            if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+            variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+            string outputFileName = getOutputFileName("summary", variables);
+            outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+            ofstream out;
+            m->openOutputFile(outputFileName, out);
+            
                         CountTable ct;
                         ct.readTable(countfile);
              
@@ -236,7 +274,9 @@ int CountGroupsCommand::execute(){
                  int num = ct.getGroupCount(Groups[i]);
                  total += num;
                                 m->mothurOut(Groups[i] + " contains " + toString(num) + "."); m->mothurOutEndLine();
+                out << Groups[i] << '\t' << num << endl;
                         }
+            out.close();
              
              m->mothurOut("\nTotal seqs: " + toString(total) + "."); m->mothurOutEndLine();
                 }
@@ -247,17 +287,33 @@ int CountGroupsCommand::execute(){
                         InputData input(sharedfile, "sharedfile");
                         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
                         
+            map<string, string> variables; 
+            string thisOutputDir = outputDir;
+            if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+            variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+            string outputFileName = getOutputFileName("summary", variables);
+            outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+            ofstream out;
+            m->openOutputFile(outputFileName, out);
+            
              int total = 0;
                         for (int i = 0; i < lookup.size(); i++) {
                  int num = lookup[i]->getNumSeqs();
                  total += num;
                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(num) + "."); m->mothurOutEndLine();
                                 delete lookup[i];
+                out << lookup[i]->getGroup() << '\t' << num << endl;
                         }
+            out.close();
                         
              m->mothurOut("\nTotal seqs: " + toString(total) + "."); m->mothurOutEndLine();
                 }
-                               
+                       
+        m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
+               m->mothurOutEndLine();
+        
                 return 0;               
         }
         
diff --git a/countgroupscommand.h b/countgroupscommand.h

index 1ab83c86ea98cb5a9bdd5c43e31558cbe4971307..cd226a3da2c02107f4512acc126b067d195dfb8c 100644 (file)
--- a/countgroupscommand.h
+++ b/countgroupscommand.h
@@ -24,7 +24,7 @@ public:
         string getCommandName()                 { return "count.groups";                        }
         string getCommandCategory()             { return "Sequence Processing";         }
         string getHelpString(); 
-    string getOutputPattern(string){ return ""; }      
+    string getOutputPattern(string);   
         string getCitation() { return "http://www.mothur.org/wiki/Count.groups"; }
         string getDescription()         { return "counts the number of sequences in each group"; }
         
@@ -36,6 +36,7 @@ private:
         string sharedfile, groupfile, countfile, outputDir, groups, accnosfile;
         bool abort;
         vector<string> Groups;
+    vector<string> outputNames;
  };
  
  #endif
diff --git a/countseqscommand.cpp b/countseqscommand.cpp

index 3bafbcd71752b48e675e0d85fcf037cc896056dc..dfa012eeff0f0f49c778c00b2098348a489f1855 100644 (file)
--- a/countseqscommand.cpp
+++ b/countseqscommand.cpp
@@ -236,7 +236,11 @@ int CountSeqsCommand::processSmall(string outputFileName){
                         
                         string firstCol, secondCol;
                         in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
-                       
+            //cout << firstCol << '\t' << secondCol << endl;
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+                       //cout << firstCol << '\t' << secondCol << endl;
+           
                         vector<string> names;
                         m->splitAtChar(secondCol, names, ',');
                         
@@ -435,6 +439,8 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    m->checkName(firstCol);
+                    m->checkName(secondCol);
                      //parse names into vector
                      vector<string> theseNames;
                      m->splitAtComma(secondCol, theseNames);
@@ -456,6 +462,8 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    m->checkName(firstCol);
+                    m->checkName(secondCol);
                      //parse names into vector
                      vector<string> theseNames;
                      m->splitAtComma(secondCol, theseNames);
@@ -507,6 +515,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    m->checkName(firstCol);
                      it = groupIndex.find(secondCol);
                      if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
                          groupIndex[secondCol] = count;
@@ -529,6 +538,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    m->checkName(firstCol);
                      it = groupIndex.find(secondCol);
                      if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
                          groupIndex[secondCol] = count;
diff --git a/counttable.cpp b/counttable.cpp

index 2ab0e345dbd110cc5c0402d0caa36becc1820bd2..ad0b2dadfcba7eb2f866b44d3d9b3011242b096a 100644 (file)
--- a/counttable.cpp
+++ b/counttable.cpp
@@ -131,6 +131,9 @@ int CountTable::createTable(string namefile, string groupfile, bool createGroup)
              string firstCol, secondCol;
              in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
              
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+            
              vector<string> names;
              m->splitAtChar(secondCol, names, ',');
              
diff --git a/distancecommand.cpp b/distancecommand.cpp

index 16fcbf56247bb73bc979f50d4b06ee4ec20e1e7c..19094e9e76ee76981b9d5bd501df3876d401b387 100644 (file)
--- a/distancecommand.cpp
+++ b/distancecommand.cpp
@@ -594,6 +594,9 @@ void DistanceCommand::createProcesses(string filename) {
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != (pDataArray[i]->endLine-pDataArray[i]->startLine)) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->endLine-pDataArray[i]->startLine) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
diff --git a/distancecommand.h b/distancecommand.h

index ddce0449d5498d48914f95d9ff64900b12837ccf..af55406b372dbf0f56a546be4de2bc87f5072614 100644 (file)
--- a/distancecommand.h
+++ b/distancecommand.h
@@ -33,7 +33,7 @@ struct distanceData {
         vector<string> Estimators;
         MothurOut* m;
         string output;
-       int numNewFasta;
+       int numNewFasta, count;
         string countends;
         
         distanceData(){}
@@ -86,7 +86,7 @@ static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){
                 ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc);
                 outFile.setf(ios::fixed, ios::showpoint);
                 outFile << setprecision(4);
-               
+               pDataArray->count = 0;
                 
                 if (pDataArray->output != "square") { 
                         if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){       outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
@@ -121,9 +121,9 @@ static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){
                                 if(i % 100 == 0){
                                         pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
                                 }
-                               
+                               pDataArray->count++;
                         }
-                       pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       pDataArray->m->mothurOut(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
                 }else{
                         if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
                         
@@ -150,9 +150,9 @@ static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){
                                 if(i % 100 == 0){
                                         pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
                                 }
-                               
+                               pDataArray->count++;
                         }
-                       pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       pDataArray->m->mothurOut(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
                 }
                 
                 outFile.close();
diff --git a/engine.cpp b/engine.cpp

index 48f782ab2a590d71a292e7b83125ba8ce0f3fffe..e4e107168c5723c82b90a7f103ccb8d9ee83cb60 100644 (file)
--- a/engine.cpp
+++ b/engine.cpp
@@ -65,7 +65,9 @@ bool InteractEngine::getInput(){
                                 if (pid == 0) {
                                 
                         #endif
-                       
+                    
+                       if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
+                    
                         mout->mothurOutEndLine();
                         
                         input = getCommand();   
@@ -115,6 +117,7 @@ bool InteractEngine::getInput(){
                                         //cout << pid << " is in execute " << commandName << endl;
                                         #endif
                                         //executes valid command
+                    mout->changedSeqNames = false;
                                         mout->runParse = true;
                                         mout->clearGroups();
                                         mout->clearAllGroups();
@@ -276,7 +279,7 @@ bool BatchEngine::getInput(){
                         
                         
                         if (input[0] != '#') {
-                               
+                               if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
                                 mout->mothurOutEndLine();
                                 mout->mothurOut("mothur > " + input);
                                 mout->mothurOutEndLine();
@@ -300,6 +303,7 @@ bool BatchEngine::getInput(){
                                                 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
                                         #endif
                                         //executes valid command
+                    mout->changedSeqNames = false;
                                         mout->runParse = true;
                                         mout->clearGroups();
                                         mout->clearAllGroups();
@@ -413,6 +417,8 @@ bool ScriptEngine::getInput(){
                         input = getNextCommand(listOfCommands); 
                         
                         if (input == "") { input = "quit()"; }
+                    
+            if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
                         
                         if (mout->gui) {
                                 if ((input.find("quit") != string::npos) || (input.find("set.logfile") != string::npos)) {}
@@ -468,6 +474,7 @@ bool ScriptEngine::getInput(){
                                                         //cout << pid << " is in execute" << endl;      
                                         #endif
                                         //executes valid command
+                    mout->changedSeqNames = false;
                                         mout->runParse = true;
                                         mout->clearGroups();
                                         mout->clearAllGroups();
diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp

index f6ca47988809f623b940f2ae04703d276ad90fa8..b17d9ed6799d9c8287664cf63241b0fde01bf40e 100644 (file)
--- a/filterseqscommand.cpp
+++ b/filterseqscommand.cpp
@@ -729,6 +729,9 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, strin
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
@@ -1176,6 +1179,9 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              F.mergeFilter(pDataArray[i]->F.getFilter());
              
                         for (int k = 0; k < alignmentLength; k++) {      F.a[k] += pDataArray[i]->F.a[k];       }
diff --git a/filterseqscommand.h b/filterseqscommand.h

index fc6bcb2efab1b3885af316a160faa59446600f30..90c13f8ee82722570d129d74874b519cc4ee9a60 100644 (file)
--- a/filterseqscommand.h
+++ b/filterseqscommand.h
@@ -153,7 +153,7 @@ static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); \r
                 }\r
                 \r
-               pDataArray->count = pDataArray->end;\r
+               pDataArray->count = 0;\r
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process\r
                         \r
                         if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }\r
@@ -166,7 +166,7 @@ static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){
                  if(pDataArray->trump != '*')                   {       pDataArray->F.doTrump(current);         }\r
                  if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0)       {       pDataArray->F.getFreqs(current);        }\r
                         }\r
-            \r
+            pDataArray->count++;\r
              //report progress\r
                         if((i) % 100 == 0){     pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }\r
                 }\r
@@ -203,7 +203,7 @@ static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); \r
                 }\r
                 \r
-               pDataArray->count = pDataArray->end;\r
+               pDataArray->count = 0;\r
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process\r
                         \r
                         if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; }\r
@@ -221,7 +221,7 @@ static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){
                  \r
                  out << '>' << seq.getName() << endl << filterSeq << endl;\r
              }\r
-            \r
+            pDataArray->count++;\r
              //report progress\r
                         if((i) % 100 == 0){     pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }\r
                 }\r
diff --git a/filtersharedcommand.cpp b/filtersharedcommand.cpp

index 413a9a432532b12987a8fa8eb65948692cec9b22..4d1c301edf9566b8101e46320e71dc84f8479b67 100644 (file)
--- a/filtersharedcommand.cpp
+++ b/filtersharedcommand.cpp
@@ -405,22 +405,8 @@ int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup)
                  for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
                      filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
                  }
-                
-                //create new label
-                string oldLastLabel = saveBinLabels[saveBinLabels.size()-1];
-                string tag = "";
-                string otuNumber = "";
-                for (int i = 0;i < oldLastLabel.length(); i++){
-                    //add numbers
-                    if( oldLastLabel[i]>47 && oldLastLabel[i]<58){ otuNumber += oldLastLabel[i];  }
-                    else { tag += oldLastLabel[i]; }
-                }
-                
-                int oldLastBin;
-                m->mothurConvert(otuNumber, oldLastBin);
-                oldLastBin++;
-                string newLabel = tag + toString(oldLastBin);
-                filteredLabels.push_back(newLabel);
+                //create label for rare OTUs
+                filteredLabels.push_back("rareOTUs");
              }
          }
          
diff --git a/flowdata.cpp b/flowdata.cpp

index 1fe7d7faf1e31ee486890980c4731ddebfdca8fb..5dc7dc3e6c780171f1ee5cacdb1d766864f18a18 100644 (file)
--- a/flowdata.cpp
+++ b/flowdata.cpp
@@ -42,15 +42,14 @@ FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP, string
  bool FlowData::getNext(ifstream& flowFile){
         
         try {
-               flowFile >> seqName >> endFlow; 
-        if (seqName.length() != 0) {
-            //cout << "in Flowdata " + seqName << endl;
+        seqName = getSequenceName(flowFile);
+               flowFile >> endFlow;    
+        if (!m->control_pressed) {
              for(int i=0;i<numFlows;i++)        {       flowFile >> flowData[i];        }
-            //cout << "in Flowdata read " << seqName + " done" << endl;
              updateEndFlow(); 
              translateFlow();
              m->gobble(flowFile);
-               }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
              
                 if(flowFile){   return 1;       }
                 else            {       return 0;       }
@@ -61,6 +60,26 @@ bool FlowData::getNext(ifstream& flowFile){
         }
         
  }
+//********************************************************************************************************************
+string FlowData::getSequenceName(ifstream& flowFile) {
+       try {
+               string name = "";
+               
+        flowFile >> name;
+               
+               if (name.length() != 0) { 
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+        }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FlowData", "getSequenceName");
+               exit(1);
+       }
+}
  
  //**********************************************************************************************************************
  
diff --git a/flowdata.h b/flowdata.h

index 100765370ca5429e75b8688a6abc51d79f17edc5..c7fd08a0a0100841e2805062ae5f44f7e5821d6d 100644 (file)
--- a/flowdata.h
+++ b/flowdata.h
@@ -38,6 +38,7 @@ private:
         string seqName, locationString, sequence, baseFlow;
         int numFlows, maxFlows, endFlow;
         vector<float> flowData;
+    string getSequenceName(ifstream&);
  };
  
  #endif
diff --git a/getdistscommand.cpp b/getdistscommand.cpp

new file mode 100644 (file)

index 0000000..77cfba4
--- /dev/null
+++ b/getdistscommand.cpp
@@ -0,0 +1,442 @@
+//
+//  getdistscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/28/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "getdistscommand.h"
+
+//**********************************************************************************************************************
+vector<string> GetDistsCommand::setParameters(){       
+       try {
+               CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);     
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetDistsCommand::getHelpString(){       
+       try {
+               string helpString = "";
+               helpString += "The get.dists command selects distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
+               helpString += "The get.dists command parameters are accnos, phylip and column.\n";
+               helpString += "The get.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
+               helpString += "Example get.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetDistsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "phylip")           {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "column")      {   pattern = "[filename],pick,[extension]";    }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetDistsCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+GetDistsCommand::GetDistsCommand(){    
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["phylip"] = tempOutNames;
+               outputTypes["column"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetDistsCommand::GetDistsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["column"] = tempOutNames;
+                       outputTypes["phylip"] = tempOutNames;
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("column");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["column"] = inputDir + it->second;           }
+                               }
+                               
+                it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+            }
+                       
+                       
+                       //check for required parameters
+                       accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  
+                               accnosfile = m->getAccnosFile(); 
+                               if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else { m->setAccnosFile(accnosfile); } 
+                       
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { phylipfile = ""; abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; }        
+                       else {  m->setPhylipFile(phylipfile); }
+                       
+                       columnfile = validParameter.validFile(parameters, "column", true);
+                       if (columnfile == "not open") { columnfile = ""; abort = true; }        
+                       else if (columnfile == "not found") { columnfile = ""; }
+                       else {  m->setColumnFile(columnfile);   }
+                       
+                       if ((phylipfile == "") && (columnfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to column, then phylip
+                               columnfile = m->getColumnFile(); 
+                               if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       phylipfile = m->getPhylipFile(); 
+                                       if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int GetDistsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               //get names you want to keep
+               names = m->readAccnos(accnosfile);
+               
+               if (m->control_pressed) { return 0; }
+               
+               //read through the correct file and output lines you want to keep
+               if (phylipfile != "")           {               readPhylip();           }
+               if (columnfile != "")           {               readColumn();       }
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+               
+               
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File names: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
+                       
+                       //set fasta file as new current fastafile
+                       string current = "";
+                       itTypes = outputTypes.find("phylip");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
+                       }
+                       
+                       itTypes = outputTypes.find("column");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
+                       }
+        }
+               
+               return 0;               
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int GetDistsCommand::readPhylip(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(phylipfile);  }
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
+        variables["[extension]"] = m->getExtension(phylipfile);
+               string outputFileName = getOutputFileName("phylip", variables);
+               
+        ifstream in;
+        m->openInputFile(phylipfile, in);
+        
+        float distance;
+        int square, nseqs; 
+        string name;
+        unsigned int row;
+        set<unsigned int> rows; //converts names in names to a index
+        row = 0;
+        
+        string numTest;
+        in >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+        
+        if (names.count(name) != 0) { rows.insert(row); }
+        row++;
+        
+        //is the matrix square?
+        char d;
+        while((d=in.get()) != EOF){
+            
+            if(isalnum(d)){
+                square = 1;
+                in.putback(d);
+                for(int i=0;i<nseqs;i++){
+                    in >> distance;
+                }
+                break;
+            }
+            if(d == '\n'){
+                square = 0;
+                break;
+            }
+        }
+        
+        //map name to row/column        
+        if(square == 0){
+            for(int i=1;i<nseqs;i++){
+                in >> name;  
+                if (names.count(name) != 0) { rows.insert(row); }
+                row++;
+                
+                for(int j=0;j<i;j++){
+                    if (m->control_pressed) {  in.close(); return 0;  }
+                    in >> distance;
+                }
+            }
+        }
+        else{
+             for(int i=1;i<nseqs;i++){
+                 in >> name;  
+                 if (names.count(name) != 0) { rows.insert(row); }
+                 row++;
+                 for(int j=0;j<nseqs;j++){
+                     if (m->control_pressed) {  in.close(); return 0;  }
+                     in >> distance;
+                 }
+             }
+        }
+        in.close();
+        
+        if (m->control_pressed) {  return 0; }
+        
+        //read through file only printing rows and columns of seqs in names
+        ifstream inPhylip;
+        m->openInputFile(phylipfile, inPhylip);
+        
+        inPhylip >> numTest;
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        outputTypes["phylip"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        out << names.size() << endl;
+        
+        unsigned int count = 0;
+               if(square == 0){
+            for(int i=0;i<nseqs;i++){
+                inPhylip >> name;  
+                bool ignoreRow = false;
+                
+                if (names.count(name) == 0) { ignoreRow = true; }
+                else{ out << name << '\t'; count++; }
+                
+                for(int j=0;j<i;j++){
+                    if (m->control_pressed) {  inPhylip.close(); out.close();  return 0;  }
+                    inPhylip >> distance;
+                    if (!ignoreRow) {
+                        //is this a column we want
+                        if(rows.count(j) != 0) {  out << distance << '\t';  }
+                    }
+                }
+                if (!ignoreRow) { out << endl; }
+            }
+        }
+        else{
+            for(int i=0;i<nseqs;i++){
+                inPhylip >> name; 
+                
+                bool ignoreRow = false;
+                
+                if (names.count(name) == 0) { ignoreRow = true; }
+                else{ out << name << '\t'; count++; }
+                
+                for(int j=0;j<nseqs;j++){
+                    if (m->control_pressed) {  inPhylip.close(); out.close(); return 0;  }
+                    inPhylip >> distance;
+                    if (!ignoreRow) {
+                        //is this a column we want
+                        if(rows.count(j) != 0) {  out << distance << '\t';  }
+                    }
+                }
+                if (!ignoreRow) { out << endl; }
+            }
+        }
+        inPhylip.close();
+               out.close();
+               
+               if (count == 0) {  m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
+        else if (count != names.size()) {
+            m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
+            //rewrite with new number
+            m->renameFile(outputFileName, outputFileName+".temp");
+            ofstream out2;
+            m->openOutputFile(outputFileName, out2);
+            out2 << count << endl;
+            
+            ifstream in3;
+            m->openInputFile(outputFileName+".temp", in3);
+            in3 >> nseqs; m->gobble(in3);
+            char buffer[4096];        
+            while (!in3.eof()) {
+                in3.read(buffer, 4096);
+                out2.write(buffer, in3.gcount());
+            }
+            in3.close();
+            out2.close();
+            m->mothurRemove(outputFileName+".temp");
+        }
+               
+               m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "readPhylip");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetDistsCommand::readColumn(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(columnfile);  }
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
+        variables["[extension]"] = m->getExtension(columnfile);
+               string outputFileName = getOutputFileName("column", variables);
+        outputTypes["column"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        ifstream in;
+        m->openInputFile(columnfile, in);
+        
+        set<string> foundNames;
+        string firstName, secondName;
+        float distance;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { out.close(); in.close(); return 0; }
+            
+            in >> firstName >> secondName >> distance; m->gobble(in);
+            
+            //are both names in the accnos file
+            if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) {
+                out << firstName << '\t' << secondName << '\t' << distance << endl;
+                foundNames.insert(firstName);
+                foundNames.insert(secondName);
+            }
+        }
+               in.close();
+               out.close();
+        
+        if (foundNames.size() == 0) {  m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
+        else if (foundNames.size() != names.size()) {
+            m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file."); m->mothurOutEndLine();
+        }
+               
+               m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
+        
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetDistsCommand", "readColumn");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/getdistscommand.h b/getdistscommand.h

new file mode 100644 (file)

index 0000000..8767067
--- /dev/null
+++ b/getdistscommand.h
@@ -0,0 +1,48 @@
+//
+//  getdistscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/28/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_getdistscommand_h
+#define Mothur_getdistscommand_h
+
+#include "command.hpp"
+
+class GetDistsCommand : public Command {
+       
+public:
+       
+       GetDistsCommand(string);        
+       GetDistsCommand();
+       ~GetDistsCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "get.dists";                           }
+       string getCommandCategory()             { return "General";                 }
+       
+       string getHelpString(); 
+    string getOutputPattern(string);   
+       string getCitation() { return "http://www.mothur.org/wiki/Get.dists"; }
+       string getDescription()         { return "gets distances from a phylip or column file related to groups or sequences listed in an accnos file"; }
+    
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+       
+private:
+       set<string> names;
+       string accnosfile, phylipfile, columnfile, outputDir;
+       bool abort;
+       vector<string> outputNames;
+       
+       int readPhylip();
+       int readColumn();
+       
+};
+
+
+#endif
diff --git a/getotulabelscommand.cpp b/getotulabelscommand.cpp

index 0f042c0bcb9a4de9d8cebd4f1bd017d1c65c37fb..b6253b02a64ec25c6a394b59438a79554c3685eb 100644 (file)
--- a/getotulabelscommand.cpp
+++ b/getotulabelscommand.cpp
@@ -13,8 +13,11 @@ vector<string> GetOtuLabelsCommand::setParameters(){
         try {
          CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(paccnos);
          CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
+        CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
                 CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false, true); parameters.push_back(potucorr);
          CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false, true); parameters.push_back(pcorraxes);
+        CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
          CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -31,11 +34,12 @@ vector<string> GetOtuLabelsCommand::setParameters(){
  string GetOtuLabelsCommand::getHelpString(){   
         try {
                 string helpString = "";
-               helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
-               helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+               helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes commands.  It can also be used to select a set of otus from a shared or list file.\n";
+               helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n";
                 helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n";
          helpString += "The otucorr parameter is used to input the results of the otu.association command.\n";
          helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n";
+        helpString += "The label parameter is used to analyze specific labels in your input. \n";
                 helpString += "The get.otulabels commmand should be in the following format: \n";
                 helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
                 return helpString;
@@ -50,9 +54,11 @@ string GetOtuLabelsCommand::getOutputPattern(string type) {
      try {
          string pattern = "";
          
-        if (type == "constaxonomy")            {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "otucorr")    {   pattern = "[filename],pick,[extension]";    }
+        if (type == "constaxonomy")         {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "otucorr")         {   pattern = "[filename],pick,[extension]";    }
          else if (type == "corraxes")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";    }
+        else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -72,6 +78,8 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(){
                 outputTypes["constaxonomy"] = tempOutNames; 
          outputTypes["otucorr"] = tempOutNames;
          outputTypes["corraxes"] = tempOutNames;
+        outputTypes["shared"] = tempOutNames;
+        outputTypes["list"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
@@ -141,12 +149,30 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["otucorr"] = inputDir + it->second;          }
                                 }
+                
+                it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
              }
              
              vector<string> tempOutNames;
              outputTypes["constaxonomy"] = tempOutNames; 
              outputTypes["otucorr"] = tempOutNames;
              outputTypes["corraxes"] = tempOutNames;
+            outputTypes["shared"] = tempOutNames;
+            outputTypes["list"] = tempOutNames;
              
                         //check for parameters
              accnosfile = validParameter.validFile(parameters, "accnos", true);
@@ -171,12 +197,26 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(string option)  {
              otucorrfile = validParameter.validFile(parameters, "otucorr", true);
                         if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
                         else if (otucorrfile == "not found") {  otucorrfile = "";  }
-
+            
+            listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { listfile = ""; abort = true; }
+                       else if (listfile == "not found") {  listfile = "";  }
+            else { m->setListFile(listfile); }
+            
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {  sharedfile = "";  }
+            else { m->setSharedFile(sharedfile); }
              
              //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){   outputDir = "";        }
              
-            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list."); m->mothurOutEndLine(); abort = true; }
+            
+            if ((sharedfile != "") || (listfile != "")) {
+                label = validParameter.validFile(parameters, "label", false);                  
+                if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+            }
                 }
                 
         }
@@ -201,6 +241,8 @@ int GetOtuLabelsCommand::execute(){
                 if (constaxonomyfile != "")     {               readClassifyOtu();      }
                 if (corraxesfile != "")         {               readCorrAxes();         }
                 if (otucorrfile != "")          {               readOtuAssociation();   }
+        if (listfile != "")         {          readList();             }
+        if (sharedfile != "")          {               readShared();           }
          
          if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
          
@@ -210,6 +252,17 @@ int GetOtuLabelsCommand::execute(){
                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                 m->mothurOutEndLine();
          
+        string current = "";
+        itTypes = outputTypes.find("list");
+        if (itTypes != outputTypes.end()) {
+            if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
+        }
+        
+        itTypes = outputTypes.find("shared");
+        if (itTypes != outputTypes.end()) {
+            if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
+        }
+        
          return 0;
      }
         catch(exception& e) {
@@ -383,3 +436,286 @@ int GetOtuLabelsCommand::readCorrAxes(){
         }
  }
  //**********************************************************************************************************************
+int GetOtuLabelsCommand::readShared(){
+       try {
+        
+        getShared();
+        
+        if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
+          
+        vector<string> newLabels;
+        
+        //create new "filtered" lookup
+        vector<SharedRAbundVector*> newLookup;
+        for (int i = 0; i < lookup.size(); i++) {
+            SharedRAbundVector* temp = new SharedRAbundVector();
+                       temp->setLabel(lookup[i]->getLabel());
+                       temp->setGroup(lookup[i]->getGroup());
+                       newLookup.push_back(temp);
+        }
+        
+        bool wroteSomething = false;
+        int numSelected = 0;
+        for (int i = 0; i < lookup[0]->getNumBins(); i++) {
+            
+            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
+            
+            //is this otu on the list
+            if (labels.count(m->currentBinLabels[i]) != 0) {
+                numSelected++; wroteSomething = true;
+                newLabels.push_back(m->currentBinLabels[i]);
+                for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
+                    newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
+                }
+            }
+        }
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+        map<string, string> variables; 
+               variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[extension]"] = m->getExtension(sharedfile);
+        variables["[distance]"] = lookup[0]->getLabel();
+               string outputFileName = getOutputFileName("shared", variables); 
+        ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
+        
+        m->currentBinLabels = newLabels;
+        
+               newLookup[0]->printHeaders(out);
+               
+               for (int i = 0; i < newLookup.size(); i++) {
+                       out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
+                       newLookup[i]->print(out);
+               }
+               out.close();
+        
+        for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
+        
+        if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file."); m->mothurOutEndLine();  }
+
+               m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file."); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "readShared");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readList(){
+       try {
+        getListVector();
+        
+        if (m->control_pressed) { delete list; return 0;}
+        
+        ListVector newList;
+        newList.setLabel(list->getLabel());
+        int selectedCount = 0;
+        bool wroteSomething = false;
+        string snumBins = toString(list->getNumBins());
+        
+        for (int i = 0; i < list->getNumBins(); i++) {
+            
+            if (m->control_pressed) { delete list; return 0;}
+            
+            //create a label for this otu
+            string otuLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+            }
+            otuLabel += sbinNumber; 
+            
+            if (labels.count(otuLabel) != 0) {
+                               selectedCount++;
+                newList.push_back(list->get(i));
+            }
+        }
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+        map<string, string> variables; 
+               variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[extension]"] = m->getExtension(listfile);
+        variables["[distance]"] = list->getLabel();
+               string outputFileName = getOutputFileName("list", variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               delete list;
+        //print new listvector
+        if (newList.getNumBins() != 0) {
+            wroteSomething = true;
+            newList.print(out);
+        }
+               out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file."); m->mothurOutEndLine();
+        
+        return 0;
+    }
+    catch(exception& e) {
+            m->errorOut(e, "GetOtuLabelsCommand", "readList");
+            exit(1);
+        }
+    }
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::getListVector(){
+       try {
+               InputData input(listfile, "list");
+               list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input.getListVector();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list; 
+                       list = input.getListVector(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "getListVector"); 
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::getShared(){
+       try {
+               InputData input(sharedfile, "sharedfile");
+               lookup = input.getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {   return 0;  }
+                       
+                       if(labels.count(lookup[0]->getLabel()) == 1){
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                               
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                               lookup = input.getSharedRAbundVectors(lastLabel);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();                      
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input.getSharedRAbundVectors();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
+                       lookup = input.getSharedRAbundVectors(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "getShared");     
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
diff --git a/getotulabelscommand.h b/getotulabelscommand.h

index 8ce0300fee9a0df9ec9feb56753951cf4b8216b5..06ad27162535bb2aacf2ddbba48c1df2201cbf34 100644 (file)
--- a/getotulabelscommand.h
+++ b/getotulabelscommand.h
@@ -11,6 +11,9 @@
  
  
  #include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "sharedrabundvector.h"
  
  /**************************************************************************************************/
  
@@ -34,13 +37,19 @@ public:
      
  private:
      bool abort;
-    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label;
      vector<string> outputNames;
      set<string> labels;
+    ListVector* list;
+    vector<SharedRAbundVector*> lookup;
      
      int readClassifyOtu();
      int readOtuAssociation();
      int readCorrAxes();
+    int readList();
+    int readShared();
+    int getListVector();
+    int getShared();
  };
  
  /**************************************************************************************************/
diff --git a/groupmap.cpp b/groupmap.cpp

index fb2495c13fe8c09095221fbff3fb79c29c9d129a..e5d8427200aed0a61aaf04a0e7084ed0e17a0a78 100644 (file)
--- a/groupmap.cpp
+++ b/groupmap.cpp
@@ -20,7 +20,6 @@
  
  /************************************************************/
   GroupMap::~GroupMap(){}
-
  /************************************************************/
  int GroupMap::readMap() {
      try {
@@ -45,6 +44,7 @@ int GroupMap::readMap() {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -69,7 +69,7 @@ int GroupMap::readMap() {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -114,7 +114,7 @@ int GroupMap::readDesignMap() {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -139,7 +139,7 @@ int GroupMap::readDesignMap() {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -188,7 +188,7 @@ int GroupMap::readMap(string filename) {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -213,7 +213,7 @@ int GroupMap::readMap(string filename) {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -261,7 +261,7 @@ int GroupMap::readDesignMap(string filename) {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -286,7 +286,7 @@ int GroupMap::readDesignMap(string filename) {
                      setNamesOfGroups(seqGroup);
                      
                      if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                      it = groupmap.find(seqName);
                      
                      if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -325,7 +325,7 @@ string GroupMap::getGroup(string sequenceName) {
  
  void GroupMap::setGroup(string sequenceName, string groupN) {
         setNamesOfGroups(groupN);
-       
+       m->checkName(sequenceName);
         it = groupmap.find(sequenceName);
         
         if (it != groupmap.end()) {  m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp

index e50196bec8bdb8e22004561df9537648d529514d..037c8225cfbc96f4b3ea3986ef16bf961a9afec1 100644 (file)
--- a/listotulabelscommand.cpp
+++ b/listotulabelscommand.cpp
@@ -14,6 +14,7 @@ vector<string> ListOtuLabelsCommand::setParameters(){
         try {
          CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false,true); parameters.push_back(pshared);
                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false); parameters.push_back(prelabund);
+        CommandParameter plist("list", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false); parameters.push_back(plist);
          CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
@@ -33,7 +34,7 @@ vector<string> ListOtuLabelsCommand::setParameters(){
  string ListOtuLabelsCommand::getHelpString(){  
         try {
                 string helpString = "";
-               helpString += "The list.otulabels lists otu labels from shared or relabund file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+               helpString += "The list.otulabels lists otu labels from shared, relabund or list file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
                 helpString += "The list.otulabels parameters are: shared, relabund, label and groups.\n";
                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
                 helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
@@ -122,6 +123,14 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                 }
+                
+                it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
              }
              
              vector<string> tempOutNames;
@@ -138,7 +147,13 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option)  {
                         else if (relabundfile == "not found") { relabundfile = ""; }
                         else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); }
              
-            if ((relabundfile == "") && (sharedfile == "")) { 
+            listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { abort = true; }
+                       else if (listfile == "not found") { listfile = ""; }
+                       else { inputFileName = listfile; format = "list"; m->setListFile(listfile); }
+
+            
+            if ((relabundfile == "") && (sharedfile == "") && (listfile== "")) { 
                                 //is there are current file available for either of these?
                                 //give priority to shared, then relabund
                                 sharedfile = m->getSharedFile(); 
@@ -147,8 +162,12 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option)  {
                                         relabundfile = m->getRelAbundFile(); 
                                         if (relabundfile != "") {  inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
                                         else { 
-                                               m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); 
-                                               abort = true;
+                        listfile = m->getListFile();
+                                               if (listfile != "") {  inputFileName = listfile; format="list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("No valid current files. You must provide a shared, list or relabund."); m->mothurOutEndLine(); 
+                            abort = true;
+                        }
                                         }
                                 }
                         }
@@ -261,7 +280,7 @@ int ListOtuLabelsCommand::execute(){
                  
                  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
              }
-        }else {
+        }else if (format == "sharedfile") {
              
              vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
              string lastLabel = lookup[0]->getLabel();
@@ -337,6 +356,81 @@ int ListOtuLabelsCommand::execute(){
                  
                  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
              }
+        }else {
+            ListVector* list = input.getListVector();
+            string lastLabel = list->getLabel();
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { delete list;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+                
+                if(allLines == 1 || labels.count(list->getLabel()) == 1){                      
+                    
+                    m->mothurOut(list->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(list);
+                    
+                    processedLabels.insert(list->getLabel());
+                    userLabels.erase(list->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                    string saveLabel = list->getLabel();
+                    
+                    delete list; 
+                    list = input.getListVector(lastLabel);
+                    m->mothurOut(list->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(list);
+                    
+                    processedLabels.insert(list->getLabel());
+                    userLabels.erase(list->getLabel());
+                    
+                    //restore real lastlabel to save below
+                    list->setLabel(saveLabel);
+                }
+                
+                lastLabel = list->getLabel();
+                //prevent memory leak
+                delete list; list = NULL;
+                
+                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+                
+                //get next line to process
+                list = input.getListVector();                          
+            }
+            
+            if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                delete list;  
+                list = input.getListVector(lastLabel);
+                
+                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
+                
+                createList(list);
+                
+                delete list;
+            }
          }
          
          if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
@@ -374,7 +468,7 @@ int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
          return 0;
      }
         catch(exception& e) {
-               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               m->errorOut(e, "ListOtuLabelsCommand", "createList");
                 exit(1);
         }
  }
@@ -398,7 +492,42 @@ int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
          return 0;
      }
         catch(exception& e) {
-               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               m->errorOut(e, "ListOtuLabelsCommand", "createList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ListOtuLabelsCommand::createList(ListVector*& list){
+       try {
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName));
+        variables["[distance]"] = list->getLabel();
+        string outputFileName = getOutputFileName("otulabels",variables);
+        outputNames.push_back(outputFileName);  outputTypes["accnos"].push_back(outputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        string snumBins = toString(list->getNumBins());
+        for (int i = 0; i < list->getNumBins(); i++) {
+            if (m->control_pressed) { break; }
+            
+            string otuLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+            }
+            otuLabel += sbinNumber; 
+            
+            out << otuLabel << endl;
+        }
+
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "createList");
                 exit(1);
         }
  }
diff --git a/listotulabelscommand.h b/listotulabelscommand.h

index b1150a8f828ad5f8bdb26931732b9288d017672e..d38091464654a635c5c03dc964ed8fe4fe0b73df 100644 (file)
--- a/listotulabelscommand.h
+++ b/listotulabelscommand.h
@@ -12,6 +12,7 @@
  
  #include "command.hpp"
  #include "sharedrabundvector.h"
+#include "listvector.hpp"
  
  /**************************************************************************************************/
  
@@ -36,13 +37,14 @@ public:
      
  private:
      bool abort, allLines;
-    string outputDir, sharedfile, relabundfile, label, inputFileName, format;
+    string outputDir, sharedfile, relabundfile, label, inputFileName, format, listfile;
      vector<string> outputNames;
      vector<string> Groups;
      set<string> labels;
      
      int createList(vector<SharedRAbundFloatVector*>&);
      int createList(vector<SharedRAbundVector*>&);
+    int createList(ListVector*&);
  
  };
  
diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp

index 32e2d68a3d1ebe51e0acc8fa64b7a499324fd60f..3474c57abec72b362dc9af4c27336473899beb96 100644 (file)
--- a/makecontigscommand.cpp
+++ b/makecontigscommand.cpp
@@ -15,8 +15,8 @@ vector<string> MakeContigsCommand::setParameters(){
          CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
          CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
          CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
-        CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(pfqual);
-        CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(prqual);
+        CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual);
+        CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual);
          CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
          CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
@@ -31,8 +31,10 @@ vector<string> MakeContigsCommand::setParameters(){
                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
-        CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold);
+        CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold);
+        CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -51,23 +53,25 @@ string MakeContigsCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta.  It will also provide new quality files if the fastq or file parameter is used.\n";
          helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
-               helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
+               helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
                 helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
-        helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column.  Mothur will process each pair and create a combined fasta and qual file with all the sequences.\n";
+        helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column.  Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
          helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
          helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
          helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
-               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
+               helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n";
+        helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
          helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
-        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
-               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+        //helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
+        helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
                 helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
-        helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
+        helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
          helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
          helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
          helpString += "The make.contigs command should be in the following format: \n";
@@ -86,9 +90,8 @@ string MakeContigsCommand::getOutputPattern(string type) {
          string pattern = "";
          
          if (type == "fasta") {  pattern = "[filename],[tag],contigs.fasta"; } 
-        else if (type == "qfile") {  pattern = "[filename],[tag],contigs.qual"; } 
          else if (type == "group") {  pattern = "[filename],[tag],contigs.groups"; }
-        else if (type == "mismatch") {  pattern = "[filename],[tag],contigs.mismatch"; }
+        else if (type == "report") {  pattern = "[filename],[tag],contigs.report"; }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -105,9 +108,8 @@ MakeContigsCommand::MakeContigsCommand(){
                 setParameters();
                 vector<string> tempOutNames;
                 outputTypes["fasta"] = tempOutNames;
-               outputTypes["qfile"] = tempOutNames;
          outputTypes["group"] = tempOutNames;
-        outputTypes["mismatch"] = tempOutNames;
+        outputTypes["report"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
@@ -140,8 +142,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                         //initialize outputTypes
                         vector<string> tempOutNames;
                         outputTypes["fasta"] = tempOutNames;
-                       outputTypes["qfile"] = tempOutNames;
-            outputTypes["mismatch"] = tempOutNames;
+            outputTypes["report"] = tempOutNames;
              outputTypes["group"] = tempOutNames;
                         
              
@@ -285,10 +286,13 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                         m->mothurConvert(temp, gapExtend); 
              if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
                         
-            temp = validParameter.validFile(parameters, "threshold", false);   if (temp == "not found"){       temp = "40";                    }
-                       m->mothurConvert(temp, threshold); 
-            if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
+            temp = validParameter.validFile(parameters, "insert", false);      if (temp == "not found"){       temp = "25";                    }
+                       m->mothurConvert(temp, insert); 
+            if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
  
+            temp = validParameter.validFile(parameters, "deltaq", false);      if (temp == "not found"){       temp = "6";                     }
+                       m->mothurConvert(temp, deltaq);
+            
                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
@@ -317,6 +321,19 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                         
                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
                         if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+            
+            format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "illumina1.8+";        }
+            
+            if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
+                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
+                               abort=true;
+                       }
+            
+            //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
+            for (int i = -64; i < 65; i++) { 
+                char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
+                convertTable.push_back(temp);
+            }
          }
                 
         }
@@ -350,25 +367,19 @@ int MakeContigsCommand::execute(){
          string compositeGroupFile = getOutputFileName("group",cvars);
          cvars["[tag]"] = "trim";
          string compositeFastaFile = getOutputFileName("fasta",cvars);
-        string compositeQualFile = getOutputFileName("qfile",cvars);
          cvars["[tag]"] = "scrap";
          string compositeScrapFastaFile = getOutputFileName("fasta",cvars);
-        string compositeScrapQualFile = getOutputFileName("qfile",cvars);
          cvars["[tag]"] = "";
-        string compositeMisMatchFile = getOutputFileName("mismatch",cvars);
+        string compositeMisMatchFile = getOutputFileName("report",cvars);
          
          if (filesToProcess.size() > 1) { //clear files for append below
              ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch;
              m->openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close();
              m->openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close();
              m->openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close();
-            m->openOutputFile(compositeQualFile, outCTQual); outCTQual.close();
-            m->openOutputFile(compositeScrapQualFile, outCSQual); outCSQual.close();
              outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile);
-            outputNames.push_back(compositeQualFile); outputTypes["qfile"].push_back(compositeQualFile);
-            outputNames.push_back(compositeMisMatchFile); outputTypes["mismatch"].push_back(compositeMisMatchFile);
+            outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile);
              outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
-            outputNames.push_back(compositeScrapQualFile); outputTypes["qfile"].push_back(compositeScrapQualFile);
          }
          
          for (int l = 0; l < filesToProcess.size(); l++) {
@@ -376,7 +387,6 @@ int MakeContigsCommand::execute(){
              m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
              
              vector<vector<string> > fastaFileNames;
-            vector<vector<string> > qualFileNames;
              createGroup = false;
              string outputGroupFileName;
              map<string, string> variables; 
@@ -385,7 +395,7 @@ int MakeContigsCommand::execute(){
              variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
              variables["[tag]"] = "";
              if(oligosfile != ""){
-                createGroup = getOligos(fastaFileNames, qualFileNames, variables["[filename]"]);
+                createGroup = getOligos(fastaFileNames, variables["[filename]"]);
                  if (createGroup) { 
                      outputGroupFileName = getOutputFileName("group",variables);
                      outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
@@ -394,22 +404,16 @@ int MakeContigsCommand::execute(){
              
              variables["[tag]"] = "trim";
              string outFastaFile = getOutputFileName("fasta",variables);
-            string outQualFile = getOutputFileName("qfile",variables);
              variables["[tag]"] = "scrap";
              string outScrapFastaFile = getOutputFileName("fasta",variables);
-            string outScrapQualFile = getOutputFileName("qfile",variables);
              variables["[tag]"] = "";
-            string outMisMatchFile = getOutputFileName("mismatch",variables);
+            string outMisMatchFile = getOutputFileName("report",variables);
              outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
              outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
-            if (filesToProcess[l][0][1] != "") {
-                outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
-                outputNames.push_back(outScrapQualFile); outputTypes["qfile"].push_back(outScrapQualFile);
-            }
-            outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
+            outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
              
              m->mothurOut("Making contigs...\n"); 
-            createProcesses(filesToProcess[l], outFastaFile, outQualFile, outScrapFastaFile, outScrapQualFile, outMisMatchFile, fastaFileNames, qualFileNames);
+            createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames);
              m->mothurOut("Done.\n");
              
              //remove temp fasta and qual files
@@ -428,11 +432,6 @@ int MakeContigsCommand::execute(){
                                  if(m->isBlank(fastaFileNames[i][j])){
                                      m->mothurRemove(fastaFileNames[i][j]);
                                      namesToRemove.insert(fastaFileNames[i][j]);
-                                    
-                                    if (filesToProcess[l][0][1] != "") {
-                                        m->mothurRemove(qualFileNames[i][j]);
-                                        namesToRemove.insert(qualFileNames[i][j]);
-                                    }
                                  }else{ 
                                      it = uniqueFastaNames.find(fastaFileNames[i][j]);
                                      if (it == uniqueFastaNames.end()) {        
@@ -489,9 +488,7 @@ int MakeContigsCommand::execute(){
                  }
                  m->appendFiles(outMisMatchFile, compositeMisMatchFile);
                  m->appendFiles(outFastaFile, compositeFastaFile);
-                m->appendFiles(outQualFile, compositeQualFile);
                  m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
-                m->appendFiles(outScrapQualFile, compositeScrapQualFile);
              }
          }
          m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
@@ -515,12 +512,6 @@ int MakeContigsCommand::execute(){
                         if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
                 }
          
-        string currentQual = "";
-               itTypes = outputTypes.find("qfile");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
-               }
-        
          string currentGroup = "";
                 itTypes = outputTypes.find("group");
                 if (itTypes != outputTypes.end()) {
@@ -601,7 +592,7 @@ vector< vector< vector<string> > > MakeContigsCommand::preProcessData(unsigned l
         }
  }
  //**********************************************************************************************************************
-int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames) {
         try {
                 int num = 0;
                 vector<int> processIDS;
@@ -617,7 +608,6 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                                 process++;
                         }else if (pid == 0){
                  vector<vector<string> > tempFASTAFileNames = fastaFileNames;
-                               vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
                  
                                 if(allFiles){
                                         ofstream temp;
@@ -627,11 +617,6 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                                                         if (tempFASTAFileNames[i][j] != "") {
                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
-                                
-                                if (files[processors-1][1] != "") {
-                                    tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
-                                    m->openOutputFile(tempPrimerQualFileNames[i][j], temp);            temp.close();
-                                }
                                                         }
                                                 }
                                         }
@@ -639,12 +624,9 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
  
                                 num = driver(files[process], 
                               outputFasta + toString(getpid()) + ".temp", 
-                             outputQual + toString(getpid()) + ".temp", 
                               outputScrapFasta + toString(getpid()) + ".temp", 
-                             outputScrapQual + toString(getpid()) + ".temp",
                               outputMisMatches + toString(getpid()) + ".temp",
-                             tempFASTAFileNames,
-                             tempPrimerQualFileNames);
+                             tempFASTAFileNames, process);
                                 
                                 //pass groupCounts to parent
                  ofstream out;
@@ -676,13 +658,9 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
          ofstream temp;
                 m->openOutputFile(outputFasta, temp);           temp.close();
          m->openOutputFile(outputScrapFasta, temp);             temp.close();
-        if (files[processors-1][1] != "") {
-            m->openOutputFile(outputScrapQual, temp);          temp.close();
-            m->openOutputFile(outputQual, temp);       temp.close();
-        }
-        
+                
                 //do my part
-               num = driver(files[processors-1], outputFasta, outputQual, outputScrapFasta, outputScrapQual, outputMisMatches, fastaFileNames, qualFileNames);
+               num = driver(files[processors-1], outputFasta, outputScrapFasta,  outputMisMatches, fastaFileNames, processors-1);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
@@ -741,8 +719,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                         string extension = "";
                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
              vector<vector<string> > tempFASTAFileNames = fastaFileNames;
-            vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
-            
+                        
              if(allFiles){
                  ofstream temp;
                  
@@ -751,25 +728,19 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                          if (tempFASTAFileNames[i][j] != "") {
                              tempFASTAFileNames[i][j] += extension;
                              m->openOutputFile(tempFASTAFileNames[i][j], temp);                 temp.close();
-                            
-                            if (files[processors-1][1] != "") {
-                                tempPrimerQualFileNames[i][j] += extension;
-                                m->openOutputFile(tempPrimerQualFileNames[i][j], temp);                temp.close();
-                            }
                          }
                      }
                  }
              }
  
                                   
-                       contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputQual + extension), (outputScrapFasta + extension), (outputScrapQual + extension),(outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, barcodes, primers, tempFASTAFileNames, tempPrimerQualFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
+                       contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
                         pDataArray.push_back(tempcontig);
              
                         hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
                 }
          
          vector<vector<string> > tempFASTAFileNames = fastaFileNames;
-        vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
  
          if(allFiles){
              ofstream temp;
@@ -780,11 +751,6 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                      if (tempFASTAFileNames[i][j] != "") {
                          tempFASTAFileNames[i][j] += extension;
                          m->openOutputFile(tempFASTAFileNames[i][j], temp);                     temp.close();
-                        
-                        if (files[processors-1][1] != "") {
-                            tempPrimerQualFileNames[i][j] += extension;
-                            m->openOutputFile(tempPrimerQualFileNames[i][j], temp);            temp.close();
-                        }
                      }
                  }
              }
@@ -794,14 +760,10 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                 ofstream temp;
                 m->openOutputFile(outputFasta, temp);           temp.close();
          m->openOutputFile(outputScrapFasta, temp);             temp.close();
-        if (files[processors-1][1] != "") {
-            m->openOutputFile(outputScrapQual, temp);          temp.close();
-            m->openOutputFile(outputQual, temp);       temp.close();
-               }
          
          //do my part
          processIDS.push_back(processors-1);
-               num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputQual+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputScrapQual+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames);       
+               num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"),  (outputScrapFasta+ toString(processors-1) + ".temp"),  (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1);     
          
                 //Wait until all threads have terminated.
                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -809,6 +771,9 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (!pDataArray[i]->done) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
                  map<string, int>::iterator it2 = groupCounts.find(it->first);
                  if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
@@ -831,14 +796,6 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                         
                         m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
                         m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
-                       
-            if (files[processors-1][1] != "") {
-                m->appendFiles((outputScrapQual + toString(processIDS[i]) + ".temp"), outputScrapQual);
-                m->mothurRemove((outputScrapQual + toString(processIDS[i]) + ".temp"));
-                
-                m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
-                m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
-            }
              
              m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
                         m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
@@ -849,11 +806,6 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                                                 if (fastaFileNames[j][k] != "") {
                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
-                                                       
-                            if (files[processors-1][1] != "") {
-                                m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
-                                m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
-                            }
                                                 }
                                         }
                                 }
@@ -868,7 +820,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
         }
  }
  //**********************************************************************************************************************
-int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputScrapFasta, string outputScrapQual, string outputMisMatches, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames){
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process){
      try {
          
          Alignment* alignment;
@@ -884,19 +836,17 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
          if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
          
          ifstream inFFasta, inRFasta, inFQual, inRQual;
-        ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
+        ofstream outFasta, outMisMatch, outScrapFasta;
          m->openInputFile(thisffastafile, inFFasta);
          m->openInputFile(thisrfastafile, inRFasta);
          if (thisfqualfile != "") {
              m->openInputFile(thisfqualfile, inFQual);
              m->openInputFile(thisrqualfile, inRQual);
-            m->openOutputFile(outputScrapQual, outScrapQual);
-            m->openOutputFile(outputQual, outQual);
          }
          m->openOutputFile(outputFasta, outFasta);
          m->openOutputFile(outputScrapFasta, outScrapFasta);
          m->openOutputFile(outputMisMatches, outMisMatch);
-        outMisMatch << "Name\tLength\tMisMatches\n";
+        if (process == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  }
          
          TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
          
@@ -956,7 +906,6 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
              
              //traverse alignments merging into one contiguous seq
              string contig = "";
-            vector<int> contigScores; 
              int numMismatches = 0;
              string seq1 = fSeq.getAligned();
              string seq2 = rSeq.getAligned();
@@ -973,15 +922,9 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
              //bigger of the 2 starting positions is the location of the overlapping start
              if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
                  overlapStart = seq2Start; 
-                for (int i = 0; i < overlapStart; i++) {
-                    contig += seq1[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores1[ABaseMap[i]]); }
-                }
+                for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
              }else { //seq1 starts later so take from 0 to overlapStart from seq2
-                for (int i = 0; i < overlapStart; i++) {
-                    contig += seq2[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores2[BBaseMap[i]]); }
-                }
+                for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
              }
              
              int seq1End = fSeq.getEndPos();
@@ -989,53 +932,40 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
              int overlapEnd = seq1End;
              if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
              
+            int oStart = contig.length();
              for (int i = overlapStart; i < overlapEnd; i++) {
                  if (seq1[i] == seq2[i]) { //match, add base and choose highest score
                      contig += seq1[i];
-                    if (thisfqualfile != "") { 
-                        contigScores.push_back(scores1[ABaseMap[i]]); 
-                        if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
-                    }
-                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
                      if (thisfqualfile != "") {
-                        if (scores2[BBaseMap[i]] < threshold) { } //
-                        else {
-                            contig += seq2[i];
-                            contigScores.push_back(scores2[BBaseMap[i]]);
-                        }
+                        if (scores2[BBaseMap[i]] < insert) { } //
+                        else { contig += seq2[i];  }
                      }else { contig += seq2[i]; } //with no quality info, then we keep it?
-                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
                      if (thisfqualfile != "") {
-                        if (scores1[ABaseMap[i]] < threshold) { } //
-                        else {
-                            contig += seq1[i];
-                            contigScores.push_back(scores1[ABaseMap[i]]);
-                        }
+                        if (scores1[ABaseMap[i]] < insert) { } //
+                        else { contig += seq1[i];  }
                      }else { contig += seq1[i]; } //with no quality info, then we keep it?
                  }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
                      if (thisfqualfile != "") {
-                        char c = seq1[i];
-                        contigScores.push_back(scores1[ABaseMap[i]]);
-                        if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
-                        contig += c;
+                        if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
+                            char c = seq1[i];
+                            if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
+                            contig += c;
+                        }else { //if no, base becomes n
+                            contig += 'N';
+                        }
                          numMismatches++;
                      }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
                  }else { //should never get here
                      m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
                  }
              }
-            
+            int oend = contig.length();
              if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
-                for (int i = overlapEnd; i < length; i++) {
-                    contig += seq2[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores2[BBaseMap[i]]); }
-                }
+                for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
              }else { //seq2 ends before seq1 so take from overlap to length from seq1
-                for (int i = overlapEnd; i < length; i++) {
-                    contig += seq1[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores1[ABaseMap[i]]); }
-                }
-                
+                for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
              }
              
              if(trashCode.length() == 0){
@@ -1076,32 +1006,16 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                      m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
                      output << ">" << fSeq.getName() << endl << contig << endl;
                      output.close();
-                    
-                    if (thisfqualfile != "") {
-                        m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
-                        output << ">" << fSeq.getName() << endl;
-                        for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
-                        output << endl;
-                        output.close();        
-                    }
                  }
                  
                  //output
                  outFasta << ">" << fSeq.getName() << endl << contig << endl;
-                if (thisfqualfile != "") {
-                    outQual << ">" << fSeq.getName() << endl;
-                    for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
-                    outQual << endl;
-                }
-                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+                int numNs = 0;
+                for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
+                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
              }else {
                  //output
                  outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
-                if (thisfqualfile != "") {
-                    outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
-                    for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
-                    outScrapQual << endl;
-                }
              }
              num++;
              
@@ -1120,12 +1034,10 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
          if (thisfqualfile != "") {
              inFQual.close();
              inRQual.close();
-            outQual.close();
-            outScrapQual.close();
          }
          delete alignment;
          
-        if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches); if (thisfqualfile != "") { m->mothurRemove(outputQual); m->mothurRemove(outputScrapQual); } }
+        if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
      
          return num;
      }
@@ -1206,7 +1118,7 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& c
                  
                  if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
                 
-                if (checkReads(fread, rread, ffastq, rfastq)) {
+                //if (checkReads(fread, rread, ffastq, rfastq)) {
                      if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
                      
                      //if the reads are okay write to output files
@@ -1225,7 +1137,7 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& c
                      
                      //report progress
                      if((count) % 10000 == 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-                }
+                //}
              }
                 }
                 //report progress
@@ -1349,7 +1261,7 @@ vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& c
                  
                  if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
                  
-                if (checkReads(fread, rread, ffasta, rfasta)) {
+               // if (checkReads(fread, rread, ffasta, rfasta)) {
                      if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inReverseFasta.close(); inForwardFasta.close(); if (fqualfile != "") { inReverseQual.close(); inReverseQual.close(); } return files; }
                      
                      //if the reads are okay write to output files
@@ -1369,7 +1281,7 @@ vector< vector<string> > MakeContigsCommand::readFastaFiles(unsigned long int& c
                      
                      //report progress
                      if((count) % 10000 == 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-                }
+                //}
              }
                 }
                 //report progress
@@ -1508,15 +1420,8 @@ fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
          if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
          if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
          
-        vector<int> qualScores;
-               int controlChar = int('!');
-               for (int i = 0; i < quality.length(); i++) { 
-                       int temp = int(quality[i]);
-                       temp -= controlChar;
-                       
-                       qualScores.push_back(temp);
-               }
-    
+        vector<int> qualScores = convertQual(quality);
+        
          read.name = name;
          read.sequence = sequence;
          read.scores = qualScores;
@@ -1528,7 +1433,7 @@ fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
          exit(1);
      }
  }
-//**********************************************************************************************************************
+/**********************************************************************************************************************
  bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, string ffile, string rfile){
      try {
          bool good = true;
@@ -1551,7 +1456,7 @@ bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, stri
          m->errorOut(e, "MakeContigsCommand", "checkReads");
          exit(1);
      }
-}
+}*/
  //***************************************************************************************************************
  vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
         try {
@@ -1653,7 +1558,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
  //BARCODE   atgcatgc   atgcatgc    groupName 
  //PRIMER   atgcatgc   atgcatgc    groupName  
  //PRIMER   atgcatgc   atgcatgc  
-bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, string rootname){
+bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
         try {
                 ifstream in;
                 m->openInputFile(oligosfile, in);
@@ -1670,7 +1575,7 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                 while(!in.eof()){
              
                         in >> type; 
-            cout << type << endl;
+    
                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
              
                         if(type[0] == '#'){
@@ -1753,7 +1658,6 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                          
                      barcodes[indexBarcode]=newPair; indexBarcode++;
                                         barcodeNameVector.push_back(group);
-                    cout << group << endl;
                                 }else if(type == "LINKER"){
                                         linker.push_back(foligo);
                      m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
@@ -1786,7 +1690,6 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                 for(int i=0;i<fastaFileNames.size();i++){
                         fastaFileNames[i].assign(primerNameVector.size(), "");
                 }
-               qualFileNames = fastaFileNames; 
                 
                 if(allFiles){
                         set<string> uniqueNames; //used to cleanup outputFileNames
@@ -1827,17 +1730,6 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, vect
                          
                          fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
                          m->openOutputFile(fastaFileName, temp);                temp.close();
-                        
-                        if ((fqualfile != "") || (ffastqfile != "") || (file != "")) {
-                            qualFileName = rootname + ".qual";
-                            if (uniqueNames.count(qualFileName) == 0) {
-                                outputNames.push_back(qualFileName);
-                                outputTypes["qfile"].push_back(qualFileName);
-                            }
-                            
-                            qualFileNames[itBar->first][itPrimer->first] = qualFileName;
-                            m->openOutputFile(qualFileName, temp);             temp.close();
-                        }
                      }
                                 }
                         }
@@ -1912,6 +1804,41 @@ string MakeContigsCommand::reverseOligo(string oligo){
         }
  }
  //**********************************************************************************************************************
+vector<int> MakeContigsCommand::convertQual(string qual) {
+       try {
+               vector<int> qualScores;
+        bool negativeScores = false;
+               
+               for (int i = 0; i < qual.length(); i++) { 
+            
+            int temp = 0;
+            temp = int(qual[i]);
+            if (format == "illumina") {
+                temp -= 64; //char '@'
+            }else if (format == "illumina1.8+") {
+                    temp -= int('!'); //char '!'
+            }else if (format == "solexa") {
+                temp = int(convertTable[temp]); //convert to sanger
+                temp -= int('!'); //char '!'
+            }else {
+                temp -= int('!'); //char '!'
+            }
+            
+            if (temp < -5) { negativeScores = true; }
+                       qualScores.push_back(temp);
+               }
+               
+        if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
+        
+               return qualScores;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "convertQual");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
  
  
  
diff --git a/makecontigscommand.h b/makecontigscommand.h

index 2732d68a959934425430853ac67b5c8414aeffd5..a23d397d202ab1fd71ecb250c7c7246bcb82db3c 100644 (file)
--- a/makecontigscommand.h
+++ b/makecontigscommand.h
@@ -60,9 +60,9 @@ public:
      
  private:
      bool abort, allFiles, createGroup;
-    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
         float match, misMatch, gapOpen, gapExtend;
-       int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+       int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
      vector<string> outputNames;
      
      map<int, oligosPair> barcodes;
@@ -70,20 +70,22 @@ private:
      vector<string>  linker;
      vector<string>  spacer;
         vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;       
+       vector<string> barcodeNameVector;
+       vector<char> convertTable;
      
         map<string, int> groupCounts; 
      map<string, string> groupMap;
      
+    vector<int> convertQual(string);
      fastqRead readFastq(ifstream&, bool&);
      vector< vector< vector<string> > > preProcessData(unsigned long int&);
      vector< vector<string> > readFileNames(string);
      vector< vector<string> > readFastqFiles(unsigned long int&, string, string);
      vector< vector<string> > readFastaFiles(unsigned long int&, string, string);
-    bool checkReads(fastqRead&, fastqRead&, string, string);
-    int createProcesses(vector< vector<string> >, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >);
-    int driver(vector<string>, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >);
-    bool getOligos(vector<vector<string> >&, vector< vector<string> >&, string);
+    //bool checkReads(fastqRead&, fastqRead&, string, string);
+    int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >);
+    int driver(vector<string>, string, string, string, vector<vector<string> >, int);
+    bool getOligos(vector<vector<string> >&, string);
      string reverseOligo(string);
      vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques);
  };
@@ -96,18 +98,15 @@ private:
  // that can be passed using a single void pointer (LPVOID).
  struct contigsData {
         string outputFasta; 
-       string outputQual; 
      string outputScrapFasta; 
-       string outputScrapQual;
         string outputMisMatches;
         string align;
      vector<string> files;
      vector<vector<string> > fastaFileNames;
-    vector<vector<string> > qualFileNames;
         MothurOut* m;
         float match, misMatch, gapOpen, gapExtend;
-       int count, threshold, threadID, pdiffs, bdiffs, tdiffs;
-    bool allFiles, createGroup;
+       int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
+    bool allFiles, createGroup, done;
      map<string, int> groupCounts; 
      map<string, string> groupMap;
      vector<string> primerNameVector;   
@@ -116,23 +115,20 @@ struct contigsData {
         map<int, oligosPair> primers;
         
         contigsData(){}
-       contigsData(vector<string> f, string of, string oq, string osf, string osq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) {
+       contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) {
          files = f;
                 outputFasta = of;
-        outputQual = oq;
          outputMisMatches = om;
          m = mout;
                 match = ma; 
                 misMatch = misMa;
                 gapOpen = gapO; 
                 gapExtend = gapE; 
-        threshold = thr;
+        insert = thr;
                 align = al;
                 count = 0;
          outputScrapFasta = osf;
-        outputScrapQual = osq;
          fastaFileNames = ffn;
-        qualFileNames = qfn;
          barcodes = br;
          primers = pr;
          barcodeNameVector = bnv;
@@ -143,6 +139,8 @@ struct contigsData {
          allFiles = all;
          createGroup = cg;
                 threadID = tid;
+        deltaq = delt;
+        done=false;
         }
  };
  
@@ -159,7 +157,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
          if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase);                 }
                 else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);                            }
          
-        int num = 0;
+        pDataArray->count = 0;
          string thisffastafile = pDataArray->files[0];
          string thisfqualfile = pDataArray->files[1];
          string thisrfastafile = pDataArray->files[2];
@@ -173,27 +171,24 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                                         if (pDataArray->fastaFileNames[i][j] != "") {
                                                 ofstream temp;
                                                 pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
-                        if (thisfqualfile != "") { pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                       temp.close(); }
                                         }
                                 }
                         }
                 }
          
          ifstream inFFasta, inRFasta, inFQual, inRQual;
-        ofstream outFasta, outQual, outMisMatch, outScrapFasta, outScrapQual;
+        ofstream outFasta, outMisMatch, outScrapFasta;
          pDataArray->m->openInputFile(thisffastafile, inFFasta);
          pDataArray->m->openInputFile(thisrfastafile, inRFasta);
          if (thisfqualfile != "") {
              pDataArray->m->openInputFile(thisfqualfile, inFQual);
              pDataArray->m->openInputFile(thisrqualfile, inRQual);
-            pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
-            pDataArray->m->openOutputFile(pDataArray->outputScrapQual, outScrapQual);
          }
          pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
          pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
          pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta);
          
-        outMisMatch << "Name\tLength\tMisMatches\n";
+        if (pDataArray->threadID == 0) {  outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  }
          
          TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
          
@@ -253,7 +248,6 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
              
              //traverse alignments merging into one contiguous seq
              string contig = "";
-            vector<int> contigScores; 
              int numMismatches = 0;
              string seq1 = fSeq.getAligned();
              string seq2 = rSeq.getAligned();
@@ -269,15 +263,9 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
              //bigger of the 2 starting positions is the location of the overlapping start
              if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
                  overlapStart = seq2Start; 
-                for (int i = 0; i < overlapStart; i++) {
-                    contig += seq1[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores1[ABaseMap[i]]); }
-                }
+                for (int i = 0; i < overlapStart; i++) { contig += seq1[i];  }
              }else { //seq1 starts later so take from 0 to overlapStart from seq2
-                for (int i = 0; i < overlapStart; i++) {
-                    contig += seq2[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores2[BBaseMap[i]]); }
-                }
+                for (int i = 0; i < overlapStart; i++) {  contig += seq2[i]; }
              }
              
              int seq1End = fSeq.getEndPos();
@@ -285,53 +273,41 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
              int overlapEnd = seq1End;
              if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
              
+            int oStart = contig.length();
              for (int i = overlapStart; i < overlapEnd; i++) {
                  if (seq1[i] == seq2[i]) { //match, add base and choose highest score
                      contig += seq1[i];
-                    if (thisfqualfile != "") { 
-                        contigScores.push_back(scores1[ABaseMap[i]]);
-                        if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
-                    }
-                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
                      if (thisfqualfile != "") {
-                        if (scores2[BBaseMap[i]] < pDataArray->threshold) { } //
-                        else {
-                            contig += seq2[i];
-                            contigScores.push_back(scores2[BBaseMap[i]]);
-                        }
-                    }else { contig += seq2[i]; }
-                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+                        if (scores2[BBaseMap[i]] < pDataArray->insert) { } //
+                        else { contig += seq2[i];  }
+                    }else { contig += seq2[i]; } //with no quality info, then we keep it?
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
                      if (thisfqualfile != "") {
-                        if (scores1[ABaseMap[i]] < pDataArray->threshold) { } //
-                        else {
-                            contig += seq1[i];
-                            contigScores.push_back(scores1[ABaseMap[i]]);
-                        }
-                    }else { contig += seq1[i]; }
+                        if (scores1[ABaseMap[i]] < pDataArray->insert) { } //
+                        else { contig += seq1[i];  }
+                    }else { contig += seq1[i]; } //with no quality info, then we keep it?
                  }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
                      if (thisfqualfile != "") {
-                        char c = seq1[i];
-                        contigScores.push_back(scores1[ABaseMap[i]]);
-                        if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
-                        contig += c;
+                        if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= pDataArray->deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score
+                            char c = seq1[i];
+                            if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; }
+                            contig += c;
+                        }else { //if no, base becomes n
+                            contig += 'N';
+                        }
                          numMismatches++;
-                    }else { numMismatches++; }
+                    }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch
                  }else { //should never get here
                      pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
                  }
              }
+            int oend = contig.length();
              
              if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
-                for (int i = overlapEnd; i < length; i++) {
-                    contig += seq2[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores2[BBaseMap[i]]); }
-                }
+                for (int i = overlapEnd; i < length; i++) { contig += seq2[i];  }
              }else { //seq2 ends before seq1 so take from overlap to length from seq1
-                for (int i = overlapEnd; i < length; i++) {
-                    contig += seq1[i];
-                    if (thisfqualfile != "") { contigScores.push_back(scores1[ABaseMap[i]]); }
-                }
-                
+                for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
              }
  
              if(trashCode.length() == 0){
@@ -367,41 +343,25 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                      pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
                      output << ">" << fSeq.getName() << endl << contig << endl;
                      output.close();
-                    
-                    if (thisfqualfile != "") {
-                        pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
-                        output << ">" << fSeq.getName() << endl;
-                        for (int i = 0; i < contigScores.size(); i++) { output << contigScores[i] << ' '; }
-                        output << endl;
-                        output.close();        
-                    }
                  }
                  
                  //output
                  outFasta << ">" << fSeq.getName() << endl << contig << endl;
-                if (thisfqualfile != "") {
-                    outQual << ">" << fSeq.getName() << endl;
-                    for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
-                    outQual << endl;
-                }
-                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+                int numNs = 0;
+                for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; }  }
+                outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << (oend-oStart) << '\t' << oStart << '\t' << oend << '\t' << numMismatches << '\t' << numNs << endl;
              }else {
                  //output
                  outScrapFasta << ">" << fSeq.getName() << " | " << trashCode << endl << contig << endl;
-                if (thisfqualfile != "") {
-                    outScrapQual << ">" << fSeq.getName() << " | " << trashCode << endl;
-                    for (int i = 0; i < contigScores.size(); i++) { outScrapQual << contigScores[i] << ' '; }
-                    outScrapQual << endl;
-                }
              }
-            num++;
+            pDataArray->count++;
              
                         //report progress
-                       if((num) % 1000 == 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+                       if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                 }
          
                 //report progress
-               if((num) % 1000 != 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+               if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
          
          inFFasta.close();
          inRFasta.close();
@@ -411,12 +371,11 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
          if (thisfqualfile != "") {
              inFQual.close();
              inRQual.close();
-            outQual.close();
-            outScrapQual.close();
          }
          delete alignment;
          
-        if (pDataArray->m->control_pressed) {  pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);  pDataArray->m->mothurRemove(pDataArray->outputScrapFasta);  if (thisfqualfile != "") { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputScrapQual); } }
+        pDataArray->done = true;
+        if (pDataArray->m->control_pressed) {  pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);  pDataArray->m->mothurRemove(pDataArray->outputScrapFasta); }
          
          return 0;
                 
diff --git a/makefastqcommand.cpp b/makefastqcommand.cpp

index 6712196bdfb3e5d76d76a4503289f943e9c72ce8..5a66d9a4b7b23ccd9051803d0e817fc7cf89f315 100644 (file)
--- a/makefastqcommand.cpp
+++ b/makefastqcommand.cpp
@@ -16,7 +16,7 @@ vector<string> MakeFastQCommand::setParameters(){
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fastq",false,true,true); parameters.push_back(pfasta);
                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","fastq",false,true,true); parameters.push_back(pqfile);
-               CommandParameter pformat("format", "Multiple", "sanger-illumina", "sanger", "", "", "","",false,false); parameters.push_back(pformat);
+               CommandParameter pformat("format", "Multiple", "sanger-illumina-illumina1.8+", "sanger", "", "", "","",false,false); parameters.push_back(pformat);
          CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -35,7 +35,7 @@ string MakeFastQCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The make.fastq command reads a fasta and quality file and creates a fastq file.\n";
                 helpString += "The make.fastq command parameters are fasta, qfile and format.  fasta and qfile are required.\n";
-               helpString += "The format parameter is used to indicate whether your sequences are sanger or illumina, default=sanger.\n";
+               helpString += "The format parameter is used to indicate whether your sequences are sanger, illumina1.8+ or illumina, default=sanger.\n";
                 helpString += "The make.fastq command should be in the following format: make.fastq(qfile=yourQualityFile, fasta=yourFasta).\n";
                 helpString += "Example make.fastq(fasta=amazon.fasta, qfile=amazon.qual).\n";
                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
@@ -147,8 +147,8 @@ MakeFastQCommand::MakeFastQCommand(string option)  {
              
              format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
              
-            if ((format != "sanger") && (format != "illumina") && (format != "solexa"))  { 
-                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa and illumina, aborting." ); m->mothurOutEndLine();
+            if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+"))  { 
+                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
                                 abort=true;
                         }
  
diff --git a/makefile b/makefile

index 458450570c000e848f217d995a9d8d976e9d637d..d4035c8a82f24ea2fa3c231a887375160125ce9e 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
  CYGWIN_BUILD ?= no
  USECOMPRESSION ?= no
  MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"11/2/2012\""
-VERSION = "\"1.28.0\""
+RELEASE_DATE = "\"1/23/2013\""
+VERSION = "\"1.29.1\""
  FORTAN_COMPILER = gfortran
  FORTRAN_FLAGS = 
  
diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp

index d2c29bdee9b6fc84481f89775f497b3165b72b4b..95b881190d085135539601c17836ce7d123592da 100644 (file)
--- a/matrixoutputcommand.cpp
+++ b/matrixoutputcommand.cpp
@@ -629,6 +629,9 @@ int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
                  
                  //Close all thread handles and free memory allocations.
                  for(int i=0; i < pDataArray.size(); i++){
+                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                    }
                      for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                      
                      for (int k = 0; k < calcDists.size(); k++) {
@@ -691,70 +694,10 @@ int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
                 
          if (iters != 0) {
              //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, mode);
              
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(matrixCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            if (mode == "average") {
-                for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                    for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                        for (int j = 0; j < calcAverages[i].size(); j++) {
-                            calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                            if (m->debug) {  m->mothurOut("[DEBUG]: Totaling for average calc: iter = " + toString(thisIter) + ", " + thisLookup[calcDistsTotals[thisIter][i][j].seq1]->getGroup() + " - " + thisLookup[calcDistsTotals[thisIter][i][j].seq2]->getGroup() + " distance = " + toString(calcDistsTotals[thisIter][i][j].dist) + ". New total = " + toString(calcAverages[i][j].dist) + ".\n");  }
-                        }
-                    }
-                }
-                
-                for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist /= (float) iters;
-                    }
-                }
-            }else { //find median
-                for (int i = 0; i < calcAverages.size(); i++) { //for each calc
-                    for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
-                        vector<double> dists;
-                        for (int thisIter = 0; thisIter < iters; thisIter++) { //for each subsample
-                            dists.push_back(calcDistsTotals[thisIter][i][j].dist);
-                        }
-                        sort(dists.begin(), dists.end());
-                        calcAverages[i][j].dist = dists[(iters/2)];
-                    }
-                }
-            }
              //find standard deviation
-            vector< vector<seqDist>  > stdDev; stdDev.resize(matrixCalculators.size());
-            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
-                stdDev[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
-                    stdDev[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
-                    stdDev[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-                for (int i = 0; i < stdDev.size(); i++) {  
-                    for (int j = 0; j < stdDev[i].size(); j++) {
-                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
-                    }
-                }
-            }
-
-            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].dist /= (float) iters;
-                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
-                }
-            }
+            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
              
              //print results
              for (int i = 0; i < calcDists.size(); i++) {
diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h

index 8a04c96a32ed6fa769a4e14474dfb624451751d1..90f120602e70dc04600b3048bab41bde21b21edc 100644 (file)
--- a/matrixoutputcommand.h
+++ b/matrixoutputcommand.h
@@ -121,6 +121,7 @@ struct distSharedData {
         unsigned long long start;
         unsigned long long end;
         MothurOut* m;
+    int count;
         
         distSharedData(){}
         distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
@@ -129,6 +130,7 @@ struct distSharedData {
                 end = en;
          Estimators = est;
          thisLookup = lu;
+        count = 0;
         }
  };
  /**************************************************************************************************/
@@ -230,7 +232,7 @@ static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){
                         
                 vector<SharedRAbundVector*> subset;
                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
-                       
+                       pDataArray->count++;
                         for (int l = 0; l < k; l++) {
                                 
                                 if (k != l) { //we dont need to similiarity of a groups to itself
diff --git a/metastatscommand.cpp b/metastatscommand.cpp

index 3eaee9652fa230281e47d77bfcf0430b3be4afa5..9dff3543f49efa25037ce7fb3dd9eeeebbe8c787 100644 (file)
--- a/metastatscommand.cpp
+++ b/metastatscommand.cpp
@@ -65,7 +65,7 @@ string MetaStatsCommand::getOutputPattern(string type) {
      try {
          string pattern = "";
          
-        if (type == "metastats") {  pattern = "[filename],[distance],[groups],metastats"; } 
+        if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -431,6 +431,9 @@ int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
                      
                      //Close all thread handles and free memory allocations.
                      for(int i=0; i < pDataArray.size(); i++){
+                        if (pDataArray[i]->count != (pDataArray[i]->num)) {
+                            m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                        }
                          for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
                          for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
                              outputNames.push_back(pDataArray[i]->outputNames[j]);
diff --git a/metastatscommand.h b/metastatscommand.h

index 7884130ad55dda3fff3fb7880a4c80e6fe15ed9b..12e0aebbe2d6a61409f5f749aba4a23db2841b31 100644 (file)
--- a/metastatscommand.h
+++ b/metastatscommand.h
@@ -68,7 +68,7 @@ struct metastatsData {
      vector<string> designMapGroups;
      vector<string> outputNames;
         int start;
-       int num, iters;
+       int num, iters, count;
         float threshold;
         MothurOut* m;
         string sharedfile;
@@ -86,6 +86,7 @@ struct metastatsData {
          designMapGroups = dg;
          iters = i;
          threshold = thr;
+        count=0;
         }
  };
  /**************************************************************************************************/
@@ -99,7 +100,7 @@ static DWORD WINAPI MyMetastatsThreadFunction(LPVOID lpParam){
                 
          //for each combo
                 for (int c = pDataArray->start; c < (pDataArray->start+pDataArray->num); c++) {
-                       
+                       pDataArray->count++;
                         //get set names
                         string setA = pDataArray->namesOfGroupCombos[c][0]; 
                         string setB = pDataArray->namesOfGroupCombos[c][1];
diff --git a/mothurout.cpp b/mothurout.cpp

index 468c063cb5c7e74a8d5901a1a38203f47409e2d5..dc77490e4b9bba1bc9bf6e4c16b0f402985c73d1 100644 (file)
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -440,10 +440,22 @@ void MothurOut::errorOut(exception& e, string object, string function) {
         //double vm, rss;
         //mem_usage(vm, rss);
         
-       mothurOut("[ERROR]: ");
-       mothurOut(toString(e.what()));
-       mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
-       mothurOutEndLine();
+    string errorType = toString(e.what());
+    
+    int pos = errorType.find("bad_alloc");
+    mothurOut("[ERROR]: ");
+    mothurOut(errorType);
+    
+    if (pos == string::npos) { //not bad_alloc
+        mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        mothurOutEndLine();
+    }else { //bad alloc
+        if (object == "cluster"){
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.  \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        }else {
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        }
+    }
  }
  /*********************************************************************************************/
  //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c 
@@ -1418,6 +1430,83 @@ vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
         }
  }
  /**************************************************************************************************/
+
+vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
+       try{
+               vector<unsigned long long> filePos;
+               filePos.push_back(0);
+               
+               FILE * pFile;
+               unsigned long long size;
+               
+               filename = getFullPathName(filename);
+        
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
+               //estimate file breaks
+               unsigned long long chunkSize = 0;
+               chunkSize = size / proc;
+        
+               //file to small to divide by processors
+               if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
+        
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < proc; i++) {
+                       unsigned long long spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(spot);
+                       
+                       //look for next line break
+                       unsigned long long newSpot = spot;
+                       while (!in.eof()) {
+                char c = in.get();
+                               
+                               if ((c == '\n') || (c == '\r') || (c == '\f'))  { gobble(in); newSpot = in.tellg(); break; }
+                else if (int(c) == -1) { break; }
+            }
+            
+                       //there was not another line before the end of the file
+                       unsigned long long sanityPos = in.tellg();
+            
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(newSpot);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
+        
+               proc = (filePos.size() - 1);
+#else
+               mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
+               proc=1;
+               filePos.push_back(size);
+#endif
+               return filePos;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "divideFile");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
  int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
         try{
                 
@@ -1606,6 +1695,7 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
                      //are there confidence scores, if so remove them
                      if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
                      map<string, string>::iterator itTax = taxMap.find(firstCol);
@@ -1633,6 +1723,7 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
                      //are there confidence scores, if so remove them
                      if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
                      map<string, string>::iterator itTax = taxMap.find(firstCol);
@@ -1684,6 +1775,9 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                      //parse names into vector
                      vector<string> theseNames;
                      splitAtComma(secondCol, theseNames);
@@ -1702,10 +1796,13 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                      //parse names into vector
                      vector<string> theseNames;
                      splitAtComma(secondCol, theseNames);
-                    for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
+                    for (int i = 0; i < theseNames.size(); i++) {   nameMap[theseNames[i]] = firstCol;  }
                      pairDone = false; 
                  }
              }  
@@ -1743,6 +1840,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      nameMap[secondCol] = firstCol;
                      pairDone = false; 
                  }
@@ -1758,6 +1857,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      nameMap[secondCol] = firstCol;
                      pairDone = false; 
                  }
@@ -1797,6 +1898,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      //parse names into vector
                      vector<string> theseNames;
                      splitAtComma(secondCol, theseNames);
@@ -1816,6 +1919,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      //parse names into vector
                      vector<string> theseNames;
                      splitAtComma(secondCol, theseNames);
@@ -1857,7 +1962,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                  if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
              }
                 }
                 in.close();
@@ -1869,7 +1977,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                  if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
              }
          }
                 
@@ -1905,6 +2016,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      vector<string> temp;
                      splitAtComma(secondCol, temp);
                      nameMap[firstCol] = temp;
@@ -1922,6 +2035,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      vector<string> temp;
                      splitAtComma(secondCol, temp);
                      nameMap[firstCol] = temp;
@@ -1963,9 +2078,73 @@ map<string, int> MothurOut::readNames(string namefile) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+               }
+        in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+        }
+               
+               return nameMap;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) { 
+       try {
+               map<string, int> nameMap;
+        numSeqs = 0;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      int num = getNumNames(secondCol);
                      nameMap[firstCol] = num;
                      pairDone = false;  
+                    numSeqs += num;
                  } 
              }
                 }
@@ -1978,9 +2157,12 @@ map<string, int> MothurOut::readNames(string namefile) {
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      int num = getNumNames(secondCol);
                      nameMap[firstCol] = num;
                      pairDone = false;  
+                    numSeqs += num;
                  } 
              }
          }
@@ -1993,6 +2175,19 @@ map<string, int> MothurOut::readNames(string namefile) {
                 exit(1);
         }
  }
+/************************************************************/
+int MothurOut::checkName(string& name) {
+    try {
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+        }        
+        return 0;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "checkName");
+               exit(1);
+       }
+}
  /**********************************************************************************************************************/
  int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) { 
         try {
@@ -2019,6 +2214,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      int num = getNumNames(secondCol);
                      
                      map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2044,6 +2241,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                  else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                  
                  if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                      int num = getNumNames(secondCol);
                      
                      map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2083,13 +2282,13 @@ set<string> MothurOut::readAccnos(string accnosfile){
              in.read(buffer, 4096);
              vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
              
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  }
          }
                 in.close();     
                 
          if (rest != "") {
              vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  } 
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  } 
          }
                 return names;
         }
@@ -2115,13 +2314,13 @@ int MothurOut::readAccnos(string accnosfile, vector<string>& names){
              in.read(buffer, 4096);
              vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
              
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
          }
                 in.close();     
          
          if (rest != "") {
              vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
          }
                 
                 return 0;
@@ -2521,7 +2720,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                 string individual = "";
                 int estimLength = estim.size();
                 bool prevEscape = false;
-               for(int i=0;i<estimLength;i++){
+               /*for(int i=0;i<estimLength;i++){
                         if(prevEscape){
                                 individual += estim[i];
                                 prevEscape = false;
@@ -2540,7 +2739,28 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                                         prevEscape = false;
                                 }
                         }
-               }
+               }*/
+        
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    container.push_back(individual);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        
+
+        
                 container.push_back(individual);
         }
         catch(exception& e) {
@@ -2621,6 +2841,7 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                 exit(1);
         }       
  }
+
  /***********************************************************************/
  string MothurOut::makeList(vector<string>& names) {
         try {
@@ -2688,11 +2909,11 @@ void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
                         string space = " ";
                         while(suffix.at(0) == ' ')
                                 suffix = suffix.substr(1, suffix.length());
-               }
+               }else {  suffix = "";  }
          
-       }
+    }
         catch(exception& e) {
-               errorOut(e, "MothurOut", "splitAtComma");
+               errorOut(e, "MothurOut", "splitAtChar");
                 exit(1);
         }       
  }
@@ -2708,7 +2929,7 @@ void MothurOut::splitAtComma(string& prefix, string& suffix){
                         string space = " ";
                         while(suffix.at(0) == ' ')
                                 suffix = suffix.substr(1, suffix.length());
-               }
+               }else {  suffix = "";  }
  
         }
         catch(exception& e) {
@@ -2924,6 +3145,253 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
                 exit(1);
         }
  }
+/**************************************************************************************************/
+vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
+       try{
+        vector<double> averages; //averages.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
+      
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
+            for (int i = 0; i < dists[thisIter].size(); i++) {  
+                averages[i] += dists[thisIter][i];
+            }
+        }
+        
+        //finds average.
+        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (double) dists.size(); }
+        
+        return averages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
+       try{
+        
+        vector<double> averages = getAverages(dists);
+        
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
+       try{
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        if (mode == "average") {
+            for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
+            
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+            }
+        }else { //find median
+            for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+                for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
+                    vector<double> dists;
+                    for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+                        dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+                    }
+                    sort(dists.begin(), dists.end());
+                    calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+                }
+            }
+        }
+
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+        }
+            
+        for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+        }
+        
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+        
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+       try{
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+
  /**************************************************************************************************/
  bool MothurOut::isContainingOnlyDigits(string input) {
         try{
diff --git a/mothurout.h b/mothurout.h

index 0d2e86f24cb258b24e78367e1427fc7ee2bc7d31..be657f4df4b72eefdf9c0494201815be4eea968f 100644 (file)
--- a/mothurout.h
+++ b/mothurout.h
@@ -69,12 +69,13 @@ class MothurOut {
                 vector<string> binLabelsInFile;
                 vector<string> currentBinLabels;
                 string saveNextLabel, argv, sharedHeaderMode, groupMode;
-               bool printedHeaders, commandInputsConvertError;
+               bool printedHeaders, commandInputsConvertError, changedSeqNames;
                 
                 //functions from mothur.h
                 //file operations
          bool dirCheck(string&); //completes path, appends appropriate / or \, makes sure dir is writable.
-               vector<unsigned long long> divideFile(string, int&);
+               vector<unsigned long long> divideFile(string, int&); //divides splitting unevenness by sequence
+        vector<unsigned long long> divideFilePerLine(string, int&); //divides splitting unevenness at line breaks
                 int divideFile(string, int&, vector<string>&);
                 vector<unsigned long long> setFilePosEachLine(string, int&);
                 vector<unsigned long long> setFilePosFasta(string, int&);
@@ -102,6 +103,7 @@ class MothurOut {
          set<string> readAccnos(string);
          int readAccnos(string, vector<string>&);
          map<string, int> readNames(string);
+        map<string, int> readNames(string, unsigned long int&);
          int readTax(string, map<string, string>&);
          int readNames(string, map<string, string>&, map<string, int>&);
                 int readNames(string, map<string, string>&);
@@ -146,6 +148,7 @@ class MothurOut {
          string removeQuotes(string);
          string makeList(vector<string>&);
          bool isSubset(vector<string>, vector<string>); //bigSet, subset
+        int checkName(string&);
                 
                 //math operation
                 int factorial(int num);
@@ -155,6 +158,13 @@ class MothurOut {
                 unsigned int fromBase36(string);
                 int getRandomIndex(int); //highest
          double getStandardDeviation(vector<int>&);
+        vector<double> getStandardDeviation(vector< vector<double> >&);
+        vector<double> getStandardDeviation(vector< vector<double> >&, vector<double>&);
+        vector<double> getAverages(vector< vector<double> >&);
+        vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&);
+        vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&, vector< vector<seqDist> >&);
+        vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&, string);
+        vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&);
  
                 int control_pressed;
                 bool executing, runParse, jumble, gui, mothurCalling, debug;
@@ -249,6 +259,7 @@ class MothurOut {
              debug = false;
                         sharedHeaderMode = "";
              groupMode = "group";
+            changedSeqNames = false;
                 }
                 ~MothurOut();
  
diff --git a/newcommandtemplate.cpp b/newcommandtemplate.cpp

index b2426f5c1bf0cf38306bea7bf756c01743a21f5a..3c893d8b44a04feddefb5ec18bebdc9350064656 100644 (file)
--- a/newcommandtemplate.cpp
+++ b/newcommandtemplate.cpp
@@ -183,7 +183,7 @@ NewCommand::NewCommand(string option)  {
              
              ///variables for examples below that you will most likely want to put in the header for 
              //use by the other class functions.
-            string phylipfile, columnfile, namefile, fastafile, sharedfile, method;
+            string phylipfile, columnfile, namefile, fastafile, sharedfile, method, countfile;
              int processors;
              bool useTiming, allLines;
              vector<string> Estimators, Groups;
@@ -304,10 +304,13 @@ NewCommand::NewCommand(string option)  {
              //saved by mothur that is associated with the other files you are using as inputs.  
              //You can do so by adding the files associated with the namefile to the files vector and then asking parser to check.  
              //This saves our users headaches over file mismatches because they forgot to include the namefile, :)
-            if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") { 
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
+
                         
                 }
                 
diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp

index 0f8602871561c20804b49bdc948da6194b25adbe..767fdb52702bd1a3652d6bf2c1e2b244958e48ca 100644 (file)
--- a/pairwiseseqscommand.cpp
+++ b/pairwiseseqscommand.cpp
@@ -581,6 +581,9 @@ void PairwiseSeqsCommand::createProcesses(string filename) {
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
diff --git a/pairwiseseqscommand.h b/pairwiseseqscommand.h

index 33fcfca0b6d0207ed1f282a36061893805745203..06dbad975bc6f6b3035024f1260108e65470db64 100644 (file)
--- a/pairwiseseqscommand.h
+++ b/pairwiseseqscommand.h
@@ -128,7 +128,7 @@ static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
                 outFile.setf(ios::fixed, ios::showpoint);
                 outFile << setprecision(4);
                 
-               pDataArray->count = pDataArray->end;
+               pDataArray->count = 0;
          
          int startTime = time(NULL);
          
@@ -162,6 +162,7 @@ static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
          if(pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
                 
                 for(int i=pDataArray->start;i<pDataArray->end;i++){
+            pDataArray->count++;
              
                         string name = pDataArray->alignDB.get(i).getName();
                         //pad with spaces to make compatible
@@ -201,7 +202,7 @@ static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){
                         }
                         
                 }
-               pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               pDataArray->m->mothurOut(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
                 
                 outFile.close();
          delete alignment;
diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp

index 63ed3144ad2a2b86ed33c6f9449b8d8143d38204..89f97acf13ad45650a1f8294a3e88895beaa347c 100644 (file)
--- a/parsefastaqcommand.cpp
+++ b/parsefastaqcommand.cpp
@@ -16,7 +16,7 @@ vector<string> ParseFastaQCommand::setParameters(){
                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
                 CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                 CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
-               CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
+               CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
          CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -36,7 +36,7 @@ string ParseFastaQCommand::getHelpString(){
                 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
                 helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
          helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
-               helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa or illumina, default=sanger.\n";
+               helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
          helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
          helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
@@ -136,8 +136,8 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                         
              format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
              
-            if ((format != "sanger") && (format != "illumina") && (format != "solexa"))  { 
-                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa and illumina, aborting." ); m->mothurOutEndLine();
+            if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa"))  { 
+                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine();
                                 abort=true;
                         }
  
@@ -249,21 +249,28 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
         try {
                 vector<int> qualScores;
                 
+        bool negativeScores = false;
+        
                 for (int i = 0; i < qual.length(); i++) { 
              
              int temp = 0;
              temp = int(qual[i]);
              if (format == "illumina") {
                  temp -= 64; //char '@'
+            }else if (format == "illumina1.8+") {
+                temp -= int('!'); //char '!'
              }else if (format == "solexa") {
                  temp = int(convertTable[temp]); //convert to sanger
                  temp -= int('!'); //char '!'
              }else {
                  temp -= int('!'); //char '!'
              }
+            if (temp < -5) { negativeScores = true; }
                         qualScores.push_back(temp);
                 }
                 
+        if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n");  m->control_pressed = true;  }
+        
                 return qualScores;
         }
         catch(exception& e) {
diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h

index b4d90e542a98f329dd166a93c54d57852e0f54dd..cb86bd66c0ae8597f605006893fd70371d16b5fc 100644 (file)
--- a/parsefastaqcommand.h
+++ b/parsefastaqcommand.h
@@ -21,7 +21,7 @@ public:
         ~ParseFastaQCommand() {}
         
         vector<string> setParameters();
-       string getCommandName()                 { return "parse.fastq";         }
+       string getCommandName()                 { return "fastq.info";          }
         string getCommandCategory()             { return "Sequence Processing"; }
         
         string getHelpString(); 
diff --git a/pcrseqscommand.h b/pcrseqscommand.h

index 9fc40419411ff2535f01eb150699b30227225a3b..c6f7ff9c7f6670073a7080496f80eac1cdfb2a72 100644 (file)
--- a/pcrseqscommand.h
+++ b/pcrseqscommand.h
@@ -132,9 +132,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                 }
          
          set<int> lengths;
-               pDataArray->count = pDataArray->fend;
+               
                 for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
-            
+            pDataArray->count++;
                         if (pDataArray->m->control_pressed) {  break; }
                         
                         Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp

index 5fc9f988be038bb02a724e38cc11b2d9ddf1fa3e..4d5b6d963a026433c5851de6d444edc524cd6d1a 100644 (file)
--- a/prcseqscommand.cpp
+++ b/prcseqscommand.cpp
@@ -516,6 +516,9 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->fend) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames.insert(*it);       }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
diff --git a/preclustercommand.cpp b/preclustercommand.cpp

index 0c7895cfd3c4b5c1988977f102d5523bfe20f9e0..a1d5f48bdd77c81bb12d0095628922202a4bcacd 100644 (file)
--- a/preclustercommand.cpp
+++ b/preclustercommand.cpp
@@ -19,6 +19,8 @@ vector<string> PreClusterCommand::setParameters(){
                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
                 CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pdiffs);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter ptopdown("topdown", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptopdown);
+
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -37,11 +39,12 @@ string PreClusterCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The pre.cluster command groups sequences that are within a given number of base mismatches.\n";
                 helpString += "The pre.cluster command outputs a new fasta and name file.\n";
-               helpString += "The pre.cluster command parameters are fasta, name, group, count, processors and diffs. The fasta parameter is required. \n";
+               helpString += "The pre.cluster command parameters are fasta, name, group, count, topdown, processors and diffs. The fasta parameter is required. \n";
                 helpString += "The name parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
                 helpString += "The group parameter allows you to provide a group file so you can cluster by group. \n";
          helpString += "The count parameter allows you to provide a count file so you can cluster by group. \n";
                 helpString += "The diffs parameter allows you to specify maximum number of mismatched bases allowed between sequences in a grouping. The default is 1.\n";
+        helpString += "The topdown parameter allows you to specify whether to cluster from largest abundance to smallest or smallest to largest.  Default=T, meanging largest to smallest.\n";
                 helpString += "The pre.cluster command should be in the following format: \n";
                 helpString += "pre.cluster(fasta=yourFastaFile, names=yourNamesFile, diffs=yourMaxDiffs) \n";
                 helpString += "Example pre.cluster(fasta=amazon.fasta, diffs=2).\n";
@@ -210,6 +213,9 @@ PreClusterCommand::PreClusterCommand(string option) {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
+            temp = validParameter.validFile(parameters, "topdown", false);             if(temp == "not found"){  temp = "T"; }
+                       topdown = m->isTrue(temp);
+            
              if (countfile == "") {
                  if (namefile == "") {
                      vector<string> files; files.push_back(fastafile);
@@ -440,7 +446,7 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s
                         // Allocate memory for thread data.
                         string extension = toString(i) + ".temp";
                         
-                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
+                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, topdown, i);
                         pDataArray.push_back(tempPreCluster);
                         processIDS.push_back(i);
                         
@@ -458,6 +464,9 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         for (int j = 0; j < pDataArray[i]->mapFileNames.size(); j++) {
                                 outputNames.push_back(pDataArray[i]->mapFileNames[j]); outputTypes["map"].push_back(pDataArray[i]->mapFileNames[j]); 
                         }
@@ -545,7 +554,8 @@ int PreClusterCommand::process(string newMapFile){
                 m->openOutputFile(newMapFile, out);
                 
                 //sort seqs by number of identical seqs
-               sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
+        if (topdown) { sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityTopDown);  }
+        else {  sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityDownTop);  }
                 
                 int count = 0;
                 int numSeqs = alignSeqs.size();
diff --git a/preclustercommand.h b/preclustercommand.h

index 56bf1f36deaaea02b1357b1d229d87ad96bcafaa..e0767826ecde2fcf8056bbdebfb8d1bd8ce435c4 100644 (file)
--- a/preclustercommand.h
+++ b/preclustercommand.h
@@ -29,13 +29,21 @@ struct seqPNode {
         ~seqPNode() {}
  };
  /************************************************************/
-inline bool comparePriority(seqPNode first, seqPNode second) {  
+inline bool comparePriorityTopDown(seqPNode first, seqPNode second) {  
      if (first.numIdentical > second.numIdentical) { return true;  }
      else if (first.numIdentical == second.numIdentical) { 
          if (first.seq.getName() > second.seq.getName()) { return true; }
      }
      return false; 
  }
+/************************************************************/
+inline bool comparePriorityDownTop(seqPNode first, seqPNode second) {  
+    if (first.numIdentical < second.numIdentical) { return true;  }
+    else if (first.numIdentical == second.numIdentical) { 
+        if (first.seq.getName() > second.seq.getName()) { return true; }
+    }
+    return false; 
+}
  //************************************************************/
  
  class PreClusterCommand : public Command {
@@ -71,7 +79,7 @@ private:
      CountTable ct;
      
         int diffs, length, processors;
-       bool abort, bygroup;
+       bool abort, bygroup, topdown;
         string fastafile, namefile, outputDir, groupfile, countfile;
         vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
         map<string, string> names; //represents the names file first column maps to second column
@@ -103,13 +111,14 @@ struct preClusterData {
         string newFName, newNName, newMName;
         MothurOut* m;
         int start;
-       int end;
+       int end, count;
         int diffs, threadID;
         vector<string> groups;
         vector<string> mapFileNames;
+    bool topdown;
         
         preClusterData(){}
-       preClusterData(string f, string n, string g, string c, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
+       preClusterData(string f, string n, string g, string c, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, bool td, int tid) {
                 fastafile = f;
                 namefile = n;
                 groupfile = g;
@@ -123,6 +132,8 @@ struct preClusterData {
                 threadID = tid;
                 groups = gr;
          countfile = c;
+        topdown = td;
+        count=0;
         }
  };
  
@@ -154,6 +165,8 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                 //precluster each group
                 for (int k = pDataArray->start; k < pDataArray->end; k++) {
                         
+            pDataArray->count++;
+            
                         int start = time(NULL);
                         
                         if (pDataArray->m->control_pressed) {  delete parser; return 0; }
@@ -235,9 +248,10 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                         pDataArray->m->openOutputFile(pDataArray->newMName+pDataArray->groups[k]+".map", out);
                         pDataArray->mapFileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map");
                         
-                       //sort seqs by number of identical seqs
-                       sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
-                       
+            //sort seqs by number of identical seqs
+            if (pDataArray->topdown) { sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityTopDown);  }
+            else {  sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityDownTop);  }
+            
                         int count = 0;
                         
                         //think about running through twice...
diff --git a/primerdesigncommand.cpp b/primerdesigncommand.cpp

new file mode 100644 (file)

index 0000000..59369b3
--- /dev/null
+++ b/primerdesigncommand.cpp
@@ -0,0 +1,1241 @@
+//
+//  primerdesigncommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/18/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "primerdesigncommand.h"
+
+//**********************************************************************************************************************
+vector<string> PrimerDesignCommand::setParameters(){   
+       try {
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","summary-list",false,true,true); parameters.push_back(plist);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+        CommandParameter plength("length", "Number", "", "18", "", "", "","",false,false); parameters.push_back(plength);
+        CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm);
+        CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm);
+        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
+        CommandParameter potunumber("otunumber", "Number", "", "-1", "", "", "","",false,true,true); parameters.push_back(potunumber);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+        CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getHelpString(){   
+       try {
+               string helpString = "";
+               helpString += "The primer.design allows you to identify sequence fragments that are specific to particular OTUs.\n";
+               helpString += "The primer.design command parameters are: list, fasta, name, count, otunumber, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
+               helpString += "The list parameter allows you to provide a list file and is required.\n";
+        helpString += "The fasta parameter allows you to provide a fasta file and is required.\n";
+        helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n";
+        helpString += "The label parameter is used to indicate the label you want to use from your list file.\n";
+        helpString += "The otunumber parameter is used to indicate the otu you want to use from your list file. It is required.\n";
+        helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n";
+        helpString += "The mintm parameter is used to indicate minimum melting temperature.\n";
+        helpString += "The maxtm parameter is used to indicate maximum melting temperature.\n";
+        helpString += "The processors parameter allows you to indicate the number of processors you want to use. Default=1.\n";
+        helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n";
+               helpString += "The primer.desing command should be in the following format: primer.design(list=yourListFile, fasta=yourFastaFile, name=yourNameFile)\n";
+               helpString += "primer.design(list=final.an.list, fasta=final.fasta, name=final.names, label=0.03)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fasta") {  pattern = "[filename],[distance],otu.cons.fasta"; } 
+        else if (type == "summary") {  pattern = "[filename],[distance],primer.summary"; }
+        else if (type == "list") {  pattern = "[filename],pick,[extension]"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "PrimerDesignCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(){    
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames; 
+        outputTypes["fasta"] = tempOutNames;
+        outputTypes["list"] = tempOutNames;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["summary"] = tempOutNames; 
+            outputTypes["fasta"] = tempOutNames;
+            outputTypes["list"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+            }
+                        
+                       //check for parameters
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }   
+                       else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            //get fastafile - it is required
+            fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { fastafile = ""; abort=true;  }
+                       else if (fastafile == "not found") {  
+                fastafile = m->getFastaFile(); 
+                               if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+            }else  { m->setFastaFile(fastafile); }
+            
+            //get listfile - it is required
+            listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { listfile = ""; abort=true;  }
+                       else if (listfile == "not found") {  
+                listfile = m->getListFile(); 
+                               if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+            }else  { m->setListFile(listfile); }
+
+            
+                       if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = m->hasPath(listfile); //if user entered a file with a path then preserve it 
+                       }
+            
+            string temp = validParameter.validFile(parameters, "cutoff", false);  if (temp == "not found") { temp = "100"; }
+                       m->mothurConvert(temp, cutoff); 
+            
+            temp = validParameter.validFile(parameters, "pdiffs", false);  if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs); 
+            
+            temp = validParameter.validFile(parameters, "length", false);  if (temp == "not found") { temp = "18"; }
+                       m->mothurConvert(temp, length); 
+            
+            temp = validParameter.validFile(parameters, "mintm", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, minTM); 
+            
+            temp = validParameter.validFile(parameters, "maxtm", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, maxTM); 
+            
+            temp = validParameter.validFile(parameters, "otunumber", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, otunumber); 
+            if (otunumber < 1) {  m->mothurOut("[ERROR]: You must provide an OTU number, aborting.\n"); abort = true; }
+            
+            temp = validParameter.validFile(parameters, "processors", false);  if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+            
+            label = validParameter.validFile(parameters, "label", false);                      
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+        
+            if (countfile == "") { 
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        int start = time(NULL);
+        //////////////////////////////////////////////////////////////////////////////
+        //              get file inputs                                             //
+        //////////////////////////////////////////////////////////////////////////////
+        
+        //reads list file and selects the label the users specified or the first label
+        getListVector();
+        if (otunumber > list->getNumBins()) { m->mothurOut("[ERROR]: You selected an OTU number larger than the number of OTUs you have in your list file, quitting.\n"); return 0; }
+        
+        map<string, int> nameMap;
+        unsigned long int numSeqs;  //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count.
+                                    //list file should have all seqs if namefile was used to create it and only uniques in count file was used.
+        
+        if (namefile != "")         {  nameMap = m->readNames(namefile, numSeqs);       }
+        else if (countfile != "")   {  nameMap = readCount(numSeqs);                    }
+        else { numSeqs = list->getNumSeqs();  }
+        
+        //sanity check
+        if (numSeqs != list->getNumSeqs()) {
+            if (namefile != "")         {  m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your name file contains " + toString(numSeqs) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name file when you clustered? \n");   }
+            else if (countfile != "") {
+                m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your count file contains " + toString(numSeqs) + " unique sequences, aborting. Do you have the correct files? Perhaps you forgot to include the count file when you clustered? \n");  
+            }
+            m->control_pressed = true;
+        }
+        
+        if (m->control_pressed) { delete list; return 0; }
+        
+        //////////////////////////////////////////////////////////////////////////////
+        //              process data                                                //
+        //////////////////////////////////////////////////////////////////////////////
+        m->mothurOut("\nFinding consensus sequences for each otu..."); cout.flush();
+        
+        vector<Sequence> conSeqs = createProcessesConSeqs(nameMap, numSeqs);
+        
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[distance]"] = list->getLabel();
+        string consFastaFile = getOutputFileName("fasta", variables);
+        outputNames.push_back(consFastaFile); outputTypes["fasta"].push_back(consFastaFile);
+        ofstream out;
+        m->openOutputFile(consFastaFile, out);
+        for (int i = 0; i < conSeqs.size(); i++) {  conSeqs[i].printSequence(out);  }
+        out.close();
+        
+        m->mothurOut("Done.\n\n");
+        
+        set<string> primers = getPrimer(conSeqs[otunumber-1]);  
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        string consSummaryFile = getOutputFileName("summary", variables);
+        outputNames.push_back(consSummaryFile); outputTypes["summary"].push_back(consSummaryFile);
+        ofstream outSum;
+        m->openOutputFile(consSummaryFile, outSum);
+        
+        outSum << "PrimerOtu: " << otunumber << " Members: " << list->get(otunumber-1) << endl << "Primers\tminTm\tmaxTm" << endl;
+        
+        //find min and max melting points
+        vector<double> minTms;
+        vector<double> maxTms;
+        string primerString = "";
+        for (set<string>::iterator it = primers.begin(); it != primers.end();) {
+            
+            double minTm, maxTm;
+            findMeltingPoint(*it, minTm, maxTm);
+            if ((minTM == -1) && (maxTM == -1)) { //user did not set min or max Tm so save this primer
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((minTM == -1) && (maxTm <= maxTM)){ //user set max and no min, keep if below max
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((maxTM == -1) && (minTm >= minTM)){ //user set min and no max, keep if above min
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((maxTm <= maxTM) && (minTm >= minTM)) { //keep if above min and below max
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else { primers.erase(it++);  } //erase because it didn't qualify
+        }
+        
+        outSum << "\nOTUNumber\tPrimer\tStart\tEnd\tLength\tMismatches\tminTm\tmaxTm\n";
+        outSum.close();
+        
+        //check each otu's conseq for each primer in otunumber
+        set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs);
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        //print new list file
+        map<string, string> mvariables; 
+        mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        mvariables["[extension]"] = m->getExtension(listfile);
+        string newListFile = getOutputFileName("list", mvariables);
+        outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
+        ofstream outList;
+        m->openOutputFile(newListFile, outList);
+        
+        outList << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+        for (int j = 0; j < list->getNumBins(); j++) {
+            if (m->control_pressed) { break; }
+            //good otus
+            if (otuToRemove.count(j) == 0) {  
+                string bin = list->get(j);
+                if (bin != "") {  outList << bin << '\t';  } 
+            }
+        }
+        outList << endl;
+        outList.close();
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        delete list;
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(list->getNumBins()) + " OTUs.\n");
+        
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "execute");
+               exit(1);
+       }
+}
+//********************************************************************/
+//used http://www.biophp.org/minitools/melting_temperature/ as a reference to substitute degenerate bases 
+// in order to find the min and max Tm values.
+//Tm =  64.9°C + 41°C x (number of G’s and C’s in the primer – 16.4)/N
+
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+
+int PrimerDesignCommand::findMeltingPoint(string primer, double& minTm, double& maxTm){
+    try {
+        string minTmprimer = primer;
+        string maxTmprimer = primer;
+        
+        //find minimum Tm string substituting for degenerate bases
+        for (int i = 0; i < minTmprimer.length(); i++) {
+            minTmprimer[i] = toupper(minTmprimer[i]);
+            
+            if (minTmprimer[i] == 'Y') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'R') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'W') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'K') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'M') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'D') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'V') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'H') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'B') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'N') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'S') { minTmprimer[i] = 'G'; }
+        }
+        
+        //find maximum Tm string substituting for degenerate bases
+        for (int i = 0; i < maxTmprimer.length(); i++) {
+            maxTmprimer[i] = toupper(maxTmprimer[i]);
+            
+            if (maxTmprimer[i] == 'Y') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'R') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'W') { maxTmprimer[i] = 'A'; }
+            else if (maxTmprimer[i] == 'K') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'M') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'D') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'V') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'H') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'B') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'N') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'S') { maxTmprimer[i] = 'G'; }
+        }
+        
+        int numGC = 0;
+        for (int i = 0; i < minTmprimer.length(); i++) {
+            if (minTmprimer[i] == 'G')       { numGC++; }
+            else if (minTmprimer[i] == 'C')  { numGC++; }
+        }
+        
+        minTm = 64.9 + 41 * (numGC - 16.4) / (double) minTmprimer.length();
+        
+        numGC = 0;
+        for (int i = 0; i < maxTmprimer.length(); i++) {
+            if (maxTmprimer[i] == 'G')       { numGC++; }
+            else if (maxTmprimer[i] == 'C')  { numGC++; }
+        }
+        
+        maxTm = 64.9 + 41 * (numGC - 16.4) / (double) maxTmprimer.length();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "findMeltingPoint");
+               exit(1);
+       }
+}
+//********************************************************************/
+//search for a primer over the sequence string
+bool PrimerDesignCommand::findPrimer(string rawSequence, string primer, vector<int>& primerStart, vector<int>& primerEnd, vector<int>& mismatches){
+       try {
+        bool foundAtLeastOne = false;  //innocent til proven guilty
+        
+        //look for exact match
+        if(rawSequence.length() < primer.length()) {  return false;  }
+                       
+        //search for primer
+        for (int j = 0; j < rawSequence.length()-length; j++){
+            
+            if (m->control_pressed) {  return foundAtLeastOne; }
+            
+            string rawChunk = rawSequence.substr(j, length);
+            
+            int numDiff = countDiffs(primer, rawChunk);
+           
+            if(numDiff <= pdiffs){
+                primerStart.push_back(j);
+                primerEnd.push_back(j+length);
+                mismatches.push_back(numDiff);
+                foundAtLeastOne = true;
+            }
+        }
+               
+               return foundAtLeastOne;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "findPrimer");
+               exit(1);
+       }
+}
+//********************************************************************/
+//find all primers for the given sequence
+set<string> PrimerDesignCommand::getPrimer(Sequence primerSeq){
+       try {
+        set<string> primers;
+        
+        string rawSequence = primerSeq.getUnaligned();
+        
+        for (int j = 0; j < rawSequence.length()-length; j++){
+            if (m->control_pressed) { break; }
+            
+            string primer = rawSequence.substr(j, length);
+            primers.insert(primer);
+        }
+        
+        return primers;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getPrimer");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs) {
+       try {
+               
+               vector<int> processIDS;
+               int process = 1;
+        set<int> otusToRemove;
+        int numBinsProcessed = 0;
+               
+               //sanity check
+        int numBins = conSeqs.size();
+               if (numBins < processors) { processors = numBins; }
+               
+               //divide the otus between the processors
+               vector<linePair> lines;
+               int numOtusPerProcessor = numBins / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numOtusPerProcessor;
+                       int endIndex = (i+1) * numOtusPerProcessor;
+                       if(i == (processors - 1)){      endIndex = numBins;     }
+                       lines.push_back(linePair(startIndex, endIndex));
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                //clear old file because we append in driver
+                m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
+                
+                               otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed);
+                
+                string tempFile = toString(getpid()) + ".otus2Remove.temp";
+                ofstream outTemp;
+                m->openOutputFile(tempFile, outTemp);
+                
+                outTemp << numBinsProcessed << endl;
+                outTemp << otusToRemove.size() << endl;
+                for (set<int>::iterator it = otusToRemove.begin(); it != otusToRemove.end(); it++) { outTemp << *it << endl; }
+                outTemp.close();
+                
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            string tempFile = toString(processIDS[i]) +  ".otus2Remove.temp";
+            ifstream intemp;
+            m->openInputFile(tempFile, intemp);
+            
+            int num;
+            intemp >> num; m->gobble(intemp);
+            if (num != (lines[i+1].end - lines[i+1].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+            intemp >> num; m->gobble(intemp);
+            for (int k = 0; k < num; k++) {
+                int otu;
+                intemp >> otu; m->gobble(intemp);
+                otusToRemove.insert(otu); 
+            }
+            intemp.close();
+            m->mothurRemove(tempFile);
+        }
+
+        
+    #else
+               
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the primerDesignData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<primerDesignData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = toString(i) + ".temp";
+                       m->mothurRemove(newSummaryFile+extension);
+            
+                       primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, otunumber, length, i);
+                       pDataArray.push_back(tempPrimer);
+                       processIDS.push_back(i);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyPrimerThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+               }
+               
+        
+               //using the main process as a worker saves time and memory
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (set<int>::iterator it = pDataArray[i]->otusToRemove.begin(); it != pDataArray[i]->otusToRemove.end(); it++) { 
+                               otusToRemove.insert(*it);  
+                       }
+            int num = pDataArray[i]->numBinsProcessed;
+            if (num != (lines[processIDS[i]].end - lines[processIDS[i]].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+               
+#endif         
+               
+               //append output files
+               for(int i=0;i<processIDS.size();i++){
+                       m->appendFiles((newSummaryFile + toString(processIDS[i]) + ".temp"), newSummaryFile);
+                       m->mothurRemove((newSummaryFile + toString(processIDS[i]) + ".temp"));
+               }
+               
+               return otusToRemove;    
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed){
+       try {
+        set<int> otuToRemove;
+        
+        ofstream outSum;
+        m->openOutputFileAppend(summaryFileName, outSum);
+        
+        for (int i = start; i < end; i++) {
+        
+            if (m->control_pressed) { break; }
+            
+            if (i != (otunumber-1)) {
+                int primerIndex = 0;
+                for (set<string>::iterator it = primers.begin(); it != primers.end(); it++) {
+                    vector<int> primerStarts;
+                    vector<int> primerEnds;
+                    vector<int> mismatches;
+                    
+                    bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+                    
+                    //if we found it report to the table
+                    if (found) {
+                        for (int j = 0; j < primerStarts.size(); j++) {
+                            outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << length << '\t' << mismatches[j] << '\t' << minTms[primerIndex] << '\t' << maxTms[primerIndex] << endl;
+                        }
+                        otuToRemove.insert(i);
+                    }
+                    primerIndex++;
+                }
+            }
+            numBinsProcessed++;
+        }
+        outSum.close();
+        
+        
+        return otuToRemove;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/ 
+vector< vector< vector<unsigned int> > > PrimerDesignCommand::driverGetCounts(map<string, int>& nameMap, unsigned long int& fastaCount, vector<unsigned int>& otuCounts, unsigned long long& start, unsigned long long& end){
+    try {
+        vector< vector< vector<unsigned int> > > counts;
+        map<string, int> seq2Bin;
+        alignedLength = 0;
+        
+        ifstream in;
+               m->openInputFile(fastafile, in);
+        
+               in.seekg(start);
+        
+               bool done = false;
+               fastaCount = 0;
+        
+               while (!done) {
+            if (m->control_pressed) { in.close(); return counts; }
+            
+                       Sequence seq(in); m->gobble(in);
+            
+                       if (seq.getName() != "") {
+                if (fastaCount == 0) { alignedLength = seq.getAligned().length(); initializeCounts(counts, alignedLength, seq2Bin, nameMap, otuCounts); }
+                else if (alignedLength != seq.getAligned().length()) {
+                    m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; break;
+                }
+                
+                int num = 1;
+                map<string, int>::iterator itCount;
+                if (namefile != "") { 
+                    itCount = nameMap.find(seq.getName());
+                    if (itCount == nameMap.end()) {  m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                    else { num = itCount->second; }
+                    fastaCount+=num;
+                }else if (countfile != "") {
+                    itCount = nameMap.find(seq.getName());
+                    if (itCount == nameMap.end()) {  m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your count file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                    else { num = itCount->second; }
+                    fastaCount++;
+                }else {
+                    fastaCount++;
+                }
+                
+                //increment counts
+                itCount = seq2Bin.find(seq.getName());
+                if (itCount == seq2Bin.end()) {
+                    if ((namefile != "") || (countfile != "")) {
+                        m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting. Perhaps you forgot to include your name or count file while clustering.\n"); m->mothurOutEndLine(); m->control_pressed = true; break;
+                    }else{
+                        m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break;
+                    }
+                }else {
+                    otuCounts[itCount->second] += num;
+                    string aligned = seq.getAligned();
+                    for (int i = 0; i < alignedLength; i++) {
+                        char base = toupper(aligned[i]);
+                        if (base == 'A') { counts[itCount->second][i][0]+=num; }
+                        else if (base == 'T') { counts[itCount->second][i][1]+=num; }
+                        else if (base == 'G') { counts[itCount->second][i][2]+=num; }
+                        else if (base == 'C') { counts[itCount->second][i][3]+=num; }
+                        else { counts[itCount->second][i][4]+=num; }
+                    }
+                }
+
+            }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            unsigned long long pos = in.tellg();
+            if ((pos == -1) || (pos >= end)) { break; }
+#else
+            if (in.eof()) { break; }
+#endif
+               }
+                               
+               in.close();
+        
+        return counts;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "driverGetCounts");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<Sequence> PrimerDesignCommand::createProcessesConSeqs(map<string, int>& nameMap, unsigned long int& numSeqs) {
+       try {
+        vector< vector< vector<unsigned int> > > counts;
+        vector<unsigned int> otuCounts;
+               vector<int> processIDS;
+               int process = 1;
+        unsigned long int fastaCount = 0;
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
+               
+        vector<unsigned long long> positions; 
+        vector<fastaLinePair> lines;
+        positions = m->divideFile(fastafile, processors);
+        for (int i = 0; i < (positions.size()-1); i++) {       lines.push_back(fastaLinePair(positions[i], positions[(i+1)])); }
+
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[process].start, lines[process].end);
+                
+                string tempFile = toString(getpid()) + ".cons_counts.temp";
+                ofstream outTemp;
+                m->openOutputFile(tempFile, outTemp);
+                
+                outTemp << fastaCount << endl;
+                //pass counts
+                outTemp << counts.size() << endl;
+                for (int i = 0; i < counts.size(); i++) { 
+                    outTemp << counts[i].size() << endl;
+                    for (int j = 0; j < counts[i].size(); j++) { 
+                        for (int k = 0; k < 5; k++) {  outTemp << counts[i][j][k] << '\t'; }
+                        outTemp << endl;
+                    }
+                }
+                //pass otuCounts
+                outTemp << otuCounts.size() << endl;
+                for (int i = 0; i < otuCounts.size(); i++) { outTemp << otuCounts[i] << '\t'; }
+                outTemp << endl;
+                outTemp.close();
+                
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[0].start, lines[0].end);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            string tempFile = toString(processIDS[i]) +  ".cons_counts.temp";
+            ifstream intemp;
+            m->openInputFile(tempFile, intemp);
+            
+            unsigned long int num;
+            intemp >> num; m->gobble(intemp); fastaCount += num;
+            intemp >> num; m->gobble(intemp);
+            if (num != counts.size()) { m->mothurOut("[ERROR]: " + tempFile + " was not built correctly by the child process, quitting.\n"); m->control_pressed = true; }
+            else {
+                //read counts
+                for (int k = 0; k < num; k++) {
+                    int alength;
+                    intemp >> alength; m->gobble(intemp);
+                    if (alength != alignedLength) {  m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    else {
+                        for (int j = 0; j < alength; j++) {
+                            for (int l = 0; l < 5; l++) {  unsigned int numTemp; intemp >> numTemp; m->gobble(intemp); counts[k][j][l] += numTemp;  }
+                        }
+                    }
+                }
+                //read otuCounts
+                intemp >> num; m->gobble(intemp);
+                for (int k = 0; k < num; k++) {  
+                    unsigned int numTemp; intemp >> numTemp; m->gobble(intemp); 
+                    otuCounts[k] += numTemp; 
+                }
+            }
+            intemp.close();
+            m->mothurRemove(tempFile);
+        }
+        
+        
+#else
+        unsigned long long start = 0;
+        unsigned long long end = 1000;
+               counts = driverGetCounts(nameMap, fastaCount, otuCounts, start, end);   
+#endif         
+        
+        //you will have a nameMap error if there is a namefile or countfile, but if those aren't given we want to make sure the fasta and list file match.
+        if (fastaCount != numSeqs) {
+            if ((namefile == "") && (countfile == ""))   {  m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your fasta file contains " + toString(fastaCount) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name or count file? \n");   }
+            m->control_pressed = true;
+        }
+        
+               vector<Sequence> conSeqs;
+        
+        if (m->control_pressed) { return conSeqs; }
+        
+               //build consensus seqs
+        string snumBins = toString(counts.size());
+        for (int i = 0; i < counts.size(); i++) {
+            if (m->control_pressed) { break; }
+            
+            string otuLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+            }
+            otuLabel += sbinNumber; 
+            
+            string cons = "";
+            for (int j = 0; j < counts[i].size(); j++) {
+                cons += getBase(counts[i][j], otuCounts[i]);
+            }
+            Sequence consSeq(otuLabel, cons);
+            conSeqs.push_back(consSeq);
+        }
+        
+        if (m->control_pressed) { conSeqs.clear(); return conSeqs; }
+        
+        return conSeqs;
+       
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "createProcessesConSeqs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+char PrimerDesignCommand::getBase(vector<unsigned int> counts, int size){  //A,T,G,C,Gap
+       try{
+               /* A = adenine
+         * C = cytosine
+         * G = guanine
+         * T = thymine
+         * R = G A (purine)
+         * Y = T C (pyrimidine)
+         * K = G T (keto)
+         * M = A C (amino)
+         * S = G C (strong bonds)
+         * W = A T (weak bonds)
+         * B = G T C (all but A)
+         * D = G A T (all but C)
+         * H = A C T (all but G)
+         * V = G C A (all but T)
+         * N = A G C T (any) */
+               
+               char conBase = 'N';
+               
+               //zero out counts that don't make the cutoff
+               float percentage = (100.0 - cutoff) / 100.0;
+        
+               for (int i = 0; i < counts.size(); i++) {
+            float countPercentage = counts[i] / (float) size;
+                       if (countPercentage < percentage) { counts[i] = 0; }
+               }
+               
+               //any
+               if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
+               //any no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'N'; }
+               //all but T
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'v'; }  
+               //all but T no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'V'; }  
+               //all but G
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'h'; }  
+               //all but G no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'H'; }  
+               //all but C
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'd'; }  
+               //all but C no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'D'; }  
+               //all but A
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'b'; }  
+               //all but A no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'B'; }  
+               //W = A T (weak bonds)
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'w'; }  
+               //W = A T (weak bonds) no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'W'; }  
+               //S = G C (strong bonds)
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 's'; }  
+               //S = G C (strong bonds) no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'S'; }  
+               //M = A C (amino)
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'm'; }  
+               //M = A C (amino) no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'M'; }  
+               //K = G T (keto)
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'k'; }  
+               //K = G T (keto) no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'K'; }  
+               //Y = T C (pyrimidine)
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'y'; }  
+               //Y = T C (pyrimidine) no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'Y'; }  
+               //R = G A (purine)
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'r'; }  
+               //R = G A (purine) no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'R'; }  
+               //only A
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'a'; }  
+               //only A no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'A'; }  
+               //only T
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 't'; }  
+               //only T no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'T'; }  
+               //only G
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'g'; }  
+               //only G no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'G'; }  
+               //only C
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'c'; }  
+               //only C no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
+               //only gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
+               //cutoff removed all counts
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'N'; }
+               else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
+               
+               return conBase;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getBase");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int PrimerDesignCommand::initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>& seq2Bin, map<string, int>& nameMap, vector<unsigned int>& otuCounts){
+       try {
+        counts.clear();
+        otuCounts.clear();
+        seq2Bin.clear();
+        
+        //vector< vector< vector<unsigned int> > > counts - otu < spot_in_alignment < counts_for_A,T,G,C,Gap > > >
+        for (int i = 0; i < list->getNumBins(); i++) {
+            string binNames = list->get(i);
+            vector<string> names;
+            m->splitAtComma(binNames, names);
+            otuCounts.push_back(0);
+            
+            //lets be smart and only map the unique names if a name or count file was given to save search time and memory
+            if ((namefile != "") || (countfile != "")) {
+                for (int j = 0; j < names.size(); j++) {
+                    map<string, int>::iterator itNames = nameMap.find(names[j]);
+                    if (itNames != nameMap.end()) { //add name because its a unique one
+                        seq2Bin[names[j]] = i;
+                    }
+                }
+            }else { //map everyone
+                for (int j = 0; j < names.size(); j++) { seq2Bin[names[j]] = i;  }
+            }
+            
+            vector<unsigned int> temp; temp.resize(5, 0); //A,T,G,C,Gap
+            vector< vector<unsigned int> > temp2;
+            for (int j = 0; j < length; j++) {
+                temp2.push_back(temp);
+            }
+            counts.push_back(temp2);
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "initializeCounts");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+map<string, int> PrimerDesignCommand::readCount(unsigned long int& numSeqs){
+       try {
+        map<string, int> nameMap;
+        
+        CountTable ct;
+        ct.readTable(countfile);
+        vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+        numSeqs = ct.getNumUniqueSeqs();
+        
+        for (int i = 0; i < namesOfSeqs.size(); i++) {
+            if (m->control_pressed) { break; }
+            
+            nameMap[namesOfSeqs[i]] = ct.getNumSeqs(namesOfSeqs[i]);
+        }
+        
+        return nameMap;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::getListVector(){
+       try {
+               InputData input(listfile, "list");
+               list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input.getListVector();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list; 
+                       list = input.getListVector(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getListVector"); 
+               exit(1);
+       }
+}
+//********************************************************************/
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+int PrimerDesignCommand::countDiffs(string oligo, string seq){
+       try {
+               
+               int length = oligo.length();
+               int countDiffs = 0;
+               
+               for(int i=0;i<length;i++){
+            
+                       oligo[i] = toupper(oligo[i]);
+            seq[i] = toupper(seq[i]);
+            
+                       if(oligo[i] != seq[i]){
+                if(oligo[i] == 'A' && (seq[i] != 'A' && seq[i] != 'M' && seq[i] != 'R' && seq[i] != 'W' && seq[i] != 'D' && seq[i] != 'H' && seq[i] != 'V'))       {   countDiffs++;   }
+                else if(oligo[i] == 'C' && (seq[i] != 'C' && seq[i] != 'Y' && seq[i] != 'M' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'H' && seq[i] != 'V'))       {      countDiffs++;   }
+                else if(oligo[i] == 'G' && (seq[i] != 'G' && seq[i] != 'R' && seq[i] != 'K' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'V'))       {      countDiffs++;   }
+                else if(oligo[i] == 'T' && (seq[i] != 'T' && seq[i] != 'Y' && seq[i] != 'K' && seq[i] != 'W' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'H'))       {      countDiffs++;   }
+                else if((oligo[i] == '.' || oligo[i] == '-'))           {      countDiffs++;   }
+                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                         {      countDiffs++;   }
+                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))       {   countDiffs++;   }       
+            }
+                       
+               }
+               
+               return countDiffs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "countDiffs");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
+
diff --git a/primerdesigncommand.h b/primerdesigncommand.h

new file mode 100644 (file)

index 0000000..2879d2b
--- /dev/null
+++ b/primerdesigncommand.h
@@ -0,0 +1,219 @@
+//
+//  primerdesigncommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/18/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_primerdesigncommand_h
+#define Mothur_primerdesigncommand_h
+
+#include "command.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
+#include "sequence.hpp"
+#include "alignment.hpp"
+#include "needlemanoverlap.hpp"
+
+/**************************************************************************************************/
+
+class PrimerDesignCommand : public Command {
+public:
+    PrimerDesignCommand(string);
+    PrimerDesignCommand();
+    ~PrimerDesignCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "primer.design";               }
+    string getCommandCategory()                { return "OTU-Based Approaches";                } 
+    
+    string getOutputPattern(string);
+       string getHelpString(); 
+    string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; }
+    string getDescription()            { return "identify sequence fragments that are specific to particular OTUs"; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    
+    struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+    struct fastaLinePair {
+               unsigned long long start;
+               unsigned long long end;
+               fastaLinePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+       };
+    
+    bool abort, allLines, large;
+    int cutoff, pdiffs, length, otunumber, processors, alignedLength;
+    string outputDir, listfile, namefile, countfile, fastafile, label;
+    double minTM, maxTM;
+    ListVector* list;
+    vector<string> outputNames;
+
+    int initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>&, map<string, int>&, vector<unsigned int>&);
+    map<string, int> readCount(unsigned long int&);
+    char getBase(vector<unsigned int> counts, int size);
+    int getListVector();
+    int countDiffs(string, string);
+    set<string> getPrimer(Sequence);
+    bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
+    int findMeltingPoint(string primer, double&, double&);
+    
+    set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
+    set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
+    vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
+    vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
+    
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct primerDesignData {
+       string summaryFileName;
+       MothurOut* m;
+       int start;
+       int end;
+       int pdiffs, threadID, otunumber, length;
+       set<string> primers;
+       vector<double> minTms, maxTms;
+    set<int> otusToRemove;
+    vector<Sequence> consSeqs;
+    int numBinsProcessed;
+       
+       primerDesignData(){}
+       primerDesignData(string sf, MothurOut* mout, int st, int en, vector<double> min, vector<double> max, set<string> pri, vector<Sequence> seqs, int d, int otun, int l, int tid) {
+               summaryFileName = sf;
+               m = mout;
+               start = st;
+               end = en;
+               pdiffs = d;
+        minTms = min;
+        maxTms = max;
+        primers = pri;
+        consSeqs = seqs;
+        otunumber = otun;
+        length = l;
+               threadID = tid;
+        numBinsProcessed = 0;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){ 
+       primerDesignData* pDataArray;
+       pDataArray = (primerDesignData*)lpParam;
+       
+       try {
+               ofstream outSum;
+        pDataArray->m->openOutputFileAppend(pDataArray->summaryFileName, outSum);
+        
+        for (int i = pDataArray->start; i < pDataArray->end; i++) {
+            
+            if (pDataArray->m->control_pressed) { break; }
+            
+            if (i != (pDataArray->otunumber-1)) {
+                int primerIndex = 0;
+                for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+                    vector<int> primerStarts;
+                    vector<int> primerEnds;
+                    vector<int> mismatches;
+                    
+                    //bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+                    ///////////////////////////////////////////////////////////////////////////////////////////////////
+                    bool found = false;  //innocent til proven guilty
+                    
+                    string rawSequence = pDataArray->consSeqs[i].getUnaligned();
+                    string primer = *it;
+                    
+                    //look for exact match
+                    if(rawSequence.length() < primer.length()) {  found = false;  }
+                    else {
+                        //search for primer
+                        for (int j = 0; j < rawSequence.length()-pDataArray->length; j++){
+                            
+                            if (pDataArray->m->control_pressed) {  found = false; break; }
+                            
+                            string rawChunk = rawSequence.substr(j, pDataArray->length);
+                            
+                            //int numDiff = countDiffs(primer, rawchuck);
+                            ///////////////////////////////////////////////////////////////////////
+                            int numDiff = 0;
+                            string oligo = primer;
+                            string seq = rawChunk;
+                            
+                            for(int k=0;k<pDataArray->length;k++){
+                                
+                                oligo[k] = toupper(oligo[k]);
+                                seq[k] = toupper(seq[k]);
+                               
+                                if(oligo[k] != seq[k]){
+            
+                                    if((oligo[k] == 'N' || oligo[k] == 'I') && (seq[k] == 'N'))                                {       numDiff++;      }
+                                    else if(oligo[k] == 'R' && (seq[k] != 'A' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'Y' && (seq[k] != 'C' && seq[k] != 'T'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'M' && (seq[k] != 'C' && seq[k] != 'A'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'K' && (seq[k] != 'T' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'W' && (seq[k] != 'T' && seq[k] != 'A'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'S' && (seq[k] != 'C' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'B' && (seq[k] != 'C' && seq[k] != 'T' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'D' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'H' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'C'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'V' && (seq[k] != 'A' && seq[k] != 'C' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'A' && (seq[k] != 'A' && seq[k] != 'M' && seq[k] != 'R' && seq[k] != 'W' && seq[k] != 'D' && seq[k] != 'H' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'C' && (seq[k] != 'C' && seq[k] != 'Y' && seq[k] != 'M' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'H' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'G' && (seq[k] != 'G' && seq[k] != 'R' && seq[k] != 'K' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'T' && (seq[k] != 'T' && seq[k] != 'Y' && seq[k] != 'K' && seq[k] != 'W' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'H'))       {  numDiff++;      }
+                                    else if((oligo[k] == '.' || oligo[k] == '-'))           {  numDiff++;      }
+                                }
+                            }
+                            ///////////////////////////////////////////////////////////////////////
+                            
+                            if(numDiff <= pDataArray->pdiffs){
+                                primerStarts.push_back(j);
+                                primerEnds.push_back(j+pDataArray->length);
+                                mismatches.push_back(numDiff);
+                                found = true;
+                            }
+                        }
+                    }
+                    ///////////////////////////////////////////////////////////////////////////////////////////////////
+                    
+                    //if we found it report to the table
+                    if (found) {
+                        for (int j = 0; j < primerStarts.size(); j++) {
+                            outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << pDataArray->length << '\t' << mismatches[j] << '\t' << pDataArray->minTms[primerIndex] << '\t' << pDataArray->maxTms[primerIndex] << endl;
+                        }
+                        pDataArray->otusToRemove.insert(i);
+                    }
+                    primerIndex++;
+                }
+            }
+            pDataArray->numBinsProcessed++;
+        }
+        outSum.close();
+        
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PrimerDesignCommand", "MyPrimerThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+/**************************************************************************************************/
+
+
+
+
+
+#endif
diff --git a/qualityscores.cpp b/qualityscores.cpp

index 0b7bd0625b914d8a69b298a1b0e88187ddf6757c..4998b3d1c4dabd18b97fb8f33e36860127e757e5 100644 (file)
--- a/qualityscores.cpp
+++ b/qualityscores.cpp
@@ -30,89 +30,37 @@ QualityScores::QualityScores(ifstream& qFile){
         try {
                 
                 m = MothurOut::getInstance();
-               
-               seqName = "";
+
                 int score;
+               seqName = getSequenceName(qFile);
+               
+               if (!m->control_pressed) {
+            string qScoreString = m->getline(qFile);
+            //cout << qScoreString << endl;
+            while(qFile.peek() != '>' && qFile.peek() != EOF){
+                if (m->control_pressed) { break; }
+                string temp = m->getline(qFile);
+                //cout << temp << endl;
+                qScoreString +=  ' ' + temp;
+            }
+            //cout << "done reading " << endl; 
+            istringstream qScoreStringStream(qScoreString);
+            int count = 0;
+            while(!qScoreStringStream.eof()){
+                if (m->control_pressed) { break; }
+                string temp;
+                qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
+                
+                //check temp to make sure its a number
+                if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
+                convert(temp, score);
+                
+                //cout << count << '\t' << score << endl;
+                qScores.push_back(score);
+                count++;
+            }
+        }
                 
-               qFile >> seqName; 
-               m->getline(qFile);
-               //cout << seqName << endl;      
-               if (seqName == "")      {
-                       m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
-                       m->mothurOutEndLine(); 
-               }
-               else{
-                       seqName = seqName.substr(1);
-               }
-               
-               string qScoreString = m->getline(qFile);
-               //cout << qScoreString << endl;
-               while(qFile.peek() != '>' && qFile.peek() != EOF){
-                       if (m->control_pressed) { break; }
-                       string temp = m->getline(qFile);
-                       //cout << temp << endl;
-                       qScoreString +=  ' ' + temp;
-               }
-               //cout << "done reading " << endl;      
-               istringstream qScoreStringStream(qScoreString);
-               int count = 0;
-               while(!qScoreStringStream.eof()){
-                       if (m->control_pressed) { break; }
-                       string temp;
-                       qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
-                       
-                       //check temp to make sure its a number
-                       if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
-                       convert(temp, score);
-                       
-                       //cout << count << '\t' << score << endl;
-                       qScores.push_back(score);
-                       count++;
-               }
-               //qScores.pop_back();
-               
-//             string scores = "";
-//             
-//             while(!qFile.eof()){    
-//                     
-//                     qFile >> seqName; 
-//                     
-//                     //get name
-//                     if (seqName.length() != 0) { 
-//                             seqName = seqName.substr(1);
-//                             while (!qFile.eof())    {       
-//                                     char c = qFile.get(); 
-//                                     //gobble junk on line
-//                                     if (c == 10 || c == 13){        break;  }
-//                             } 
-//                             m->gobble(qFile);
-//                     }
-//                     
-//                     //get scores
-//                     while(qFile){
-//                             char letter=qFile.get();
-//                             if((letter == '>')){    qFile.putback(letter);  break;  }
-//                             else if (isprint(letter)) { scores += letter; }
-//                     }
-//                     m->gobble(qFile);
-//                     
-//                     break;
-//             }
-//             
-//             //convert scores string to qScores
-//             istringstream qScoreStringStream(scores);
-//             
-//             int score;
-//             while(!qScoreStringStream.eof()){
-//                     
-//                     if (m->control_pressed) { break; }
-//                     
-//                     qScoreStringStream >> score;
-//                     qScores.push_back(score);
-//             }
-//             
-//             qScores.pop_back();
-
                 seqLength = qScores.size();
                 //cout << "seqlength = " << seqLength << '\t' << count << endl;
                 
@@ -123,7 +71,46 @@ QualityScores::QualityScores(ifstream& qFile){
         }                                                       
         
  }
-
+//********************************************************************************************************************
+string QualityScores::getSequenceName(ifstream& qFile) {
+       try {
+               string name = "";
+               
+        qFile >> name;
+        m->getline(qFile);
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "QualityScores", "getSequenceName");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+void QualityScores::setName(string name) {
+       try {
+      
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }     
+    
+        seqName = name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "QualityScores", "setName");
+               exit(1);
+       }
+}
  /**************************************************************************************************/
  
  string QualityScores::getName(){
diff --git a/qualityscores.h b/qualityscores.h

index 77c3ee0d52beefad645e7ecd87ff19327b137c2d..87699739d34ad012b121a295be53f976b122a5b7 100644 (file)
--- a/qualityscores.h
+++ b/qualityscores.h
@@ -36,7 +36,7 @@ public:
         void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
         void updateForwardMap(vector<vector<int> >&, int, int, int);
         void updateReverseMap(vector<vector<int> >&, int, int, int);
-    void setName(string n) { seqName = n; }
+    void setName(string n); 
      void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
      
         
@@ -48,6 +48,8 @@ private:
         
         string seqName;
         int seqLength;
+    
+    string getSequenceName(ifstream&);
  };
         
  /**************************************************************************************************/
diff --git a/refchimeratest.cpp b/refchimeratest.cpp

index c618e788386550f40cf83c6a328342022e47f243..c084bffd40c94ef9c14a43d05de39e768dc96bea 100644 (file)
--- a/refchimeratest.cpp
+++ b/refchimeratest.cpp
@@ -24,7 +24,8 @@ RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, bool aligned) : aligned(a
         referenceSeqs.resize(numRefSeqs);
         referenceNames.resize(numRefSeqs);
         for(int i=0;i<numRefSeqs;i++){
-               referenceSeqs[i] = refs[i].getAligned();
+               if (aligned) { referenceSeqs[i] = refs[i].getAligned(); }
+        else { referenceSeqs[i] = refs[i].getUnaligned(); }
                 referenceNames[i] = refs[i].getName();
         }
         
@@ -128,7 +129,7 @@ int RefChimeraTest::analyzeUnalignedQuery(string queryName, string querySeq, ofs
      
      for(int i=0;i<numRefSeqs;i++){
          double length = 0;
-        int diffs = alignQueryToReferences(querySeq, referenceSeqs[i], queryAlign[i], refAlign[i], length);
+        double diffs = alignQueryToReferences(querySeq, referenceSeqs[i], queryAlign[i], refAlign[i], length);
          if(diffs < bestRefDiffs){
              bestRefDiffs = diffs;
              bestRefLength = length;
@@ -324,7 +325,7 @@ double RefChimeraTest::alignQueryToReferences(string query, string reference, st
                         
                 int end = refLength - 1;
          int maxRow = 0;
-        double maxRowValue = -100000000000;
+        double maxRowValue = -2147483647;
          for(int i=0;i<queryLength;i++){
              if(alignMatrix[i][end] > maxRowValue){
                  maxRow = i;
@@ -334,7 +335,7 @@ double RefChimeraTest::alignQueryToReferences(string query, string reference, st
          
          end = queryLength - 1;
          int maxColumn = 0;
-        double maxColumnValue = -100000000000;
+        double maxColumnValue = -2147483647;
  
          for(int j=0;j<refLength;j++){
              if(alignMatrix[end][j] > maxColumnValue){
diff --git a/removedistscommand.cpp b/removedistscommand.cpp

new file mode 100644 (file)

index 0000000..2bb8046
--- /dev/null
+++ b/removedistscommand.cpp
@@ -0,0 +1,450 @@
+//
+//  removedistscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/29/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "removedistscommand.h"
+
+//**********************************************************************************************************************
+vector<string> RemoveDistsCommand::setParameters(){    
+       try {
+               CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);     
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string RemoveDistsCommand::getHelpString(){    
+       try {
+               string helpString = "";
+               helpString += "The remove.dists command removes distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
+               helpString += "The remove.dists command parameters are accnos, phylip and column.\n";
+               helpString += "The remove.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
+               helpString += "Example remove.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string RemoveDistsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "phylip")           {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "column")      {   pattern = "[filename],pick,[extension]";    }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "RemoveDistsCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+RemoveDistsCommand::RemoveDistsCommand(){      
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["phylip"] = tempOutNames;
+               outputTypes["column"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+RemoveDistsCommand::RemoveDistsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["column"] = tempOutNames;
+                       outputTypes["phylip"] = tempOutNames;
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("column");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["column"] = inputDir + it->second;           }
+                               }
+                               
+                it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+            }
+                       
+                       
+                       //check for required parameters
+                       accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  
+                               accnosfile = m->getAccnosFile(); 
+                               if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else { m->setAccnosFile(accnosfile); } 
+                       
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { phylipfile = ""; abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; }        
+                       else {  m->setPhylipFile(phylipfile); }
+                       
+                       columnfile = validParameter.validFile(parameters, "column", true);
+                       if (columnfile == "not open") { columnfile = ""; abort = true; }        
+                       else if (columnfile == "not found") { columnfile = ""; }
+                       else {  m->setColumnFile(columnfile);   }
+                       
+                       if ((phylipfile == "") && (columnfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to column, then phylip
+                               columnfile = m->getColumnFile(); 
+                               if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       phylipfile = m->getPhylipFile(); 
+                                       if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int RemoveDistsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               //get names you want to keep
+               names = m->readAccnos(accnosfile);
+               
+               if (m->control_pressed) { return 0; }
+               
+               //read through the correct file and output lines you want to keep
+               if (phylipfile != "")           {               readPhylip();           }
+               if (columnfile != "")           {               readColumn();       }
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+               
+               
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File names: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
+                       
+                       //set fasta file as new current fastafile
+                       string current = "";
+                       itTypes = outputTypes.find("phylip");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
+                       }
+                       
+                       itTypes = outputTypes.find("column");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
+                       }
+        }
+               
+               return 0;               
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int RemoveDistsCommand::readPhylip(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(phylipfile);  }
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
+        variables["[extension]"] = m->getExtension(phylipfile);
+               string outputFileName = getOutputFileName("phylip", variables);
+               
+        ifstream in;
+        m->openInputFile(phylipfile, in);
+        
+        float distance;
+        int square, nseqs; 
+        string name;
+        unsigned int row;
+        set<unsigned int> rows; //converts names in names to a index
+        row = 0;
+        
+        string numTest;
+        in >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+        
+        //not one we want to remove
+        if (names.count(name) == 0) { rows.insert(row); }
+        row++;
+        
+        //is the matrix square?
+        char d;
+        while((d=in.get()) != EOF){
+            
+            if(isalnum(d)){
+                square = 1;
+                in.putback(d);
+                for(int i=0;i<nseqs;i++){
+                    in >> distance;
+                }
+                break;
+            }
+            if(d == '\n'){
+                square = 0;
+                break;
+            }
+        }
+        
+        //map name to row/column        
+        if(square == 0){
+            for(int i=1;i<nseqs;i++){
+                in >> name;  
+                if (names.count(name) == 0) { rows.insert(row); }
+                row++;
+                
+                for(int j=0;j<i;j++){
+                    if (m->control_pressed) {  in.close(); return 0;  }
+                    in >> distance;
+                }
+            }
+        }
+        else{
+            for(int i=1;i<nseqs;i++){
+                in >> name;  
+                if (names.count(name) == 0) { rows.insert(row);  }
+                row++;
+                for(int j=0;j<nseqs;j++){
+                    if (m->control_pressed) {  in.close(); return 0;  }
+                    in >> distance;
+                }
+            }
+        }
+        in.close();
+        
+        if (m->control_pressed) {  return 0; }
+        
+        //read through file only printing rows and columns of seqs in names
+        ifstream inPhylip;
+        m->openInputFile(phylipfile, inPhylip);
+        
+        inPhylip >> numTest;
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        outputTypes["phylip"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        out << names.size() << endl;
+            
+        unsigned int count = 0;
+        unsigned int keptCount = 0;
+               if(square == 0){
+            for(int i=0;i<nseqs;i++){
+                inPhylip >> name;  
+                bool ignoreRow = false;
+                
+                if (names.count(name) != 0) { ignoreRow = true; count++; }
+                else{ out << name << '\t'; keptCount++; }
+                
+                for(int j=0;j<i;j++){
+                    if (m->control_pressed) {  inPhylip.close(); out.close();  return 0;  }
+                    inPhylip >> distance;
+                    if (!ignoreRow) {
+                        //is this a column we want
+                        if(rows.count(j) != 0) {  out << distance << '\t';  }
+                    }
+                }
+                if (!ignoreRow) { out << endl; }
+            }
+        }
+        else{
+            for(int i=0;i<nseqs;i++){
+                inPhylip >> name; 
+                
+                bool ignoreRow = false;
+                
+                if (names.count(name) != 0) { ignoreRow = true; count++; }
+                else{ out << name << '\t'; keptCount++; }
+                
+                for(int j=0;j<nseqs;j++){
+                    if (m->control_pressed) {  inPhylip.close(); out.close(); return 0;  }
+                    inPhylip >> distance;
+                    if (!ignoreRow) {
+                        //is this a column we want
+                        if(rows.count(j) != 0) {  out << distance << '\t';  }
+                    }
+                }
+                if (!ignoreRow) { out << endl; }
+            }
+        }
+        inPhylip.close();
+               out.close();
+               
+               if (keptCount == 0) {  m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
+        else if (count != names.size()) {
+            m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
+            //rewrite with new number
+            m->renameFile(outputFileName, outputFileName+".temp");
+            ofstream out2;
+            m->openOutputFile(outputFileName, out2);
+            out2 << keptCount << endl;
+            
+            ifstream in3;
+            m->openInputFile(outputFileName+".temp", in3);
+            in3 >> nseqs; m->gobble(in3);
+            char buffer[4096];        
+            while (!in3.eof()) {
+                in3.read(buffer, 4096);
+                out2.write(buffer, in3.gcount());
+            }
+            in3.close();
+            out2.close();
+            m->mothurRemove(outputFileName+".temp");
+        }
+               
+               m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "readPhylip");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveDistsCommand::readColumn(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(columnfile);  }
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
+        variables["[extension]"] = m->getExtension(columnfile);
+               string outputFileName = getOutputFileName("column", variables);
+        outputTypes["column"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        ifstream in;
+        m->openInputFile(columnfile, in);
+        
+        set<string> removeNames;
+        string firstName, secondName;
+        float distance;
+        bool wrote = false;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { out.close(); in.close(); return 0; }
+            
+            in >> firstName >> secondName >> distance; m->gobble(in);
+            
+            //is either names in the accnos file
+            if (names.count(firstName) != 0)       { 
+                removeNames.insert(firstName);  
+                if (names.count(secondName) != 0)  { removeNames.insert(secondName);      }   }
+            else if (names.count(secondName) != 0) { 
+                removeNames.insert(secondName); 
+                if (names.count(firstName) != 0)   { removeNames.insert(firstName);     }   }
+            else {
+                wrote = true;
+                out << firstName << '\t' << secondName << '\t' << distance << endl;
+            }
+        }
+               in.close();
+               out.close();
+        
+        if (!wrote) {  m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
+        else if (removeNames.size() != names.size()) {
+            m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file."); m->mothurOutEndLine();
+        }
+               
+               m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
+        
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveDistsCommand", "readColumn");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/removedistscommand.h b/removedistscommand.h

new file mode 100644 (file)

index 0000000..513a9e9
--- /dev/null
+++ b/removedistscommand.h
@@ -0,0 +1,48 @@
+//
+//  removedistscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/29/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_removedistscommand_h
+#define Mothur_removedistscommand_h
+
+#include "command.hpp"
+
+class RemoveDistsCommand : public Command {
+       
+public:
+       
+       RemoveDistsCommand(string);     
+       RemoveDistsCommand();
+       ~RemoveDistsCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "remove.dists";                        }
+       string getCommandCategory()             { return "General";                 }
+       
+       string getHelpString(); 
+    string getOutputPattern(string);   
+       string getCitation() { return "http://www.mothur.org/wiki/Remove.dists"; }
+       string getDescription()         { return "removes distances from a phylip or column file related to groups or sequences listed in an accnos file"; }
+    
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+       
+private:
+       set<string> names;
+       string accnosfile, phylipfile, columnfile, outputDir;
+       bool abort;
+       vector<string> outputNames;
+       
+       int readPhylip();
+       int readColumn();
+       
+};
+
+
+#endif
diff --git a/removeotulabelscommand.cpp b/removeotulabelscommand.cpp

index 5359db8de28e3bfb6a0aa1611e6407e830ddd68b..148758a369f77f85e8c9832d69b3916e13365396 100644 (file)
--- a/removeotulabelscommand.cpp
+++ b/removeotulabelscommand.cpp
@@ -15,6 +15,9 @@ vector<string> RemoveOtuLabelsCommand::setParameters(){
          CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false); parameters.push_back(pconstaxonomy);
                 CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false); parameters.push_back(potucorr);
          CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false); parameters.push_back(pcorraxes);
+        CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
+        CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
          CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -31,11 +34,12 @@ vector<string> RemoveOtuLabelsCommand::setParameters(){
  string RemoveOtuLabelsCommand::getHelpString(){        
         try {
                 string helpString = "";
-               helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
-               helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+               helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes. It can also be used to select a set of otus from a shared or list file.\n";
+               helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n";
                 helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
          helpString += "The otucorr parameter is input the results of the otu.association command.\n";
          helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+        helpString += "The label parameter is used to analyze specific labels in your input. \n";
                 helpString += "The remove.otulabels commmand should be in the following format: \n";
                 helpString += "remove.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
                 return helpString;
@@ -50,9 +54,11 @@ string RemoveOtuLabelsCommand::getOutputPattern(string type) {
      try {
          string pattern = "";
          
-        if (type == "constaxonomy")            {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "otucorr")    {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "corraxes")        {   pattern = "[filename],pick,[extension]";    }
+        if (type == "constaxonomy")         {   pattern = "[filename],pick,[extension]";                }
+        else if (type == "otucorr")         {   pattern = "[filename],pick,[extension]";                }
+        else if (type == "corraxes")        {   pattern = "[filename],pick,[extension]";                }
+        else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";     }
+        else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";     }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -71,6 +77,8 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(){
                 outputTypes["constaxonomy"] = tempOutNames; 
          outputTypes["otucorr"] = tempOutNames;
          outputTypes["corraxes"] = tempOutNames;
+        outputTypes["shared"] = tempOutNames;
+        outputTypes["list"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
@@ -140,12 +148,31 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["otucorr"] = inputDir + it->second;          }
                                 }
+                
+                it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
              }
              
              vector<string> tempOutNames;
              outputTypes["constaxonomy"] = tempOutNames; 
              outputTypes["otucorr"] = tempOutNames;
              outputTypes["corraxes"] = tempOutNames;
+            outputTypes["shared"] = tempOutNames;
+            outputTypes["list"] = tempOutNames;
+
              
                         //check for parameters
              accnosfile = validParameter.validFile(parameters, "accnos", true);
@@ -171,11 +198,25 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option)  {
                         if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
                         else if (otucorrfile == "not found") {  otucorrfile = "";  }
              
+            listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { listfile = ""; abort = true; }
+                       else if (listfile == "not found") {  listfile = "";  }
+            else { m->setListFile(listfile); }
+            
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {  sharedfile = "";  }
+            else { m->setSharedFile(sharedfile); }
              
              //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){   outputDir = "";        }
              
-            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list."); m->mothurOutEndLine(); abort = true; }
+            
+            if ((sharedfile != "") || (listfile != "")) {
+                label = validParameter.validFile(parameters, "label", false);                  
+                if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+            }
                 }
                 
         }
@@ -200,6 +241,8 @@ int RemoveOtuLabelsCommand::execute(){
                 if (constaxonomyfile != "")     {               readClassifyOtu();      }
                 if (corraxesfile != "")         {               readCorrAxes();         }
                 if (otucorrfile != "")          {               readOtuAssociation();   }
+        if (listfile != "")         {          readList();             }
+        if (sharedfile != "")          {               readShared();           }
          
          if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
          
@@ -209,6 +252,17 @@ int RemoveOtuLabelsCommand::execute(){
                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                 m->mothurOutEndLine();
          
+        string current = "";
+        itTypes = outputTypes.find("list");
+        if (itTypes != outputTypes.end()) {
+            if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
+        }
+        
+        itTypes = outputTypes.find("shared");
+        if (itTypes != outputTypes.end()) {
+            if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
+        }
+        
          return 0;
      }
         catch(exception& e) {
@@ -375,6 +429,288 @@ int RemoveOtuLabelsCommand::readCorrAxes(){
         }
  }
  //**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readShared(){
+       try {
+        
+        getShared();
+        
+        if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
+        
+        vector<string> newLabels;
+        
+        //create new "filtered" lookup
+        vector<SharedRAbundVector*> newLookup;
+        for (int i = 0; i < lookup.size(); i++) {
+            SharedRAbundVector* temp = new SharedRAbundVector();
+                       temp->setLabel(lookup[i]->getLabel());
+                       temp->setGroup(lookup[i]->getGroup());
+                       newLookup.push_back(temp);
+        }
+        
+        bool wroteSomething = false;
+        int numRemoved = 0;
+        for (int i = 0; i < lookup[0]->getNumBins(); i++) {
+            
+            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
+            
+            //is this otu on the list
+            if (labels.count(m->currentBinLabels[i]) == 0) {
+                wroteSomething = true;
+                newLabels.push_back(m->currentBinLabels[i]);
+                for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
+                    newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
+                }
+            }else { numRemoved++; }
+        }
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+        map<string, string> variables; 
+               variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[extension]"] = m->getExtension(sharedfile);
+        variables["[distance]"] = lookup[0]->getLabel();
+               string outputFileName = getOutputFileName("shared", variables); 
+        ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
+        
+        m->currentBinLabels = newLabels;
+        
+               newLookup[0]->printHeaders(out);
+               
+               for (int i = 0; i < newLookup.size(); i++) {
+                       out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
+                       newLookup[i]->print(out);
+               }
+               out.close();
+        
+        for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
+        
+        if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file."); m->mothurOutEndLine();  }
+        
+               m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file."); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "readShared");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readList(){
+       try {
+        getListVector();
+        
+        if (m->control_pressed) { delete list; return 0;}
+        
+        ListVector newList;
+        newList.setLabel(list->getLabel());
+        int removedCount = 0;
+        bool wroteSomething = false;
+        string snumBins = toString(list->getNumBins());
+        
+        for (int i = 0; i < list->getNumBins(); i++) {
+            
+            if (m->control_pressed) { delete list; return 0;}
+            
+            //create a label for this otu
+            string otuLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+            }
+            otuLabel += sbinNumber; 
+            
+            if (labels.count(otuLabel) == 0) {
+                newList.push_back(list->get(i));
+            }else { removedCount++; }
+        }
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+        map<string, string> variables; 
+               variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[extension]"] = m->getExtension(listfile);
+        variables["[distance]"] = list->getLabel();
+               string outputFileName = getOutputFileName("list", variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               delete list;
+        //print new listvector
+        if (newList.getNumBins() != 0) {
+            wroteSomething = true;
+            newList.print(out);
+        }
+               out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file."); m->mothurOutEndLine();
+        
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "RemoveOtuLabelsCommand", "readList");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::getListVector(){
+       try {
+               InputData input(listfile, "list");
+               list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input.getListVector();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list; 
+                       list = input.getListVector(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "getListVector");      
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::getShared(){
+       try {
+               InputData input(sharedfile, "sharedfile");
+               lookup = input.getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {   return 0;  }
+                       
+                       if(labels.count(lookup[0]->getLabel()) == 1){
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                               
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                               lookup = input.getSharedRAbundVectors(lastLabel);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();                      
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input.getSharedRAbundVectors();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
+                       lookup = input.getSharedRAbundVectors(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "getShared");  
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
  
  
  
diff --git a/removeotulabelscommand.h b/removeotulabelscommand.h

index 03d2da3392443f3ebf70a60a1b4b318fa5b24e8a..fd03d1018582d04de5efe1c11910610c0186252e 100644 (file)
--- a/removeotulabelscommand.h
+++ b/removeotulabelscommand.h
@@ -11,6 +11,9 @@
  //
  
  #include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "sharedrabundvector.h"
  
  /**************************************************************************************************/
  
@@ -34,13 +37,19 @@ public:
      
  private:
      bool abort;
-    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label;
      vector<string> outputNames;
      set<string> labels;
+    ListVector* list;
+    vector<SharedRAbundVector*> lookup;
      
      int readClassifyOtu();
      int readOtuAssociation();
      int readCorrAxes();
+    int readList();
+    int readShared();
+    int getListVector();
+    int getShared();
  };
  
  /**************************************************************************************************/
diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp

index 5a9c0c8320b7305834cdf4bb544effd515728417..51495506c6e7a3303c6a87d0510a87e44f1810f6 100644 (file)
--- a/screenseqscommand.cpp
+++ b/screenseqscommand.cpp
@@ -14,11 +14,14 @@
  vector<string> ScreenSeqsCommand::setParameters(){     
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+        CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "report", "none", "none","contigsreport",false,true,true); parameters.push_back(pcontigsreport);
+        CommandParameter palignreport("alignreport", "InputTypes", "", "", "report", "none", "none","alignreport",false,false); parameters.push_back(palignreport);
+        CommandParameter psummary("summary", "InputTypes", "", "", "report", "none", "none","summary",false,false); parameters.push_back(psummary);
          CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
          CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false); parameters.push_back(pqfile);
-               CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none","alignreport",false,false); parameters.push_back(palignreport);
+               
                 CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false); parameters.push_back(ptax);
                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pstart);
                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pend);
@@ -29,8 +32,20 @@ vector<string> ScreenSeqsCommand::setParameters(){
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                 CommandParameter pcriteria("criteria", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pcriteria);
                 CommandParameter poptimize("optimize", "Multiple", "none-start-end-maxambig-maxhomop-minlength-maxlength", "none", "", "", "","",true,false); parameters.push_back(poptimize);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+        
+        //report parameters
+        CommandParameter pminoverlap("minoverlap", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminoverlap);
+        CommandParameter postart("ostart", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(postart);
+        CommandParameter poend("oend", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(poend);
+        CommandParameter pmismatches("mismatches", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmismatches);
+        CommandParameter pmaxn("maxn", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxn);
+        CommandParameter pminscore("minscore", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminscore);
+        CommandParameter pmaxinsert("maxinsert", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxinsert);
+        CommandParameter pminsim("minsim", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminsim);
+
+               
                 
                 vector<string> myArray;
                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -46,15 +61,25 @@ string ScreenSeqsCommand::getHelpString(){
         try {
                 string helpString = "";
                 helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
-               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
+               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, contigsreport, summary, taxonomy, optimize, criteria and processors.\n";
                 helpString += "The fasta parameter is required.\n";
-               helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
+        helpString += "The contigsreport parameter allows you to use the contigsreport file to determine if a sequence is good. Screening parameters include: minoverlap, ostart, oend and mismatches. \n";
+        helpString += "The alignreport parameter allows you to use the alignreport file to determine if a sequence is good. Screening parameters include: minsim, minscore and maxinsert. \n";
+        helpString += "The summary parameter allows you to use the summary file from summary.seqs to save time processing.\n";
+               helpString += "The taxonomy parameter allows you to remove bad seqs from taxonomy files.\n";
                 helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
                 helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n";
                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
-               helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
+               helpString += "The maxn parameter allows you to set and maximum number of N's allowed in a sequence. \n";
+        helpString += "The minoverlap parameter allows you to set and minimum overlap. The default is -1. \n";
+        helpString += "The ostart parameter is used to set an overlap position the \"good\" sequences must start by. The default is -1. \n";
+        helpString += "The oend parameter is used to set an overlap position the \"good\" sequences must end after. The default is -1.\n";
+        helpString += "The mismatches parameter allows you to set and maximum mismatches in the contigs.report. \n";
+        helpString += "The minsim parameter allows you to set the minimum similarity to template sequences during alignment. Found in column \'SimBtwnQuery&Template\' in align.report file.\n";
+        helpString += "The minscore parameter allows you to set the minimum search score during alignment. Found in column \'SearchScore\' in align.report file.\n";
+        helpString += "The maxinsert parameter allows you to set the maximum number of insertions during alignment. Found in column \'LongestInsert\' in align.report file.\n";
                 helpString += "The processors parameter allows you to specify the number of processors to use while running the command. The default is 1.\n";
                 helpString += "The optimize and criteria parameters allow you set the start, end, maxabig, maxhomop, minlength and maxlength parameters relative to your set of sequences .\n";
                 helpString += "For example optimize=start-end, criteria=90, would set the start and end values to the position 90% of your sequences started and ended.\n";
@@ -84,6 +109,8 @@ string ScreenSeqsCommand::getOutputPattern(string type) {
          else if (type == "accnos")      {   pattern = "[filename],bad.accnos";          }
          else if (type == "qfile")       {   pattern = "[filename],good,[extension]";    }
          else if (type == "alignreport")      {   pattern = "[filename],good.align.report";    }
+        else if (type == "contigsreport")      {   pattern = "[filename],good.contigs.report";    }
+        else if (type == "summary")      {   pattern = "[filename],good.summary";    }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -103,6 +130,8 @@ ScreenSeqsCommand::ScreenSeqsCommand(){
                 outputTypes["name"] = tempOutNames;
                 outputTypes["group"] = tempOutNames;
                 outputTypes["alignreport"] = tempOutNames;
+        outputTypes["contigsreport"] = tempOutNames;
+        outputTypes["summary"] = tempOutNames;
                 outputTypes["accnos"] = tempOutNames;
                 outputTypes["qfile"] = tempOutNames;
                 outputTypes["taxonomy"] = tempOutNames;
@@ -147,7 +176,10 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                         outputTypes["qfile"] = tempOutNames;
                         outputTypes["taxonomy"] = tempOutNames;
              outputTypes["count"] = tempOutNames;
-                       
+                       outputTypes["contigsreport"] = tempOutNames;
+            outputTypes["summary"] = tempOutNames;
+
+            
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                         if (inputDir == "not found"){   inputDir = "";          }
@@ -184,6 +216,22 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
                                 }
+                
+                it = parameters.find("contigsreport");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["contigsreport"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("summary");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["summary"] = inputDir + it->second;          }
+                               }
                                 
                                 it = parameters.find("qfile");
                                 //user has given a template file
@@ -240,6 +288,14 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                         else if (countfile == "not found") { countfile = "";  } 
                         else { m->setCountTableFile(countfile); }
              
+            contigsreport = validParameter.validFile(parameters, "contigsreport", true);
+                       if (contigsreport == "not open") { contigsreport = ""; abort = true; }
+                       else if (contigsreport == "not found") { contigsreport = "";  } 
+            
+            summaryfile = validParameter.validFile(parameters, "summary", true);
+                       if (summaryfile == "not open") { summaryfile = ""; abort = true; }
+                       else if (summaryfile == "not found") { summaryfile = "";  }     
+            
              if ((namefile != "") && (countfile != "")) {
                  m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
              }
@@ -287,16 +343,71 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
+            temp = validParameter.validFile(parameters, "minoverlap", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, minOverlap); 
+            
+            temp = validParameter.validFile(parameters, "ostart", false);      if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, oStart); 
+            
+            temp = validParameter.validFile(parameters, "oend", false);        if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, oEnd); 
+            
+            temp = validParameter.validFile(parameters, "mismatches", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, mismatches); 
+            
+            temp = validParameter.validFile(parameters, "maxn", false);        if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, maxN); 
+            
+            temp = validParameter.validFile(parameters, "minscore", false);    if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, minScore); 
+            
+            temp = validParameter.validFile(parameters, "maxinsert", false);   if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, maxInsert); 
+            
+            temp = validParameter.validFile(parameters, "minsim", false);      if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, minSim); 
+            
                         temp = validParameter.validFile(parameters, "optimize", false); //optimizing trumps the optimized values original value
                         if (temp == "not found"){       temp = "none";          }
                         m->splitAtDash(temp, optimize);         
+            
+            if ((contigsreport != "") && ((summaryfile != "") || ( alignreport != ""))) {
+                m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true;
+            }
+            
+            if ((alignreport != "") && ((summaryfile != "") || ( contigsreport != ""))) {
+                m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true;
+            }
+            
+            if ((summaryfile != "") && ((alignreport != "") || ( contigsreport != ""))) {
+                m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true;
+            }
                         
+            //check to make sure you have the files you need for certain screening
+            if ((contigsreport == "") && ((minOverlap != -1) || (oStart != -1) || (oEnd != -1) || (mismatches != -1))) {
+                m->mothurOut("[ERROR]: minoverlap, ostart, oend and mismatches can only be used with a contigs.report file, aborting.\n"); abort=true;
+            }
+            
+            if ((alignreport == "") && ((minScore != -1) || (maxInsert != -1) || (minSim != -1))) {
+                m->mothurOut("[ERROR]: minscore, maxinsert and minsim can only be used with a align.report file, aborting.\n"); abort=true;
+            }
+            
                         //check for invalid optimize options
                         set<string> validOptimizers;
-                       validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength");
+                       validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength"); validOptimizers.insert("maxn");
+            if (contigsreport != "")    { validOptimizers.insert("minoverlap"); validOptimizers.insert("ostart"); validOptimizers.insert("oend"); validOptimizers.insert("mismatches");  }
+            if (alignreport != "")      { validOptimizers.insert("minscore"); validOptimizers.insert("maxinsert"); validOptimizers.insert("minsim"); }
+            
                         for (int i = 0; i < optimize.size(); i++) { 
                                 if (validOptimizers.count(optimize[i]) == 0) { 
-                                       m->mothurOut(optimize[i] + " is not a valid optimizer. Valid options are start, end, maxambig, maxhomop, minlength and maxlength."); m->mothurOutEndLine();
+                                       m->mothurOut(optimize[i] + " is not a valid optimizer with your input files. Valid options are "); 
+                    string valid = "";
+                    for (set<string>::iterator it = validOptimizers.begin(); it != validOptimizers.end(); it++) {
+                        valid += (*it) + ", ";
+                    }
+                    if (valid.length() != 0) {  valid = valid.substr(0, valid.length()-2); }
+                    m->mothurOut(valid + ".");
+                    m->mothurOutEndLine();
                                         optimize.erase(optimize.begin()+i);
                                         i--;
                                 }
@@ -328,190 +439,38 @@ int ScreenSeqsCommand::execute(){
                 
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
-               //if the user want to optimize we need to know the 90% mark
-               vector<unsigned long long> positions;
-               if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
-                       //use the namefile to optimize correctly
-                       if (namefile != "") { nameMap = m->readNames(namefile); }
-            else if (countfile != "") {
-                CountTable ct;
-                ct.readTable(countfile);
-                nameMap = ct.getNameMap();
-            }
-                       getSummary(positions); 
-               } 
-               else { 
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                positions = m->divideFile(fastafile, processors);
-                for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
-                       #else 
-                if(processors == 1){ lines.push_back(linePair(0, 1000));  }
-                else {
-                    int numFastaSeqs = 0;
-                    positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
-                    if (positions.size() < processors) { processors = positions.size(); }
-                
-                    //figure out how many sequences you have to process
-                    int numSeqsPerProcessor = numFastaSeqs / processors;
-                    for (int i = 0; i < processors; i++) {
-                        int startIndex =  i * numSeqsPerProcessor;
-                        if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
-                        lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
-                    }
-                }
-                       #endif
-               }
+        map<string, string> badSeqNames;
+        int start = time(NULL);
+        int numFastaSeqs = 0;
          
-        map<string, string> variables; 
-        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
-        string badAccnosFile =  getOutputFileName("accnos",variables);
-        variables["[extension]"] = m->getExtension(fastafile);
-               string goodSeqFile = getOutputFileName("fasta", variables);
-               
+        if ((contigsreport == "") && (summaryfile == "") && (alignreport == "")) {   numFastaSeqs = screenFasta(badSeqNames);  }
+        else {   numFastaSeqs = screenReports(badSeqNames);   }
                 
-               int numFastaSeqs = 0;
-               set<string> badSeqNames;
-               int start = time(NULL);
-       
-#ifdef USE_MPI 
-                       int pid, numSeqsPerProcessor; 
-                       int tag = 2001;
-                       vector<unsigned long long> MPIPos;
-                       
-                       MPI_Status status; 
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
-       
-                       MPI_File inMPI;
-                       MPI_File outMPIGood;
-                       MPI_File outMPIBadAccnos;
-                       
-                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-                       int inMode=MPI_MODE_RDONLY; 
-                       
-                       char outGoodFilename[1024];
-                       strcpy(outGoodFilename, goodSeqFile.c_str());
-
-                       char outBadAccnosFilename[1024];
-                       strcpy(outBadAccnosFilename, badAccnosFile.c_str());
-
-                       char inFileName[1024];
-                       strcpy(inFileName, fastafile.c_str());
-                       
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
-                       MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
-                       
-                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood); MPI_File_close(&outMPIBadAccnos); return 0; }
-                       
-                       if (pid == 0) { //you are the root process 
-                               
-                               MPIPos = m->setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
-                               
-                               //send file positions to all processes
-                               for(int i = 1; i < processors; i++) { 
-                                       MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                       MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
-                               }
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numFastaSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
-
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
-
-                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
-
-                               for (int i = 1; i < processors; i++) {
-                                       //get bad lists
-                                       int badSize;
-                                       MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-                               }
-                       }else{ //you are a child process
-                               MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                               MPIPos.resize(numFastaSeqs+1);
-                               MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numFastaSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
-
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
-
-                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
-                               
-                               //send bad list 
-                               int badSize = badSeqNames.size();
-                               MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
-                       }
-                       
-                       //close files 
-                       MPI_File_close(&inMPI);
-                       MPI_File_close(&outMPIGood);
-                       MPI_File_close(&outMPIBadAccnos);
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-                                       
-#else
-        if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);      }       
-        else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); }
+        if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
          
-        if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
-#endif         
-
-               #ifdef USE_MPI
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
-                                       
-                       if (pid == 0) { //only one process should fix files
-                       
-                               //read accnos file with all names in it, process 0 just has its names
-                               MPI_File inMPIAccnos;
-                               MPI_Offset size;
-                       
-                               char inFileName[1024];
-                               strcpy(inFileName, badAccnosFile.c_str());
-                       
-                               MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
-                               MPI_File_get_size(inMPIAccnos, &size);
-                       
-                               char* buffer = new char[size];
-                               MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
-                       
-                               string tempBuf = buffer;
-                               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
-                               istringstream iss (tempBuf,istringstream::in);
-
-                               delete buffer;
-                               MPI_File_close(&inMPIAccnos);
-                               
-                               badSeqNames.clear();
-                               string tempName;
-                               while (!iss.eof()) {
-                                       iss >> tempName; m->gobble(iss);
-                                       badSeqNames.insert(tempName);
-                               }
-               #endif
-                                                                                                                                                                       
+        #ifdef USE_MPI
+            int pid;
+            MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+        
+            if (pid == 0) { //only one process should fix files
+        #endif 
+                
                 if(namefile != "" && groupfile != "")   {       
                         screenNameGroupFile(badSeqNames);       
-                       if (m->control_pressed) {  m->mothurRemove(goodSeqFile); return 0; }
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
                 }else if(namefile != "")        {       
                         screenNameGroupFile(badSeqNames);
-                       if (m->control_pressed) {  m->mothurRemove(goodSeqFile);  return 0; }   
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }       
                 }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
                 else if (countfile != "") {     screenCountFile(badSeqNames);           }
              
                  
-               if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
+               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
  
-               if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
                 if(qualfile != "")                                              {       screenQual(badSeqNames);                        }
                 if(taxonomy != "")                                              {       screenTaxonomy(badSeqNames);            }
                 
-               if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
+               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
                 
                 #ifdef USE_MPI
                         }
@@ -519,8 +478,6 @@ int ScreenSeqsCommand::execute(){
  
                 m->mothurOutEndLine();
                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(goodSeqFile); m->mothurOutEndLine();       outputTypes["fasta"].push_back(goodSeqFile);
-               m->mothurOut(badAccnosFile); m->mothurOutEndLine();      outputTypes["accnos"].push_back(badAccnosFile);
                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
                 m->mothurOutEndLine();
                 m->mothurOutEndLine();
@@ -552,120 +509,1303 @@ int ScreenSeqsCommand::execute(){
                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                 }
          
-        itTypes = outputTypes.find("count");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
+
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
+               m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "execute");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
+int ScreenSeqsCommand::runFastaScreening(map<string, string>& badSeqNames){
+       try{
+        int numFastaSeqs = 0;
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+        string badAccnosFile =  getOutputFileName("accnos",variables);
+        variables["[extension]"] = m->getExtension(fastafile);
+               string goodSeqFile = getOutputFileName("fasta", variables);
+               outputNames.push_back(goodSeqFile); outputTypes["fasta"].push_back(goodSeqFile);
+               outputNames.push_back(badAccnosFile); outputTypes["accnos"].push_back(badAccnosFile);
+        
+#ifdef USE_MPI 
+        int pid, numSeqsPerProcessor; 
+        int tag = 2001;
+        vector<unsigned long long> MPIPos;
+        
+        MPI_Status status; 
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+        
+        MPI_File inMPI;
+        MPI_File outMPIGood;
+        MPI_File outMPIBadAccnos;
+        
+        int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+        int inMode=MPI_MODE_RDONLY; 
+        
+        char outGoodFilename[1024];
+        strcpy(outGoodFilename, goodSeqFile.c_str());
+        
+        char outBadAccnosFilename[1024];
+        strcpy(outBadAccnosFilename, badAccnosFile.c_str());
+        
+        char inFileName[1024];
+        strcpy(inFileName, fastafile.c_str());
+        
+        MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+        MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
+        MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
+        
+        if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood); MPI_File_close(&outMPIBadAccnos); return 0; }
+        
+        if (pid == 0) { //you are the root process 
+            
+            MPIPos = m->setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
+            
+            //send file positions to all processes
+            for(int i = 1; i < processors; i++) { 
+                MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+            }
+            
+            //figure out how many sequences you have to align
+            numSeqsPerProcessor = numFastaSeqs / processors;
+            int startIndex =  pid * numSeqsPerProcessor;
+            if(pid == (processors - 1)){       numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+            
+            //align your part
+            driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
+            
+            if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
+            
+            for (int i = 1; i < processors; i++) {
+                //get bad lists
+                int badSize;
+                MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+            }
+        }else{ //you are a child process
+            MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+            MPIPos.resize(numFastaSeqs+1);
+            MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+            
+            //figure out how many sequences you have to align
+            numSeqsPerProcessor = numFastaSeqs / processors;
+            int startIndex =  pid * numSeqsPerProcessor;
+            if(pid == (processors - 1)){       numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+            
+            //align your part
+            driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
+            
+            if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
+            
+            //send bad list    
+            int badSize = badSeqNames.size();
+            MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+        }
+        
+        //close files 
+        MPI_File_close(&inMPI);
+        MPI_File_close(&outMPIGood);
+        MPI_File_close(&outMPIBadAccnos);
+        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+        
+#else
+        if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);      }       
+        else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); }
+        
+        if (m->control_pressed) { m->mothurRemove(goodSeqFile); return numFastaSeqs; }
+#endif         
+        
+#ifdef USE_MPI
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+        
+        if (pid == 0) { //only one process should fix files
+                       
+            //read accnos file with all names in it, process 0 just has its names
+            MPI_File inMPIAccnos;
+            MPI_Offset size;
+                       
+            char inFileName[1024];
+            strcpy(inFileName, badAccnosFile.c_str());
+                       
+            MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
+            MPI_File_get_size(inMPIAccnos, &size);
+                       
+            char* buffer = new char[size];
+            MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
+                       
+            string tempBuf = buffer;
+            if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+            istringstream iss (tempBuf,istringstream::in);
+            
+            delete buffer;
+            MPI_File_close(&inMPIAccnos);
+            
+            badSeqNames.clear();
+            string tempName, trashCode;
+            while (!iss.eof()) {
+                iss >> tempName >> trashCode; m->gobble(iss);
+                badSeqNames[tempName] = trashCode;
+            }
+        }
+#endif
+        
+        
+               return numFastaSeqs;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "runFastaScreening");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
+int ScreenSeqsCommand::screenReports(map<string, string>& badSeqNames){
+       try{
+        int numFastaSeqs = 0;
+        bool summarizedFasta = false;
+        
+        //did not provide a summary file, but set a parameter that requires summarizing the fasta file
+        //or did provide a summary file, but set maxn parameter so we must summarize the fasta file 
+        vector<unsigned long long> positions;
+        if (((summaryfile == "") && ((m->inUsersGroups("maxambig", optimize)) ||(m->inUsersGroups("maxhomop", optimize)) ||(m->inUsersGroups("maxlength", optimize)) || (m->inUsersGroups("minlength", optimize)) || (m->inUsersGroups("start", optimize)) || (m->inUsersGroups("end", optimize)))) || ((summaryfile != "") && m->inUsersGroups("maxn", optimize))) {  
+            //use the namefile to optimize correctly
+            if (namefile != "") { nameMap = m->readNames(namefile); }
+            else if (countfile != "") {
+                CountTable ct;
+                ct.readTable(countfile);
+                nameMap = ct.getNameMap();
+            }
+            getSummary(positions); 
+            summarizedFasta = true;
+        } else {
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                positions = m->divideFile(fastafile, processors);
+                for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
+            #else 
+                if(processors == 1){ lines.push_back(linePair(0, 1000));  }
+                else {
+                    int numFastaSeqs = 0;
+                    positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
+                    if (positions.size() < processors) { processors = positions.size(); }
+                
+                    //figure out how many sequences you have to process
+                    int numSeqsPerProcessor = numFastaSeqs / processors;
+                    for (int i = 0; i < processors; i++) {
+                        int startIndex =  i * numSeqsPerProcessor;
+                        if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                        lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+                    }
+                }
+            #endif
+        }
+        
+        if ((summaryfile != "") && ((m->inUsersGroups("maxambig", optimize)) ||(m->inUsersGroups("maxhomop", optimize)) ||(m->inUsersGroups("maxlength", optimize)) || (m->inUsersGroups("minlength", optimize)) || (m->inUsersGroups("start", optimize)) || (m->inUsersGroups("end", optimize))) && !summarizedFasta) { //summarize based on summaryfile
+            if (namefile != "") { nameMap = m->readNames(namefile); }
+            else if (countfile != "") {
+                CountTable ct;
+                ct.readTable(countfile);
+                nameMap = ct.getNameMap();
+            }
+            getSummaryReport();
+        }else if ((contigsreport != "") && ((m->inUsersGroups("minoverlap", optimize)) || (m->inUsersGroups("ostart", optimize)) || (m->inUsersGroups("oend", optimize)) || (m->inUsersGroups("mismatches", optimize)))) { //optimize settings based on contigs file
+            optimizeContigs();
+        }else if ((alignreport != "") && ((m->inUsersGroups("minsim", optimize)) || (m->inUsersGroups("minscore", optimize)) || (m->inUsersGroups("maxinsert", optimize)))) { //optimize settings based on contigs file
+            optimizeAlign();
+        }
+        
+        
+        //provided summary file, and did not set maxn so no need to summarize fasta
+        if (summaryfile != "")      {   numFastaSeqs = screenSummary(badSeqNames);  }
+        //add in any seqs that fail due to contigs report results
+        else if (contigsreport != "")    {   numFastaSeqs = screenContigs(badSeqNames);  }
+        //add in any seqs that fail due to align report
+        else if (alignreport != "")      {   numFastaSeqs = screenAlignReport(badSeqNames);  }
+        
+        return numFastaSeqs;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenReports");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ScreenSeqsCommand::screenAlignReport(map<string, string>& badSeqNames){
+       try {
+        
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(alignreport));
+        string outSummary =  getOutputFileName("alignreport",variables);
+               outputNames.push_back(outSummary); outputTypes["alignreport"].push_back(outSummary);
+        
+        string name, TemplateName, SearchMethod, AlignmentMethod;
+        //QueryName    QueryLength     TemplateName    TemplateLength  SearchMethod    SearchScore     AlignmentMethod QueryStart      QueryEnd        TemplateStart   TemplateEnd     PairwiseAlignmentLength GapsInQuery     GapsInTemplate  LongestInsert   SimBtwnQuery&Template
+        //checking for minScore, maxInsert, minSim
+        int length, TemplateLength,     QueryStart,    QueryEnd,       TemplateStart,  TemplateEnd,    PairwiseAlignmentLength,        GapsInQuery,    GapsInTemplate, LongestInsert;
+        float SearchScore, SimBtwnQueryTemplate;
+        
+        ofstream out;
+        m->openOutputFile(outSummary, out);
+        
+        //read summary file
+        ifstream in;
+        m->openInputFile(alignreport, in);
+        out << (m->getline(in)) << endl;   //skip headers
+        
+               int count = 0;
+        
+               while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close(); out.close(); return 0; }
+            
+            //seqname  start   end     nbases  ambigs  polymer numSeqs
+            in >> name >> length >> TemplateName >> TemplateLength >> SearchMethod >> SearchScore >> AlignmentMethod >> QueryStart >> QueryEnd >> TemplateStart >> TemplateEnd >> PairwiseAlignmentLength >> GapsInQuery >> GapsInTemplate >> LongestInsert >> SimBtwnQueryTemplate; m->gobble(in);
+
+            bool goodSeq = 1;          //      innocent until proven guilty
+            string trashCode = "";
+            if(maxInsert != -1 && maxInsert < LongestInsert)    {      goodSeq = 0; trashCode += "insert|";    }
+            if(minScore != -1 && minScore > SearchScore)               {       goodSeq = 0; trashCode += "score|";     }
+            if(minSim != -1 && minSim > SimBtwnQueryTemplate)  {       goodSeq = 0; trashCode += "sim|";       }
+            
+            if(goodSeq == 1){
+                out << name << '\t' << length << '\t' << TemplateName  << '\t' << TemplateLength  << '\t' << SearchMethod  << '\t' << SearchScore  << '\t' << AlignmentMethod  << '\t' << QueryStart  << '\t' << QueryEnd  << '\t' << TemplateStart  << '\t' << TemplateEnd  << '\t' << PairwiseAlignmentLength  << '\t' << GapsInQuery  << '\t' << GapsInTemplate  << '\t' << LongestInsert  << '\t' << SimBtwnQueryTemplate << endl;
+            }
+            else{ badSeqNames[name] = trashCode;  }
+            count++;
+        }
+        in.close();
+        out.close();
+        
+        int oldBadSeqsCount = badSeqNames.size();
+        
+        int numFastaSeqs = runFastaScreening(badSeqNames);
+        
+        if (oldBadSeqsCount != badSeqNames.size()) { //more seqs were removed by maxns
+            m->renameFile(outSummary, outSummary+".temp");
+            
+            ofstream out2;
+            m->openOutputFile(outSummary, out2);
+            
+            //read summary file
+            ifstream in2;
+            m->openInputFile(outSummary+".temp", in2);
+            out2 << (m->getline(in2)) << endl;   //skip headers
+            
+            while (!in2.eof()) {
+                
+                if (m->control_pressed) { in2.close(); out2.close(); return 0; }
+                
+                //seqname      start   end     nbases  ambigs  polymer numSeqs
+                in2 >> name >> length >> TemplateName >> TemplateLength >> SearchMethod >> SearchScore >> AlignmentMethod >> QueryStart >> QueryEnd >> TemplateStart >> TemplateEnd >> PairwiseAlignmentLength >> GapsInQuery >> GapsInTemplate >> LongestInsert >> SimBtwnQueryTemplate; m->gobble(in2);
+                
+                if (badSeqNames.count(name) == 0) { //are you good?
+                    out2 << name << '\t' << length << '\t' << TemplateName  << '\t' << TemplateLength  << '\t' << SearchMethod  << '\t' << SearchScore  << '\t' << AlignmentMethod  << '\t' << QueryStart  << '\t' << QueryEnd  << '\t' << TemplateStart  << '\t' << TemplateEnd  << '\t' << PairwiseAlignmentLength  << '\t' << GapsInQuery  << '\t' << GapsInTemplate  << '\t' << LongestInsert  << '\t' << SimBtwnQueryTemplate << endl;            
+                }
+            }
+            in2.close();
+            out2.close();
+            m->mothurRemove(outSummary+".temp");
+        }
+        
+        if (numFastaSeqs != count) {  m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your contigs report file, quitting.\n"); m->control_pressed = true; }
+        
+        
+        return count;
+        
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
+               exit(1);
+       }
+       
+}
+//***************************************************************************************************************/
+int ScreenSeqsCommand::screenContigs(map<string, string>& badSeqNames){
+       try{
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(contigsreport));
+        string outSummary =  getOutputFileName("contigsreport",variables);
+               outputNames.push_back(outSummary); outputTypes["contigsreport"].push_back(outSummary);
+        
+        string name;
+        //Name Length  Overlap_Length  Overlap_Start   Overlap_End     MisMatches      Num_Ns
+        int length, OLength, thisOStart, thisOEnd, numMisMatches, numNs;
+        
+        ofstream out;
+        m->openOutputFile(outSummary, out);
+        
+        //read summary file
+        ifstream in;
+        m->openInputFile(contigsreport, in);
+        out << (m->getline(in)) << endl;   //skip headers
+        
+               int count = 0;
+        
+               while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close(); out.close(); return 0; }
+            
+            //seqname  start   end     nbases  ambigs  polymer numSeqs
+            in >> name >> length >> OLength >> thisOStart >> thisOEnd >> numMisMatches >> numNs; m->gobble(in);
+            
+            bool goodSeq = 1;          //      innocent until proven guilty
+            string trashCode = "";
+            if(oStart != -1 && oStart < thisOStart)             {      goodSeq = 0;    trashCode += "ostart|";     }
+            if(oEnd != -1 && oEnd > thisOEnd)                   {      goodSeq = 0;    trashCode += "oend|";       }
+            if(maxN != -1 && maxN <    numNs)                      {   goodSeq = 0;    trashCode += "n|";          }
+            if(minOverlap != -1 && minOverlap > OLength)               {       goodSeq = 0;    trashCode += "olength|";    }
+            if(mismatches != -1 && mismatches < numMisMatches) {       goodSeq = 0;    trashCode += "mismatches|"; }
+            
+            if(goodSeq == 1){
+                out << name << '\t' << length  << '\t' << OLength  << '\t' << thisOStart  << '\t' << thisOEnd  << '\t' << numMisMatches  << '\t' << numNs << endl;     
+            }
+            else{ badSeqNames[name] = trashCode; }
+            count++;
+        }
+        in.close();
+        out.close();
+        
+        int oldBadSeqsCount = badSeqNames.size();
+        
+        int numFastaSeqs = runFastaScreening(badSeqNames);
+        
+        if (oldBadSeqsCount != badSeqNames.size()) { //more seqs were removed by maxns
+            m->renameFile(outSummary, outSummary+".temp");
+            
+            ofstream out2;
+            m->openOutputFile(outSummary, out2);
+            
+            //read summary file
+            ifstream in2;
+            m->openInputFile(outSummary+".temp", in2);
+            out2 << (m->getline(in2)) << endl;   //skip headers
+            
+            while (!in2.eof()) {
+                
+                if (m->control_pressed) { in2.close(); out2.close(); return 0; }
+                
+                //seqname      start   end     nbases  ambigs  polymer numSeqs
+                in2 >> name >> length >> OLength >> thisOStart >> thisOEnd >> numMisMatches >> numNs; m->gobble(in2);
+                
+                if (badSeqNames.count(name) == 0) { //are you good?
+                    out2 << name << '\t' << length  << '\t' << OLength  << '\t' << thisOStart  << '\t' << thisOEnd  << '\t' << numMisMatches  << '\t' << numNs << endl;                
+                }
+            }
+            in2.close();
+            out2.close();
+            m->mothurRemove(outSummary+".temp");
+        }
+        
+        if (numFastaSeqs != count) {  m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your contigs report file, quitting.\n"); m->control_pressed = true; }
+        
+        
+        return count;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenContigs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
+int ScreenSeqsCommand::screenSummary(map<string, string>& badSeqNames){
+       try{
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(summaryfile));
+        string outSummary =  getOutputFileName("summary",variables);
+               outputNames.push_back(outSummary); outputTypes["summary"].push_back(outSummary);
+        
+        string name;
+        int start, end, length, ambigs, polymer, numReps;
+        
+        ofstream out;
+        m->openOutputFile(outSummary, out);
+                
+        //read summary file
+        ifstream in;
+        m->openInputFile(summaryfile, in);
+        out << (m->getline(in)) << endl;   //skip headers
+         
+               int count = 0;
+        
+               while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close(); out.close(); return 0; }
+            
+            //seqname  start   end     nbases  ambigs  polymer numSeqs
+            in >> name >> start >> end >> length >> ambigs >> polymer >> numReps; m->gobble(in);
+            
+            bool goodSeq = 1;          //      innocent until proven guilty
+            string trashCode = "";
+            if(startPos != -1 && startPos < start)                     {       goodSeq = 0;    trashCode += "start|"; }
+            if(endPos != -1 && endPos > end)                           {       goodSeq = 0;    trashCode += "end|"; }
+            if(maxAmbig != -1 && maxAmbig <    ambigs)         {       goodSeq = 0;    trashCode += "ambig|"; }
+            if(maxHomoP != -1 && maxHomoP < polymer)        {  goodSeq = 0;    trashCode += "homop|"; }
+            if(minLength != -1 && minLength > length)          {       goodSeq = 0;    trashCode += "<length|"; }
+            if(maxLength != -1 && maxLength < length)          {       goodSeq = 0;    trashCode += ">length|"; }
+            
+            if(goodSeq == 1){
+                out << name << '\t' << start  << '\t' << end  << '\t' << length  << '\t' << ambigs  << '\t' << polymer  << '\t' << numReps << endl;    
+            }
+            else{ badSeqNames[name] = trashCode; }
+            count++;
+        }
+        in.close();
+        out.close();
+        
+        int oldBadSeqsCount = badSeqNames.size();
+        
+        int numFastaSeqs = runFastaScreening(badSeqNames);
+        
+        if (oldBadSeqsCount != badSeqNames.size()) { //more seqs were removed by maxns
+            m->renameFile(outSummary, outSummary+".temp");
+            
+            ofstream out2;
+            m->openOutputFile(outSummary, out2);
+            
+            //read summary file
+            ifstream in2;
+            m->openInputFile(outSummary+".temp", in2);
+            out2 << (m->getline(in2)) << endl;   //skip headers
+            
+            while (!in2.eof()) {
+                
+                if (m->control_pressed) { in2.close(); out2.close(); return 0; }
+                
+                //seqname      start   end     nbases  ambigs  polymer numSeqs
+                in2 >> name >> start >> end >> length >> ambigs >> polymer >> numReps; m->gobble(in2);
+                
+                if (badSeqNames.count(name) == 0) { //are you good?
+                    out2 << name << '\t' << start  << '\t' << end  << '\t' << length  << '\t' << ambigs  << '\t' << polymer  << '\t' << numReps << endl;       
+                }
+            }
+            in2.close();
+            out2.close();
+            m->mothurRemove(outSummary+".temp");
+        }
+        
+        if (numFastaSeqs != count) {  m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your summary file, quitting.\n"); m->control_pressed = true; }
+        
+        
+        
+        return count;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenSummary");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/
+int ScreenSeqsCommand::screenFasta(map<string, string>& badSeqNames){
+       try{
+        
+        
+        //if the user want to optimize we need to know the 90% mark
+               vector<unsigned long long> positions;
+               if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
+                       //use the namefile to optimize correctly
+                       if (namefile != "") { nameMap = m->readNames(namefile); }
+            else if (countfile != "") {
+                CountTable ct;
+                ct.readTable(countfile);
+                nameMap = ct.getNameMap();
+            }
+                       getSummary(positions); 
+               }else { 
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            positions = m->divideFile(fastafile, processors);
+            for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
+#else 
+            if(processors == 1){ lines.push_back(linePair(0, 1000));  }
+            else {
+                int numFastaSeqs = 0;
+                positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
+                if (positions.size() < processors) { processors = positions.size(); }
+                
+                //figure out how many sequences you have to process
+                int numSeqsPerProcessor = numFastaSeqs / processors;
+                for (int i = 0; i < processors; i++) {
+                    int startIndex =  i * numSeqsPerProcessor;
+                    if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                    lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+                }
+            }
+#endif
+               }
+        
+        if (m->control_pressed) { return 0; }
+        
+        int numFastaSeqs = runFastaScreening(badSeqNames);
+        
+        return numFastaSeqs;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenFasta");
+               exit(1);
+       }
+}      
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenNameGroupFile(map<string, string> badSeqNames){
+       try {
+               ifstream inputNames;
+               m->openInputFile(namefile, inputNames);
+               map<string, string> badSeqGroups;
+               string seqName, seqList, group;
+               map<string, string>::iterator it;
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
+        variables["[extension]"] = m->getExtension(namefile);
+               string goodNameFile = getOutputFileName("name", variables);
+               outputNames.push_back(goodNameFile);  outputTypes["name"].push_back(goodNameFile);
+               
+               ofstream goodNameOut;   m->openOutputFile(goodNameFile, goodNameOut);
+               
+               while(!inputNames.eof()){
+                       if (m->control_pressed) { goodNameOut.close();  inputNames.close(); m->mothurRemove(goodNameFile);  return 0; }
+
+                       inputNames >> seqName; m->gobble(inputNames); inputNames >> seqList;
+                       it = badSeqNames.find(seqName);
+                               
+                       if(it != badSeqNames.end()){
+                               badSeqNames.erase(it);
+                               
+                               if(namefile != ""){
+                                       int start = 0;
+                                       for(int i=0;i<seqList.length();i++){
+                                               if(seqList[i] == ','){
+                                                       badSeqGroups[seqList.substr(start,i-start)] = it->second;
+                                                       start = i+1;
+                                               }                                       
+                                       }
+                                       badSeqGroups[seqList.substr(start,seqList.length()-start)] = it->second;
+                               }
+                       }
+                       else{
+                               goodNameOut << seqName << '\t' << seqList << endl;
+                       }
+                       m->gobble(inputNames);
+               }
+               inputNames.close();
+               goodNameOut.close();
+       
+               //we were unable to remove some of the bad sequences
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your namefile does not include the sequence " + it->first + " please correct."); 
+                               m->mothurOutEndLine();
+                       }
+               }
+
+               if(groupfile != ""){
+                       
+                       ifstream inputGroups;
+                       m->openInputFile(groupfile, inputGroups);
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
+            variables["[extension]"] = m->getExtension(groupfile);
+            string goodGroupFile = getOutputFileName("group", variables);
+                       
+                       outputNames.push_back(goodGroupFile);   outputTypes["group"].push_back(goodGroupFile);
+                       
+                       ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
+                       
+                       while(!inputGroups.eof()){
+                               if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile);  m->mothurRemove(goodGroupFile); return 0; }
+
+                               inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
+                               
+                               it = badSeqGroups.find(seqName);
+                               
+                               if(it != badSeqGroups.end()){
+                                       badSeqGroups.erase(it);
+                               }
+                               else{
+                                       goodGroupOut << seqName << '\t' << group << endl;
+                               }
+                               m->gobble(inputGroups);
+                       }
+                       inputGroups.close();
+                       goodGroupOut.close();
+                       
+                       //we were unable to remove some of the bad sequences
+                       if (badSeqGroups.size() != 0) {
+                               for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
+                                       m->mothurOut("Your groupfile does not include the sequence " + it->first + " please correct."); 
+                                       m->mothurOutEndLine();
+                               }
+                       }
+               }
+               
+               
+               return 0;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ScreenSeqsCommand::getSummaryReport(){
+       try {
+               
+               vector<int> startPosition;
+               vector<int> endPosition;
+               vector<int> seqLength;
+               vector<int> ambigBases;
+               vector<int> longHomoPolymer;
+        
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { 
+#endif
+            
+            
+            //read summary file
+            ifstream in;
+            m->openInputFile(summaryfile, in);
+            m->getline(in);
+            
+            string name;
+            int start, end, length, ambigs, polymer, numReps;
+            
+            while (!in.eof()) {
+                
+                if (m->control_pressed) { in.close(); return 0; }
+                
+                //seqname      start   end     nbases  ambigs  polymer numSeqs
+                in >> name >> start >> end >> length >> ambigs >> polymer >> numReps; m->gobble(in);
+                
+                int num = 1;
+                               if ((namefile != "") || (countfile !="")) {
+                                       //make sure this sequence is in the namefile, else error 
+                                       map<string, int>::iterator it = nameMap.find(name);
+                                       
+                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + name + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       else { num = it->second; }
+                               }
+                               
+                               //for each sequence this sequence represents
+                               for (int i = 0; i < num; i++) {
+                                       startPosition.push_back(start);
+                                       endPosition.push_back(end);
+                                       seqLength.push_back(length);
+                                       ambigBases.push_back(ambigs);
+                                       longHomoPolymer.push_back(polymer);
+                               }
+               
+            }
+            in.close();
+
+        sort(startPosition.begin(), startPosition.end());
+               sort(endPosition.begin(), endPosition.end());
+               sort(seqLength.begin(), seqLength.end());
+               sort(ambigBases.begin(), ambigBases.end());
+               sort(longHomoPolymer.begin(), longHomoPolymer.end());
+               
+               //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
+               int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
+               
+               for (int i = 0; i < optimize.size(); i++) {
+                       if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); }
+                       else if (optimize[i] == "end") { int endcriteriaPercentile = int(endPosition.size() * ((100 - criteria) / (float) 100));  endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();}
+                       else if (optimize[i] == "maxambig") { maxAmbig = ambigBases[criteriaPercentile]; m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + "."); m->mothurOutEndLine(); }
+                       else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); }
+                       else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); }
+                       else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
+               }
+        
+#ifdef USE_MPI
+    }
+    
+    MPI_Status status; 
+    MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+    MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+    
+    if (pid == 0) { 
+        //send file positions to all processes
+        for(int i = 1; i < processors; i++) { 
+            MPI_Send(&startPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+            MPI_Send(&endPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+            MPI_Send(&maxAmbig, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+            MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+            MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+            MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+        }
+    }else {
+        MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+        MPI_Recv(&endPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+        MPI_Recv(&maxAmbig, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+        MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+        MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+        MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+    }
+    MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+#endif
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "getSummaryReport");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ScreenSeqsCommand::optimizeContigs(){
+       try {
+               vector<int> olengths;
+               vector<int> oStarts;
+               vector<int> oEnds;
+               vector<int> numMismatches;
+        vector<int> numNs;
+               
+        vector<unsigned long long> positions;
+        vector<linePair> contigsLines;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               positions = m->divideFilePerLine(contigsreport, processors);
+               for (int i = 0; i < (positions.size()-1); i++) { contigsLines.push_back(linePair(positions[i], positions[(i+1)])); }    
+#else
+               if(processors == 1){ contigsLines.push_back(linePair(0, 1000));  }
+        else {
+            int numContigsSeqs = 0;
+            positions = m->setFilePosEachLine(contigsreport, numContigsSeqs); 
+            if (positions.size() < processors) { processors = positions.size(); }
+            
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numContigsSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numContigsSeqs - i * numSeqsPerProcessor;         }
+                contigsLines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+            }
+        }
+#endif
+               
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { 
+                       driverContigsSummary(olengths, oStarts, oEnds, numMismatches, numNs, contigsLines[0]);
+#else
+            createProcessesContigsSummary(olengths, oStarts, oEnds, numMismatches, numNs, contigsLines); 
+            
+                       if (m->control_pressed) {  return 0; }
+#endif
+            sort(olengths.begin(), olengths.end());
+            sort(oStarts.begin(), oStarts.end());
+            sort(oEnds.begin(), oEnds.end());
+            sort(numMismatches.begin(), numMismatches.end());
+            sort(numNs.begin(), numNs.end());
+            
+            //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
+            int criteriaPercentile     = int(oStarts.size() * (criteria / (float) 100));
+            
+            for (int i = 0; i < optimize.size(); i++) {
+                if (optimize[i] == "ostart") { oStart = oStarts[criteriaPercentile]; m->mothurOut("Optimizing ostart to " + toString(oStart) + "."); m->mothurOutEndLine(); }
+                else if (optimize[i] == "oend") { int endcriteriaPercentile = int(oEnds.size() * ((100 - criteria) / (float) 100));  oEnd = oEnds[endcriteriaPercentile]; m->mothurOut("Optimizing oend to " + toString(oEnd) + "."); m->mothurOutEndLine();}
+                else if (optimize[i] == "mismatches") { mismatches = numMismatches[criteriaPercentile]; m->mothurOut("Optimizing mismatches to " + toString(mismatches) + "."); m->mothurOutEndLine(); }
+                else if (optimize[i] == "maxn") { maxN = numNs[criteriaPercentile]; m->mothurOut("Optimizing maxn to " + toString(maxN) + "."); m->mothurOutEndLine(); }
+                else if (optimize[i] == "minoverlap") { int mincriteriaPercentile = int(olengths.size() * ((100 - criteria) / (float) 100)); minOverlap = olengths[mincriteriaPercentile]; m->mothurOut("Optimizing minoverlap to " + toString(minOverlap) + "."); m->mothurOutEndLine(); }
+
+            }
+            
+#ifdef USE_MPI
+               }
+               
+               MPI_Status status; 
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+        
+               if (pid == 0) { 
+                       //send file positions to all processes
+                       for(int i = 1; i < processors; i++) { 
+                MPI_Send(&minOverlap, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                               MPI_Send(&oStart, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                               MPI_Send(&oEnd, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                               MPI_Send(&mismatches, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                MPI_Send(&maxN, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                       }
+               }else {
+            MPI_Recv(&minOverlap, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                       MPI_Recv(&oStart, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                       MPI_Recv(&oEnd, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                       MPI_Recv(&mismatches, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+            MPI_Recv(&maxN, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+               }
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+#endif
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "optimizeContigs");
+               exit(1);
+       }
+}
+/**************************************************************************************/
+int ScreenSeqsCommand::driverContigsSummary(vector<int>& oLength, vector<int>& ostartPosition, vector<int>& oendPosition, vector<int>& omismatches, vector<int>& numNs, linePair filePos) {    
+       try {
+               
+        string name;
+        //Name Length  Overlap_Length  Overlap_Start   Overlap_End     MisMatches      Num_Ns
+        int length, OLength, thisOStart, thisOEnd, numMisMatches, numns;
+        
+               ifstream in;
+               m->openInputFile(contigsreport, in);
+        
+               in.seekg(filePos.start);
+        if (filePos.start == 0) { //read headers
+            m->getline(in); m->gobble(in);
+        }
+        
+               bool done = false;
+               int count = 0;
+        
+               while (!done) {
+            
+                       if (m->control_pressed) { in.close(); return 1; }
+            
+            //seqname  start   end     nbases  ambigs  polymer numSeqs
+            in >> name >> length >> OLength >> thisOStart >> thisOEnd >> numMisMatches >> numns; m->gobble(in);
+            
+            int num = 1;
+            if ((namefile != "") || (countfile !="")){
+                //make sure this sequence is in the namefile, else error 
+                map<string, int>::iterator it = nameMap.find(name);
+                
+                if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + name + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                else { num = it->second; }
+            }
+            
+            //for each sequence this sequence represents
+            for (int i = 0; i < num; i++) {
+                ostartPosition.push_back(thisOStart);
+                oendPosition.push_back(thisOEnd);
+                oLength.push_back(OLength);
+                omismatches.push_back(numMisMatches);
+                numNs.push_back(numns);
+            }
+            
+            count++;
+                       
+                       //if((count) % 100 == 0){       m->mothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine();         }
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            unsigned long long pos = in.tellg();
+            if ((pos == -1) || (pos >= filePos.end)) { break; }
+#else
+            if (in.eof()) { break; }
+#endif
+               }
+               
+               in.close();
+               
+               return count;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "driverContigsSummary");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+int ScreenSeqsCommand::createProcessesContigsSummary(vector<int>& oLength, vector<int>& ostartPosition, vector<int>& oendPosition, vector<int>& omismatches, vector<int>& numNs, vector<linePair> contigsLines) {
+       try {
+        
+        int process = 1;
+               int num = 0;
+               vector<int> processIDS;
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driverContigsSummary(oLength, ostartPosition, oendPosition, omismatches, numNs, contigsLines[process]);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = contigsreport + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               
+                               out << num << endl;
+                               out << ostartPosition.size() << endl;
+                               for (int k = 0; k < ostartPosition.size(); k++)         {               out << ostartPosition[k] << '\t';   }  out << endl;
+                               for (int k = 0; k < oendPosition.size(); k++)           {               out << oendPosition[k] << '\t';     }  out << endl;
+                               for (int k = 0; k < oLength.size(); k++)                        {               out << oLength[k] << '\t';          }  out << endl;
+                               for (int k = 0; k < omismatches.size(); k++)        {           out << omismatches[k] << '\t';      }  out << endl;
+                for (int k = 0; k < numNs.size(); k++)              {          out << numNs[k] << '\t';            }  out << endl;
+                               
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               num = driverContigsSummary(oLength, ostartPosition, oendPosition, omismatches, numNs, contigsLines[0]);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               //parent reads in and combine Filter info
+               for (int i = 0; i < processIDS.size(); i++) {
+                       string tempFilename = contigsreport + toString(processIDS[i]) + ".num.temp";
+                       ifstream in;
+                       m->openInputFile(tempFilename, in);
+                       
+                       int temp, tempNum;
+                       in >> tempNum; m->gobble(in); num += tempNum;
+                       in >> tempNum; m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; ostartPosition.push_back(temp);             }               m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; oendPosition.push_back(temp);               }               m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; oLength.push_back(temp);                    }               m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; omismatches.push_back(temp);        }               m->gobble(in);
+            for (int k = 0; k < tempNum; k++)                  {               in >> temp; numNs.push_back(temp);              }               m->gobble(in);
+            
+                       in.close();
+                       m->mothurRemove(tempFilename);
+               }
+               
+               
+#else 
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to vectors.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               /*
+               vector<contigsSumData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+            
+                       // Allocate memory for thread data.
+                       contigsSumData* tempSum = new contigsSumData(contigsreport, m, contigsLines[i].start, contigsLines[i].end, namefile, countfile, nameMap);
+                       pDataArray.push_back(tempSum);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MyContigsSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               */
+        contigsLines[processors-1].start = 0;
+        //do your part
+               num = driverContigsSummary(oLength, ostartPosition, oendPosition, omismatches, numNs, contigsLines[processors-1]);
+        /*
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
+            for (int k = 0; k < pDataArray[i]->ostartPosition.size(); k++)  {  ostartPosition.push_back(pDataArray[i]->ostartPosition[k]);     }
+                       for (int k = 0; k < pDataArray[i]->oendPosition.size(); k++)    {       oendPosition.push_back(pDataArray[i]->oendPosition[k]);         }
+            for (int k = 0; k < pDataArray[i]->oLength.size(); k++)         {  oLength.push_back(pDataArray[i]->oLength[k]);                   }
+            for (int k = 0; k < pDataArray[i]->omismatches.size(); k++)     {  omismatches.push_back(pDataArray[i]->omismatches[k]);           }
+            for (int k = 0; k < pDataArray[i]->numNs.size(); k++)           {  numNs.push_back(pDataArray[i]->numNs[k]);                       }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        */
+#endif         
+        return num;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "createProcessesContigsSummary");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ScreenSeqsCommand::optimizeAlign(){
+       try {
+        
+               vector<float> sims;
+               vector<float> scores;
+               vector<int> inserts;
+               
+        vector<unsigned long long> positions;
+        vector<linePair> alignLines;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               positions = m->divideFilePerLine(alignreport, processors);
+               for (int i = 0; i < (positions.size()-1); i++) { alignLines.push_back(linePair(positions[i], positions[(i+1)])); }      
+#else
+               if(processors == 1){ alignLines.push_back(linePair(0, 1000));  }
+        else {
+            int numAlignSeqs = 0;
+            positions = m->setFilePosEachLine(alignreport, numAlignSeqs); 
+            if (positions.size() < processors) { processors = positions.size(); }
+            
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numAlignSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numAlignSeqs - i * numSeqsPerProcessor;   }
+                alignLines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+            }
+        }
+#endif
+               
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               
+               if (pid == 0) { 
+                       driverAlignSummary(sims, scores, inserts, alignLines[0]);
+#else
+            createProcessesAlignSummary(sims, scores, inserts, alignLines); 
+            
+                       if (m->control_pressed) {  return 0; }
+#endif
+            sort(sims.begin(), sims.end());
+            sort(scores.begin(), scores.end());
+            sort(inserts.begin(), inserts.end());
+            
+            //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
+            int criteriaPercentile     = int(sims.size() * (criteria / (float) 100));
+            
+            for (int i = 0; i < optimize.size(); i++) {
+                if (optimize[i] == "minsim") { int mincriteriaPercentile = int(sims.size() * ((100 - criteria) / (float) 100)); minSim = sims[mincriteriaPercentile];  m->mothurOut("Optimizing minsim to " + toString(minSim) + "."); m->mothurOutEndLine();}
+                else if (optimize[i] == "minscore") { int mincriteriaPercentile = int(scores.size() * ((100 - criteria) / (float) 100)); minScore = scores[mincriteriaPercentile];  m->mothurOut("Optimizing minscore to " + toString(minScore) + "."); m->mothurOutEndLine(); }
+                else if (optimize[i] == "maxinsert") { maxInsert = inserts[criteriaPercentile]; m->mothurOut("Optimizing maxinsert to " + toString(maxInsert) + "."); m->mothurOutEndLine(); }
+            }
+            
+#ifdef USE_MPI
+               }
+               
+               MPI_Status status; 
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+        
+               if (pid == 0) { 
+                       //send file positions to all processes
+                       for(int i = 1; i < processors; i++) { 
+                MPI_Send(&minSim, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                               MPI_Send(&minScore, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                               MPI_Send(&maxInsert, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                       }
+               }else {
+            MPI_Recv(&minSim, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                       MPI_Recv(&minScore, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                       MPI_Recv(&maxInsert, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
                 }
-
-               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
-               m->mothurOutEndLine();
-
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+#endif
                 return 0;
         }
         catch(exception& e) {
-               m->errorOut(e, "ScreenSeqsCommand", "execute");
+               m->errorOut(e, "ScreenSeqsCommand", "optimizeContigs");
                 exit(1);
         }
  }
-
-//***************************************************************************************************************
-
-int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
+/**************************************************************************************/
+int ScreenSeqsCommand::driverAlignSummary(vector<float>& sims, vector<float>& scores, vector<int>& inserts, linePair filePos) {        
         try {
-               ifstream inputNames;
-               m->openInputFile(namefile, inputNames);
-               set<string> badSeqGroups;
-               string seqName, seqList, group;
-               set<string>::iterator it;
-        map<string, string> variables; 
-               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
-        variables["[extension]"] = m->getExtension(namefile);
-               string goodNameFile = getOutputFileName("name", variables);
-               outputNames.push_back(goodNameFile);  outputTypes["name"].push_back(goodNameFile);
                 
-               ofstream goodNameOut;   m->openOutputFile(goodNameFile, goodNameOut);
+        string name, TemplateName, SearchMethod, AlignmentMethod;
+        //QueryName    QueryLength     TemplateName    TemplateLength  SearchMethod    SearchScore     AlignmentMethod QueryStart      QueryEnd        TemplateStart   TemplateEnd     PairwiseAlignmentLength GapsInQuery     GapsInTemplate  LongestInsert   SimBtwnQuery&Template
+        //checking for minScore, maxInsert, minSim
+        int length, TemplateLength,     QueryStart,    QueryEnd,       TemplateStart,  TemplateEnd,    PairwiseAlignmentLength,        GapsInQuery,    GapsInTemplate, LongestInsert;
+        float SearchScore, SimBtwnQueryTemplate;
+         
+               ifstream in;
+               m->openInputFile(alignreport, in);
+        
+               in.seekg(filePos.start);
+        if (filePos.start == 0) { //read headers
+            m->getline(in); m->gobble(in);
+        }
+        
+               bool done = false;
+               int count = 0;
+        
+               while (!done) {
+            
+                       if (m->control_pressed) { in.close(); return 1; }
+            
+            in >> name >> length >> TemplateName >> TemplateLength >> SearchMethod >> SearchScore >> AlignmentMethod >> QueryStart >> QueryEnd >> TemplateStart >> TemplateEnd >> PairwiseAlignmentLength >> GapsInQuery >> GapsInTemplate >> LongestInsert >> SimBtwnQueryTemplate; m->gobble(in);
+            
+            int num = 1;
+            if ((namefile != "") || (countfile !="")){
+                //make sure this sequence is in the namefile, else error 
+                map<string, int>::iterator it = nameMap.find(name);
+                
+                if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + name + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                else { num = it->second; }
+            }
+            
+            //for each sequence this sequence represents
+            for (int i = 0; i < num; i++) {
+                sims.push_back(SimBtwnQueryTemplate);
+                scores.push_back(SearchScore);
+                inserts.push_back(LongestInsert);
+            }
+            
+            count++;
+                       
+                       //if((count) % 100 == 0){       m->mothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine();         }
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            unsigned long long pos = in.tellg();
+            if ((pos == -1) || (pos >= filePos.end)) { break; }
+#else
+            if (in.eof()) { break; }
+#endif
+               }
                 
-               while(!inputNames.eof()){
-                       if (m->control_pressed) { goodNameOut.close();  inputNames.close(); m->mothurRemove(goodNameFile);  return 0; }
+               in.close();
+               
+               return count;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "driverAlignSummary");
+               exit(1);
+       }
+}
  
-                       inputNames >> seqName >> seqList;
-                       it = badSeqNames.find(seqName);
+/**************************************************************************************************/
+int ScreenSeqsCommand::createProcessesAlignSummary(vector<float>& sims, vector<float>& scores, vector<int>& inserts, vector<linePair> alignLines) {
+       try {
+        
+        int process = 1;
+               int num = 0;
+               vector<int> processIDS;
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driverAlignSummary(sims, scores, inserts, alignLines[process]);
                                 
-                       if(it != badSeqNames.end()){
-                               badSeqNames.erase(it);
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = alignreport + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
                                 
-                               if(namefile != ""){
-                                       int start = 0;
-                                       for(int i=0;i<seqList.length();i++){
-                                               if(seqList[i] == ','){
-                                                       badSeqGroups.insert(seqList.substr(start,i-start));
-                                                       start = i+1;
-                                               }                                       
-                                       }
-                                       badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
-                               }
-                       }
-                       else{
-                               goodNameOut << seqName << '\t' << seqList << endl;
-                       }
-                       m->gobble(inputNames);
-               }
-               inputNames.close();
-               goodNameOut.close();
-       
-               //we were unable to remove some of the bad sequences
-               if (badSeqNames.size() != 0) {
-                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
-                               m->mothurOutEndLine();
-                       }
-               }
-
-               if(groupfile != ""){
-                       
-                       ifstream inputGroups;
-                       m->openInputFile(groupfile, inputGroups);
-            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
-            variables["[extension]"] = m->getExtension(groupfile);
-            string goodGroupFile = getOutputFileName("group", variables);
-                       
-                       outputNames.push_back(goodGroupFile);   outputTypes["group"].push_back(goodGroupFile);
-                       
-                       ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
-                       
-                       while(!inputGroups.eof()){
-                               if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile);  m->mothurRemove(goodGroupFile); return 0; }
-
-                               inputGroups >> seqName >> group;
+                               out << num << endl;
+                               out << sims.size() << endl;
+                               for (int k = 0; k < sims.size(); k++)           {               out << sims[k] << '\t';         }  out << endl;
+                               for (int k = 0; k < scores.size(); k++)         {               out << scores[k] << '\t';       }  out << endl;
+                               for (int k = 0; k < inserts.size(); k++)        {               out << inserts[k] << '\t';      }  out << endl;
                                 
-                               it = badSeqGroups.find(seqName);
+                               out.close();
                                 
-                               if(it != badSeqGroups.end()){
-                                       badSeqGroups.erase(it);
-                               }
-                               else{
-                                       goodGroupOut << seqName << '\t' << group << endl;
-                               }
-                               m->gobble(inputGroups);
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
                         }
-                       inputGroups.close();
-                       goodGroupOut.close();
+               }
+               
+               num = driverAlignSummary(sims, scores, inserts, alignLines[0]);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               //parent reads in and combine Filter info
+               for (int i = 0; i < processIDS.size(); i++) {
+                       string tempFilename = alignreport + toString(processIDS[i]) + ".num.temp";
+                       ifstream in;
+                       m->openInputFile(tempFilename, in);
                         
-                       //we were unable to remove some of the bad sequences
-                       if (badSeqGroups.size() != 0) {
-                               for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
-                                       m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
-                                       m->mothurOutEndLine();
-                               }
-                       }
+                       int temp, tempNum;
+            float temp2;
+                       in >> tempNum; m->gobble(in); num += tempNum;
+                       in >> tempNum; m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp2; sims.push_back(temp2);             }               m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp2; scores.push_back(temp2);           }               m->gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; inserts.push_back(temp);    }               m->gobble(in);
+                         
+                       in.close();
+                       m->mothurRemove(tempFilename);
                 }
                 
                 
-               return 0;
-       
+#else 
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to vectors.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               /*
+               vector<alignsData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+            
+                       // Allocate memory for thread data.
+                       alignsData* tempSum = new alignsData(alignreport, m, alignLines[i].start, alignLines[i].end, namefile, countfile, nameMap);
+                       pDataArray.push_back(tempSum);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MyAlignsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }*/
+               alignLines[processors-1].start = 0;
+        //do your part
+               num = driverAlignSummary(sims, scores, inserts, alignLines[processors-1]);
+       /*
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
+            for (int k = 0; k < pDataArray[i]->sims.size(); k++)        {      sims.push_back(pDataArray[i]->sims[k]);         }
+                       for (int k = 0; k < pDataArray[i]->scores.size(); k++)      {   scores.push_back(pDataArray[i]->scores[k]);     }
+            for (int k = 0; k < pDataArray[i]->inserts.size(); k++)     {      inserts.push_back(pDataArray[i]->inserts[k]);   }
+               CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        */
+#endif         
+        return num;
         }
         catch(exception& e) {
-               m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
+               m->errorOut(e, "ScreenSeqsCommand", "createProcessesAlignSummary");
                 exit(1);
         }
  }
@@ -678,6 +1818,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                 vector<int> seqLength;
                 vector<int> ambigBases;
                 vector<int> longHomoPolymer;
+        vector<int> numNs;
                 
          vector<unsigned long long> positions;
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
@@ -705,27 +1846,24 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
                 
                 if (pid == 0) { 
-                       driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
+                       driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[0]);
  #else
                 int numSeqs = 0;
                 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                         if(processors == 1){
-                               numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
+                               numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[0]);
                         }else{
-                               numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile); 
+                               numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile); 
                         }
                                 
                         if (m->control_pressed) {  return 0; }
-               //#else
-               //      numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
-               //      if (m->control_pressed) {  return 0; }
-               //#endif
  #endif
                 sort(startPosition.begin(), startPosition.end());
                 sort(endPosition.begin(), endPosition.end());
                 sort(seqLength.begin(), seqLength.end());
                 sort(ambigBases.begin(), ambigBases.end());
                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
+        sort(numNs.begin(), numNs.end());
                 
                 //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
                 int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
@@ -737,6 +1875,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                         else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); }
                         else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); }
                         else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
+            else if (optimize[i] == "maxn") { maxN = numNs[criteriaPercentile]; m->mothurOut("Optimizing maxn to " + toString(maxN) + "."); m->mothurOutEndLine(); }
                 }
  
  #ifdef USE_MPI
@@ -755,6 +1894,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                                 MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
                                 MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
                                 MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                MPI_Send(&maxN, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
                         }
                 }else {
                         MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
@@ -763,6 +1903,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                         MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
                         MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
                         MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+            MPI_Recv(&maxN, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
                 }
                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
  #endif
@@ -774,7 +1915,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
         }
  }
  /**************************************************************************************/
-int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair filePos) {   
+int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, vector<int>& numNs, string filename, linePair filePos) {       
         try {
                 
                 ifstream in;
@@ -793,7 +1934,7 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
         
                         if (current.getName() != "") {
                                 int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile !="")){
                                         //make sure this sequence is in the namefile, else error 
                                         map<string, int>::iterator it = nameMap.find(current.getName());
                                         
@@ -802,12 +1943,14 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                 }
                                 
                                 //for each sequence this sequence represents
+                int numns = current.getNumNs();
                                 for (int i = 0; i < num; i++) {
                                         startPosition.push_back(current.getStartPos());
                                         endPosition.push_back(current.getEndPos());
                                         seqLength.push_back(current.getNumBases());
                                         ambigBases.push_back(current.getAmbigBases());
                                         longHomoPolymer.push_back(current.getLongHomoPolymer());
+                    numNs.push_back(numns);
                                 }
                                 
                                 count++;
@@ -832,7 +1975,7 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
         }
  }
  /**************************************************************************************************/
-int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename) {
+int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, vector<int>& numNs, string filename) {
         try {
          
          int process = 1;
@@ -849,7 +1992,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[process]);
+                               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[process]);
                                 
                                 //pass numSeqs to parent
                                 ofstream out;
@@ -863,6 +2006,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                                 for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
                                 for (int k = 0; k < ambigBases.size(); k++)                     {               out << ambigBases[k] << '\t'; }  out << endl;
                                 for (int k = 0; k < longHomoPolymer.size(); k++)        {               out << longHomoPolymer[k] << '\t'; }  out << endl;
+                for (int k = 0; k < numNs.size(); k++) {               out << numNs[k] << '\t'; }  out << endl;
                                 
                                 out.close();
                                 
@@ -874,7 +2018,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                         }
                 }
                 
-               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
+               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[0]);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
@@ -896,6 +2040,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               m->gobble(in);
                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; ambigBases.push_back(temp);                 }               m->gobble(in);
                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; longHomoPolymer.push_back(temp);    }               m->gobble(in);
+            for (int k = 0; k < tempNum; k++)                  {               in >> temp; numNs.push_back(temp);      }               m->gobble(in);
                                 
                         in.close();
                         m->mothurRemove(tempFilename);
@@ -917,7 +2062,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                 for( int i=0; i<processors-1; i++ ){
              
                         // Allocate memory for thread data.
-                       sumData* tempSum = new sumData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+                       sumData* tempSum = new sumData(filename, m, lines[i].start, lines[i].end, namefile, countfile, nameMap);
                         pDataArray.push_back(tempSum);
                         
                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
@@ -926,7 +2071,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                 }
                 
          //do your part
-               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[processors-1]);
+               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, numNs, fastafile, lines[processors-1]);
           
                 //Wait until all threads have terminated.
                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -934,11 +2079,15 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {    startPosition.push_back(pDataArray[i]->startPosition[k]);       }
                         for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
              for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) {        seqLength.push_back(pDataArray[i]->seqLength[k]);       }
              for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {       ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
              for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {  longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
+            for (int k = 0; k < pDataArray[i]->numNs.size(); k++) {    numNs.push_back(pDataArray[i]->numNs[k]);       }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
@@ -954,12 +2103,12 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
  
  //***************************************************************************************************************
  
-int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
+int ScreenSeqsCommand::screenGroupFile(map<string, string> badSeqNames){
         try {
                 ifstream inputGroups;
                 m->openInputFile(groupfile, inputGroups);
                 string seqName, group;
-               set<string>::iterator it;
+               map<string, string>::iterator it;
                 map<string, string> variables;
                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
          variables["[extension]"] = m->getExtension(groupfile);
@@ -970,7 +2119,7 @@ int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
                 while(!inputGroups.eof()){
                         if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
  
-                       inputGroups >> seqName >> group;
+                       inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
                         it = badSeqNames.find(seqName);
                         
                         if(it != badSeqNames.end()){
@@ -987,7 +2136,7 @@ int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
                 //we were unable to remove some of the bad sequences
                 if (badSeqNames.size() != 0) {
                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
+                               m->mothurOut("Your groupfile does not include the sequence " + it->first + " please correct."); 
                                 m->mothurOutEndLine();
                         }
                 }
@@ -1006,11 +2155,11 @@ int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
         }
  }
  //***************************************************************************************************************
-int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
+int ScreenSeqsCommand::screenCountFile(map<string, string> badSeqNames){
         try {
                 ifstream in;
                 m->openInputFile(countfile, in);
-               set<string>::iterator it;
+               map<string, string>::iterator it;
                 map<string, string> variables;
                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
          variables["[extension]"] = m->getExtension(countfile);
@@ -1046,7 +2195,7 @@ int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
                 //we were unable to remove some of the bad sequences
                 if (badSeqNames.size() != 0) {
                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your count file does not include the sequence " + *it + " please correct."); 
+                               m->mothurOut("Your count file does not include the sequence " + it->first + " please correct."); 
                                 m->mothurOutEndLine();
                         }
                 }
@@ -1073,79 +2222,12 @@ int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
  }
  //***************************************************************************************************************
  
-int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
-       try {
-               ifstream inputAlignReport;
-               m->openInputFile(alignreport, inputAlignReport);
-               string seqName, group;
-               set<string>::iterator it;
-               
-        map<string, string> variables;
-               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(alignreport));
-        string goodAlignReportFile = getOutputFileName("alignreport", variables);
-               
-               outputNames.push_back(goodAlignReportFile);  outputTypes["alignreport"].push_back(goodAlignReportFile);
-               ofstream goodAlignReportOut;    m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
-
-               while (!inputAlignReport.eof()) {               //      need to copy header
-                       char c = inputAlignReport.get();
-                       goodAlignReportOut << c;
-                       if (c == 10 || c == 13){        break;  }       
-               }
-
-               while(!inputAlignReport.eof()){
-                       if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); m->mothurRemove(goodAlignReportFile); return 0; }
-
-                       inputAlignReport >> seqName;
-                       it = badSeqNames.find(seqName);
-                       string line;            
-                       while (!inputAlignReport.eof()) {               //      need to copy header
-                               char c = inputAlignReport.get();
-                               line += c;
-                               if (c == 10 || c == 13){        break;  }       
-                       }
-                       
-                       if(it != badSeqNames.end()){
-                               badSeqNames.erase(it);
-                       }
-                       else{
-                               goodAlignReportOut << seqName << '\t' << line;
-                       }
-                       m->gobble(inputAlignReport);
-               }
-               
-               if (m->control_pressed) { goodAlignReportOut.close();  inputAlignReport.close(); m->mothurRemove(goodAlignReportFile);  return 0; }
-
-               //we were unable to remove some of the bad sequences
-               if (badSeqNames.size() != 0) {
-                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct."); 
-                               m->mothurOutEndLine();
-                       }
-               }
-
-               inputAlignReport.close();
-               goodAlignReportOut.close();
-                               
-               if (m->control_pressed) {  m->mothurRemove(goodAlignReportFile);  return 0; }
-               
-               return 0;
-       
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
-               exit(1);
-       }
-       
-}
-//***************************************************************************************************************
-
-int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
+int ScreenSeqsCommand::screenTaxonomy(map<string, string> badSeqNames){
         try {
                 ifstream input;
                 m->openInputFile(taxonomy, input);
                 string seqName, tax;
-               set<string>::iterator it;
+               map<string, string>::iterator it;
          map<string, string> variables;
                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(taxonomy));
          variables["[extension]"] = m->getExtension(taxonomy);
@@ -1157,7 +2239,7 @@ int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
                 while(!input.eof()){
                         if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
                         
-                       input >> seqName >> tax;
+                       input >> seqName; m->gobble(input); input >> tax;
                         it = badSeqNames.find(seqName);
                         
                         if(it != badSeqNames.end()){ badSeqNames.erase(it); }
@@ -1172,7 +2254,7 @@ int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
                 //we were unable to remove some of the bad sequences
                 if (badSeqNames.size() != 0) {
                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct."); 
+                               m->mothurOut("Your taxonomy file does not include the sequence " + it->first + " please correct."); 
                                 m->mothurOutEndLine();
                         }
                 }
@@ -1193,11 +2275,11 @@ int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
  }
  //***************************************************************************************************************
  
-int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
+int ScreenSeqsCommand::screenQual(map<string, string> badSeqNames){
         try {
                 ifstream in;
                 m->openInputFile(qualfile, in);
-               set<string>::iterator it;
+               map<string, string>::iterator it;
                 map<string, string> variables;
                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile));
          variables["[extension]"] = m->getExtension(qualfile);
@@ -1251,7 +2333,7 @@ int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
                 //we were unable to remove some of the bad sequences
                 if (badSeqNames.size() != 0) {
                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                               m->mothurOut("Your qual file does not include the sequence " + *it + " please correct."); 
+                               m->mothurOut("Your qual file does not include the sequence " + it->first + " please correct."); 
                                 m->mothurOutEndLine();
                         }
                 }
@@ -1269,7 +2351,7 @@ int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
  }
  //**********************************************************************************************************************
  
-int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, map<string, string>& badSeqNames){
         try {
                 ofstream goodFile;
                 m->openOutputFile(goodFName, goodFile);
@@ -1284,7 +2366,7 @@ int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccn
  
                 bool done = false;
                 int count = 0;
-       
+        
                 while (!done) {
                 
                         if (m->control_pressed) {  return 0; }
@@ -1292,21 +2374,31 @@ int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccn
                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
                         if (currSeq.getName() != "") {
                                 bool goodSeq = 1;               //      innocent until proven guilty
-                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                string trashCode = "";
+                //have the report files found you bad
+                map<string, string>::iterator it = badSeqNames.find(currSeq.getName());
+                if (it != badSeqNames.end()) { goodSeq = 0;  trashCode = it->second; }  
+                
+                if (summaryfile == "") { //summaryfile includes these so no need to check again
+                    if(startPos != -1 && startPos < currSeq.getStartPos())                     {       goodSeq = 0;    trashCode += "start|"; }
+                    if(endPos != -1 && endPos > currSeq.getEndPos())                           {       goodSeq = 0;    trashCode += "end|";}
+                    if(maxAmbig != -1 && maxAmbig <    currSeq.getAmbigBases())                {       goodSeq = 0;    trashCode += "ambig|";}
+                    if(maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())      {       goodSeq = 0;    trashCode += "homop|";}
+                    if(minLength != -1 && minLength > currSeq.getNumBases())           {       goodSeq = 0;    trashCode += "<length|";}
+                    if(maxLength != -1 && maxLength < currSeq.getNumBases())           {       goodSeq = 0;    trashCode += ">length|";}
+                }
+                
+                if (contigsreport == "") { //contigs report includes this so no need to check again
+                    if(maxN != -1 && maxN < currSeq.getNumNs())                     {  goodSeq = 0;    trashCode += "n|"; }
+                }
                                 
                                 if(goodSeq == 1){
                                         currSeq.printSequence(goodFile);        
+                               }else{
+                                       badAccnosFile << currSeq.getName() << '\t' << trashCode.substr(0, trashCode.length()-1) << endl;
+                                       badSeqNames[currSeq.getName()] = trashCode;
                                 }
-                               else{
-                                       badAccnosFile << currSeq.getName() << endl;
-                                       badSeqNames.insert(currSeq.getName());
-                               }
-                       count++;
+                count++;
                         }
                         
                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
@@ -1336,7 +2428,7 @@ int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccn
  }
  //**********************************************************************************************************************
  #ifdef USE_MPI
-int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long long>& MPIPos, set<string>& badSeqNames){
+int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long long>& MPIPos, map<string, string>& badSeqNames){
         try {
                 string outputString = "";
                 MPI_Status statusGood; 
@@ -1365,13 +2457,25 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File&
                         //process seq
                         if (currSeq.getName() != "") {
                                 bool goodSeq = 1;               //      innocent until proven guilty
-                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                string trashCode = "";
+                //have the report files found you bad
+                map<string, string>::iterator it = badSeqNames.find(currSeq.getName());
+                if (it != badSeqNames.end()) { goodSeq = 0;  trashCode = it->second; }  
+                
+                if (summaryfile == "") { //summaryfile includes these so no need to check again
+                    if(startPos != -1 && startPos < currSeq.getStartPos())                     {       goodSeq = 0;    trashCode += "start|"; }
+                    if(endPos != -1 && endPos > currSeq.getEndPos())                           {       goodSeq = 0;    trashCode += "end|";}
+                    if(maxAmbig != -1 && maxAmbig <    currSeq.getAmbigBases())                {       goodSeq = 0;    trashCode += "ambig|";}
+                    if(maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())      {       goodSeq = 0;    trashCode += "homop|";}
+                    if(minLength != -1 && minLength > currSeq.getNumBases())           {       goodSeq = 0;    trashCode += "<length|";}
+                    if(maxLength != -1 && maxLength < currSeq.getNumBases())           {       goodSeq = 0;    trashCode += ">length|";}
+                }
+                
+                if (contigsreport == "") { //contigs report includes this so no need to check again
+                    if(maxN != -1 && maxN < currSeq.getNumNs())                     {  goodSeq = 0;    trashCode += "n|"; }
+                }
                                 
+                
                                 if(goodSeq == 1){
                                         outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
                                 
@@ -1385,10 +2489,10 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File&
                                 }
                                 else{
  
-                                       badSeqNames.insert(currSeq.getName());
+                                       badSeqNames[currSeq.getName()] = trashCode;
                                         
                                         //write to bad accnos file
-                                       outputString = currSeq.getName() + "\n";
+                                       outputString = currSeq.getName() + "\t" + trashCode.substr(0, trashCode.length()-1) + "\n";
                                 
                                         length = outputString.length();
                                         char* buf3 = new char[length];
@@ -1413,7 +2517,7 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File&
  #endif
  /**************************************************************************************************/
  
-int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
+int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, map<string, string>& badSeqNames) {
         try {
          
          vector<int> processIDS;   
@@ -1475,10 +2579,10 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st
          
          if (ableToOpen == 0) {
              badSeqNames.clear();
-            string tempName;
+            string tempName, trashCode;
              while (!inBad.eof()) {
-                inBad >> tempName; m->gobble(inBad);
-                badSeqNames.insert(tempName);
+                inBad >> tempName >> trashCode; m->gobble(inBad);
+                badSeqNames[tempName] = trashCode;
              }
              inBad.close();
          }
@@ -1501,7 +2605,7 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st
              if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
              
                         // Allocate memory for thread data.
-                       sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
+                       sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, maxN, badSeqNames, filename, summaryfile, contigsreport, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
                         pDataArray.push_back(tempSum);
                         
                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -1518,7 +2622,10 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
-            for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames.insert(*it);       }
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
+            for (map<string, string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames[it->first] = it->second;       }
                         CloseHandle(hThreadArray[i]);
                         delete pDataArray[i];
                 }
diff --git a/screenseqscommand.h b/screenseqscommand.h

index 284c9a5ac1a0bd5c90dca6b4854b725d6b428023..e2906900980ac7d1897b9e0976435401fb35c8be 100644 (file)
--- a/screenseqscommand.h
+++ b/screenseqscommand.h
@@ -43,30 +43,44 @@ private:
  
         vector<linePair> lines;
  
-       int screenNameGroupFile(set<string>);
-       int screenGroupFile(set<string>);
-    int screenCountFile(set<string>);
-       int screenAlignReport(set<string>);
-       int screenQual(set<string>);
-       int screenTaxonomy(set<string>);
-       
-       int driver(linePair, string, string, string, set<string>&);
-       int createProcesses(string, string, string, set<string>&);
+       int screenNameGroupFile(map<string, string>);
+       int screenGroupFile(map<string, string>);
+    int screenCountFile(map<string, string>);
+       int screenAlignReport(map<string, string>&);
+       int screenQual(map<string, string>);
+       int screenTaxonomy(map<string, string>);
         
+    int optimizeContigs();
+    int optimizeAlign();
+       int driver(linePair, string, string, string, map<string, string>&);
+       int createProcesses(string, string, string, map<string, string>&);
+    int screenSummary(map<string, string>&);
+    int screenContigs(map<string, string>&);
+    int runFastaScreening(map<string, string>&);
+    int screenFasta(map<string, string>&);
+    int screenReports(map<string, string>&);
+       int getSummary(vector<unsigned long long>&);
+       int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
+       int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);  
+       int getSummaryReport();
+    int driverContigsSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, linePair);
+    int createProcessesContigsSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<linePair>);
+    int driverAlignSummary(vector<float>&, vector<float>&, vector<int>&, linePair);
+    int createProcessesAlignSummary(vector<float>&, vector<float>&, vector<int>&, vector<linePair>);
+    
         #ifdef USE_MPI
-       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, map<string, string>&);
         #endif
  
         bool abort;
-       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile;
-       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
+       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile, contigsreport, summaryfile;
+       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert;
+    float minSim, minScore;
         vector<string> outputNames;
         vector<string> optimize;
         map<string, int> nameMap;
         
-       int getSummary(vector<unsigned long long>&);
-       int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
-       int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
+    
  };
  
  /**************************************************************************************************/
@@ -79,7 +93,8 @@ struct sumData {
         vector<int> seqLength; 
         vector<int> ambigBases; 
         vector<int> longHomoPolymer; 
-       string filename, namefile; 
+    vector<int> numNs;
+       string filename, namefile, countfile; 
         unsigned long long start;
         unsigned long long end;
         int count;
@@ -88,9 +103,65 @@ struct sumData {
         
         
         sumData(){}
-       sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
+       sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, string cf, map<string, int> nam) {
+               filename = f;
+        namefile = nf;
+        countfile = cf;
+               m = mout;
+               start = st;
+               end = en;
+               nameMap = nam;
+               count = 0;
+       }
+};
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct contigsSumData {
+       vector<int> ostartPosition;
+       vector<int> oendPosition;
+       vector<int> oLength; 
+       vector<int> omismatches; 
+    vector<int> numNs;
+       string filename, namefile, countfile; 
+       unsigned long long start;
+       unsigned long long end;
+       int count;
+       MothurOut* m;
+       map<string, int> nameMap;
+       
+       
+       contigsSumData(){}
+       contigsSumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, string cf, map<string, int> nam) {
+               filename = f;
+        namefile = nf;
+        countfile = cf;
+               m = mout;
+               start = st;
+               end = en;
+               nameMap = nam;
+               count = 0;
+       }
+};
+/**************************************************************************************************/
+struct alignsData {
+       vector<float> sims;
+       vector<float> scores;
+       vector<int> inserts;
+       string filename, namefile, countfile; 
+       unsigned long long start;
+       unsigned long long end;
+       int count;
+       MothurOut* m;
+       map<string, int> nameMap;
+       
+       
+       alignsData(){}
+       alignsData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, string cf, map<string, int> nam) {
                 filename = f;
          namefile = nf;
+        countfile = cf;
                 m = mout;
                 start = st;
                 end = en;
@@ -98,34 +169,40 @@ struct sumData {
                 count = 0;
         }
  };
+
  /**************************************************************************************************/
  //custom data structure for threads to use.
  // This is passed by void pointer so it can be any data type
  // that can be passed using a single void pointer (LPVOID).
  struct sumScreenData {
-    int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
+    int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, maxN;
         unsigned long long start;
         unsigned long long end;
         int count;
         MothurOut* m;
         string goodFName, badAccnosFName, filename;
-    set<string> badSeqNames;
+    map<string, string> badSeqNames;
+    string summaryfile, contigsreport;
         
         
         sumScreenData(){}
-       sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
+       sumScreenData(int s, int e, int a, int h, int minl, int maxl, int mn, map<string, string> bs, string f, string sum, string cont, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
                 startPos = s;
                 endPos = e;
                 minLength = minl;
          maxLength = maxl;
                 maxAmbig = a;
                 maxHomoP = h;
+        maxN = mn;
                 filename = f;
          goodFName = gf;
          badAccnosFName = bf;
                 m = mout;
                 start = st;
                 end = en;
+        summaryfile = sum;
+        contigsreport = cont;
+        badSeqNames = bs;
                 count = 0;
         }
  };
@@ -149,9 +226,11 @@ static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
                 }
                 
-               pDataArray->count = pDataArray->end;
+               
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                         
+            pDataArray->count++;
+            
                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
                         
                         Sequence current(in); pDataArray->m->gobble(in); 
@@ -159,7 +238,7 @@ static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){
                         if (current.getName() != "") {
                                 
                                 int num = 1;
-                               if (pDataArray->namefile != "") {
+                               if ((pDataArray->namefile != "") || (pDataArray->countfile !="")){
                                         //make sure this sequence is in the namefile, else error 
                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                         
@@ -168,12 +247,14 @@ static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){
                                 }
                                 
                                 //for each sequence this sequence represents
+                int numns = current.getNumNs();
                                 for (int i = 0; i < num; i++) {
                                         pDataArray->startPosition.push_back(current.getStartPos());
                                         pDataArray->endPosition.push_back(current.getEndPos());
                                         pDataArray->seqLength.push_back(current.getNumBases());
                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
+                    pDataArray->numNs.push_back(numns);
                                 }
              }
                 }
@@ -190,7 +271,124 @@ static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){
  } 
  
  /**************************************************************************************************/
+static DWORD WINAPI MyContigsSumThreadFunction(LPVOID lpParam){ 
+       contigsSumData* pDataArray;
+       pDataArray = (contigsSumData*)lpParam;
+       
+       try {
+        string name;
+        //Name Length  Overlap_Length  Overlap_Start   Overlap_End     MisMatches      Num_Ns
+        int length, OLength, thisOStart, thisOEnd, numMisMatches, numns;
+        
+               ifstream in;
+               pDataArray->m->openInputFile(pDataArray->filename, in);
+        
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       in.seekg(0);  pDataArray->m->getline(in); pDataArray->m->gobble(in);
+               }else { //this accounts for the difference in line endings. 
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
+               }
+               
+               
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+            
+            pDataArray->count++;
+            
+                       if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
+                       
+            //seqname  start   end     nbases  ambigs  polymer numSeqs
+            in >> name >> length >> OLength >> thisOStart >> thisOEnd >> numMisMatches >> numns; pDataArray->m->gobble(in);
+            
+            int num = 1;
+            if ((pDataArray->namefile != "") || (pDataArray->countfile !="")){
+                //make sure this sequence is in the namefile, else error 
+                map<string, int>::iterator it = pDataArray->nameMap.find(name);
+                
+                if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + name + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                else { num = it->second; }
+            }
+            
+            //for each sequence this sequence represents
+            for (int i = 0; i < num; i++) {
+                pDataArray->ostartPosition.push_back(thisOStart);
+                pDataArray->oendPosition.push_back(thisOEnd);
+                pDataArray->oLength.push_back(OLength);
+                pDataArray->omismatches.push_back(numMisMatches);
+                pDataArray->numNs.push_back(numns);
+            }
+               }
+               
+               in.close();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MyContigsThreadFunction");
+               exit(1);
+       }
+} 
+/**************************************************************************************************/
+static DWORD WINAPI MyAlignsThreadFunction(LPVOID lpParam){ 
+       alignsData* pDataArray;
+       pDataArray = (alignsData*)lpParam;
+       
+       try {
+        
+        string name, TemplateName, SearchMethod, AlignmentMethod;
+        //QueryName    QueryLength     TemplateName    TemplateLength  SearchMethod    SearchScore     AlignmentMethod QueryStart      QueryEnd        TemplateStart   TemplateEnd     PairwiseAlignmentLength GapsInQuery     GapsInTemplate  LongestInsert   SimBtwnQuery&Template
+        //checking for minScore, maxInsert, minSim
+        int length, TemplateLength,     QueryStart,    QueryEnd,       TemplateStart,  TemplateEnd,    PairwiseAlignmentLength,        GapsInQuery,    GapsInTemplate, LongestInsert;
+        float SearchScore, SimBtwnQueryTemplate;
+        
+        ifstream in;
+               pDataArray->m->openInputFile(pDataArray->filename, in);
+        
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       in.seekg(0);  pDataArray->m->getline(in); pDataArray->m->gobble(in);
+               }else { //this accounts for the difference in line endings. 
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
+               }
+               
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+            
+            pDataArray->count++;
+            
+                       if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
+
+            in >> name >> length >> TemplateName >> TemplateLength >> SearchMethod >> SearchScore >> AlignmentMethod >> QueryStart >> QueryEnd >> TemplateStart >> TemplateEnd >> PairwiseAlignmentLength >> GapsInQuery >> GapsInTemplate >> LongestInsert >> SimBtwnQueryTemplate; pDataArray->m->gobble(in);
+            cout << i << '\t' << name << endl;
+            int num = 1;
+            if ((pDataArray->namefile != "") || (pDataArray->countfile !="")){
+                //make sure this sequence is in the namefile, else error 
+                map<string, int>::iterator it = pDataArray->nameMap.find(name);
+                
+                if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + name + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                else { num = it->second; }
+            }
+            
+            //for each sequence this sequence represents
+            for (int i = 0; i < num; i++) {
+                pDataArray->sims.push_back(SimBtwnQueryTemplate);
+                pDataArray->scores.push_back(SearchScore);
+                pDataArray->inserts.push_back(LongestInsert);
+            }
+               }
+               
+               in.close();
+               
+               return 0;
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MyAlignsThreadFunction");
+               exit(1);
+       }
+} 
  
+/**************************************************************************************************/
  static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
         sumScreenData* pDataArray;
         pDataArray = (sumScreenData*)lpParam;
@@ -212,29 +410,41 @@ static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){
                 }else { //this accounts for the difference in line endings. 
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
                 }
-               
-               pDataArray->count = pDataArray->end;
+        
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                         
+            pDataArray->count++;
+            
                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
                         
                         Sequence currSeq(in); pDataArray->m->gobble(in); 
                         
                         if (currSeq.getName() != "") {
                                 bool goodSeq = 1;               //      innocent until proven guilty
-                               if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                               if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                               if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                string trashCode = "";
+                //have the report files found you bad
+                map<string, string>::iterator it = pDataArray->badSeqNames.find(currSeq.getName());
+                if (it != pDataArray->badSeqNames.end()) { goodSeq = 0;  trashCode = it->second; } //found it 
+                
+                if (pDataArray->summaryfile == "") {
+                    if(pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                     {       goodSeq = 0;    trashCode += "start|"; }
+                    if(pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                           {       goodSeq = 0;    trashCode += "end|"; }
+                    if(pDataArray->maxAmbig != -1 && pDataArray->maxAmbig <    currSeq.getAmbigBases())                {       goodSeq = 0;    trashCode += "ambig|"; }
+                    if(pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())      {       goodSeq = 0;    trashCode += "homop|"; }
+                    if(pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())           {       goodSeq = 0;    trashCode += "<length|"; }
+                    if(pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())           {       goodSeq = 0;    trashCode += ">length|"; }
+                }
+                if (pDataArray->contigsreport == "") { //contigs report includes this so no need to check again
+                    if(pDataArray->maxN != -1 && pDataArray->maxN < currSeq.getNumNs())                     {  goodSeq = 0;    trashCode += "n|"; }
+                }
                                 
+                
                                 if(goodSeq == 1){
                                         currSeq.printSequence(goodFile);        
                                 }
                                 else{
-                                       badAccnosFile << currSeq.getName() << endl;
-                                       pDataArray->badSeqNames.insert(currSeq.getName());
+                                       badAccnosFile << currSeq.getName() << '\t' << trashCode.substr(0, trashCode.length()-1) << endl;
+                                       pDataArray->badSeqNames[currSeq.getName()] = trashCode;
                                 }
      
                         }               
diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp

index 1fe60e8954acd14dc1a1e02d37c0d8bd987cc1c0..67e43aa0d4b7f6a6ff251bc45a8254ca49b8b162 100644 (file)
--- a/seqerrorcommand.cpp
+++ b/seqerrorcommand.cpp
@@ -298,7 +298,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                         }
              else{
                  if(reportFileName != ""){
-                    m->mothurOut("we are ignoring the report file if your sequences are not aligned.  we will check that the sequences in your fasta and and qual fileare the same length.");
+                    m->mothurOut("we are ignoring the report file if your sequences are not aligned.  we will check that the sequences in your fasta and and qual file are the same length.");
                      m->mothurOutEndLine();
                  }
              }
@@ -759,7 +759,10 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
              int numParentSeqs = -1;
              int closestRefIndex = -1;
                          
-            numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned(), outChimeraReport);
+            string querySeq = query.getAligned();
+            if (!aligned) {  querySeq = query.getUnaligned();  }
+            
+            numParentSeqs = chimeraTest.analyzeQuery(query.getName(), querySeq, outChimeraReport);
              
              closestRefIndex = chimeraTest.getClosestRefIndex();
              
diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp

index 8f27b8c7beaf797775f680bdef5c34bcf7f980c8..fdf95ee6fc5f87ed7629de936f4b0543dc89906a 100644 (file)
--- a/seqsummarycommand.cpp
+++ b/seqsummarycommand.cpp
@@ -675,6 +675,9 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
                         num += pDataArray[i]->count;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {    startPosition.push_back(pDataArray[i]->startPosition[k]);       }
                         for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
              for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) {        seqLength.push_back(pDataArray[i]->seqLength[k]);       }
diff --git a/seqsummarycommand.h b/seqsummarycommand.h

index c4e6131524d9c64b4496df7f9d5d34a315e6b34e..552f4a08bee284bf914e6a50cd51176864b8b1d8 100644 (file)
--- a/seqsummarycommand.h
+++ b/seqsummarycommand.h
@@ -114,9 +114,10 @@ static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
                 }
                 
-               pDataArray->count = pDataArray->end;
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                         
+            pDataArray->count++;
+            
                         if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
                         
                         Sequence current(in); pDataArray->m->gobble(in); 
diff --git a/sequence.cpp b/sequence.cpp

index 96662bc36d4b77e1cb8b7a884f4e8460969033f4..224ecb10cb1529db7d6bdb10c268be047dae5834 100644 (file)
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -20,6 +20,10 @@ Sequence::Sequence(string newName, string sequence) {
                 m = MothurOut::getInstance();
                 initialize();   
                 name = newName;
+        
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }
                 
                 //setUnaligned removes any gap characters for us
                 setUnaligned(sequence);
@@ -36,6 +40,10 @@ Sequence::Sequence(string newName, string sequence, string justUnAligned) {
                 m = MothurOut::getInstance();
                 initialize();   
                 name = newName;
+        
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }
                 
                 //setUnaligned removes any gap characters for us
                 setUnaligned(sequence);
@@ -53,11 +61,9 @@ Sequence::Sequence(istringstream& fastaString){
                 m = MothurOut::getInstance();
         
                 initialize();
-               fastaString >> name;
-               
-               if (name.length() != 0) { 
+        name = getSequenceName(fastaString);
                 
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                         string sequence;
                 
                         //read comments
@@ -84,8 +90,7 @@ Sequence::Sequence(istringstream& fastaString){
                         setUnaligned(sequence); 
                         
                         if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-               
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                 
         }
         catch(exception& e) {
@@ -100,11 +105,9 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                 m = MothurOut::getInstance();
         
                 initialize();
-               fastaString >> name;
-               
-               if (name.length() != 0) { 
+               name = getSequenceName(fastaString);
                 
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                         string sequence;
                 
                         //read comments
@@ -131,7 +134,7 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                         
                         if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                         
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                 
         }
         catch(exception& e) {
@@ -147,11 +150,9 @@ Sequence::Sequence(ifstream& fastaFile){
         try {
                 m = MothurOut::getInstance();
                 initialize();
-               fastaFile >> name;
-               
-               if (name.length() != 0) { 
+               name = getSequenceName(fastaFile);
                 
-                       name = name.substr(1); 
+               if (!m->control_pressed) { 
                         
                         string sequence;
                 
@@ -181,7 +182,7 @@ Sequence::Sequence(ifstream& fastaFile){
                         
                         if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                         
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
  
         }
         catch(exception& e) {
@@ -195,13 +196,11 @@ Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
         try {
                 m = MothurOut::getInstance();
                 initialize();
-               fastaFile >> name;
          extraInfo = "";
                 
-               if (name.length() != 0) { 
-            
-                       name = name.substr(1); 
-                       
+               name = getSequenceName(fastaFile);
+               
+               if (!m->control_pressed) {                      
                         string sequence;
              
                         //read comments
@@ -233,8 +232,7 @@ Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
                         setUnaligned(sequence); 
                         
                         if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-                       
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
          
         }
         catch(exception& e) {
@@ -248,10 +246,9 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
         try {
                 m = MothurOut::getInstance();
                 initialize();
-               fastaFile >> name;
+               name = getSequenceName(fastaFile);
                 
-               if (name.length() != 0) { 
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                         string sequence;
                         
                         //read comments
@@ -279,7 +276,7 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                         
                         if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                         
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                 
         }
         catch(exception& e) {
@@ -287,7 +284,54 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                 exit(1);
         }                                                       
  }
-
+//********************************************************************************************************************
+string Sequence::getSequenceName(ifstream& fastaFile) {
+       try {
+               string name = "";
+               
+        fastaFile >> name;
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getSequenceName");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+string Sequence::getSequenceName(istringstream& fastaFile) {
+       try {
+               string name = "";
+               
+        fastaFile >> name;
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getSequenceName");
+               exit(1);
+       }
+}
  //********************************************************************************************************************
  string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
         try {
@@ -531,6 +575,15 @@ string Sequence::getUnaligned(){
  int Sequence::getNumBases(){
         return numBases;
  }
+//********************************************************************************************************************
+
+int Sequence::getNumNs(){
+    int numNs = 0;
+       for (int i = 0; i < unaligned.length(); i++) {
+        if(toupper(unaligned[i]) == 'N') { numNs++; }
+    }
+    return numNs;
+}
  
  //********************************************************************************************************************
  
diff --git a/sequence.hpp b/sequence.hpp

index db4c4f32b9992a27f63e15908d881a77d5507980..ad3a4b40c238c4260d36f7afea027b903539c8a0 100644 (file)
--- a/sequence.hpp
+++ b/sequence.hpp
@@ -48,6 +48,7 @@ public:
         string getPairwise();
         string getUnaligned();
         string getInlineSeq();
+    int getNumNs();
         int getNumBases();
         int getStartPos();
         int getEndPos();
@@ -69,6 +70,8 @@ private:
         string getCommentString(ifstream&);
         string getSequenceString(istringstream&, int&);
         string getCommentString(istringstream&);
+    string getSequenceName(ifstream&);
+    string getSequenceName(istringstream&);
         string name;
         string unaligned;
         string aligned;
diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp

index 3e3e5863c59abc69e4175eba7cb5b7d6f0e173c8..82d956189a6f025fd57dc7e901e585456f59f8ab 100644 (file)
--- a/shhhseqscommand.cpp
+++ b/shhhseqscommand.cpp
@@ -474,6 +474,9 @@ vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, st
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                         for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) {
                                 mapfileNames.push_back(pDataArray[i]->mapfileNames[j]);
                         }
diff --git a/shhhseqscommand.h b/shhhseqscommand.h

index 4da1a840c7d3c591f589ad6ea0103da4d9fbf1ce..ffd72b9ebdc161291f9705333f018cb56c09559c 100644 (file)
--- a/shhhseqscommand.h
+++ b/shhhseqscommand.h
@@ -77,7 +77,7 @@ struct shhhseqsData {
         MothurOut* m;
         int start;
         int end;
-       int sigma, threadID;
+       int sigma, threadID, count;
         vector<string> groups;
         vector<string> mapfileNames;
         
@@ -95,6 +95,7 @@ struct shhhseqsData {
                 sigma = s;
                 threadID = tid;
                 groups = gr;
+        count=0;
         }
  };
  
@@ -113,6 +114,8 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){
                 //precluster each group
                 for (int k = pDataArray->start; k < pDataArray->end; k++) {
                         
+            pDataArray->count++;
+            
                         int start = time(NULL);
                         
                         if (pDataArray->m->control_pressed) {  return 0; }
diff --git a/splitgroupscommand.h b/splitgroupscommand.h

index b251fe55bc4fd010582c82297e8b757a0f17113e..ec22b8fedaec8752dd1b9161c6476b0223fc157e 100644 (file)
--- a/splitgroupscommand.h
+++ b/splitgroupscommand.h
@@ -28,7 +28,7 @@ public:
         ~SplitGroupCommand() {}
         
         vector<string> setParameters();
-       string getCommandName()                 { return "split.group";                         }
+       string getCommandName()                 { return "split.groups";                                }
         string getCommandCategory()             { return "Sequence Processing";         }
         
         string getHelpString(); 
diff --git a/summaryqualcommand.cpp b/summaryqualcommand.cpp

index ae5b652a3cbfa0e43aa74e495bb28eb16775d9d3..0f6f719ef568446af70438eb08579a81c7c6f3f6 100644 (file)
--- a/summaryqualcommand.cpp
+++ b/summaryqualcommand.cpp
@@ -441,7 +441,10 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
-                       numSeqs += pDataArray[i]->count;
+                       numSeqs += pDataArray[i]->numSeqs;
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
              int tempNum = pDataArray[i]->position.size();
              if (position.size() < tempNum) { position.resize(tempNum, 0); }
                         if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
diff --git a/summaryqualcommand.h b/summaryqualcommand.h

index 49583b059f5380ce049ac759dd94d8d25b1c7193..b9edc5148ca5f7a834433815204b40f9ea317db4 100644 (file)
--- a/summaryqualcommand.h
+++ b/summaryqualcommand.h
@@ -66,7 +66,7 @@ struct seqSumQualData {
         string filename; 
         unsigned long long start;
         unsigned long long end;
-       int count;
+       int count, numSeqs;
         MothurOut* m;
      bool hasNameMap;
         map<string, int> nameMap;
@@ -101,7 +101,8 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){
                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
                 }
                 
-               int count = 0;
+               pDataArray->count = 0;
+        pDataArray->numSeqs = 0;
                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
                                 
                         if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }
@@ -138,11 +139,11 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){
                                         else { pDataArray->scores.at(i)[thisScores[i]] += num; }  
                                 }
                                 
-                               count += num;
+                               pDataArray->numSeqs += num;
+                pDataArray->count++;
                         }
                 }
                 
-               pDataArray->count = count;
                 in.close();
                 
                 return 0;
diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp

index fdec2ec483c67f1d37c23477d1411b5c97e8a215..e82d1f6600e0b83f79154634ddfbda43d7542d91 100644 (file)
--- a/summarysharedcommand.cpp
+++ b/summarysharedcommand.cpp
@@ -742,6 +742,9 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                  
                  //Close all thread handles and free memory allocations.
                  for(int i=0; i < pDataArray.size(); i++){
+                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                    }
                      m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
                      m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
                      
@@ -809,58 +812,10 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
  
          if (iters != 0) {
              //we need to find the average distance and standard deviation for each groups distance
-            
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                    }
-                }
-            }
-            
-            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].dist /= (float) iters;
-                }
-            }
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
              
              //find standard deviation
-            vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
-            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
-                stdDev[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].seq1 = calcDists[i][j].seq1;
-                    stdDev[i][j].seq2 = calcDists[i][j].seq2;
-                    stdDev[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-                for (int i = 0; i < stdDev.size(); i++) {  
-                    for (int j = 0; j < stdDev[i].size(); j++) {
-                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
-                    }
-                }
-            }
-            
-            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].dist /= (float) iters;
-                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
-                }
-            }
+            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
              
              //print results
              for (int i = 0; i < calcDists.size(); i++) {
diff --git a/summarysharedcommand.h b/summarysharedcommand.h

index f486d520216bd8ec4858d50cd7f2d92b7fd23537..8ba9b9fcf3e40b745cf75ddbaccd2f6df8cde652 100644 (file)
--- a/summarysharedcommand.h
+++ b/summarysharedcommand.h
@@ -112,6 +112,7 @@ struct summarySharedData {
         unsigned long long end;
         MothurOut* m;
         string sumFile;
+    int count;
         
         summarySharedData(){}
         summarySharedData(string sf, MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
@@ -121,6 +122,7 @@ struct summarySharedData {
                 end = en;
          Estimators = est;
          thisLookup = lu;
+        count=0;
         }
  };
  /**************************************************************************************************/
@@ -225,7 +227,7 @@ static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){
                 
                 vector<SharedRAbundVector*> subset;
                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
-            
+            pDataArray->count++;
                         for (int l = 0; l < k; l++) {
                                 
                                 outputFileHandle << pDataArray->thisLookup[0]->getLabel() << '\t';
diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp

index 0d014599fa794217233694f8a28ed707b99c86eb..0df53d5649881c924be60140338e5fff7c2bc840 100644 (file)
--- a/treegroupscommand.cpp
+++ b/treegroupscommand.cpp
@@ -890,6 +890,9 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                  
                  //Close all thread handles and free memory allocations.
                  for(int i=0; i < pDataArray.size(); i++){
+                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                    }
                      for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                      
                      for (int k = 0; k < calcDists.size(); k++) {
@@ -917,31 +920,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 
          if (iters != 1) {
              //we need to find the average distance and standard deviation for each groups distance
-            
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(treeCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                    }
-                }
-            }
-            
-            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].dist /= (float) iters;
-                }
-            }
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
              
              //create average tree for each calc
              for (int i = 0; i < calcDists.size(); i++) {
diff --git a/treegroupscommand.h b/treegroupscommand.h

index 979612f4aea3aa41187ea520483afadfa05b1782..36c852bd8ef545894b9aa00245064eb112e236c6 100644 (file)
--- a/treegroupscommand.h
+++ b/treegroupscommand.h
@@ -139,6 +139,7 @@ struct treeSharedData {
         unsigned long long start;
         unsigned long long end;
         MothurOut* m;
+    int count;
         
         treeSharedData(){}
         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
@@ -147,6 +148,7 @@ struct treeSharedData {
                 end = en;
          Estimators = est;
          thisLookup = lu;
+        count=0;
         }
  };
  /**************************************************************************************************/
@@ -249,6 +251,8 @@ static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
                 vector<SharedRAbundVector*> subset;
                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
                         
+            pDataArray->count++;
+            
                         for (int l = 0; l < k; l++) {
                                 
                                 if (k != l) { //we dont need to similiarity of a groups to itself
diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp

index 44121b97dfff0bb0c6611878c7e1c04e046709ea..e9bd081605b4211a26084334122ee12da83b949b 100644 (file)
--- a/trimflowscommand.cpp
+++ b/trimflowscommand.cpp
@@ -14,7 +14,7 @@
  //**********************************************************************************************************************
  vector<string> TrimFlowsCommand::setParameters(){      
         try {
-               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow",false,true,true); parameters.push_back(pflow);
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
@@ -63,7 +63,7 @@ string TrimFlowsCommand::getOutputPattern(string type) {
          
          if (type == "flow") {  pattern = "[filename],[tag],flow"; } 
          else if (type == "fasta") {  pattern = "[filename],flow.fasta"; } 
-        else if (type == "file") {  pattern = "[filename],[tag],flow.files"; }
+        else if (type == "file") {  pattern = "[filename],flow.files"; }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -306,7 +306,6 @@ int TrimFlowsCommand::execute(){
                 
                 if(allFiles){
                         set<string> namesAlreadyProcessed;
-            variables["[tag]"] = "";
                         flowFilesFileName = getOutputFileName("file",variables);
                         m->openOutputFile(flowFilesFileName, output);
  
@@ -342,7 +341,6 @@ int TrimFlowsCommand::execute(){
                         output.close();
                 }
                 else{
-            variables["[tag]"] = "";
                         flowFilesFileName = getOutputFileName("file",variables);
                         m->openOutputFile(flowFilesFileName, output);
                         
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 53084078f3d4c079773a4e1cb73ebb1774726b37..b208209e2a808a0f92cca9f160f4da456450e153 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -1152,6 +1152,9 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
+            }
                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
                  map<string, int>::iterator it2 = groupCounts.find(it->first);
                  if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
diff --git a/trimseqscommand.h b/trimseqscommand.h

index 891b14dc463342e9403aab317fc0877fdd78e82c..80e1ebe1a00e2f56ec19a6f23c3ba890af1c4f10 100644 (file)
--- a/trimseqscommand.h
+++ b/trimseqscommand.h
@@ -254,7 +254,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                 
                 TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
          
-               pDataArray->count = pDataArray->lineEnd;
+               pDataArray->count = 0;
                 for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                    
                         if (pDataArray->m->control_pressed) { 
@@ -281,6 +281,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         
                         string origSeq = currSeq.getUnaligned();
                         if (origSeq != "") {
+                pDataArray->count++;
                                 
                                 int barcodeIndex = 0;
                                 int primerIndex = 0;
@@ -398,7 +399,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                      string thisGroup = "";
                      if (pDataArray->createGroup) {
                                                 if(pDataArray->barcodes.size() != 0){
-                                                       string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
+                                                       thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
                                                         if (pDataArray->primers.size() != 0) { 
                                                                 if (pDataArray->primerNameVector[primerIndex] != "") { 
                                                                         if(thisGroup != "") {
diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp

index 5f3cffc2b1ab807b279ba3185444dc96a0a3b863..ab8afd07ffd96728e6204d2f9b2bbd2adc719787 100644 (file)
--- a/unifracunweightedcommand.cpp
+++ b/unifracunweightedcommand.cpp
@@ -481,43 +481,31 @@ int UnifracUnweightedCommand::execute() {
  int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
         try {
          //we need to find the average distance and standard deviation for each groups distance
-        
          //finds sum
-        vector<double> averages; averages.resize(numComp, 0); 
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
+        vector<double> averages = m->getAverages(dists);
          
          //find standard deviation
-        vector<double> stdDev; stdDev.resize(numComp, 0);
-        
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
          
          //make matrix with scores in it
-        vector< vector<double> > avedists;     avedists.resize(m->getNumGroups());
+        vector< vector<double> > avedists;     //avedists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            avedists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            avedists.push_back(temp);
          }
          
          //make matrix with scores in it
-        vector< vector<double> > stddists;     stddists.resize(m->getNumGroups());
+        vector< vector<double> > stddists;     //stddists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            stddists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            //stddists[i].resize(m->getNumGroups(), 0.0);
+            stddists.push_back(temp);
          }
          
+        if (m->debug) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); }
+        
          //flip it so you can print it
          int count = 0;
          for (int r=0; r<m->getNumGroups(); r++) { 
@@ -530,6 +518,8 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& di
              }
          }
          
+        if (m->debug) { m->mothurOut("[DEBUG]: done filling matrix.\n"); }
+        
          map<string, string> variables; 
                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
          variables["[tag]"] = toString(treeNum+1);
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index 47adc9a55b4ba929796fca2993138b89584bdee1..94ae125962c85c2cc0b7f100bf5178b89da89bdd 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -465,40 +465,28 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
          //we need to find the average distance and standard deviation for each groups distance
          
          //finds sum
-        vector<double> averages; averages.resize(numComp, 0); 
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
+        vector<double> averages = m->getAverages(dists);        
          
          //find standard deviation
-        vector<double> stdDev; stdDev.resize(numComp, 0);
-                
-        for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
          
          //make matrix with scores in it
-        vector< vector<double> > avedists;     avedists.resize(m->getNumGroups());
+        vector< vector<double> > avedists;     //avedists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            avedists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            avedists.push_back(temp);
          }
          
          //make matrix with scores in it
-        vector< vector<double> > stddists;     stddists.resize(m->getNumGroups());
+        vector< vector<double> > stddists;     //stddists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            stddists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            //stddists[i].resize(m->getNumGroups(), 0.0);
+            stddists.push_back(temp);
          }
+
          
          //flip it so you can print it
          int count = 0;
diff --git a/venn.cpp b/venn.cpp

index 2824ca8f6921c52993857472399beced96d4c26f..66dbb8eac3ee13c069b85374503dd87cc0ae538f 100644 (file)
--- a/venn.cpp
+++ b/venn.cpp
@@ -161,7 +161,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                 //in essence you want to run it like a single 
                                 if (vCalcs[i]->getName() == "sharedsobs") {
                                         singleCalc = new Sobs();
-                    if (sharedOtus) {
+                    if (sharedOtus &&  (labels.size() != 0)) {
                          string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
                          
                          outputNames.push_back(filenameShared);
@@ -482,7 +482,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[1]);
                      vector<string> labels;
                                         vector<double> sharedab =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t' << labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -494,7 +494,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.clear(); 
                                         subset.push_back(lookup[0]); subset.push_back(lookup[2]);
                                         vector<double> sharedac =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -506,7 +506,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.clear(); 
                                         subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                         vector<double> sharedbc =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -519,7 +519,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.clear(); 
                                         subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                         vector<double> sharedabc =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -674,7 +674,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[1]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedAB = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -687,7 +687,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[2]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedAC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -700,7 +700,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedAD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -713,7 +713,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedBC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -726,7 +726,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[1]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedBD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[1]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -739,7 +739,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[2]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -754,7 +754,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedABC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[2]->getGroup()<< '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -767,7 +767,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedACD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
@@ -780,12 +780,12 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[1]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset,labels);
                                         sharedBCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
                          }
-                        if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+                        outShared << labels[labels.size()-1]; 
                          outShared << endl;
                      }
                 //cout << "num bcd = " << sharedBCD << endl;            
@@ -793,19 +793,19 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                         subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[3]);
                                         data = vCalcs[i]->getValues(subset, labels);
                                         sharedABD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
                          }
-                        if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+                        outShared << labels[labels.size()-1]; 
                          outShared << endl;
                      }
  //cout << "num abd = " << sharedABD << endl;
                                         //get estimate for all four
                                         data = vCalcs[i]->getValues(lookup, labels);
                                         sharedABCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                          outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                          for (int k = 0; k < labels.size()-1; k++) {
                              outShared << labels[k] << ",";
author	Pat Schloss <pschloss@umich.edu>
	Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)
committer	Pat Schloss <pschloss@umich.edu>
	Thu, 7 Feb 2013 15:12:18 +0000 (10:12 -0500)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
aligncommand.cpp		patch \| blob \| history
aligncommand.h		patch \| blob \| history
bayesian.cpp		patch \| blob \| history
catchallcommand.cpp		patch \| blob \| history
chimeraperseuscommand.cpp		patch \| blob \| history
chimeraslayercommand.cpp		patch \| blob \| history
chimeraslayercommand.h		patch \| blob \| history
chimerauchimecommand.cpp		patch \| blob \| history
chimerauchimecommand.h		patch \| blob \| history
chopseqscommand.cpp		patch \| blob \| history
chopseqscommand.h		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
classifyseqscommand.h		patch \| blob \| history
clustersplitcommand.cpp		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
commandfactory.hpp		patch \| blob \| history
consensusseqscommand.cpp		patch \| blob \| history
countgroupscommand.cpp		patch \| blob \| history
countgroupscommand.h		patch \| blob \| history
countseqscommand.cpp		patch \| blob \| history
counttable.cpp		patch \| blob \| history
distancecommand.cpp		patch \| blob \| history
distancecommand.h		patch \| blob \| history
engine.cpp		patch \| blob \| history
filterseqscommand.cpp		patch \| blob \| history
filterseqscommand.h		patch \| blob \| history
filtersharedcommand.cpp		patch \| blob \| history
flowdata.cpp		patch \| blob \| history
flowdata.h		patch \| blob \| history
getdistscommand.cpp	[new file with mode: 0644]	patch \| blob
getdistscommand.h	[new file with mode: 0644]	patch \| blob
getotulabelscommand.cpp		patch \| blob \| history
getotulabelscommand.h		patch \| blob \| history
groupmap.cpp		patch \| blob \| history
listotulabelscommand.cpp		patch \| blob \| history
listotulabelscommand.h		patch \| blob \| history
makecontigscommand.cpp		patch \| blob \| history
makecontigscommand.h		patch \| blob \| history
makefastqcommand.cpp		patch \| blob \| history
makefile		patch \| blob \| history
matrixoutputcommand.cpp		patch \| blob \| history
matrixoutputcommand.h		patch \| blob \| history
metastatscommand.cpp		patch \| blob \| history
metastatscommand.h		patch \| blob \| history
mothurout.cpp		patch \| blob \| history
mothurout.h		patch \| blob \| history
newcommandtemplate.cpp		patch \| blob \| history
pairwiseseqscommand.cpp		patch \| blob \| history
pairwiseseqscommand.h		patch \| blob \| history
parsefastaqcommand.cpp		patch \| blob \| history
parsefastaqcommand.h		patch \| blob \| history
pcrseqscommand.h		patch \| blob \| history
prcseqscommand.cpp		patch \| blob \| history
preclustercommand.cpp		patch \| blob \| history
preclustercommand.h		patch \| blob \| history
primerdesigncommand.cpp	[new file with mode: 0644]	patch \| blob
primerdesigncommand.h	[new file with mode: 0644]	patch \| blob
qualityscores.cpp		patch \| blob \| history
qualityscores.h		patch \| blob \| history
refchimeratest.cpp		patch \| blob \| history
removedistscommand.cpp	[new file with mode: 0644]	patch \| blob
removedistscommand.h	[new file with mode: 0644]	patch \| blob
removeotulabelscommand.cpp		patch \| blob \| history
removeotulabelscommand.h		patch \| blob \| history
screenseqscommand.cpp		patch \| blob \| history
screenseqscommand.h		patch \| blob \| history
seqerrorcommand.cpp		patch \| blob \| history
seqsummarycommand.cpp		patch \| blob \| history
seqsummarycommand.h		patch \| blob \| history
sequence.cpp		patch \| blob \| history
sequence.hpp		patch \| blob \| history
shhhseqscommand.cpp		patch \| blob \| history
shhhseqscommand.h		patch \| blob \| history
splitgroupscommand.h		patch \| blob \| history
summaryqualcommand.cpp		patch \| blob \| history
summaryqualcommand.h		patch \| blob \| history
summarysharedcommand.cpp		patch \| blob \| history
summarysharedcommand.h		patch \| blob \| history
treegroupscommand.cpp		patch \| blob \| history
treegroupscommand.h		patch \| blob \| history
trimflowscommand.cpp		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history
trimseqscommand.h		patch \| blob \| history
unifracunweightedcommand.cpp		patch \| blob \| history
unifracweightedcommand.cpp		patch \| blob \| history
venn.cpp		patch \| blob \| history