]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge remote-tracking branch 'mothur/master'
authorPat Schloss <pschloss@umich.edu>
Thu, 31 May 2012 16:06:33 +0000 (12:06 -0400)
committerPat Schloss <pschloss@umich.edu>
Thu, 31 May 2012 16:06:33 +0000 (12:06 -0400)
58 files changed:
Mothur.xcodeproj/project.pbxproj
Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme
aligncommand.h
alignment.cpp
alignment.hpp
catchallcommand.cpp
chimeraslayercommand.cpp
chimerauchimecommand.cpp
chimerauchimecommand.h
classify.cpp
classifyotucommand.cpp
classifyseqscommand.cpp
clusterclassic.cpp
clusterclassic.h
clustersplitcommand.cpp
clustersplitcommand.h
commandfactory.cpp
cooccurrencecommand.cpp
engine.cpp
engine.hpp
getotulabelscommand.cpp [new file with mode: 0644]
getotulabelscommand.h [new file with mode: 0644]
groupmap.cpp
groupmap.h
indicatorcommand.cpp
listotulabelscommand.cpp [new file with mode: 0644]
listotulabelscommand.h [new file with mode: 0644]
makebiomcommand.cpp
makecontigscommand.cpp [new file with mode: 0644]
makecontigscommand.h [new file with mode: 0644]
makefastqcommand.cpp
makefile
matrixoutputcommand.cpp
mothurout.cpp
mothurout.h
otuassociationcommand.cpp
otuassociationcommand.h
parsefastaqcommand.cpp
phylotree.cpp
rarefact.cpp
rarefact.h
rarefactcommand.cpp
rarefactcommand.h
rarefactsharedcommand.cpp
rarefactsharedcommand.h
removeotulabelscommand.cpp [new file with mode: 0644]
removeotulabelscommand.h [new file with mode: 0644]
sequence.cpp
setdircommand.cpp
sffinfocommand.cpp
shhhercommand.cpp
shhhercommand.h
splitmatrix.cpp
splitmatrix.h
summarysharedcommand.cpp
summarysharedcommand.h
trialSwap2.cpp
trialswap2.h

index f510cfe15c1449b9878439aff18eb85cef333ec4..d98b28443921915890802e12777ab374aa31d402 100644 (file)
@@ -11,6 +11,8 @@
                219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; };
                7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
                8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; };
+               A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotulabelscommand.cpp */; };
+               A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
                A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
                A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
                A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
@@ -37,6 +39,8 @@
                A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
                A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
+               A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
+               A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
                A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = refchimeratest.h; sourceTree = "<group>"; };
                7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = refchimeratest.cpp; sourceTree = "<group>"; };
                7E78911B135F3E8600E725D2 /* eachgapdistignorens.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eachgapdistignorens.h; sourceTree = "<group>"; };
-               8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; };
+               8DD76FB20486AB0100D96B5E /* mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mothur; sourceTree = BUILT_PRODUCTS_DIR; };
+               A70056E5156A93D000924A2D /* getotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getotulabelscommand.cpp; sourceTree = "<group>"; };
+               A70056E8156A93E300924A2D /* getotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getotulabelscommand.h; sourceTree = "<group>"; };
+               A70056E9156AB6D400924A2D /* removeotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotulabelscommand.h; sourceTree = "<group>"; };
+               A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotulabelscommand.cpp; sourceTree = "<group>"; };
                A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
+               A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listotulabelscommand.cpp; sourceTree = "<group>"; };
+               A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
+               A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
+               A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
                1AB674ADFE9D54B511CA2CBB /* Products */ = {
                        isa = PBXGroup;
                        children = (
-                               8DD76FB20486AB0100D96B5E /* Mothur */,
+                               8DD76FB20486AB0100D96B5E /* mothur */,
                        );
                        name = Products;
                        sourceTree = "<group>";
                                A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */,
                                A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */,
                                A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
+                               A70056E8156A93E300924A2D /* getotulabelscommand.h */,
+                               A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
                                A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
                                A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */,
                                A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */,
                                A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */,
                                A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */,
                                A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */,
+                               A7A0671C156294810095C8C5 /* listotulabelscommand.h */,
+                               A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */,
                                A7E9B73E12D37EC400DA6239 /* listseqscommand.h */,
                                A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */,
                                A7FA10001302E096003860FE /* mantelcommand.h */,
                                A7FA10011302E096003860FE /* mantelcommand.cpp */,
                                A724D2B4153C8600000A826F /* makebiomcommand.h */,
                                A724D2B6153C8628000A826F /* makebiomcommand.cpp */,
+                               A7A0671D1562AC230095C8C5 /* makecontigscommand.h */,
+                               A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */,
                                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */,
                                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
                                A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
                                A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */,
                                A7E9B7C812D37EC400DA6239 /* removeotuscommand.h */,
                                A7E9B7C712D37EC400DA6239 /* removeotuscommand.cpp */,
+                               A70056E9156AB6D400924A2D /* removeotulabelscommand.h */,
+                               A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */,
                                A727864212E9E28C00F86ABA /* removerarecommand.h */,
                                A727864312E9E28C00F86ABA /* removerarecommand.cpp */,
                                A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */,
                        name = Mothur;
                        productInstallPath = "$(HOME)/bin";
                        productName = mothur;
-                       productReference = 8DD76FB20486AB0100D96B5E /* Mothur */;
+                       productReference = 8DD76FB20486AB0100D96B5E /* mothur */;
                        productType = "com.apple.product-type.tool";
                };
 /* End PBXNativeTarget section */
                                A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */,
                                219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */,
                                219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */,
+                               A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */,
+                               A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */,
+                               A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
+                               A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                        isa = XCBuildConfiguration;
                        buildSettings = {
                                ALWAYS_SEARCH_USER_PATHS = NO;
-                               CONFIGURATION_TEMP_DIR = /Users/pschloss/Desktop/mothur;
                                COPY_PHASE_STRIP = NO;
                                DEPLOYMENT_LOCATION = YES;
                                GCC_DYNAMIC_NO_PIC = NO;
                                GCC_MODEL_TUNING = G5;
                                GCC_OPTIMIZATION_LEVEL = 0;
-                               INSTALL_PATH = /Users/pschloss/Desktop/mothur/build/Debug;
-                               OBJROOT = /Users/pschloss/Desktop/mothur;
-                               PRODUCT_NAME = Mothur;
+                               PRODUCT_NAME = mothur;
                                SDKROOT = macosx10.6;
-                               SYMROOT = /Users/pschloss/Desktop/mothur;
+                               SKIP_INSTALL = NO;
                        };
                        name = Debug;
                };
                        isa = XCBuildConfiguration;
                        buildSettings = {
                                ALWAYS_SEARCH_USER_PATHS = NO;
-                               CONFIGURATION_TEMP_DIR = /Users/pschloss/Desktop/mothur;
                                DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
                                DEPLOYMENT_LOCATION = YES;
                                GCC_MODEL_TUNING = G5;
                                GCC_WARN_UNUSED_VALUE = YES;
-                               INSTALL_PATH = /Users/pschloss/Desktop/mothur/build/Debug;
-                               OBJROOT = /Users/pschloss/Desktop/mothur;
-                               PRODUCT_NAME = Mothur;
-                               SYMROOT = /Users/pschloss/Desktop/mothur;
+                               PRODUCT_NAME = mothur;
+                               SKIP_INSTALL = NO;
                        };
                        name = Release;
                };
                                GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.24.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"3/12/2012\\\"\"",
+                                       "VERSION=\"\\\"1.25.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"5/01/2012\\\"\"",
                                );
                                "GCC_VERSION[arch=*]" = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
                                GCC_WARN_UNUSED_VARIABLE = YES;
                                HEADER_SEARCH_PATHS = "";
-                               INSTALL_PATH = "";
+                               INSTALL_PATH = TARGET_BUILD_DIR;
                                MACH_O_TYPE = mh_execute;
                                ONLY_ACTIVE_ARCH = YES;
                                OTHER_CPLUSPLUSFLAGS = (
                                        "-lreadline",
                                );
                                SDKROOT = macosx10.6;
+                               SKIP_INSTALL = NO;
                                USER_HEADER_SEARCH_PATHS = "";
                        };
                        name = Debug;
                                GCC_WARN_UNUSED_PARAMETER = YES;
                                GCC_WARN_UNUSED_VALUE = YES;
                                GCC_WARN_UNUSED_VARIABLE = YES;
-                               INSTALL_PATH = "";
+                               INSTALL_PATH = TARGET_BUILD_DIR;
                                MACH_O_TYPE = mh_execute;
                                OTHER_CPLUSPLUSFLAGS = (
                                        "-DUSE_READLINE",
                                        "-lreadline",
                                );
                                SDKROOT = macosx10.6;
+                               SKIP_INSTALL = NO;
                        };
                        name = Release;
                };
index 5184c66900b60315219f5f452a689821e00f4bfd..3ced6e4a7c610f456b22a59d421b7ca434d8e6bb 100644 (file)
@@ -14,7 +14,7 @@
             <BuildableReference
                BuildableIdentifier = "primary"
                BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-               BuildableName = "Mothur"
+               BuildableName = "mothur"
                BlueprintName = "Mothur"
                ReferencedContainer = "container:Mothur.xcodeproj">
             </BuildableReference>
@@ -32,7 +32,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
@@ -50,7 +50,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
@@ -68,7 +68,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
index 86bb4d7a4377904f0ca3c2712204d5932fdb46f6..7eeaa1eb7e7bc3e2dc28d144d646b2190eb34983 100644 (file)
@@ -1,6 +1,5 @@
 #ifndef ALIGNCOMMAND_H
 #define ALIGNCOMMAND_H
-//test
 /*
  *  aligncommand.h
  *  Mothur
index 235216e8e59c82be596004559445c021312acb40..9ab17004bc146bc97168373995c98b932dcb9c3f 100644 (file)
@@ -15,7 +15,7 @@
 
 /**************************************************************************************************/
 
-Alignment::Alignment() {       /*      do nothing      */      }
+Alignment::Alignment() {       m = MothurOut::getInstance(); /*        do nothing      */      }
 
 /**************************************************************************************************/
 
@@ -53,7 +53,8 @@ void Alignment::resize(int A) {
 
 void Alignment::traceBack(){                   //      This traceback routine is used by the dynamic programming algorithms
        try {   
-                                                                       //      to fill the values of seqAaln and seqBaln
+               BBaseMap.clear();
+        ABaseMap.clear(); //   to fill the values of seqAaln and seqBaln
                seqAaln = "";
                seqBaln = "";
                int row = lB-1;
@@ -65,31 +66,51 @@ void Alignment::traceBack(){                        //      This traceback routine is used by the dynamic
                //      matrix
                
                if(currentCell.prevCell == 'x'){        seqAaln = seqBaln = "NOALIGNMENT";              }//If there's an 'x' in the bottom-
-               else{                                                                                           //      right corner bail out because it means nothing got aligned
+               else{   //      right corner bail out because it means nothing got aligned
+            int count = 0;
                        while(currentCell.prevCell != 'x'){                             //      while the previous cell isn't an 'x', keep going...
                                
                                if(currentCell.prevCell == 'u'){                        //      if the pointer to the previous cell is 'u', go up in the
                                        seqAaln = '-' + seqAaln;                                //      matrix.  this indicates that we need to insert a gap in
                                        seqBaln = seqB[row] + seqBaln;                  //      seqA and a base in seqB
+                    BBaseMap[row] = count;
                                        currentCell = alignment[--row][column];
                                }
                                else if(currentCell.prevCell == 'l'){           //      if the pointer to the previous cell is 'l', go to the left
                                        seqBaln = '-' + seqBaln;                                //      in the matrix.  this indicates that we need to insert a gap
                                        seqAaln = seqA[column] + seqAaln;               //      in seqB and a base in seqA
+                    ABaseMap[column] = count;
                                        currentCell = alignment[row][--column];
                                }
                                else{
                                        seqAaln = seqA[column] + seqAaln;               //      otherwise we need to go diagonally up and to the left,
                                        seqBaln = seqB[row] + seqBaln;                  //      here we add a base to both alignments
+                    BBaseMap[row] = count;
+                    ABaseMap[column] = count;
                                        currentCell = alignment[--row][--column];
                                }
+                count++;
                        }
                }
                
-               pairwiseLength = seqAaln.length();
+       
+        pairwiseLength = seqAaln.length();
                seqAstart = 1;  seqAend = 0;
                seqBstart = 1;  seqBend = 0;
-               
+        //flip maps since we now know the total length
+        map<int, int> newAMap;
+        for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
+            int spot = it->second;
+            newAMap[pairwiseLength-spot-1] = it->first-1;
+        }
+        ABaseMap = newAMap;
+        map<int, int> newBMap;
+        for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
+            int spot = it->second;
+            newBMap[pairwiseLength-spot-1] = it->first-1;
+        }
+               BBaseMap = newBMap;
+        
                for(int i=0;i<seqAaln.length();i++){
                        if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAstart++;    }
                        else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBstart++;    }
@@ -156,7 +177,16 @@ int Alignment::getCandidateEndPos(){
 int Alignment::getTemplateStartPos(){
        return seqBstart;                                                                       //      this is called to report the quality of the alignment
 }
+/**************************************************************************************************/
+
+map<int, int> Alignment::getSeqAAlnBaseMap(){
+       return ABaseMap;                                                                        
+}
+/**************************************************************************************************/
 
+map<int, int> Alignment::getSeqBAlnBaseMap(){
+       return BBaseMap;                                                                        
+}
 /**************************************************************************************************/
 
 int Alignment::getTemplateEndPos(){
index 7a943fd0037430991a7baab418474e78a496d403..9d2197f5eaa7248b9bb8490c87c804f85931d095 100644 (file)
@@ -30,6 +30,8 @@ public:
 //     float getAlignmentScore();
        string getSeqAAln();
        string getSeqBAln();
+    map<int, int> getSeqAAlnBaseMap();
+    map<int, int> getSeqBAlnBaseMap();
        int getCandidateStartPos();
        int getCandidateEndPos();
        int getTemplateStartPos();
@@ -49,6 +51,8 @@ protected:
        int pairwiseLength;
        int nRows, nCols, lA, lB;
        vector<vector<AlignmentCell> > alignment;
+    map<int, int> ABaseMap;
+    map<int, int> BBaseMap;
        MothurOut* m;
 };
 
index cc99e6d992cb182f7495e412b674298a643981f6..329b56a2c24dcf024ca66cab0f160b76438c0922 100644 (file)
@@ -175,16 +175,14 @@ int CatchAllCommand::execute() {
                path = m->getFullPathName(path);
                
         if (m->debug) { m->mothurOut("[DEBUG]: mothur's path = " + path + "\n"); }
-        
+       
                savedOutputDir = outputDir;
                string catchAllCommandExe = "";
         string catchAllTest = "";
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                       catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
                        if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
             catchAllTest = path + "CatchAllcmdL.exe";
                #else
-                       catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " ";
                        if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
             catchAllTest = path + "CatchAllcmdW.exe";
                #endif
@@ -193,12 +191,34 @@ int CatchAllCommand::execute() {
                ifstream in;
                catchAllTest = m->getFullPathName(catchAllTest);
                int ableToOpen = m->openInputFile(catchAllTest, in, "no error"); in.close();
-               if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + catchAllTest + " file does not exist. mothur requires the catchall executable to run the catchall command."); m->mothurOutEndLine(); m->control_pressed = true; return 0;  }
+               if(ableToOpen == 1) {   
+            m->mothurOut(catchAllTest + " file does not exist. Checking path... \n");
+            //check to see if uchime is in the path??
+            
+            string programName = "CatchAllcmdW.exe";
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            programName = "CatchAllcmdL.exe";
+#endif
+            string cLocation = m->findProgramPath(programName);
+            
+            ifstream in2;
+            ableToOpen = m->openInputFile(cLocation, in2, "no error"); in2.close();
+
+            if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + cLocation + " file does not exist. mothur requires the catchall executable."); m->mothurOutEndLine();  return 0; } 
+            else {  m->mothurOut("Found catchall in your path, using " + cLocation + "\n"); catchAllTest = cLocation; }
+        }
+        catchAllTest = m->getFullPathName(catchAllTest);
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        catchAllCommandExe += "mono " + catchAllTest + " ";
+#else
+        catchAllCommandExe += "\"" + catchAllTest + "\" ";
+#endif
         
                //prepare full output directory
                outputDir = m->getFullPathName(outputDir);
                
-        if (m->debug) { m->mothurOut("[DEBUG]: catchall location = " + catchAllCommandExe + ".\n [DEBUG]: outputDir = " + outputDir + ".\n"); }
+        if (m->debug) { m->mothurOut("[DEBUG]: catchall location = " + catchAllCommandExe + "\n[DEBUG]: outputDir = " + outputDir + "\n"); }
         
                vector<string> inputFileNames;
                if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile);   }
@@ -219,7 +239,7 @@ int CatchAllCommand::execute() {
                        string summaryfilename = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "catchall.summary";
                        summaryfilename = m->getFullPathName(summaryfilename);
                        
-            if (m->debug) { m->mothurOut("[DEBUG]: Input File = " + inputFileNames[p] + ".\n [DEBUG]: inputdata address = " + toString(&input) + ".\n [DEBUG]: sabund address = " + toString(&sabund) + ".\n"); } 
+            if (m->debug) { m->mothurOut("[DEBUG]: Input File = " + inputFileNames[p] + ".\n[DEBUG]: inputdata address = " + toString(&input) + ".\n[DEBUG]: sabund address = " + toString(&sabund) + ".\n"); } 
             
                        ofstream out;
                        m->openOutputFile(summaryfilename, out);        
index 2c435cadb11c1bc413b982676ac58082d4683992..e2b93316a4ade033f6ce4db3988cfd09cbf432e9 100644 (file)
@@ -633,6 +633,7 @@ int ChimeraSlayerCommand::execute(){
 #endif
                                
                                totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName);
+                m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
 #ifdef USE_MPI 
                                }
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
@@ -641,7 +642,7 @@ int ChimeraSlayerCommand::execute(){
        
                        if (parser != NULL) { delete parser; } 
                        
-                       m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");       m->mothurOutEndLine();
+            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");   m->mothurOutEndLine();
                }
                
                //set accnos file as new current accnosfile
index e28775f016c4e992d2d8f2d23904c93c6e3f74ef..be0421a73097bc47aa98934ba4ae59a320e02010 100644 (file)
@@ -481,8 +481,26 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        ifstream in;
                        uchimeCommand = m->getFullPathName(uchimeCommand);
                        int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
-                       if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
-               }
+                       if(ableToOpen == 1) {   
+                m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
+                //check to see if uchime is in the path??
+                
+                string uLocation = m->findProgramPath("uchime");
+                
+                
+                ifstream in2;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
+#else
+                ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
+#endif
+
+                if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
+                else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
+            }else {  uchimeLocation = uchimeCommand; }
+            
+            uchimeLocation = m->getFullPathName(uchimeLocation);
+        }
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
@@ -1010,16 +1028,10 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                                
                vector<char*> cPara;
                
-               string path = m->argv;
-               string tempPath = path;
-               for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
-               path = path.substr(0, (tempPath.find_last_of('m')));
-               
-               string uchimeCommand = path;
+               string uchimeCommand = uchimeLocation;
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               uchimeCommand += "uchime ";
+               uchimeCommand += " ";
 #else
-               uchimeCommand += "uchime";
                uchimeCommand = "\"" + uchimeCommand + "\"";
 #endif
                
@@ -1258,6 +1270,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
 #else
                commandString = "\"" + commandString + "\"";
 #endif
+        if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
                system(commandString.c_str());
                
                //free memory
@@ -1430,7 +1443,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
+                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
                        tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
                        
@@ -1562,7 +1575,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
+                       uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
                        tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
                        tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
                        
index b7da889701f3ac2bc7883cf5c39356550eb2e25d..dfb410f1acf31390df8c511b87e5a0f9dce74283 100644 (file)
@@ -45,7 +45,7 @@ private:
        int createProcesses(string, string, string, string, int&);
                
        bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract;
-       string fastafile, groupfile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+       string fastafile, groupfile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
        int processors;
        
        
@@ -74,7 +74,7 @@ struct uchimeData {
        string namefile; 
        string groupfile;
        string outputFName;
-       string accnos, alns, filename, templatefile;
+       string accnos, alns, filename, templatefile, uchimeLocation;
        MothurOut* m;
        int start;
        int end;
@@ -84,7 +84,7 @@ struct uchimeData {
        string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
        
        uchimeData(){}
-       uchimeData(string o, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+       uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
                fastafile = f;
                namefile = n;
                groupfile = g;
@@ -100,6 +100,7 @@ struct uchimeData {
                groups = gr;
                count = 0;
                numChimeras = 0;
+        uchimeLocation = uloc;
        }
        void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract) {
                useAbskew = Abskew;
@@ -183,18 +184,8 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                        vector<char*> cPara;
                        
-                       string path = pDataArray->m->argv;
-                       string tempPath = path;
-                       for (int j = 0; j < path.length(); j++) { tempPath[j] = tolower(path[j]); }
-                       path = path.substr(0, (tempPath.find_last_of('m')));
-                       
-                       string uchimeCommand = path;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                       uchimeCommand += "uchime ";
-#else
-                       uchimeCommand += "uchime";
-                       uchimeCommand = "\"" + uchimeCommand + "\"";
-#endif                 
+            string uchimeCommand = pDataArray->uchimeLocation;
+            uchimeCommand = "\"" + uchimeCommand + "\"";
                        
                        char* tempUchime;
                        tempUchime= new char[uchimeCommand.length()+1]; 
@@ -403,10 +394,10 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                        //uchime_main(numArgs, uchimeParameters); 
                        //cout << "commandString = " << commandString << endl;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
                        commandString = "\"" + commandString + "\"";
-#endif
+            
+            if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
+            
                        system(commandString.c_str());
                        
                        //free memory
@@ -505,11 +496,14 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                
                vector<char*> cPara;
                
-               char* tempUchime;
-               tempUchime= new char[8]; 
-               *tempUchime = '\0';
-               strncat(tempUchime, "uchime ", 7); 
-               cPara.push_back(tempUchime);
+               string uchimeCommand = pDataArray->uchimeLocation;
+        uchimeCommand = "\"" + uchimeCommand + "\"";
+        
+        char* tempUchime;
+        tempUchime= new char[uchimeCommand.length()+1]; 
+        *tempUchime = '\0';
+        strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
+        cPara.push_back(tempUchime);
                
                char* tempIn = new char[8]; 
                *tempIn = '\0'; strncat(tempIn, "--input", 7);
@@ -722,6 +716,7 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                
                //uchime_main(numArgs, uchimeParameters); 
                //cout << "commandString = " << commandString << endl;
+        if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
                system(commandString.c_str());
                
                //free memory
index 2d01183479a504c798a18071df045f723643f850..7726b3e00cca9de19971b70656f9581e43e97c8a 100644 (file)
@@ -249,7 +249,8 @@ int Classify::readTaxonomy(string file) {
                
                m->mothurOutEndLine();
                m->mothurOut("Reading in the " + file + " taxonomy...\t");      cout.flush();
-
+        if (m->debug) { m->mothurOut("[DEBUG]: Taxonomies read in...\n"); }
+        
 #ifdef USE_MPI 
                int pid, num, processors;
                vector<unsigned long long> positions;
@@ -298,6 +299,7 @@ int Classify::readTaxonomy(string file) {
                        istringstream iss (tempBuf,istringstream::in);
                        iss >> name; m->gobble(iss);
             iss >> taxInfo;
+            if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); }
                        taxonomy[name] = taxInfo;
                        phyloTree->addSeqToTree(name, taxInfo);
                }
@@ -312,7 +314,9 @@ int Classify::readTaxonomy(string file) {
                while (!inTax.eof()) {
                        inTax >> name; m->gobble(inTax);
             inTax >> taxInfo;
-               
+            
+            if (m->debug) {  m->mothurOut("[DEBUG]: name = '" + name + "' tax = '" + taxInfo + "'\n");  }
+
                        taxonomy[name] = taxInfo;
                        
                        phyloTree->addSeqToTree(name, taxInfo);
@@ -321,6 +325,8 @@ int Classify::readTaxonomy(string file) {
                }
                inTax.close();
 #endif 
+        
+        
        
                phyloTree->assignHeirarchyIDs(0);
                
index c8896375b016ef6c45462351a18e8e520bfe75c2..ef41ee1bab8853f6741938005f775076b60723ab 100644 (file)
@@ -475,6 +475,9 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }
                                
                        }
+            
+            //phylotree adds an extra unknown so we want to remove that
+            if (bestChild.name == "unknown") { bestChildSize--; }
                                
                        //is this taxonomy above cutoff
                        int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
@@ -541,7 +544,9 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        taxaSum = new PhyloSummary(groupfile);
                }
                
+
                //for each bin in the list vector
+        string snumBins = toString(processList->getNumBins());
                for (int i = 0; i < processList->getNumBins(); i++) {
                        
                        if (m->control_pressed) { break; }
@@ -552,7 +557,15 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        if (m->control_pressed) { out.close();  return 0; }
                        
                        //output to new names file
-                       out << (i+1) << '\t' << size << '\t' << conTax << endl;
+            string binLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { binLabel += "0"; }
+            }
+            binLabel += sbinNumber;
+
+                       out << binLabel << '\t' << size << '\t' << conTax << endl;
                        
                        string noConfidenceConTax = conTax;
                        m->removeConfidences(noConfidenceConTax);
index 0504e6635802c3f6e3e186e4568675906774b3ff..4e244905ff6b755fd064617368506b9160770048 100644 (file)
@@ -467,7 +467,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
             }
                        
                }
-               
        }
        catch(exception& e) {
                m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand");
@@ -486,7 +485,7 @@ ClassifySeqsCommand::~ClassifySeqsCommand(){
 int ClassifySeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
+        
                if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);             }
                else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
                else {
@@ -507,8 +506,8 @@ int ClassifySeqsCommand::execute(){
                        string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
                        RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
                        if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
-                       RippedTaxName +=  "."; 
-               
+                       if (RippedTaxName != "") { RippedTaxName +=  "."; }
+           
                        if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
                        string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
                        string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
index 1ce81c4f2affda469164709f862bb36ff023c163..2d1b9a6b781960a824fddafc03b904e8039179e1 100644 (file)
@@ -19,6 +19,10 @@ ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist
                cutoff = c;
                aboveCutoff = cutoff + 10000.0;
                m = MothurOut::getInstance();
+        if(method == "furthest")        {   tag = "fn";   }
+        else if (method == "average")   {   tag = "an";   }
+        else if (method == "weighted")  {   tag = "wn";   }        
+        else if (method == "nearest")   {   tag = "nn";   }
        }
        catch(exception& e) {
                m->errorOut(e, "ClusterClassic", "ClusterClassic");
@@ -36,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
                ifstream fileHandle;
                m->openInputFile(filename, fileHandle);
                
-               fileHandle >> nseqs >> name;
+        string numTest;
+               fileHandle >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+
 
                matrixNames.push_back(name);
 
index 110c0408e5107e9acebd322c7a1070f364c13a7d..932c806ec42ac52a4a90670239d0c4a6a27b1efb 100644 (file)
@@ -1,8 +1,7 @@
-#ifndef CLUSTER_H
-#define CLUSTER_H
+#ifndef CLUSTERCLASSIC_H
+#define CLUSTERCLASSIC_H
 
 
-#include "mothur.h"
 #include "mothurout.h"
 #include "listvector.hpp"
 #include "rabundvector.hpp"
@@ -28,7 +27,7 @@ public:
        int getNSeqs() { return nseqs; }        
        ListVector* getListVector() { return list; }
        RAbundVector* getRAbundVector() { return rabund; }              
-       string getTag();
+       string getTag() { return tag; }
        void setMapWanted(bool m);  
        map<string, int> getSeqtoBin()  {  return seq2Bin;      }
 
@@ -57,7 +56,7 @@ private:
        bool mapWanted, sim;
        double cutoff, aboveCutoff;
        map<string, int> seq2Bin;
-       string method;
+       string method, tag;
        
        MothurOut* m;
 };
index 9cdd7b065b857e5ddb7eaacbc27e4c3af84a43cb..2eee59f6165e28c50f86fd6ab9b1db0920135040 100644 (file)
@@ -10,7 +10,6 @@
 #include "clustersplitcommand.h"
 
 
-
 //**********************************************************************************************************************
 vector<string> ClusterSplitCommand::setParameters(){   
        try {
@@ -29,6 +28,7 @@ vector<string> ClusterSplitCommand::setParameters(){
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+        CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                        
@@ -62,6 +62,7 @@ string ClusterSplitCommand::getHelpString(){
                helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
                helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
                helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
+        helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic.  It is only valid with splitmethod=fasta. Default=f.\n";
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
@@ -266,7 +267,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
                        large = m->isTrue(temp);
-                       
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
@@ -277,6 +278,11 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                else {  splitmethod = temp; }
                        }
                        
+            temp = validParameter.validFile(parameters, "classic", false);                     if (temp == "not found") { temp = "F"; }
+                       classic = m->isTrue(temp);
+            
+            if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
+
                        temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
                        m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));  
@@ -374,7 +380,7 @@ int ClusterSplitCommand::execute(){
                SplitMatrix* split;
                if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
                else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir);     }
                else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                
                split->split();
@@ -1021,148 +1027,20 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
             
-            Cluster* cluster = NULL;
-            SparseMatrix* matrix = NULL;
-            ListVector* list = NULL;
-            ListVector oldList;
-            RAbundVector* rabund = NULL;
-            
-                       if (m->control_pressed) { return listFileNames; }
-                       
                        string thisNamefile = distNames[i].begin()->second;
                        string thisDistFile = distNames[i].begin()->first;
-                                               
-                       #ifdef USE_MPI
-                               int pid;
-                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Reading " << thisDistFile << endl;
-                               }
-                       #endif
-                       
-                       m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
-                       
-                       ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
-                       read->setCutoff(cutoff);
-
-                       NameAssignment* nameMap = new NameAssignment(thisNamefile);
-                       nameMap->readMap();
-                       read->read(nameMap);
-                       
-                       if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
-                       
-                       list = read->getListVector();
-                       oldList = *list;
-                       matrix = read->getMatrix();
-                       
-                       delete read;  read = NULL;
-                       delete nameMap; nameMap = NULL;
-                       
-                       
-                       #ifdef USE_MPI
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Clustering " << thisDistFile << endl;
-                               }
-                       #endif
                        
-                       m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
-               
-                       rabund = new RAbundVector(list->getRAbundVector());
-                       
-                       //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-                       tag = cluster->getTag();
-               
-                       if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
-                       fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
-                       
-                       ofstream listFile;
-                       m->openOutputFile(fileroot+ tag + ".list",      listFile);
-               
-                       listFileNames.push_back(fileroot+ tag + ".list");
-                               
-                       float previousDist = 0.00000;
-                       float rndPreviousDist = 0.00000;
-                       
-                       oldList = *list;
-
-                       print_start = true;
-                       start = time(NULL);
-                       double saveCutoff = cutoff;
-               
-                       while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-               
-                               if (m->control_pressed) { //clean up
-                                       delete matrix; delete list;     delete cluster; delete rabund;
-                                       listFile.close();
-                                       for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
-                                       listFileNames.clear(); return listFileNames;
-                               }
-               
-                               cluster->update(saveCutoff);
-       
-                               float dist = matrix->getSmallDist();
-                               float rndDist;
-                               if (hard) {
-                                       rndDist = m->ceilDist(dist, precision); 
-                               }else{
-                                       rndDist = m->roundDist(dist, precision); 
-                               }
-
-                               if(previousDist <= 0.0000 && dist != previousDist){
-                                       oldList.setLabel("unique");
-                                       oldList.print(listFile);
-                                       if (labels.count("unique") == 0) {  labels.insert("unique");  }
-                               }
-                               else if(rndDist != rndPreviousDist){
-                                       oldList.setLabel(toString(rndPreviousDist,  length-1));
-                                       oldList.print(listFile);
-                                       if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                               }
-               
-                               previousDist = dist;
-                               rndPreviousDist = rndDist;
-                               oldList = *list;
-                       }
+                       string listFileName = "";
+            if (classic)    {  listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff);   }
+            else            {  listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff);          }
 
-               
-                       if(previousDist <= 0.0000){
-                               oldList.setLabel("unique");
-                               oldList.print(listFile);
-                               if (labels.count("unique") == 0) { labels.insert("unique"); }
-                       }
-                       else if(rndPreviousDist<cutoff){
-                               oldList.setLabel(toString(rndPreviousDist,  length-1));
-                               oldList.print(listFile);
-                               if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                       }
-       
-                       delete matrix; delete list;     delete cluster; delete rabund; 
-            matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
-                       listFile.close();
-                       
                        if (m->control_pressed) { //clean up
                                for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                listFileNames.clear(); return listFileNames;
                        }
-                       
-                       m->mothurRemove(thisDistFile);
-                       m->mothurRemove(thisNamefile);
-                       
-                       if (saveCutoff != cutoff) { 
-                               if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
-                               else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
-                       
-                               m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
-                       }
-                       
-                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
-               }
+            
+            listFileNames.push_back(listFileName);
+        }
                
                cutoff = smallestCutoff;
                                        
@@ -1177,6 +1055,269 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
 
 }
 //**********************************************************************************************************************
+string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+                               
+               //reads phylip file storing data in 2D vector, also fills list and rabund
+        bool sim = false;
+               ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
+               cluster->readPhylipFile(thisDistFile, nameMap);
+               tag = cluster->getTag();
+        
+               if (m->control_pressed) { delete cluster; return 0; }
+               
+               list = cluster->getListVector();
+               rabund = cluster->getRAbundVector();
+        
+               if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        listFileName = fileroot+ tag + ".list";
+        
+        ofstream listFile;
+               m->openOutputFile(fileroot+ tag + ".list",      listFile);
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldList = *list;
+               
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+        
+               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
+                       if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close();  return listFileName;  }
+            
+                       cluster->update(cutoff);
+            
+                       float dist = cluster->getSmallDist();
+                       float rndDist;
+                       if (hard) {
+                               rndDist = m->ceilDist(dist, precision); 
+                       }else{
+                               rndDist = m->roundDist(dist, precision); 
+                       }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+
+            
+                       previousDist = dist;
+                       rndPreviousDist = rndDist;
+                       oldList = *list;
+               }
+        
+               if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+
+        
+               listFile.close();
+               
+               delete cluster; delete nameMap; delete list; delete rabund;
+
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        Cluster* cluster = NULL;
+        SparseMatrix* matrix = NULL;
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+        if (m->control_pressed) { return listFileName; }
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        ReadMatrix* read = new ReadColumnMatrix(thisDistFile);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+        read->read(nameMap);
+        
+        if (m->control_pressed) {  delete read; delete nameMap; return listFileName; }
+        
+        list = read->getListVector();
+        oldList = *list;
+        matrix = read->getMatrix();
+        
+        delete read;  read = NULL;
+        delete nameMap; nameMap = NULL;
+        
+        
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+               
+        rabund = new RAbundVector(list->getRAbundVector());
+        
+        //create cluster
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        tag = cluster->getTag();
+               
+        if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+        fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        
+        ofstream listFile;
+        m->openOutputFile(fileroot+ tag + ".list",     listFile);
+               
+        listFileName = fileroot+ tag + ".list";
+        
+        float previousDist = 0.00000;
+        float rndPreviousDist = 0.00000;
+        
+        oldList = *list;
+        
+        print_start = true;
+        start = time(NULL);
+        double saveCutoff = cutoff;
+               
+        while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { //clean up
+                delete matrix; delete list;    delete cluster; delete rabund;
+                listFile.close();
+                m->mothurRemove(listFileName);         
+                return listFileName;
+            }
+            
+            cluster->update(saveCutoff);
+            
+            float dist = matrix->getSmallDist();
+            float rndDist;
+            if (hard) {
+                rndDist = m->ceilDist(dist, precision); 
+            }else{
+                rndDist = m->roundDist(dist, precision); 
+            }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+            
+            previousDist = dist;
+            rndPreviousDist = rndDist;
+            oldList = *list;
+        }
+        
+               
+        if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+        
+        delete matrix; delete list;    delete cluster; delete rabund; 
+        matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
+        listFile.close();
+        
+        if (m->control_pressed) { //clean up
+            m->mothurRemove(listFileName);     
+            return listFileName;
+        }
+        
+        m->mothurRemove(thisDistFile);
+        m->mothurRemove(thisNamefile);
+        
+        if (saveCutoff != cutoff) { 
+            if (hard)  {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+            else               {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+            m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
+        }
+        
+        if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
        try{
index 0e32ffaea7710189859f458de4adc897362e12ef..a82d02270ecc37f1c85e93c9fe51b70e29288081 100644 (file)
@@ -23,6 +23,7 @@
 #include "readmatrix.hpp"
 #include "inputdata.h"
 #include "clustercommand.h"
+#include "clusterclassic.h"
 
 class ClusterSplitCommand : public Command {
        
@@ -48,13 +49,15 @@ private:
        string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
        double cutoff, splitcutoff;
        int precision, length, processors, taxLevelCutoff;
-       bool print_start, abort, hard, large;
+       bool print_start, abort, hard, large, classic;
        time_t start;
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
        vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
+    string clusterFile(string, string, set<string>&, double&);
+    string clusterClassicFile(string, string, set<string>&, double&);
        int mergeLists(vector<string>, map<float, int>, ListVector*);
        map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
        int createMergedDistanceFile(vector< map<string, string> >);
index d70c10a597d265c631665c7ab61ab9ea1ce940a7..2822d0b9920a0f5cc5005cf01ce653b38b9ede6d 100644 (file)
 #include "createdatabasecommand.h"
 #include "makebiomcommand.h"
 #include "getcoremicrobiomecommand.h"
+#include "listotulabelscommand.h"
+#include "getotulabelscommand.h"
+#include "removeotulabelscommand.h"
+#include "makecontigscommand.h"
 
 /*******************************************************/
 
@@ -284,7 +288,11 @@ CommandFactory::CommandFactory(){
     commands["pcr.seqs"]            = "pcr.seqs";
     commands["create.database"]     = "create.database";
     commands["make.biom"]           = "make.biom";
-    commands["get.coremicrobiome"]   = "get.coremicrobiome";
+    commands["get.coremicrobiome"]  = "get.coremicrobiome";
+    commands["list.otulabels"]      = "list.otulabels";
+    commands["get.otulabels"]       = "get.otulabels";
+    commands["remove.otulabels"]    = "remove.otulabels";
+    commands["make.contigs"]        = "make.contigs";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -496,6 +504,10 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "create.database")       {      command = new CreateDatabaseCommand(optionString);          }
         else if(commandName == "make.biom")             {      command = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      command = new GetCoreMicroBiomeCommand(optionString);       }
+        else if(commandName == "list.otulabels")        {      command = new ListOtuLabelsCommand(optionString);           }
+        else if(commandName == "get.otulabels")         {      command = new GetOtuLabelsCommand(optionString);            }
+        else if(commandName == "remove.otulabels")      {      command = new RemoveOtuLabelsCommand(optionString);         }
+        else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -648,6 +660,10 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "create.database")       {      pipecommand = new CreateDatabaseCommand(optionString);          }
         else if(commandName == "make.biom")             {      pipecommand = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      pipecommand = new GetCoreMicroBiomeCommand(optionString);       }
+        else if(commandName == "list.otulabels")        {      pipecommand = new ListOtuLabelsCommand(optionString);           }
+        else if(commandName == "get.otulabels")         {      pipecommand = new GetOtuLabelsCommand(optionString);            }
+        else if(commandName == "remove.otulabels")      {      pipecommand = new RemoveOtuLabelsCommand(optionString);         }
+        else if(commandName == "make.contigs")          {      pipecommand = new MakeContigsCommand(optionString);             }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -786,6 +802,10 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "create.database")       {      shellcommand = new CreateDatabaseCommand();         }
         else if(commandName == "make.biom")             {      shellcommand = new MakeBiomCommand();               }
         else if(commandName == "get.coremicrobiome")    {      shellcommand = new GetCoreMicroBiomeCommand();      }
+        else if(commandName == "list.otulabels")        {      shellcommand = new ListOtuLabelsCommand();          }
+        else if(commandName == "get.otulabels")         {      shellcommand = new GetOtuLabelsCommand();           }
+        else if(commandName == "remove.otulabels")      {      shellcommand = new RemoveOtuLabelsCommand();        }
+        else if(commandName == "make.contigs")          {      shellcommand = new MakeContigsCommand();            }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 75b87fae4303f161a1f75fb632e831672a80e29d..e4c915d6a14e4a9755a17d64c073fdda1440803b 100644 (file)
@@ -180,7 +180,7 @@ int CooccurrenceCommand::execute(){
         m->openOutputFile(outputFileName, out);
         outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-        out << "metric\tlabel\tScore\tpValue\n";
+        out << "metric\tlabel\tScore\tzScore\tstandardDeviation\n";
 
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
@@ -272,6 +272,12 @@ int CooccurrenceCommand::execute(){
 int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
     try {
         int numOTUS = thisLookUp[0]->getNumBins();
+        
+        if(numOTUS < 2) {
+            m->mothurOut("Not enough OTUs for co-occurrence analysis, skipping"); m->mothurOutEndLine();
+            return 0;
+        }
+        
         vector< vector<int> > co_matrix; co_matrix.resize(thisLookUp[0]->getNumBins());
         for (int i = 0; i < thisLookUp[0]->getNumBins(); i++) { co_matrix[i].resize((thisLookUp.size()), 0); }
         vector<int> columntotal; columntotal.resize(thisLookUp.size(), 0);
@@ -530,12 +536,20 @@ int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp
         
         m->mothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine();
         
+        //calc_p_value is not a statistical p-value, it's just the average that are either > or < the initscore.
+        //All it does is show what is expected in a competitively structured community
+        //zscore is output so p-value can be looked up in a ztable
         double pvalue = 0.0;
         if (metric == "cscore" || metric == "checker") { pvalue = trial.calc_pvalue_greaterthan (stats, initscore); }
         else{ pvalue = trial.calc_pvalue_lessthan (stats, initscore); }
+
+        double sd = trial.getSD(runs, stats, nullMean);
+
+        double zscore = trial.get_zscore(sd, nullMean, initscore);
         
-        m->mothurOut("pvalue: " + toString(pvalue)); m->mothurOutEndLine();
-        out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << pvalue << endl;
+        m->mothurOut("zscore: " + toString(zscore)); m->mothurOutEndLine();
+        m->mothurOut("standard deviation: " + toString(sd)); m->mothurOutEndLine();
+        out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << zscore << '\t' << sd << endl;
         
         return 0;
     }
index 24adcb53b59f39f049269d74b4da953a5f387262..48f782ab2a590d71a292e7b83125ba8ce0f3fffe 100644 (file)
@@ -26,71 +26,7 @@ Engine::Engine(){
        }
 }
 /***********************************************************************/
-string Engine::findMothursPath(){
-       try { 
-               
-               string envPath = getenv("PATH");
-               string mothurPath = "";
-               
-               //delimiting path char
-               char delim;
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                       delim = ':';
-               #else
-                       delim = ';';
-               #endif
-               
-               //break apart path variable by ':'
-               vector<string> dirs;
-               mout->splitAtChar(envPath, dirs, delim);
-               
-               //get path related to mothur
-               for (int i = 0; i < dirs.size(); i++) {
-                       //to lower so we can find it
-                       string tempLower = "";
-                       for (int j = 0; j < dirs[i].length(); j++) {  tempLower += tolower(dirs[i][j]);  }
-                       
-                       //is this mothurs path?
-                       if (tempLower.find("mothur") != -1) {  mothurPath = dirs[i]; break;  }
-               }
-               
-               if (mothurPath != "") {
-                       //add mothur so it looks like what argv would look like
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                               mothurPath += "/mothur";
-                       #else
-                               mothurPath += "\\mothur";
-                       #endif
-               }else {
-                       //okay mothur is not in the path, so the folder mothur is in must be in the path
-                       //lets find out which one
-                       
-                       //get path related to mothur
-                       for (int i = 0; i < dirs.size(); i++) {
-                                                               
-                               //is this mothurs path?
-                               ifstream in;
-                               string tempIn = dirs[i];
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                                       tempIn += "/mothur";
-                               #else
-                                       tempIn += "\\mothur";
-                               #endif
-                               mout->openInputFile(tempIn, in, "");
-                               
-                               //if this file exists
-                               if (in) { in.close(); mothurPath = tempIn;  break;  }
-                       }
-               }
-               
-               return mothurPath;
-               
-       }
-       catch(exception& e) {
-               mout->errorOut(e, "Engine", "findMothursPath");
-               exit(1);
-       }
-}
+
 /***********************************************************************/
 
 InteractEngine::InteractEngine(string path){
@@ -99,7 +35,7 @@ InteractEngine::InteractEngine(string path){
        string temppath = path.substr(0, (path.find_last_of("othur")-5));
        
        //this will happen if you set the path variable to contain mothur's exe location
-       if (temppath == "") { path = findMothursPath(); }
+       if (temppath == "") { path = mout->findProgramPath("mothur"); }
        
        mout->argv = path;
 }
@@ -266,7 +202,7 @@ BatchEngine::BatchEngine(string path, string batchFileName){
                string temppath = path.substr(0, (path.find_last_of("othur")-5));
        
                //this will happen if you set the path variable to contain mothur's exe location
-               if (temppath == "") { path = findMothursPath(); }
+               if (temppath == "") { path = mout->findProgramPath("mothur"); }
                
                mout->argv = path;
                                
@@ -434,7 +370,7 @@ ScriptEngine::ScriptEngine(string path, string commandString){
                string temppath = path.substr(0, (path.find_last_of("othur")-5));
 
                //this will happen if you set the path variable to contain mothur's exe location
-               if (temppath == "") { path = findMothursPath(); }
+               if (temppath == "") { path = mout->findProgramPath("mothur"); }
                
                mout->argv = path;
                                
index 5311da9e1e3aafb3c447b2e51ec411cb29c79d14..460e245bf5312d81f5b15d70387cf9ec63d472f6 100644 (file)
@@ -33,7 +33,6 @@ protected:
        vector<string> options;
        CommandFactory* cFactory;
        MothurOut* mout;
-       string findMothursPath();
 };
 
 
diff --git a/getotulabelscommand.cpp b/getotulabelscommand.cpp
new file mode 100644 (file)
index 0000000..40ad69f
--- /dev/null
@@ -0,0 +1,381 @@
+//
+//  getotulabelscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/21/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "getotulabelscommand.h"
+
+//**********************************************************************************************************************
+vector<string> GetOtuLabelsCommand::setParameters(){   
+       try {
+        CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
+        CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pconstaxonomy);
+               CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(potucorr);
+        CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pcorraxes);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetOtuLabelsCommand::getHelpString(){   
+       try {
+               string helpString = "";
+               helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+               helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+               helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
+        helpString += "The otucorr parameter is input the results of the otu.association command.\n";
+        helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+               helpString += "The get.otulabels commmand should be in the following format: \n";
+               helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetOtuLabelsCommand::GetOtuLabelsCommand(){    
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["contaxonomy"] = tempOutNames; 
+        outputTypes["otu.corr"] = tempOutNames;
+        outputTypes["corr.axes"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetOtuLabelsCommand::GetOtuLabelsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                
+                //edit file types below to include only the types you added as parameters
+                
+                               string path;
+                it = parameters.find("constaxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("corraxes");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["corraxes"] = inputDir + it->second;         }
+                               }
+                
+                it = parameters.find("otucorr");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["otucorr"] = inputDir + it->second;          }
+                               }
+            }
+            
+            vector<string> tempOutNames;
+            outputTypes["contaxonomy"] = tempOutNames; 
+            outputTypes["otu.corr"] = tempOutNames;
+            outputTypes["corr.axes"] = tempOutNames;
+            
+                       //check for parameters
+            accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  
+                               accnosfile = m->getAccnosFile(); 
+                               if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else { m->setAccnosFile(accnosfile); } 
+                       
+                       constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+                       if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+                       else if (constaxonomyfile == "not found") {  constaxonomyfile = "";  }
+            
+            corraxesfile = validParameter.validFile(parameters, "corraxes", true);
+                       if (corraxesfile == "not open") { corraxesfile = ""; abort = true; }
+                       else if (corraxesfile == "not found") {  corraxesfile = "";  }
+            
+            otucorrfile = validParameter.validFile(parameters, "otucorr", true);
+                       if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
+                       else if (otucorrfile == "not found") {  otucorrfile = "";  }
+
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){   outputDir = "";        }
+            
+            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int GetOtuLabelsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //get labels you want to keep
+               readAccnos();
+               
+               if (m->control_pressed) { return 0; }
+               
+               //read through the correct file and output lines you want to keep
+               if (constaxonomyfile != "")     {               readClassifyOtu();      }
+               if (corraxesfile != "")         {               readCorrAxes();         }
+               if (otucorrfile != "")          {               readOtuAssociation();   }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readClassifyOtu(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomyfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(constaxonomyfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; m->gobble(in);
+            
+            if (labels.count(otu) != 0) {
+                               wroteSomething = true;
+                               selectedCount++;
+                
+                out << otu << '\t' << size << '\t' << tax << endl;
+            }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["constaxonomy"].push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " otus from your constaxonomy file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "readClassifyOtu");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readOtuAssociation(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(otucorrfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(otucorrfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu1 = ""; 
+            string otu2 = ""; 
+            in >> otu1 >> otu2;
+            string line = m->getline(in); m->gobble(in);
+            
+            if ((labels.count(otu1) != 0) && (labels.count(otu2) != 0)){
+                               wroteSomething = true;
+                               selectedCount++;
+                
+                out << otu1 << '\t' << otu2 << '\t' << line << endl;
+            }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["otu.corr"].push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "readOtuAssociation");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readCorrAxes(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(corraxesfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+        
+               ifstream in;
+               m->openInputFile(corraxesfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; 
+            in >> otu;
+            string line = m->getline(in); m->gobble(in);
+            
+            if (labels.count(otu) != 0) {
+                               wroteSomething = true;
+                               selectedCount++;
+                
+                out << otu << '\t' << line << endl;
+            }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["corr.axes"].push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "readCorrAxes");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int GetOtuLabelsCommand::readAccnos(){
+       try {
+               
+               ifstream in;
+               m->openInputFile(accnosfile, in);
+               string name;
+               
+               while(!in.eof()){
+                       in >> name;
+            
+                       labels.insert(name);
+                       
+                       m->gobble(in);
+               }
+               in.close();     
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "readAccnos");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
diff --git a/getotulabelscommand.h b/getotulabelscommand.h
new file mode 100644 (file)
index 0000000..bad0f55
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef Mothur_getotulabelscommand_h
+#define Mothur_getotulabelscommand_h
+
+//
+//  getotulabelscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/21/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+class GetOtuLabelsCommand : public Command {
+public:
+    GetOtuLabelsCommand(string);
+    GetOtuLabelsCommand();
+    ~GetOtuLabelsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "get.otulabels";          }
+    string getCommandCategory()                { return "OTU-Based Approaches";        } 
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
+    string getDescription()            { return "Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort;
+    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+    vector<string> outputNames;
+    set<string> labels;
+    
+    int readClassifyOtu();
+    int readOtuAssociation();
+    int readCorrAxes();
+    int readAccnos();
+    
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+#endif
index 481fd1decfc4516eb5e1cf618c6b4d40cffbc8a9..92a43e965044c06c2fdcaca78d50779440b301ab 100644 (file)
@@ -73,7 +73,35 @@ int GroupMap::readDesignMap() {
                m->setAllGroups(namesOfGroups);
                return error;
 }
-
+/************************************************************/
+int GroupMap::readDesignMap(string filename) {
+    groupFileName = filename;
+       m->openInputFile(filename, fileHandle);
+       index = 0;
+    string seqName, seqGroup;
+    int error = 0;
+    
+    while(fileHandle){
+        fileHandle >> seqName; m->gobble(fileHandle);          //read from first column
+        fileHandle >> seqGroup;                        //read from second column
+        
+        if (m->control_pressed) {  fileHandle.close();  return 1; }
+        
+        setNamesOfGroups(seqGroup);
+        
+        it = groupmap.find(seqName);
+        
+        if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
+        else {
+            groupmap[seqName] = seqGroup;      //store data in map
+            seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+        }
+        m->gobble(fileHandle);
+    }
+    fileHandle.close();
+    m->setAllGroups(namesOfGroups);
+    return error;
+}
 /************************************************************/
 int GroupMap::getNumGroups() { return namesOfGroups.size();    }
 /************************************************************/
index 29c174134eee5689e5210775dbde2f220010d551..567165de820fd480ed2a4ecf8ff785bc9ec1c92c 100644 (file)
 
 class GroupMap {
 public:
-       GroupMap() {};
+       GroupMap() { m = MothurOut::getInstance(); }
        GroupMap(string);
        ~GroupMap();
        int readMap();
        int readDesignMap();
+    int readDesignMap(string);
        int getNumGroups();
        bool isValidGroup(string);  //return true if string is a valid group
        string getGroup(string);
index 97f480e749b7042a07a39d0f48890ff137d49dd0..8d1d7f7a6864b261a0a31611a1e02270730cfd32 100644 (file)
@@ -240,7 +240,6 @@ int IndicatorCommand::execute(){
                        util.setGroups(Groups, nameGroups);
                        designMap->setNamesOfGroups(nameGroups);
                        
-                       //loop through the Groups and fill Globaldata's Groups with the design file info
                        vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
                        m->setGroups(namesSeqs);
                }
diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp
new file mode 100644 (file)
index 0000000..93b1510
--- /dev/null
@@ -0,0 +1,387 @@
+//
+//  listotucommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "listotulabelscommand.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> ListOtuLabelsCommand::setParameters(){  
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared);
+               CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ListOtuLabelsCommand::getHelpString(){  
+       try {
+               string helpString = "";
+               helpString += "The list.otulabels lists otu labels from shared or relabund file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+               helpString += "The list.otulabels parameters are: shared, relabund, label and groups.\n";
+               helpString += "The label parameter is used to analyze specific labels in your input.\n";
+               helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
+               helpString += "The list.otulabels commmand should be in the following format: \n";
+               helpString += "list.otulabels(shared=yourSharedFile, groups=yourGroup1-yourGroup2)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(){  
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["otulabels"] = tempOutNames; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               allLines = 1;
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                
+                //edit file types below to include only the types you added as parameters
+                
+                               string path;
+                it = parameters.find("relabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
+                               }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+            
+            vector<string> tempOutNames;
+            outputTypes["otulabels"] = tempOutNames; 
+            
+                       //check for parameters
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { abort = true; }
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else { inputFileName = sharedfile; format = "sharedfile"; m->setSharedFile(sharedfile); }
+                       
+                       relabundfile = validParameter.validFile(parameters, "relabund", true);
+                       if (relabundfile == "not open") { abort = true; }
+                       else if (relabundfile == "not found") { relabundfile = ""; }
+                       else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); }
+            
+            if ((relabundfile == "") && (sharedfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to shared, then relabund
+                               sharedfile = m->getSharedFile(); 
+                               if (sharedfile != "") {  inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       relabundfile = m->getRelAbundFile(); 
+                                       if (relabundfile != "") {  inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = m->hasPath(inputFileName); //if user entered a file with a path then preserve it    
+                       }
+            
+            string groups = validParameter.validFile(parameters, "groups", false);                     
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);                       
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }                       
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        InputData input(inputFileName, format);
+        
+        if (format == "relabund") {
+            vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
+            string lastLabel = lookup[0]->getLabel();
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+                
+                if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                 
+                    
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                    string saveLabel = lookup[0]->getLabel();
+                    
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+                    lookup = input.getSharedRAbundFloatVectors(lastLabel);
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                    
+                    //restore real lastlabel to save below
+                    lookup[0]->setLabel(saveLabel);
+                }
+                
+                lastLabel = lookup[0]->getLabel();
+                //prevent memory leak
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                
+                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+                
+                //get next line to process
+                lookup = input.getSharedRAbundFloatVectors();                          
+            }
+            
+            if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                lookup = input.getSharedRAbundFloatVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                createList(lookup);
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+            }
+        }else {
+            
+            vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+            string lastLabel = lookup[0]->getLabel();
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+                
+                if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                 
+                    
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                    string saveLabel = lookup[0]->getLabel();
+                    
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+                    lookup = input.getSharedRAbundVectors(lastLabel);
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                    
+                    //restore real lastlabel to save below
+                    lookup[0]->setLabel(saveLabel);
+                }
+                
+                lastLabel = lookup[0]->getLabel();
+                //prevent memory leak
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                
+                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+                
+                //get next line to process
+                lookup = input.getSharedRAbundVectors();                               
+            }
+            
+            if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                createList(lookup);
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+            }
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
+       try {
+        
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+        outputNames.push_back(outputFileName);  outputTypes["otulabels"].push_back(outputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
+       try {
+        
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+        outputNames.push_back(outputFileName);  outputTypes["accnos"].push_back(outputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
diff --git a/listotulabelscommand.h b/listotulabelscommand.h
new file mode 100644 (file)
index 0000000..dbfd5d4
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef Mothur_listotucommand_h
+#define Mothur_listotucommand_h
+
+//
+//  listotucommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+#include "sharedrabundvector.h"
+
+/**************************************************************************************************/
+
+class ListOtuLabelsCommand : public Command {
+public:
+    ListOtuLabelsCommand(string);
+    ListOtuLabelsCommand();
+    ~ListOtuLabelsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "list.otulabels";          }
+    string getCommandCategory()                { return "OTU-Based Approaches";        } 
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; }
+    string getDescription()            { return "lists otu labels from shared or relabund file. Can be used by get.otulabels with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort, allLines;
+    string outputDir, sharedfile, relabundfile, label, inputFileName, format;
+    vector<string> outputNames;
+    vector<string> Groups;
+    set<string> labels;
+    
+    int createList(vector<SharedRAbundFloatVector*>&);
+    int createList(vector<SharedRAbundVector*>&);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+#endif
index acd0500963d096acfb5d1ea453cc9186feafe783..03e7baf2b9888fc7f52cbe6076ea8fdfa472b584 100644 (file)
@@ -165,7 +165,7 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                        for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
-                       
+
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["biom"] = tempOutNames;
@@ -612,7 +612,7 @@ vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
                         scores.push_back(confidenceScore);
                                        }else{ scores.push_back("null"); }
                                }
-                       }
+                       }else{ scores.push_back("null"); }
                        
             //strip "" if they are there
             pos = taxon.find("\"");
diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp
new file mode 100644 (file)
index 0000000..523edd5
--- /dev/null
@@ -0,0 +1,686 @@
+//
+//  makecontigscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "makecontigscommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeContigsCommand::setParameters(){    
+       try {
+               CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+        CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
+               CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
+               CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
+               CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
+               CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
+               CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+        CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeContigsCommand::getHelpString(){    
+       try {
+               string helpString = "";
+               helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
+               helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
+               helpString += "The ffastq and rfastq parameter is required.\n";
+               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+               helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
+               helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
+               helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
+               helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
+        helpString += "The threshold parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=40.\n";
+        helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+        helpString += "The make.contigs command should be in the following format: \n";
+               helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
+               helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(){      
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
+        outputTypes["mismatch"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters(); 
+                       
+                       ValidParameters validParameter("pairwise.seqs");
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
+            outputTypes["mismatch"] = tempOutNames;
+                       
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else { 
+                               string path;
+                it = parameters.find("ffastq");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("rfastq");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
+                               }
+            }
+            
+            ffastqfile = validParameter.validFile(parameters, "ffastq", true);
+                       if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
+                       else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
+                       
+                       rfastqfile = validParameter.validFile(parameters, "rfastq", true);
+                       if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
+                       else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
+                       
+
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       string temp;
+                       temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
+                       m->mothurConvert(temp, match);  
+                       
+                       temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
+                       m->mothurConvert(temp, misMatch);  
+            if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
+                       
+                       temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
+                       m->mothurConvert(temp, gapOpen);  
+            if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
+                       
+                       temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
+                       m->mothurConvert(temp, gapExtend); 
+            if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
+                       
+            temp = validParameter.validFile(parameters, "threshold", false);   if (temp == "not found"){       temp = "40";                    }
+                       m->mothurConvert(temp, threshold); 
+            if ((threshold < 0) || (threshold > 40)) { m->mothurOut("[ERROR]: threshold must be between 0 and 40.\n"); abort=true; }
+
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+                       
+                       align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
+                       if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+        }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::execute(){
+       try {
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //read ffastq and rfastq files creating fasta and qual files.
+        //this function will create a forward and reverse, fasta and qual files for each processor.
+        //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
+        int numReads = 0;
+        int start = time(NULL);
+        longestBase = 1000;
+        m->mothurOut("Reading fastq data...\n"); 
+        vector< vector<string> > files = readFastqFiles(numReads);  
+        m->mothurOut("Done.\n");
+    
+        if (m->control_pressed) { return 0; }
+        
+        string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
+        string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
+        string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
+        outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+        outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
+        outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
+        
+        m->mothurOut("Making contigs...\n"); 
+        createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
+        m->mothurOut("Done.\n");
+        
+        //remove temp fasta and qual files
+        for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
+        
+        string currentFasta = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
+               }
+        
+        string currentQual = "";
+               itTypes = outputTypes.find("qfile");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
+               }
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
+       try {
+               int num = 0;
+               vector<int> processIDS;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               int process = 0;
+               
+               //loop through and create all the processes you want
+               while (process != processors-1) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputFasta + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); m->mothurRemove(tempFile);
+        }
+    #else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<contigsData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       string extension = toString(i) + ".temp";
+                       
+                       contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, threshold, i);
+                       pDataArray.push_back(tempcontig);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+        num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);          
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+        }
+                               
+    #endif     
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+                       m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
+                       m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
+                       m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
+            
+            m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
+                       m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
+               }
+               
+               return num;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
+    try {
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+        
+        int num = 0;
+        string thisffastafile = files[0];
+        string thisfqualfile = files[1];
+        string thisrfastafile = files[2];
+        string thisrqualfile = files[3];
+        
+        if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+        
+        ifstream inFFasta, inRFasta, inFQual, inRQual;
+        m->openInputFile(thisffastafile, inFFasta);
+        m->openInputFile(thisfqualfile, inFQual);
+        m->openInputFile(thisrfastafile, inRFasta);
+        m->openInputFile(thisrqualfile, inRQual);
+        
+        ofstream outFasta, outQual, outMisMatch;
+        m->openOutputFile(outputFasta, outFasta);
+        m->openOutputFile(outputQual, outQual);
+        m->openOutputFile(outputMisMatches, outMisMatch);
+        outMisMatch << "Name\tLength\tMisMatches\n";
+        
+        while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+            
+            if (m->control_pressed) { break; }
+            
+            //read seqs and quality info
+            Sequence fSeq(inFFasta); m->gobble(inFFasta);
+            Sequence rSeq(inRFasta); m->gobble(inRFasta);
+            QualityScores fQual(inFQual); m->gobble(inFQual);
+            QualityScores rQual(inRQual); m->gobble(inRQual);
+            
+            //flip the reverse reads
+            rSeq.reverseComplement();
+            rQual.flipQScores();
+            
+            //pairwise align
+            alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+            map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+            map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+            fSeq.setAligned(alignment->getSeqAAln());
+            rSeq.setAligned(alignment->getSeqBAln());
+            int length = fSeq.getAligned().length();
+        
+            //traverse alignments merging into one contiguous seq
+            string contig = "";
+            vector<int> contigScores; 
+            int numMismatches = 0;
+            string seq1 = fSeq.getAligned();
+            string seq2 = rSeq.getAligned();
+            vector<int> scores1 = fQual.getQualityScores();
+            vector<int> scores2 = rQual.getQualityScores();
+
+           // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
+            int overlapStart = fSeq.getStartPos();
+            int seq2Start = rSeq.getStartPos();
+            //bigger of the 2 starting positions is the location of the overlapping start
+            if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
+                overlapStart = seq2Start; 
+                for (int i = 0; i < overlapStart; i++) {
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }
+            }else { //seq1 starts later so take from 0 to overlapStart from seq2
+                for (int i = 0; i < overlapStart; i++) {
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }
+            }
+            
+            int seq1End = fSeq.getEndPos();
+            int seq2End = rSeq.getEndPos();
+            int overlapEnd = seq1End;
+            if (seq2End < overlapEnd) { overlapEnd = seq2End; }  //smallest end position is where overlapping ends
+            
+            for (int i = overlapStart; i < overlapEnd; i++) {
+                if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; }
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores2[BBaseMap[i]] < threshold) { } //
+                    else {
+                        contig += seq2[i];
+                        contigScores.push_back(scores2[BBaseMap[i]]);
+                    }
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores1[ABaseMap[i]] < threshold) { } //
+                    else {
+                        contig += seq1[i];
+                        contigScores.push_back(scores1[ABaseMap[i]]);
+                    }
+                }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+                    char c = seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[contigScores.size()-1] = scores2[BBaseMap[i]]; c = seq2[i]; }
+                    contig += c;
+                    numMismatches++;
+                }else { //should never get here
+                    m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+                }
+            }
+            
+            if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2
+                for (int i = overlapEnd; i < length; i++) {
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }
+            }else { //seq2 ends before seq1 so take from overlap to length from seq1
+                for (int i = overlapEnd; i < length; i++) {
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }
+
+            }
+            //if (num < 5) { cout << overlapStart << '\t' << overlapEnd << endl << seq1 << endl << seq2 << endl<< contig << endl; }
+            //output
+            outFasta << ">" << fSeq.getName() << endl << contig << endl;
+            outQual << ">" << fSeq.getName() << endl;
+            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+            outQual << endl;
+            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            
+            num++;
+            
+                       //report progress
+                       if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
+               }
+        
+               //report progress
+               if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
+        
+        inFFasta.close();
+        inFQual.close();
+        inRFasta.close();
+        inRQual.close();
+        outFasta.close();
+        outQual.close();
+        outMisMatch.close();
+        delete alignment;
+        
+        if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
+        
+        return num;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
+    try {
+        vector< vector<string> > files;
+        
+        //maps processors number to file pointer
+        map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
+        map<int, vector<ofstream*> >::iterator it;
+        
+        //create files to write to
+        for (int i = 0; i < processors; i++) {
+            vector<ofstream*> temp;
+            ofstream* outFF = new ofstream;     temp.push_back(outFF);
+            ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
+            ofstream* outRF = new ofstream;     temp.push_back(outRF);
+            ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
+            tempfiles[i] = temp;
+            
+            vector<string> names;
+            string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
+            string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
+            string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
+            string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
+            names.push_back(ffastafilename); names.push_back(fqualfilename);
+            names.push_back(rfastafilename); names.push_back(rqualfilename);
+            files.push_back(names);
+            
+            m->openOutputFile(ffastafilename, *outFF);
+            m->openOutputFile(rfastafilename, *outRF);
+            m->openOutputFile(fqualfilename, *outFQ);
+            m->openOutputFile(rqualfilename, *outRQ);
+        }
+        
+        if (m->control_pressed) {
+            //close files, delete ofstreams
+            for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
+            //remove files
+            for (int i = 0; i < files.size(); i++) {  
+                for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
+            }
+        }
+        
+        ifstream inForward;
+        m->openInputFile(ffastqfile, inForward);
+        
+        ifstream inReverse;
+        m->openInputFile(rfastqfile, inReverse);
+        
+        count = 0;
+        while ((!inForward.eof()) && (!inReverse.eof())) {
+            
+            if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+            
+            //get a read from forward and reverse fastq files
+            fastqRead fread = readFastq(inForward);
+            fastqRead rread = readFastq(inReverse);
+            checkReads(fread, rread);
+            
+            if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+            
+            //if the reads are okay write to output files
+            int process = count % processors;
+            
+            *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
+            *(tempfiles[process][1]) << ">" << fread.name << endl;
+            for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
+            *(tempfiles[process][1]) << endl;
+            *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
+            *(tempfiles[process][3]) << ">" << rread.name << endl;
+            for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
+            *(tempfiles[process][3]) << endl;
+            
+            count++;
+            
+            //report progress
+                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       
+               }
+               //report progress
+               if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               
+        
+        
+        //close files, delete ofstreams
+        for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
+        inForward.close();
+        inReverse.close();
+        
+        //adjust for really large processors or really small files
+        if (count < processors) { 
+            for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
+            files.resize(count);
+            processors = count; 
+        }
+        
+        return files;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+fastqRead MakeContigsCommand::readFastq(ifstream& in){
+    try {
+        fastqRead read;
+        
+        //read sequence name
+        string name = m->getline(in); m->gobble(in);
+        if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else { name = name.substr(1); }
+        
+        //read sequence
+        string sequence = m->getline(in); m->gobble(in);
+        if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        //read sequence name
+        string name2 = m->getline(in); m->gobble(in);
+        if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else { name2 = name2.substr(1);  }
+        
+        //read quality scores
+        string quality = m->getline(in); m->gobble(in);
+        if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        //sanity check sequence length and number of quality scores match
+        if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
+        if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        vector<int> qualScores;
+               int controlChar = int('@');
+               for (int i = 0; i < quality.length(); i++) { 
+                       int temp = int(quality[i]);
+                       temp -= controlChar;
+                       
+                       qualScores.push_back(temp);
+               }
+
+        read.name = name;
+        read.sequence = sequence;
+        read.scores = qualScores;
+
+        return read;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastq");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
+    try {
+        bool good = true;
+        
+        //fix names
+        if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
+            forward.name = forward.name.substr(0, forward.name.length()-2);
+            reverse.name = reverse.name.substr(0, reverse.name.length()-2);
+        }else { good = false; m->control_pressed = true; }
+        
+        //do names match
+        if (forward.name != reverse.name) {
+            m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+        
+        //do sequence lengths match
+        if (forward.sequence.length() != reverse.sequence.length()) {
+            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+        
+        //do number of qual scores match 
+        if (forward.scores.size() != reverse.scores.size()) {
+            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+
+        return good;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastq");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+
+
+
+
diff --git a/makecontigscommand.h b/makecontigscommand.h
new file mode 100644 (file)
index 0000000..cc08842
--- /dev/null
@@ -0,0 +1,228 @@
+#ifndef Mothur_makecontigscommand_h
+#define Mothur_makecontigscommand_h
+
+//
+//  makecontigscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "sequence.hpp"
+#include "qualityscores.h"
+#include "alignment.hpp"
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+
+struct fastqRead {
+       vector<int> scores;
+       string name;
+       string sequence;
+       
+       fastqRead() { name = ""; sequence = ""; scores.clear(); };
+       fastqRead(string n, string s, vector<int> sc) : name(n), sequence(s), scores(sc) {};
+       ~fastqRead() {};
+};
+
+/**************************************************************************************************/
+
+class MakeContigsCommand : public Command {
+public:
+    MakeContigsCommand(string);
+    MakeContigsCommand();
+    ~MakeContigsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "make.contigs";                        }
+    string getCommandCategory()                { return "Sequence Processing";         } 
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
+    string getDescription()            { return "description"; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort;
+    string outputDir, ffastqfile, rfastqfile, align;
+       float match, misMatch, gapOpen, gapExtend;
+       int processors, longestBase, threshold;
+    vector<string> outputNames;
+    
+    fastqRead readFastq(ifstream&);
+    vector< vector<string> > readFastqFiles(int&);
+    bool checkReads(fastqRead&, fastqRead&);
+    int createProcesses(vector< vector<string> >, string, string, string);
+    int driver(vector<string>, string, string, string);
+};
+
+/**************************************************************************************************/
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct contigsData {
+       string outputFasta; 
+       string outputQual; 
+       string outputMisMatches;
+       string align;
+    vector<string> files;
+       MothurOut* m;
+       float match, misMatch, gapOpen, gapExtend;
+       int count, threshold, threadID;
+       
+       contigsData(){}
+       contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
+        files = f;
+               outputFasta = of;
+        outputQual = oq;
+        outputMisMatches = om;
+        m = mout;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+        threshold = thr;
+               align = al;
+               count = 0;
+               threadID = tid;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ 
+       contigsData* pDataArray;
+       pDataArray = (contigsData*)lpParam;
+       
+       try {
+        int longestBase = 1000;
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase);                 }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);                            }
+        
+        int num = 0;
+        string thisffastafile = pDataArray->files[0];
+        string thisfqualfile = pDataArray->files[1];
+        string thisrfastafile = pDataArray->files[2];
+        string thisrqualfile = pDataArray->files[3];
+        
+        if (pDataArray->m->debug) {  pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+        
+        ifstream inFFasta, inRFasta, inFQual, inRQual;
+        pDataArray->m->openInputFile(thisffastafile, inFFasta);
+        pDataArray->m->openInputFile(thisfqualfile, inFQual);
+        pDataArray->m->openInputFile(thisrfastafile, inRFasta);
+        pDataArray->m->openInputFile(thisrqualfile, inRQual);
+        
+        ofstream outFasta, outQual, outMisMatch;
+        pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
+        pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
+        pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
+        outMisMatch << "Name\tLength\tMisMatches\n";
+        
+        while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+            
+            if (pDataArray->m->control_pressed) { break; }
+            
+            //read seqs and quality info
+            Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta);
+            Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta);
+            QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual);
+            QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual);
+            
+            //flip the reverse reads
+            rSeq.reverseComplement();
+            rQual.flipQScores();
+            
+            //pairwise align
+            alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+            map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+            map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+            fSeq.setAligned(alignment->getSeqAAln());
+            rSeq.setAligned(alignment->getSeqBAln());
+            int length = fSeq.getAligned().length();
+            
+            //traverse alignments merging into one contiguous seq
+            string contig = "";
+            vector<int> contigScores; 
+            int numMismatches = 0;
+            string seq1 = fSeq.getAligned();
+            string seq2 = rSeq.getAligned();
+            
+            vector<int> scores1 = fQual.getQualityScores();
+            vector<int> scores2 = rQual.getQualityScores();
+            
+            for (int i = 0; i < length; i++) {
+                if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores2[BBaseMap[i]] >= pDataArray->threshold)) {
+                        contig += seq2[i];
+                        contigScores.push_back(scores2[BBaseMap[i]]);
+                    }
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below threshold. In that case eliminate base
+                    if (scores1[ABaseMap[i]] >= pDataArray->threshold) { 
+                        contig += seq1[i];
+                        contigScores.push_back(scores1[ABaseMap[i]]);
+                    }
+                }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+                    char c = seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
+                    contig += c;
+                    numMismatches++;
+                }else { //should never get here
+                    pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+                }
+            }
+            
+            //output
+            outFasta << ">" << fSeq.getName() << endl << contig << endl;
+            outQual << ">" << fSeq.getName() << endl;
+            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+            outQual << endl;
+            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            
+            num++;
+            
+                       //report progress
+                       if((num) % 1000 == 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+               }
+        
+               //report progress
+               if((num) % 1000 != 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+        
+        inFFasta.close();
+        inFQual.close();
+        inRFasta.close();
+        inRQual.close();
+        outFasta.close();
+        outQual.close();
+        outMisMatch.close();
+        delete alignment;
+        
+        if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);}
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "AlignCommand", "MyContigsThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+#endif
index 8e1a7f52882fe1cf8e3b12a60538915306dacb46..0f6e030bb0d1fec3f7cf99614fa0827c66de2bb5 100644 (file)
@@ -201,7 +201,7 @@ string MakeFastQCommand::convertQual(vector<int> qual) {
        try {
                string qualScores;
                
-               int controlChar = int('!');
+               int controlChar = int('@');
                
                for (int i = 0; i < qual.size(); i++) { 
                        int temp = qual[i] + controlChar;
index e06c334636410c7d5b31512fd3d1440c26711dad..6f8a42343be48654dc77ef2e021caf601a02ff2e 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"4/30/2012\""
-VERSION = "\"1.25.0\""
+RELEASE_DATE = "\"5/14/2012\""
+VERSION = "\"1.25.1\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
index c879d1b621b871a5f7a9e3dbf48df70a7d928320..87929a4d9f8d8c91998e6e9d9760a22c67c60df0 100644 (file)
@@ -434,18 +434,18 @@ void MatrixOutputCommand::printSims(ostream& out, vector< vector<double> >& simM
                out << simMatrix.size() << endl;
                
                if (output == "lt") {
-                       for (int m = 0; m < simMatrix.size(); m++)      {
-                               out << lookup[m]->getGroup() << '\t';
-                               for (int n = 0; n < m; n++)     {
-                                       out << simMatrix[m][n] << '\t'; 
+                       for (int b = 0; b < simMatrix.size(); b++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < b; n++)     {
+                                       out << simMatrix[b][n] << '\t'; 
                                }
                                out << endl;
                        }
                }else{
-                       for (int m = 0; m < simMatrix.size(); m++)      {
-                               out << lookup[m]->getGroup() << '\t';
-                               for (int n = 0; n < simMatrix[m].size(); n++)   {
-                                       out << simMatrix[m][n] << '\t'; 
+                       for (int b = 0; b < simMatrix.size(); m++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < simMatrix[b].size(); n++)   {
+                                       out << simMatrix[b][n] << '\t'; 
                                }
                                out << endl;
                        }
index 61b39514e43b79bc146eb3e37ed4dc8d41017a62..6ecb86f94e5533165551ed5ae571f0c51f3c9ff9 100644 (file)
@@ -117,6 +117,81 @@ void MothurOut::clearCurrentFiles()  {
                exit(1);
        }
 }
+/***********************************************************************/
+string MothurOut::findProgramPath(string programName){
+       try { 
+               
+               string envPath = getenv("PATH");
+               string pPath = "";
+               
+               //delimiting path char
+               char delim;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        delim = ':';
+#else
+        delim = ';';
+#endif
+               
+               //break apart path variable by ':'
+               vector<string> dirs;
+               splitAtChar(envPath, dirs, delim);
+               
+        if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
+        
+               //get path related to mothur
+               for (int i = 0; i < dirs.size(); i++) {
+            
+            if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
+            
+                       //to lower so we can find it
+                       string tempLower = "";
+                       for (int j = 0; j < dirs[i].length(); j++) {  tempLower += tolower(dirs[i][j]);  }
+                       
+                       //is this mothurs path?
+                       if (tempLower.find(programName) != -1) {  pPath = dirs[i]; break;  }
+               }
+        
+               if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
+        
+               if (pPath != "") {
+                       //add programName so it looks like what argv would look like
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            pPath += "/" + programName;
+#else
+            pPath += "\\" + programName;
+#endif
+               }else {
+                       //okay programName is not in the path, so the folder programName is in must be in the path
+                       //lets find out which one
+                       
+                       //get path related to the program
+                       for (int i = 0; i < dirs.size(); i++) {
+                
+                if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
+                
+                               //is this the programs path?
+                               ifstream in;
+                               string tempIn = dirs[i];
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                tempIn += "/" + programName;
+#else
+                tempIn += "\\" + programName;
+#endif
+                               openInputFile(tempIn, in, "");
+                               
+                               //if this file exists
+                               if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break;   }
+                       }
+               }
+               
+               return pPath;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "findProgramPath");
+               exit(1);
+       }
+}
 /*********************************************************************************************/
 void MothurOut::setFileName(string filename)  {
        try {
index 0e31f8414a3a3477c1d8ab928d79aee0ccdc9957..cc8bfb6b101b13b0ebe45a2eb2f5472600d6e0fc 100644 (file)
@@ -82,6 +82,7 @@ class MothurOut {
                int appendFiles(string, string);
                int renameFile(string, string); //oldname, newname
                string getFullPathName(string);
+        string findProgramPath(string programName);
                string hasPath(string);
                string getExtension(string);
                string getPathName(string);
index 0d41e63f3526bc9af4e3933d5f3337d62ebd0f2a..93f46ba1e716f80e321245832eb11eabb2bda733 100644 (file)
@@ -15,6 +15,7 @@ vector<string> OTUAssociationCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pshared);
                CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(prelabund);
+        CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none",false,false); parameters.push_back(pmetadata);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "",false,false); parameters.push_back(pmethod);
@@ -35,7 +36,8 @@ string OTUAssociationCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n";
-               helpString += "The otu.association command parameters are shared, relabund, groups, method and label.  The shared or relabund parameter is required.\n";
+        helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n";
+               helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method and label.  The shared or relabund parameter is required.\n";
                helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n";
                helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n";
@@ -110,6 +112,14 @@ OTUAssociationCommand::OTUAssociationCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("metadata");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
+                               }
                        }
                        
                        
@@ -124,6 +134,10 @@ OTUAssociationCommand::OTUAssociationCommand(string option)  {
                        else if (relabundfile == "not found") { relabundfile = ""; }
                        else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); }
                        
+            metadatafile = validParameter.validFile(parameters, "metadata", true);
+                       if (metadatafile == "not open") { abort = true; metadatafile = ""; }
+                       else if (metadatafile == "not found") { metadatafile = ""; }
+            
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "";  pickedGroups = false;  }
                        else { 
@@ -176,6 +190,8 @@ int OTUAssociationCommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        if (metadatafile != "") {  readMetadata(); }
                
                //function are identical just different datatypes
                if (sharedfile != "")                   {  processShared();             } 
@@ -201,6 +217,15 @@ int OTUAssociationCommand::processShared(){
                InputData* input = new InputData(sharedfile, "sharedfile");
                vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
+        
+        if (metadatafile != "") { 
+            getMetadata();  
+            bool error = false;
+            if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the shared file.\n");  m->control_pressed = true; error=true;}
+            if (error) {
+                //maybe add extra info here?? compare groups in each file??
+            }
+        }
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
@@ -289,29 +314,50 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
                //column headings
-               out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n";
+               if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
+        else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n";  }
+
                
                vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
                for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
                
                LinearAlgebra linear;
-               for (int i = 0; i < xy.size(); i++) {
-                       
-                       for (int k = 0; k < i; k++) {
-                               
-                               if (m->control_pressed) { out.close(); return 0; }
+        if (metadatafile == "") {//compare otus
+            for (int i = 0; i < xy.size(); i++) {
+                
+                for (int k = 0; k < i; k++) {
+                    
+                    if (m->control_pressed) { out.close(); return 0; }
+                    
+                    double coef = 0.0;
+                    double sig = 0.0;
+                    if (method == "spearman")          {   coef = linear.calcSpearman(xy[i], xy[k], sig);      }
+                    else if (method == "pearson")      {       coef = linear.calcPearson(xy[i], xy[k], sig);   }
+                    else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
+                    else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    
+                    out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+                }
+            }
+               }else { //compare otus to metadata
+            for (int i = 0; i < xy.size(); i++) {
+                
+                for (int k = 0; k < metadata.size(); k++) {
+                    
+                    if (m->control_pressed) { out.close(); return 0; }
+                    
+                    double coef = 0.0;
+                    double sig = 0.0;
+                    if (method == "spearman")          {   coef = linear.calcSpearman(xy[i], metadata[k], sig);        }
+                    else if (method == "pearson")      {       coef = linear.calcPearson(xy[i], metadata[k], sig);     }
+                    else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
+                    else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    
+                    out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+                }
+            }
 
-                               double coef = 0.0;
-                               double sig = 0.0;
-                               if (method == "spearman")               {   coef = linear.calcSpearman(xy[i], xy[k], sig);      }
-                               else if (method == "pearson")   {       coef = linear.calcPearson(xy[i], xy[k], sig);   }
-                               else if (method == "kendall")   {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
-                               else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
-                       
-                out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
-                       }
-               }
-               
+        }
                out.close();
                
                
@@ -329,6 +375,17 @@ int OTUAssociationCommand::processRelabund(){
                InputData* input = new InputData(relabundfile, "relabund");
                vector<SharedRAbundFloatVector*> lookup = input->getSharedRAbundFloatVectors();
                string lastLabel = lookup[0]->getLabel();
+        
+        if (metadatafile != "") { 
+            getMetadata(); 
+            bool error = false;
+            if (metadata[0].size() != lookup.size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup.size()) + " from the relabund file.\n");  m->control_pressed = true; error=true;}
+            if (error) {
+                //maybe add extra info here?? compare groups in each file??
+            }
+        }
+        
+        
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
@@ -417,28 +474,49 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
                //column headings
-               out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n";
+               if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; }
+        else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n";  }
                
                vector< vector<double> > xy; xy.resize(lookup[0]->getNumBins());
                for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { xy[i].push_back(lookup[j]->getAbundance(i)); } }
                
                LinearAlgebra linear;
-               for (int i = 0; i < xy.size(); i++) {
-                       
-                       for (int k = 0; k < i; k++) {
-                               
-                               if (m->control_pressed) { out.close(); return 0; }
-                               
-                               double coef = 0.0;
-                               double sig = 0.0;
-                               if (method == "spearman")               {   coef = linear.calcSpearman(xy[i], xy[k], sig);      }
-                               else if (method == "pearson")   {       coef = linear.calcPearson(xy[i], xy[k], sig);   }
-                               else if (method == "kendall")   {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
-                               else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
-                               
-                out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; 
-                       }
-               }
+        if (metadatafile == "") {//compare otus
+            for (int i = 0; i < xy.size(); i++) {
+                
+                for (int k = 0; k < i; k++) {
+                    
+                    if (m->control_pressed) { out.close(); return 0; }
+                    
+                    double coef = 0.0;
+                    double sig = 0.0;
+                    if (method == "spearman")          {   coef = linear.calcSpearman(xy[i], xy[k], sig);      }
+                    else if (method == "pearson")      {       coef = linear.calcPearson(xy[i], xy[k], sig);   }
+                    else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
+                    else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    
+                    out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+                }
+            }
+               }else { //compare otus to metadata
+            for (int i = 0; i < xy.size(); i++) {
+                
+                for (int k = 0; k < metadata.size(); k++) {
+                    
+                    if (m->control_pressed) { out.close(); return 0; }
+                    
+                    double coef = 0.0;
+                    double sig = 0.0;
+                    if (method == "spearman")          {   coef = linear.calcSpearman(xy[i], metadata[k], sig);        }
+                    else if (method == "pearson")      {       coef = linear.calcPearson(xy[i], metadata[k], sig);     }
+                    else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
+                    else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    
+                    out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl;
+                }
+            }
+            
+        }
                
                out.close();
                
@@ -451,6 +529,120 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
        }
 }
 /*****************************************************************/
+int OTUAssociationCommand::readMetadata(){
+       try {
+               ifstream in;
+               m->openInputFile(metadatafile, in);
+               
+               string headerLine = m->getline(in); m->gobble(in);
+               istringstream iss (headerLine,istringstream::in);
+               
+               //read the first label, because it refers to the groups
+               string columnLabel;
+               iss >> columnLabel; m->gobble(iss); 
+               
+               //save names of columns you are reading
+               while (!iss.eof()) {
+                       iss >> columnLabel; m->gobble(iss);
+                       metadataLabels.push_back(columnLabel);
+               }
+               int count = metadataLabels.size();
+        
+               //read rest of file
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); return 0; }
+                       
+                       string group = "";
+                       in >> group; m->gobble(in);
+            
+                       SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
+                       tempLookup->setGroup(group);
+                       tempLookup->setLabel("1");
+                       
+                       for (int i = 0; i < count; i++) {
+                               float temp = 0.0;
+                               in >> temp; 
+                               tempLookup->push_back(temp, group);
+                       }
+                       
+                       metadataLookup.push_back(tempLookup);
+                       
+                       m->gobble(in);
+               }
+               in.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "OTUAssociationCommand", "readMetadata");        
+               exit(1);
+       }
+}
+/*****************************************************************/
+//eliminate groups user did not pick, remove zeroed out otus, fill metadata vector.
+int OTUAssociationCommand::getMetadata(){
+       try {
+        
+               vector<string> mGroups = m->getGroups();
+        
+               bool remove = false;
+               for (int i = 0; i < metadataLookup.size(); i++) {
+                       //if this sharedrabund is not from a group the user wants then delete it.
+                       if (!(m->inUsersGroups(metadataLookup[i]->getGroup(), mGroups))) { 
+                               delete metadataLookup[i]; metadataLookup[i] = NULL;
+                               metadataLookup.erase(metadataLookup.begin()+i); 
+                               i--; 
+                               remove = true;
+                       }
+               }
+        
+        vector<SharedRAbundFloatVector*> newLookup;
+               for (int i = 0; i < metadataLookup.size(); i++) {
+                       SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+                       temp->setLabel(metadataLookup[i]->getLabel());
+                       temp->setGroup(metadataLookup[i]->getGroup());
+                       newLookup.push_back(temp);
+               }
+               
+               //for each bin
+        vector<string> newBinLabels;
+               for (int i = 0; i < metadataLookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                       
+                       //look at each sharedRabund and make sure they are not all zero
+                       bool allZero = true;
+                       for (int j = 0; j < metadataLookup.size(); j++) {
+                               if (metadataLookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+                       }
+                       
+                       //if they are not all zero add this bin
+                       if (!allZero) {
+                               for (int j = 0; j < metadataLookup.size(); j++) {
+                                       newLookup[j]->push_back(metadataLookup[j]->getAbundance(i), metadataLookup[j]->getGroup());
+                               }
+                newBinLabels.push_back(metadataLabels[i]);
+                       }
+               }
+               
+        metadataLabels = newBinLabels;
+        
+               for (int j = 0; j < metadataLookup.size(); j++) {  delete metadataLookup[j];  } 
+        metadataLookup.clear();
+               
+        metadata.resize(newLookup[0]->getNumBins());
+               for (int i = 0; i < newLookup[0]->getNumBins(); i++) { for (int j = 0; j < newLookup.size(); j++) { metadata[i].push_back(newLookup[j]->getAbundance(i)); } }
+        
+        for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  }
+               
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "OTUAssociationCommand", "getMetadata"); 
+               exit(1);
+       }
+}
+/*****************************************************************/
 
 
 
index 7aaa88a61c37a21ec6a9af5d1bb8f0c90066c222..f1d047d97158c966bbeae80985b51c4725648885 100644 (file)
@@ -33,15 +33,19 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
 private:
        
-       string sharedfile, relabundfile, groups, label, inputFileName, outputDir, method;
+       string sharedfile, relabundfile, metadatafile, groups, label, inputFileName, outputDir, method;
        bool abort, pickedGroups, allLines;
        set<string> labels;
+    vector<SharedRAbundFloatVector*> metadataLookup;
+    vector< vector< double> > metadata;
        
-       vector<string> outputNames, Groups;
+       vector<string> outputNames, Groups, metadataLabels;
        int processShared();
        int process(vector<SharedRAbundVector*>&);
        int processRelabund();
        int process(vector<SharedRAbundFloatVector*>&);
+    int readMetadata();
+    int getMetadata();
        
 };
 
index e6d7ce6d1aa30da836bb88f2833bd666ae11bd85..06ea359e6740bd822691e9f8ab6e3e53454cc724 100644 (file)
@@ -216,7 +216,7 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
        try {
                vector<int> qualScores;
                
-               int controlChar = int('!');
+               int controlChar = int('@');
                
                for (int i = 0; i < qual.length(); i++) { 
                        int temp = int(qual[i]);
index a9ef6cb0d85f44bde915606eb452db993ec40d8d..4ed3d8c1c02e1c5a1ee93b9dd6143309f5606f5c 100644 (file)
@@ -619,13 +619,15 @@ bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
                map<string, int>::iterator itFind;
                map<string, int> taxonomyFileNames = name2Taxonomy;
                
+        if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); }
+        
                for (int i = 0; i < templateFileNames.size(); i++) {
                        itFind = taxonomyFileNames.find(templateFileNames[i]);
                        
                        if (itFind != taxonomyFileNames.end()) { //found it so erase it
                                taxonomyFileNames.erase(itFind);
                        }else {
-                               m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+                               m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
                                okay = false;
                        }
                        
index 6a9cb3180172b8aa06aed0039d252de8bdd8873d..0454d9e253473b368af34f35021dabbc4a50ae1b 100644 (file)
@@ -86,12 +86,12 @@ int Rarefact::driver(RarefactionCurveData* rcd, int increment, int nIters = 1000
                                lookup->set(binNumber, abundance);
                                rank->set(abundance, rank->get(abundance)+1);
 
-                               if((i == 0) || (i+1) % increment == 0){
+                               if((i == 0) || ((i+1) % increment == 0) || (ends.count(i+1) != 0)){
                                        rcd->updateRankData(rank);
                                }
                        }
        
-                       if(numSeqs % increment != 0){
+                       if((numSeqs % increment != 0) || (ends.count(numSeqs) != 0)){
                                rcd->updateRankData(rank);
                        }
 
index 5e42a787bb77f44a7bb989c738c45b8449ed6e29..f6bc35551f50858e47a7999ce198145bcb81a4bb 100644 (file)
@@ -10,8 +10,8 @@
 class Rarefact {
        
 public:
-       Rarefact(OrderVector* o, vector<Display*> disp, int p) :
-                       numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel()), processors(p)  { m = MothurOut::getInstance(); }
+       Rarefact(OrderVector* o, vector<Display*> disp, int p, set<int> en) :
+                       numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel()), processors(p), ends(en)  { m = MothurOut::getInstance(); }
        Rarefact(vector<SharedRAbundVector*> shared, vector<Display*> disp) :
                                         lookup(shared), displays(disp) {  m = MothurOut::getInstance(); }
 
@@ -25,6 +25,7 @@ private:
        vector<Display*> displays;
        int numSeqs, numGroupComb, processors;
        string label;
+    set<int> ends;
        void mergeVectors(SharedRAbundVector*, SharedRAbundVector*);
        vector<SharedRAbundVector*> lookup; 
        MothurOut* m;
index 82ff7faef0d8886d21fc6b642f34c88c1b143ea1..652ff4e49ecf163fc71dad48b40eb1485012b105 100644 (file)
@@ -282,9 +282,15 @@ int RareFactCommand::execute(){
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        map<string, set<int> > labelToEnds;
                if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
-               else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
-                               
+               else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
+               for (map<string, set<int> >::iterator it = labelToEnds.begin(); it != labelToEnds.end(); it++) {
+            cout << it->first << endl;
+            for (set<int>::iterator its = (it->second).begin(); its != (it->second).end(); its++) {
+                cout << (*its) << endl;
+            }
+        }
                if (m->control_pressed) { return 0; }
                
                map<int, string> file2Group; //index in outputNames[i] -> group
@@ -378,7 +384,10 @@ int RareFactCommand::execute(){
                                if(allLines == 1 || labels.count(order->getLabel()) == 1){
                                        
                                        m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(order, rDisplays, processors);
+                    map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+                    set<int> ends;
+                    if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+                                       rCurve = new Rarefact(order, rDisplays, processors, ends);
                                        rCurve->getCurve(freq, nIters);
                                        delete rCurve;
                                        
@@ -393,7 +402,11 @@ int RareFactCommand::execute(){
                                        order = (input->getOrderVector(lastLabel));
                                        
                                        m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(order, rDisplays, processors);
+                                       map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+                    set<int> ends;
+                    if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+                                       rCurve = new Rarefact(order, rDisplays, processors, ends);
+
                                        rCurve->getCurve(freq, nIters);
                                        delete rCurve;
                                        
@@ -433,7 +446,11 @@ int RareFactCommand::execute(){
                                order = (input->getOrderVector(lastLabel));
                                
                                m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                               rCurve = new Rarefact(order, rDisplays, processors);
+                               map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
+                set<int> ends;
+                if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
+                rCurve = new Rarefact(order, rDisplays, processors, ends);
+
                                rCurve->getCurve(freq, nIters);
                                delete rCurve;
                                
@@ -474,17 +491,38 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                
                //find different types of files
                map<string, map<string, string> > typesFiles;
+        map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
+        vector<string> groupNames;
                for (int i = 0; i < outputNames.size(); i++) {
+            
                        string extension = m->getExtension(outputNames[i]);
-                       
+            string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
+                       m->mothurRemove(combineFileName); //remove old file
+            
                        ifstream in;
                        m->openInputFile(outputNames[i], in);
                        
                        string labels = m->getline(in);
-                       string newLine = labels.substr(0, labels.find_first_of('\t'));
-                       
-                       newLine += "\tGroup" + labels.substr(labels.find_first_of('\t'));
             
+                       istringstream iss (labels,istringstream::in);
+            string newLabel = ""; vector<string> theseLabels;
+            while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
+            vector< vector<string> > allLabels;
+            vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
+            for (int j = 1; j < theseLabels.size()-1; j++) {
+                if (theseLabels[j+1] == "lci") {
+                    thisSet.push_back(theseLabels[j]); 
+                    thisSet.push_back(theseLabels[j+1]); 
+                    thisSet.push_back(theseLabels[j+2]);
+                    j++; j++;
+                }else{ //no lci or hci for this calc.
+                    thisSet.push_back(theseLabels[j]); 
+                }
+                allLabels.push_back(thisSet); 
+                thisSet.clear();
+            }
+            fileLabels[combineFileName] = allLabels;
+                    
             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
             if (itfind != typesFiles.end()) {
                 (itfind->second)[outputNames[i]] = file2Group[i];
@@ -493,62 +531,57 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                 temp[outputNames[i]] = file2Group[i];
                 typesFiles[extension] = temp;
             }
-                       
-                       string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
-                       
-                       //print headers
-                       ofstream out;
-                       m->openOutputFile(combineFileName, out);
-                       out << newLine << endl;
-                       out.close();
-                       
+            if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
                }
                
                //for each type create a combo file
-               map<int, int> lineToNumber; 
+               
                for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
                        
                        ofstream out;
                        string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
                        m->openOutputFileAppend(combineFileName, out);
                        newFileNames.push_back(combineFileName);
-                       map<string, string> thisTypesFiles = it->second;
-               
+                       map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
+            set<int> numSampledSet;
+            
                        //open each type summary file
-                       map<string, vector<string> > files; //maps file name to lines in file
+                       map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
                        int maxLines = 0;
-                       int numColumns = 0;
                        for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
                 
                 string thisfilename = itFileNameGroup->first;
                 string group = itFileNameGroup->second;
-               
+                
                                ifstream temp;
                                m->openInputFile(thisfilename, temp);
                                
                                //read through first line - labels
                                m->getline(temp);       m->gobble(temp);
                                
-                               vector<string> thisFilesLines;
-                               
-                               thisFilesLines.push_back(group);
-                               int count = 1;
+                               map<int, vector< vector<string> > > thisFilesLines;
                                while (!temp.eof()){
-                               
-                                       string thisLine = m->getline(temp);
-                                       
-                                       string numSampled = thisLine.substr(0, thisLine.find_first_of('\t'));
-                                       int num = 0;
-                                       convert(numSampled, num);
-                                       numColumns = m->getNumChar(thisLine, '\t');
-                                       lineToNumber[count] = num;
-                                       count++;
-                                                                       
-                                       thisFilesLines.push_back(thisLine);
-                                       m->gobble(temp);
+                    int numSampled = 0;
+                    temp >> numSampled; m->gobble(temp);
+                
+                    vector< vector<string> > theseReads;
+                    vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
+                    for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+                        vector<string> reads;
+                        string next = "";
+                        for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+                            temp >> next; m->gobble(temp);
+                            reads.push_back(next);
+                        }
+                        theseReads.push_back(reads);
+                    }
+                    thisFilesLines[numSampled] = theseReads;
+                    m->gobble(temp);
+                   
+                    numSampledSet.insert(numSampled);
                                }
                                
-                               files[thisfilename] = thisFilesLines;
+                               files[group] = thisFilesLines;
                                
                                //save longest file for below
                                if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
@@ -557,34 +590,46 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                                m->mothurRemove(thisfilename);
                        }
                        
-                       
+            //output new labels line
+            out << fileLabels[combineFileName][0][0] << '\t';
+            for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
+                for (int n = 0; n < groupNames.size(); n++) { // for each group
+                    for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
+                        out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
+                    }
+                }
+            }
+                       out << endl;
+            
                        //for each label
-                       for (int k = 1; k < maxLines; k++) {
+                       for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
                                
-                               //grab data for each group
-                               for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
-                    
-                                       string thisfilename = itFileNameGroup->first;
-                                       map<int, int>::iterator itLine = lineToNumber.find(k);
-                                       if (itLine != lineToNumber.end()) {
-                                               string output = toString(itLine->second);
-                                               if (k < files[thisfilename].size()) {
-                                                       string line = files[thisfilename][k];
-                                                       output = line.substr(0, line.find_first_of('\t'));
-                                                       output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t'));
-                                               }else{
-                                                       output += '\t' + files[thisfilename][0] + '\t';
-                                                       for (int h = 0; h < numColumns; h++) {
-                                                               output += "NA\t";
-                                                       }
-                                               }
-                                               out << output << endl;
-                                       }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); }
-                               }
+                out << (*itNumSampled) << '\t';
+                               
+                if (m->control_pressed) { break; }
+                
+                for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
+                                   //grab data for each group
+                    for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
+                        
+                        string group = itFileNameGroup->first;
+                       
+                        map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
+                        if (itLine != files[group].end()) { 
+                            for (int l = 0; l < (itLine->second)[k].size(); l++) { 
+                                out << (itLine->second)[k][l] << '\t';
+                               
+                            }                             
+                        }else { 
+                            for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
+                                out << "NA" << '\t';
+                            } 
+                        }
+                    }
+                }
+                out << endl;
                        }       
-                       
                        out.close();
-                       
                }
                
                //return combine file name
@@ -597,7 +642,7 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
        }
 }
 //**********************************************************************************************************************
-vector<string> RareFactCommand::parseSharedFile(string filename) {
+vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
        try {
                vector<string> filenames;
                
@@ -629,6 +674,7 @@ vector<string> RareFactCommand::parseSharedFile(string filename) {
                                m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
                                rav.print(*(filehandles[lookup[i]->getGroup()]));
                                (*(filehandles[lookup[i]->getGroup()])).close();
+                label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
                        }
                
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
index 6aaa3de401e07f653abcd29cff4e98c868a94e4b..0e77a277e6ad7cda962a4c9476945f6b2aa6b557 100644 (file)
@@ -50,7 +50,7 @@ private:
        vector<string> groups;
        string outputDir;
        
-       vector<string> parseSharedFile(string);
+       vector<string> parseSharedFile(string, map<string, set<int> >&);
        vector<string> createGroupFile(vector<string>&, map<int, string>);
 };
 
index 64bdbb4ceebd18f5abeff63b26fbb4d4ac3dc924..726ddd61b75c6185cca6b4ebac108b7dd1b4d44f 100644 (file)
 #include "rarefactsharedcommand.h"
 #include "sharedsobs.h"
 #include "sharednseqs.h"
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
 vector<string> RareFactSharedCommand::setParameters(){ 
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+        CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
                CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+        CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -39,7 +42,9 @@ string RareFactSharedCommand::getHelpString(){
                string helpString = "";
                ValidCalculators validCalculator;
                helpString += "The collect.shared command parameters are shared, label, freq, calc and groups.  shared is required if there is no current sharedfile. \n";
-               helpString += "The rarefaction.shared command parameters are shared, label, iters, groups, jumble and calc.  shared is required if there is no current sharedfile. \n";
+               helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc.  shared is required if there is no current sharedfile. \n";
+        helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
+        helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
                helpString += "The rarefaction command should be in the following format: \n";
                helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
@@ -114,6 +119,15 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("design");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["design"] = inputDir + it->second;           }
+                               }
+
                        }
                        
                        //get shared file
@@ -125,6 +139,11 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setSharedFile(sharedfile); }
+            
+            designfile = validParameter.validFile(parameters, "design", true);
+                       if (designfile == "not open") { abort = true; designfile = ""; }
+                       else if (designfile == "not found") {   designfile = "";        }
+                       else { m->setDesignFile(designfile); }
                        
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -159,6 +178,12 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
                                m->splitAtDash(groups, Groups);
                        }
                        m->setGroups(Groups);
+            
+            string sets = validParameter.validFile(parameters, "sets", false);                 
+                       if (sets == "not found") { sets = ""; }
+                       else { 
+                               m->splitAtDash(sets, Sets);
+                       }
                        
                        string temp;
                        temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
@@ -186,11 +211,73 @@ int RareFactSharedCommand::execute(){
        try {
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-       
-               string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        
+        GroupMap designMap;
+        if (designfile == "") { //fake out designMap to run with process
+            process(designMap, "");
+        }else {
+            designMap.readDesignMap(designfile);
+            
+            //fill Sets - checks for "all" and for any typo groups
+                       SharedUtil util;
+                       vector<string> nameSets = designMap.getNamesOfGroups();
+                       util.setGroups(Sets, nameSets);
+            
+            for (int i = 0; i < Sets.size(); i++) {
+                process(designMap, Sets[i]);
+            }
+        }
+                    
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RareFactSharedCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
+       try {
+        Rarefact* rCurve;
+        vector<Display*> rDisplays;
+        
+        InputData input(sharedfile, "sharedfile");
+               lookup = input.getSharedRAbundVectors();
+        if (lookup.size() < 2) { 
+                       m->mothurOut("I cannot run the command without at least 2 valid groups."); 
+            for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+            return 0;
+        }
+        
+        string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        
+        vector<string> newGroups = m->getGroups();
+        if (thisSet != "") {  //make groups only filled with groups from this set so that's all inputdata will read
+            vector<string> thisSets; thisSets.push_back(thisSet);
+            newGroups = designMap.getNamesSeqs(thisSets);
+            fileNameRoot += thisSet + ".";
+        }
+        
+        vector<SharedRAbundVector*> subset;
+        if (thisSet == "") {  subset.clear(); subset = lookup;  }
+        else {//fill subset with this sets groups
+            subset.clear();
+            for (int i = 0; i < lookup.size(); i++) {
+                if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                    subset.push_back(lookup[i]);
+                }
+            }
+        }
                
                ValidCalculators validCalculator;
-                       
                for (int i=0; i<Estimators.size(); i++) {
                        if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
                                if (Estimators[i] == "sharedobserved") { 
@@ -204,82 +291,90 @@ int RareFactSharedCommand::execute(){
                }
                
                //if the users entered no valid calculators don't execute command
-               if (rDisplays.size() == 0) { return 0; }
-                       
-               input = new InputData(sharedfile, "sharedfile");
-               lookup = input->getSharedRAbundVectors();
-               string lastLabel = lookup[0]->getLabel();
+               if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input;
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                        return 0;
                }
-
-
-               if (lookup.size() < 2) { 
-                       m->mothurOut("I cannot run the command without at least 2 valid groups."); 
-                       for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
-                       return 0;
-               }
-               
+        
+        
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        string lastLabel = subset[0]->getLabel();
                set<string> processedLabels;
                set<string> userLabels = labels;
-       
+        
                //as long as you are not at the end of the file or done wih the lines you want
-               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+               while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) { 
-                               m->clearGroups(); 
-                               delete input;
                                for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                                for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                return 0;
                        }
                        
-                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
-                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               rCurve = new Rarefact(lookup, rDisplays);
+                       if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
+                               m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                               rCurve = new Rarefact(subset, rDisplays);
                                rCurve->getSharedCurve(freq, nIters);
                                delete rCurve;
-                       
-                               processedLabels.insert(lookup[0]->getLabel());
-                               userLabels.erase(lookup[0]->getLabel());
+                
+                               processedLabels.insert(subset[0]->getLabel());
+                               userLabels.erase(subset[0]->getLabel());
                        }
                        
-                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                                       string saveLabel = lookup[0]->getLabel();
-                       
-                                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                                       lookup = input->getSharedRAbundVectors(lastLabel);
+                       if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = subset[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                if (thisSet == "") {  subset.clear(); subset = lookup;  }
+                else {//fill subset with this sets groups
+                    subset.clear();
+                    for (int i = 0; i < lookup.size(); i++) {
+                        if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                            subset.push_back(lookup[i]);
+                        }
+                    }
+                }
 
-                                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(lookup, rDisplays);
-                                       rCurve->getSharedCurve(freq, nIters);
-                                       delete rCurve;
-
-                                       processedLabels.insert(lookup[0]->getLabel());
-                                       userLabels.erase(lookup[0]->getLabel());
-                                       
-                                       //restore real lastlabel to save below
-                                       lookup[0]->setLabel(saveLabel);
+                m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                rCurve = new Rarefact(subset, rDisplays);
+                rCurve->getSharedCurve(freq, nIters);
+                delete rCurve;
+                
+                processedLabels.insert(subset[0]->getLabel());
+                userLabels.erase(subset[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                subset[0]->setLabel(saveLabel);
                        }
-                               
+            
                        
-                       lastLabel = lookup[0]->getLabel();
+                       lastLabel = subset[0]->getLabel();
                        
                        //get next line to process
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
-                       lookup = input->getSharedRAbundVectors();
+                       lookup = input.getSharedRAbundVectors();
+            
+            if (lookup[0] != NULL) {
+                if (thisSet == "") {  subset.clear(); subset = lookup;  }
+                else {//fill subset with this sets groups
+                    subset.clear();
+                    for (int i = 0; i < lookup.size(); i++) {
+                        if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                            subset.push_back(lookup[i]);
+                        }
+                    }
+                }
+            }else {  subset.clear(); subset.push_back(NULL); }
+
                }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input;
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        return 0;
@@ -299,8 +394,6 @@ int RareFactSharedCommand::execute(){
                }
                
                if (m->control_pressed) { 
-                       m->clearGroups(); 
-                       delete input; 
                        for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
                        return 0;
@@ -309,33 +402,33 @@ int RareFactSharedCommand::execute(){
                //run last label if you need to
                if (needToRun == true)  {
                        for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i]; }  } 
-                       lookup = input->getSharedRAbundVectors(lastLabel);
-
-                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                       rCurve = new Rarefact(lookup, rDisplays);
+                       lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            if (thisSet == "") {  subset.clear(); subset = lookup;  }
+            else {//fill subset with this sets groups
+                subset.clear();
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
+                        subset.push_back(lookup[i]);
+                    }
+                }
+            }
+            
+                       m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
+                       rCurve = new Rarefact(subset, rDisplays);
                        rCurve->getSharedCurve(freq, nIters);
                        delete rCurve;
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                }
                
                for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
-               m->clearGroups(); 
-               delete input;
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
 
-               return 0;
-       }
+        
+        return 0;
+    }
        catch(exception& e) {
-               m->errorOut(e, "RareFactSharedCommand", "execute");
+               m->errorOut(e, "RareFactSharedCommand", "process");
                exit(1);
        }
 }
-
-
 //**********************************************************************************************************************
index 2112e8d78565712a490de69bedd2ffb44844a768..1a2d944894feabe26b4103e5431c0da08c6574b4 100644 (file)
@@ -36,17 +36,16 @@ public:
 private:
        
        vector<SharedRAbundVector*> lookup;
-       InputData* input;
-       Rarefact* rCurve;
-       vector<Display*> rDisplays;
        int nIters;
        string format;
        float freq;
        
        bool abort, allLines, jumble;
        set<string> labels; //holds labels to be used
-       string label, calc, groups, outputDir, sharedfile;
-       vector<string>  Estimators, Groups, outputNames;
+       string label, calc, groups, outputDir, sharedfile, designfile;
+       vector<string>  Estimators, Groups, outputNames, Sets;
+    
+    int process(GroupMap&, string);
 
 };
 
diff --git a/removeotulabelscommand.cpp b/removeotulabelscommand.cpp
new file mode 100644 (file)
index 0000000..77305fb
--- /dev/null
@@ -0,0 +1,380 @@
+//
+//  removeotulabels.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/21/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "removeotulabelscommand.h"
+
+//**********************************************************************************************************************
+vector<string> RemoveOtuLabelsCommand::setParameters(){        
+       try {
+        CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
+        CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pconstaxonomy);
+               CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(potucorr);
+        CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pcorraxes);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string RemoveOtuLabelsCommand::getHelpString(){        
+       try {
+               string helpString = "";
+               helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+               helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
+               helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
+        helpString += "The otucorr parameter is input the results of the otu.association command.\n";
+        helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+               helpString += "The remove.otulabels commmand should be in the following format: \n";
+               helpString += "remove.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(){      
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["contaxonomy"] = tempOutNames; 
+        outputTypes["otu.corr"] = tempOutNames;
+        outputTypes["corr.axes"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                
+                //edit file types below to include only the types you added as parameters
+                
+                               string path;
+                it = parameters.find("constaxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("corraxes");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["corraxes"] = inputDir + it->second;         }
+                               }
+                
+                it = parameters.find("otucorr");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["otucorr"] = inputDir + it->second;          }
+                               }
+            }
+            
+            vector<string> tempOutNames;
+            outputTypes["contaxonomy"] = tempOutNames; 
+            outputTypes["otu.corr"] = tempOutNames;
+            outputTypes["corr.axes"] = tempOutNames;
+            
+                       //check for parameters
+            accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  
+                               accnosfile = m->getAccnosFile(); 
+                               if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else { m->setAccnosFile(accnosfile); } 
+                       
+                       constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+                       if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
+                       else if (constaxonomyfile == "not found") {  constaxonomyfile = "";  }
+            
+            corraxesfile = validParameter.validFile(parameters, "corraxes", true);
+                       if (corraxesfile == "not open") { corraxesfile = ""; abort = true; }
+                       else if (corraxesfile == "not found") {  corraxesfile = "";  }
+            
+            otucorrfile = validParameter.validFile(parameters, "otucorr", true);
+                       if (otucorrfile == "not open") { otucorrfile = ""; abort = true; }
+                       else if (otucorrfile == "not found") {  otucorrfile = "";  }
+            
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){   outputDir = "";        }
+            
+            if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == ""))  { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int RemoveOtuLabelsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //get labels you want to keep
+               readAccnos();
+               
+               if (m->control_pressed) { return 0; }
+               
+               //read through the correct file and output lines you want to keep
+               if (constaxonomyfile != "")     {               readClassifyOtu();      }
+               if (corraxesfile != "")         {               readCorrAxes();         }
+               if (otucorrfile != "")          {               readOtuAssociation();   }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtuLabelsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readClassifyOtu(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomyfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomyfile)) + "pick.taxonomy";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(constaxonomyfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; m->gobble(in);
+            
+            if (labels.count(otu) == 0) {
+                               wroteSomething = true;
+                out << otu << '\t' << size << '\t' << tax << endl;
+            }else {  removedCount++;  }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["constaxonomy"].push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " otus from your constaxonomy file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "readClassifyOtu");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readOtuAssociation(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(otucorrfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(otucorrfile)) + "pick.corr";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(otucorrfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu1 = ""; 
+            string otu2 = ""; 
+            in >> otu1 >> otu2;
+            string line = m->getline(in); m->gobble(in);
+            
+            if ((labels.count(otu1) == 0) && (labels.count(otu2) == 0)){
+                               wroteSomething = true;
+                
+                out << otu1 << '\t' << otu2 << '\t' << line << endl;
+            }else { removedCount++; }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["otu.corr"].push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " lines from your otu.corr file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "readOtuAssociation");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readCorrAxes(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(corraxesfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(corraxesfile)) + "pick.axes";
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+        
+               ifstream in;
+               m->openInputFile(corraxesfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        //read headers
+        string headers = m->getline(in);
+        out << headers << endl;
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; 
+            in >> otu;
+            string line = m->getline(in); m->gobble(in);
+            
+            if (labels.count(otu) == 0) {
+                               wroteSomething = true;
+                
+                out << otu << '\t' << line << endl;
+            }else { removedCount++; }
+        }
+        in.close();
+        out.close();
+               
+               if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["corr.axes"].push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " lines from your corr.axes file."); m->mothurOutEndLine();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "readCorrAxes");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int RemoveOtuLabelsCommand::readAccnos(){
+       try {
+               
+               ifstream in;
+               m->openInputFile(accnosfile, in);
+               string name;
+               
+               while(!in.eof()){
+                       in >> name;
+            
+                       labels.insert(name);
+                       
+                       m->gobble(in);
+               }
+               in.close();     
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtuLabelsCommand", "readAccnos");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
+
diff --git a/removeotulabelscommand.h b/removeotulabelscommand.h
new file mode 100644 (file)
index 0000000..da2441d
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef Mothur_removeotulabelscommand_h
+#define Mothur_removeotulabelscommand_h
+
+
+//
+//  removeotulabelscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/21/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+class RemoveOtuLabelsCommand : public Command {
+public:
+    RemoveOtuLabelsCommand(string);
+    RemoveOtuLabelsCommand();
+    ~RemoveOtuLabelsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "remove.otulabels";          }
+    string getCommandCategory()                { return "OTU-Based Approaches";        } 
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/Get.otulabels"; }
+    string getDescription()            { return "Can be used with output from classify.otu, otu.association, or corr.axes to remove specific otus."; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort;
+    string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile;
+    vector<string> outputNames;
+    set<string> labels;
+    
+    int readClassifyOtu();
+    int readOtuAssociation();
+    int readCorrAxes();
+    int readAccnos();
+    
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+
+
+#endif
index d877f4b93f33d8123ff6a1a9f7f839dc680fcf61..a1e787bab8cd24ebb352593055e7d2fc10b406de 100644 (file)
@@ -603,7 +603,7 @@ int Sequence::getLongHomoPolymer(){
 int Sequence::getStartPos(){
        if(startPos == -1){
                for(int j = 0; j < alignmentLength; j++) {
-                       if(aligned[j] != '.'){
+                       if((aligned[j] != '.')&&(aligned[j] != '-')){
                                startPos = j + 1;
                                break;
                        }
@@ -668,7 +668,7 @@ int Sequence::filterFromPos(int end){
 int Sequence::getEndPos(){
        if(endPos == -1){
                for(int j=alignmentLength-1;j>=0;j--){
-                       if(aligned[j] != '.'){
+                       if((aligned[j] != '.')&&(aligned[j] != '-')){
                                endPos = j + 1;
                                break;
                        }
index b0d12b87bc49f4d3b6e9cae63558ba622b199701..ca8c62d43821652d6bf4ea868e9444d54c256ff4 100644 (file)
@@ -92,6 +92,8 @@ SetDirectoryCommand::SetDirectoryCommand(string option)  {
                        if (temp == "not found") {  debug = false;  nodebug=true; }
             else {  debug = m->isTrue(temp); }
             m->debug = debug;
+            
+            if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); }
                                
                        if ((input == "") && (output == "") && (tempdefault == "") && nodebug) {        
                                m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true;
index 20caead668196fc75d10de1275956b7955bdd3b6..f267ba4b7859ed091f749456cb5495799ffc9ec0 100644 (file)
@@ -306,6 +306,7 @@ int SffInfoCommand::execute(){
                        
                        int start = time(NULL);
                        
+            filenames[s] = m->getFullPathName(filenames[s]);
                        m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
                        
                        string accnos = "";
index 803b6cb7955af18d441d6c160e5aafb797c3a057..be410d9d9f4ecec2fc885a239c9ba57b5f409e89 100644 (file)
@@ -18,6 +18,7 @@ vector<string> ShhherCommand::setParameters(){
                CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+        CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
                CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
                CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
                CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
@@ -136,7 +137,7 @@ ShhherCommand::ShhherCommand(string option) {
                        flowFileName = validParameter.validFile(parameters, "flow", true);
                        flowFilesFileName = validParameter.validFile(parameters, "file", true);
                        if (flowFileName == "not found" && flowFilesFileName == "not found") {
-                               m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
+                               m->mothurOut("values for either flow or file must be provided for the shhh.flows command.");
                                m->mothurOutEndLine();
                                abort = true; 
                        }
@@ -149,14 +150,21 @@ ShhherCommand::ShhherCommand(string option) {
                        else{
                                ofstream temp;
                 
-                string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it
+                string thisoutputDir = outputDir;
+                if (outputDir == "") {  thisoutputDir =  m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it
                 
-                               //flow.files = 9 character offset
-                               compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta";
+                               //we want to rip off .files, and also .flow if its there
+                string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName));
+                if (fileroot[fileroot.length()-1] == '.') {  fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot
+                string extension = m->getExtension(fileroot);
+                if (extension == ".flow") { fileroot = m->getRootName(fileroot);  }
+                else { fileroot += "."; } //add back if needed
+                
+                               compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta";
                                m->openOutputFile(compositeFASTAFileName, temp);
                                temp.close();
                                
-                               compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names";
+                               compositeNamesFileName = thisoutputDir + fileroot + "shhh.names";
                                m->openOutputFile(compositeNamesFileName, temp);
                                temp.close();
                        }
@@ -307,6 +315,17 @@ ShhherCommand::ShhherCommand(string option) {
 
                        temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
                        m->mothurConvert(temp, maxIters); 
+            
+            temp = validParameter.validFile(parameters, "large", false);       if (temp == "not found"){       temp = "0";             }
+                       m->mothurConvert(temp, largeSize); 
+            if (largeSize != 0) { large = true; }
+            else { large = false;  }
+            if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
+            
+#ifdef USE_MPI
+            if (large) { m->mothurOut("The large parameter is not available with the MPI-Enabled version.\n"); large=false; }                          
+#endif
+            
 
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        m->mothurConvert(temp, sigma); 
@@ -2014,170 +2033,254 @@ int ShhherCommand::createProcesses(vector<string> filenames){
 }
 /**************************************************************************************************/
 
-int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+vector<string> ShhherCommand::parseFlowFiles(string filename){
     try {
+        vector<string> files;
+        int count = 0;
         
-        for(int i=start;i<end;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       string flowFileName = filenames[i];
-            
-                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
-                       m->mothurOut("Reading flowgrams...\n");
-                       
-            vector<string> seqNameVector;
-            vector<int> lengths;
-            vector<short> flowDataIntI;
-            vector<double> flowDataPrI;
-            map<string, int> nameMap;
-            vector<short> uniqueFlowgrams;
-            vector<int> uniqueCount;
-            vector<int> mapSeqToUnique;
-            vector<int> mapUniqueToSeq;
-            vector<int> uniqueLengths;
-            int numFlowCells;
-            
-            int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("Identifying unique flowgrams...\n");
-                       int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("Calculating distances between flowgrams...\n");
-            string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
-            unsigned long long begTime = time(NULL);
-            double begClock = clock();
+        ifstream in;
+        m->openInputFile(filename, in);
         
-            flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);    
-            
-            m->mothurOutEndLine();
-            m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
-
+        int thisNumFLows = 0;
+        in >> thisNumFLows; m->gobble(in);
+        
+        while (!in.eof()) {
+            if (m->control_pressed) { break; }
             
-                       string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
-                       createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("\nClustering flowgrams...\n");
-            string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
-                       cluster(listFileName, distFileName, namesFileName);
-                       
-                       if (m->control_pressed) { break; }
+            ofstream out;
+            string outputFileName = filename + toString(count) + ".temp";
+            m->openOutputFile(outputFileName, out);
+            out << thisNumFLows << endl;
+            files.push_back(outputFileName);
+            
+            int numLinesWrote = 0;
+            for (int i = 0; i < largeSize; i++) {
+                if (in.eof()) { break; }
+                string line = m->getline(in); m->gobble(in);
+                out << line << endl;
+                numLinesWrote++;
+            }
+            out.close();
             
-            vector<int> otuData;
-            vector<int> cumNumSeqs;
-            vector<int> nSeqsPerOTU;
-            vector<vector<int> > aaP;  //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
-            vector<vector<int> > aaI;  //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
-            vector<int> seqNumber;             //tMaster->anP:         the sequence id number sorted by OTU
-            vector<int> seqIndex;              //tMaster->anI;         the index that corresponds to seqNumber
+            if (numLinesWrote == 0) {  m->mothurRemove(outputFileName); files.pop_back();  }
+            count++;
+        }
+        in.close();
+        
+        if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }  files.clear(); }
+        
+        m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); 
+        
+        return files;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "parseFlowFiles");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
-                       
-                       int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+    try {
+        
+        for(int i=start;i<end;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       m->mothurRemove(distFileName);
-                       m->mothurRemove(namesFileName);
-                       m->mothurRemove(listFileName);
-                       
-            vector<double> dist;               //adDist - distance of sequences to centroids
-            vector<short> change;              //did the centroid sequence change? 0 = no; 1 = yes
-            vector<int> centroids;             //the representative flowgram for each cluster m
-            vector<double> weight;
-            vector<double> singleTau;  //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
-            vector<int> nSeqsBreaks;
-            vector<int> nOTUsBreaks;
-            
-                       dist.assign(numSeqs * numOTUs, 0);
-            change.assign(numOTUs, 1);
-            centroids.assign(numOTUs, -1);
-            weight.assign(numOTUs, 0);
-            singleTau.assign(numSeqs, 1.0);
+            vector<string> theseFlowFileNames; theseFlowFileNames.push_back(filenames[i]);
+            if (large) {  theseFlowFileNames = parseFlowFiles(filenames[i]);  }
             
-            nSeqsBreaks.assign(2, 0);
-            nOTUsBreaks.assign(2, 0);
+            if (m->control_pressed) { break; }
             
-            nSeqsBreaks[0] = 0;
-            nSeqsBreaks[1] = numSeqs;
-            nOTUsBreaks[1] = numOTUs;
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       double maxDelta = 0;
-                       int iter = 0;
-                       
-                       begClock = clock();
-                       begTime = time(NULL);
+            double begClock = clock();
+            unsigned long long begTime;
             
-                       m->mothurOut("\nDenoising flowgrams...\n");
-                       m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
-                       
-                       while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               double cycClock = clock();
-                               unsigned long long cycTime = time(NULL);
-                               fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
-                               
-                               if (m->control_pressed) { break; }
+            for (int g = 0; g < theseFlowFileNames.size(); g++) {
                 
-                               calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
-                               
-                               if (m->control_pressed) { break; }
+                string flowFileName = theseFlowFileNames[g];
+                m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
+                m->mothurOut("Reading flowgrams...\n");
                 
-                               maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                vector<string> seqNameVector;
+                vector<int> lengths;
+                vector<short> flowDataIntI;
+                vector<double> flowDataPrI;
+                map<string, int> nameMap;
+                vector<short> uniqueFlowgrams;
+                vector<int> uniqueCount;
+                vector<int> mapSeqToUnique;
+                vector<int> mapUniqueToSeq;
+                vector<int> uniqueLengths;
+                int numFlowCells;
+                
+                int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
                 
                 if (m->control_pressed) { break; }
                 
-                               double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                m->mothurOut("Identifying unique flowgrams...\n");
+                int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
                 
                 if (m->control_pressed) { break; }
                 
-                               checkCentroids(numOTUs, centroids, weight);
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               iter++;
-                               
-                               m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+                m->mothurOut("Calculating distances between flowgrams...\n");
+                string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+                begTime = time(NULL);
+               
                 
-                       }       
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("\nFinalizing...\n");
-                       fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       vector<int> otuCounts(numOTUs, 0);
-                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
-                       
-                       calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
-            
-            if (m->control_pressed) { break; }
+                flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);        
+                
+                m->mothurOutEndLine();
+                m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+                
+                
+                string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+                createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+                
+                if (m->control_pressed) { break; }
+                
+                m->mothurOut("\nClustering flowgrams...\n");
+                string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+                cluster(listFileName, distFileName, namesFileName);
+                
+                if (m->control_pressed) { break; }
+                
+                vector<int> otuData;
+                vector<int> cumNumSeqs;
+                vector<int> nSeqsPerOTU;
+                vector<vector<int> > aaP;      //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+                vector<vector<int> > aaI;      //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+                vector<int> seqNumber;         //tMaster->anP:         the sequence id number sorted by OTU
+                vector<int> seqIndex;          //tMaster->anI;         the index that corresponds to seqNumber
+                
+                
+                int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+                
+                if (m->control_pressed) { break; }
+                
+                m->mothurRemove(distFileName);
+                m->mothurRemove(namesFileName);
+                m->mothurRemove(listFileName);
+                
+                vector<double> dist;           //adDist - distance of sequences to centroids
+                vector<short> change;          //did the centroid sequence change? 0 = no; 1 = yes
+                vector<int> centroids;         //the representative flowgram for each cluster m
+                vector<double> weight;
+                vector<double> singleTau;      //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+                vector<int> nSeqsBreaks;
+                vector<int> nOTUsBreaks;
+                
+                dist.assign(numSeqs * numOTUs, 0);
+                change.assign(numOTUs, 1);
+                centroids.assign(numOTUs, -1);
+                weight.assign(numOTUs, 0);
+                singleTau.assign(numSeqs, 1.0);
+                
+                nSeqsBreaks.assign(2, 0);
+                nOTUsBreaks.assign(2, 0);
+                
+                nSeqsBreaks[0] = 0;
+                nSeqsBreaks[1] = numSeqs;
+                nOTUsBreaks[1] = numOTUs;
+                
+                if (m->control_pressed) { break; }
+                
+                double maxDelta = 0;
+                int iter = 0;
+                
+                begClock = clock();
+                begTime = time(NULL);
+                
+                m->mothurOut("\nDenoising flowgrams...\n");
+                m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+                
+                while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    double cycClock = clock();
+                    unsigned long long cycTime = time(NULL);
+                    fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    checkCentroids(numOTUs, centroids, weight);
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+                    
+                    if (m->control_pressed) { break; }
+                    
+                    iter++;
+                    
+                    m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+                    
+                }      
+                
+                if (m->control_pressed) { break; }
+                
+                m->mothurOut("\nFinalizing...\n");
+                fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                
+                if (m->control_pressed) { break; }
+                
+                setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+                
+                if (m->control_pressed) { break; }
+                
+                vector<int> otuCounts(numOTUs, 0);
+                for(int i=0;i<numSeqs;i++)     {       otuCounts[otuData[i]]++;        }
+                
+                calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);        
+                
+                if (m->control_pressed) { break; }
+                
+                if ((large) && (g == 0)) {  flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
+                string thisOutputDir = outputDir;
+                if (outputDir == "") {  thisOutputDir = m->hasPath(flowFileName);  }
+                string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+                string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+                string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+                string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+                string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
+                string groupFileName = fileRoot + "shhh.groups";
+
+                
+                writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
+                writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
+                writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                             if (m->control_pressed) { break; }
+                writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                 if (m->control_pressed) { break; }
+                writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector);                                          if (m->control_pressed) { break; }
+                
+                if (large) {
+                    if (g > 0) {
+                        m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.qual"));
+                        m->mothurRemove(qualityFileName);
+                        m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.fasta"));
+                        m->mothurRemove(fastaFileName);
+                        m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.names"));
+                        m->mothurRemove(nameFileName);
+                        m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.counts"));
+                        m->mothurRemove(otuCountsFileName);
+                        m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
+                        m->mothurRemove(groupFileName);
+                    }
+                    m->mothurRemove(theseFlowFileNames[g]);
+                }
+                       }
             
-                       writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
-                       writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
-                       writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
-                       writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                       if (m->control_pressed) { break; }
-                       writeGroups(flowFileName, numSeqs, seqNameVector);                                              if (m->control_pressed) { break; }
-                       
                        m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
@@ -2977,14 +3080,11 @@ void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, ve
 }
 /**************************************************************************************************/
 
-void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
+void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string qualityFileName, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
                                    vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
                                    vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
        
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
-               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
         
                ofstream qualityFile;
                m->openOutputFile(qualityFileName, qualityFile);
@@ -3088,11 +3188,9 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
+void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string fastaFileName, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
-               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
+               
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
                
@@ -3117,7 +3215,8 @@ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTU
                                        }
                                }
                                
-                               fastaFile << newSeq.substr(4) << endl;
+                               if (newSeq.length() >= 4) {  fastaFile << newSeq.substr(4) << endl;  }
+                else {  fastaFile << "NNNN" << endl;  }
                        }
                }
                fastaFile.close();
@@ -3136,11 +3235,9 @@ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTU
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
+void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string nameFileName, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
-               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
+               
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
                
@@ -3174,13 +3271,9 @@ void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, s
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
+void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSeqs, vector<string>& seqNameVector){
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
-               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
-               string groupFileName = fileRoot + "shhh.groups";
-               ofstream groupFile;
+        ofstream groupFile;
                m->openOutputFile(groupFileName, groupFile);
                
                for(int i=0;i<numSeqs;i++){
@@ -3199,11 +3292,8 @@ void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& se
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
+void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
-               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
@@ -3242,7 +3332,9 @@ void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells
                                                        //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << newSeq.substr(4) << endl;
+                                       
+                    if (newSeq.length() >= 4) {  otuCountsFile << newSeq.substr(4) << endl;  }
+                    else {  otuCountsFile << "NNNN" << endl;  }
                                }
                                otuCountsFile << endl;
                        }
index c9772af6f0e87e32d24518ee0419885770e11998..5ef43df17099c0b269c73c60dd6cf02e355b6176 100644 (file)
@@ -56,10 +56,10 @@ private:
                linePair(int i, int j) : start(i), end(j) {}
        };
     
-       int abort;
+       bool abort, large;
        string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
 
-       int processors, maxIters;
+       int processors, maxIters, largeSize;
        float cutoff, sigma, minDelta;
        string flowOrder;
     
@@ -68,6 +68,7 @@ private:
        vector<double> jointLookUp;
     vector<string> flowFileVector;
        
+    vector<string> parseFlowFiles(string);
     int driver(vector<string>, string, string, int, int);
     int createProcesses(vector<string>);
     int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
@@ -90,7 +91,7 @@ private:
     void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
     void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
     void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
-    void writeGroups(string, int, vector<string>&);
+    void writeGroups(string, string, int, vector<string>&);
     void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
     
        void getSingleLookUp();
index dabcd0469461f25a79ac707cf0e785c19974cc1e..59e49a79e9e702cfcf70c47ffee3a42da8a0b537 100644 (file)
@@ -25,7 +25,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, bool cl, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
@@ -34,6 +34,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float c
        distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
        method = t;
        processors = p;
+    classic = cl;
        outputDir = output;
 }
 
@@ -185,7 +186,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+                       string options = "";
+            if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+            else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
                        if (outputDir != "") { options += ", outputdir=" + outputDir; }
                        
                        Command* command = new DistanceCommand(options);
@@ -229,7 +232,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
                        if (outputDir == "") { outputDir = m->hasPath(fastafile); }
-                       string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+                       string tempDistFile = "";
+            if (classic) { tempDistFile =  outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+            else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
 
                        //if there are valid distances
                        ifstream fileHandle;
index 4ff316073789b694916a75fae5d2081a813d96c7..b8aa55119c810871988e76ca156b1746bd4eeb4a 100644 (file)
@@ -20,7 +20,7 @@ class SplitMatrix  {
        public:
 
                SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large
-               SplitMatrix(string, string, string, float, float, string, int, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, outputDir
+               SplitMatrix(string, string, string, float, float, string, int, bool, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, classic, outputDir
                
                ~SplitMatrix();
                int split();
@@ -33,7 +33,7 @@ class SplitMatrix  {
                string distFile, namefile, singleton, method, taxFile, fastafile, outputDir;
                vector< map< string, string> > dists;
                float cutoff, distCutoff;
-               bool large;
+               bool large, classic;
                int processors;
                                
                int splitDistance();
index 8c4ea0d6cd5b4cf26e15ec3460da8e5fd74e5fb6..77961f1c807387080b914af5059fea03d3844ff4 100644 (file)
@@ -8,15 +8,19 @@
  */
 
 #include "summarysharedcommand.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> SummarySharedCommand::setParameters(){  
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
                CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
                CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+        CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -36,11 +40,14 @@ string SummarySharedCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The summary.shared command parameters are shared, label, calc, distance, processors and all.  shared is required if there is no current sharedfile.\n";
+               helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
                helpString += "The summary.shared command should be in the following format: \n";
                helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
                helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
                helpString +=  validCalculator.printCalc("sharedsummary");
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
+        helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
                helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
                helpString += "The default value for groups is all the groups in your groupfile.\n";
                helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
@@ -162,6 +169,21 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
                        createPhylip = m->isTrue(temp);
                        
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters); 
+            
+            output = validParameter.validFile(parameters, "output", false);            if(output == "not found"){      output = "lt"; }
+                       if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {  
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 1; }
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors); 
@@ -463,214 +485,374 @@ int SummarySharedCommand::execute(){
                exit(1);
        }
 }
-
 /***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
        try {
-        vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
-        calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+               
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               //output num seqs
+               out << simMatrix.size() << endl;
+               
+               if (output == "lt") {
+                       for (int b = 0; b < simMatrix.size(); b++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < b; n++)     {
+                    if (m->control_pressed) { return 0; }
+                                       out << simMatrix[b][n] << '\t'; 
+                               }
+                               out << endl;
+                       }
+               }else{
+                       for (int b = 0; b < simMatrix.size(); m++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < simMatrix[b].size(); n++)   {
+                    if (m->control_pressed) { return 0; }
+                                       out << simMatrix[b][n] << '\t'; 
+                               }
+                               out << endl;
+                       }
+               }
         
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "printSims");
+               exit(1);
+       }
+}
+/***********************************************************/
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+       try {
+        vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
+        vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());                 
         
-        if(processors == 1){
-            driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
-            m->appendFiles((sumFileName + ".temp"), sumFileName);
-            m->mothurRemove((sumFileName + ".temp"));
-            if (mult) {
-                m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
-                m->mothurRemove((sumAllFileName + ".temp"));
-            }
-        }else{
+        for (int thisIter = 0; thisIter < iters; thisIter++) {
             
-            int process = 1;
-            vector<int> processIDS;
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
             
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-            //loop through and create all the processes you want
-            while (process != processors) {
-                int pid = fork();
+            if (subsample) {
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
                 
-                if (pid > 0) {
-                    processIDS.push_back(pid); 
-                    process++;
-                }else if (pid == 0){
-                    driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+        
+            
+            if(processors == 1){
+                driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+                m->appendFiles((sumFileName + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + ".temp"));
+                if (mult) {
+                    m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                    m->mothurRemove((sumAllFileName + ".temp"));
+                }
+            }else{
+                
+                int process = 1;
+                vector<int> processIDS;
+                
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                //loop through and create all the processes you want
+                while (process != processors) {
+                    int pid = fork();
                     
-                    //only do this if you want a distance file
-                    if (createPhylip) {
-                        string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
-                        ofstream outtemp;
-                        m->openOutputFile(tempdistFileName, outtemp);
+                    if (pid > 0) {
+                        processIDS.push_back(pid); 
+                        process++;
+                    }else if (pid == 0){
+                        driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
                         
-                        for (int i = 0; i < calcDists.size(); i++) {
-                            outtemp << calcDists[i].size() << endl;
+                        //only do this if you want a distance file
+                        if (createPhylip) {
+                            string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                            ofstream outtemp;
+                            m->openOutputFile(tempdistFileName, outtemp);
                             
-                            for (int j = 0; j < calcDists[i].size(); j++) {
-                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                            for (int i = 0; i < calcDists.size(); i++) {
+                                outtemp << calcDists[i].size() << endl;
+                                
+                                for (int j = 0; j < calcDists[i].size(); j++) {
+                                    outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                                }
                             }
+                            outtemp.close();
                         }
-                        outtemp.close();
+                        
+                        exit(0);
+                    }else { 
+                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                        exit(0);
                     }
-                    
-                    exit(0);
-                }else { 
-                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                    exit(0);
                 }
-            }
-            
-            //parent do your part
-            driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
-            m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
-            m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
-            if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
-            
-            //force parent to wait until all the processes are done
-            for (int i = 0; i < processIDS.size(); i++) {
-                int temp = processIDS[i];
-                wait(&temp);
-            }
-            
-            for (int i = 0; i < processIDS.size(); i++) {
-                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
-                m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
-                if (mult) {    m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
                 
-                if (createPhylip) {
-                    string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
-                    ifstream intemp;
-                    m->openInputFile(tempdistFileName, intemp);
+                //parent do your part
+                driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
+                if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+                
+                //force parent to wait until all the processes are done
+                for (int i = 0; i < processIDS.size(); i++) {
+                    int temp = processIDS[i];
+                    wait(&temp);
+                }
+                
+                for (int i = 0; i < processIDS.size(); i++) {
+                    m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                    m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                    if (mult) {        m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
                     
-                    for (int k = 0; k < calcDists.size(); k++) {
-                        int size = 0;
-                        intemp >> size; m->gobble(intemp);
+                    if (createPhylip) {
+                        string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                        ifstream intemp;
+                        m->openInputFile(tempdistFileName, intemp);
                         
-                        for (int j = 0; j < size; j++) {
-                            int seq1 = 0;
-                            int seq2 = 0;
-                            float dist = 1.0;
+                        for (int k = 0; k < calcDists.size(); k++) {
+                            int size = 0;
+                            intemp >> size; m->gobble(intemp);
                             
-                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
-                            
-                            seqDist tempDist(seq1, seq2, dist);
-                            calcDists[k].push_back(tempDist);
+                            for (int j = 0; j < size; j++) {
+                                int seq1 = 0;
+                                int seq2 = 0;
+                                float dist = 1.0;
+                                
+                                intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                
+                                seqDist tempDist(seq1, seq2, dist);
+                                calcDists[k].push_back(tempDist);
+                            }
                         }
+                        intemp.close();
+                        m->mothurRemove(tempdistFileName);
                     }
-                    intemp.close();
-                    m->mothurRemove(tempdistFileName);
                 }
-            }
 #else
-            //////////////////////////////////////////////////////////////////////////////////////////////////////
-            //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
-            //Above fork() will clone, so memory is separate, but that's not the case with windows, 
-            //Taking advantage of shared memory to pass results vectors.
-            //////////////////////////////////////////////////////////////////////////////////////////////////////
-
-            vector<summarySharedData*> pDataArray; 
-            DWORD   dwThreadIdArray[processors-1];
-            HANDLE  hThreadArray[processors-1]; 
-            
-            //Create processor worker threads.
-            for( int i=1; i<processors; i++ ){
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
+                //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                //Taking advantage of shared memory to pass results vectors.
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
                 
-                //make copy of lookup so we don't get access violations
-                vector<SharedRAbundVector*> newLookup;
-                for (int k = 0; k < thisLookup.size(); k++) {
-                    SharedRAbundVector* temp = new SharedRAbundVector();
-                    temp->setLabel(thisLookup[k]->getLabel());
-                    temp->setGroup(thisLookup[k]->getGroup());
-                    newLookup.push_back(temp);
+                vector<summarySharedData*> pDataArray; 
+                DWORD   dwThreadIdArray[processors-1];
+                HANDLE  hThreadArray[processors-1]; 
+                
+                //Create processor worker threads.
+                for( int i=1; i<processors; i++ ){
+                    
+                    //make copy of lookup so we don't get access violations
+                    vector<SharedRAbundVector*> newLookup;
+                    for (int k = 0; k < thisLookup.size(); k++) {
+                        SharedRAbundVector* temp = new SharedRAbundVector();
+                        temp->setLabel(thisLookup[k]->getLabel());
+                        temp->setGroup(thisLookup[k]->getGroup());
+                        newLookup.push_back(temp);
+                    }
+                    
+                    //for each bin
+                    for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                        for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+                    }
+                    
+                    // Allocate memory for thread data.
+                    summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
+                    pDataArray.push_back(tempSum);
+                    processIDS.push_back(i);
+                    
+                    hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
                 }
                 
-                //for each bin
-                for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
-                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                    for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+                //parent do your part
+                driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
+                m->appendFiles((sumFileName + "0.temp"), sumFileName);
+                m->mothurRemove((sumFileName + "0.temp"));
+                if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+                
+                //Wait until all threads have terminated.
+                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                
+                //Close all thread handles and free memory allocations.
+                for(int i=0; i < pDataArray.size(); i++){
+                    m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                    m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                    
+                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
+                    
+                    if (createPhylip) {
+                        for (int k = 0; k < calcDists.size(); k++) {
+                            int size = pDataArray[i]->calcDists[k].size();
+                            for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
+                        }
+                    }
+                    
+                    CloseHandle(hThreadArray[i]);
+                    delete pDataArray[i];
                 }
+                
+#endif
+            }
+            calcDistsTotals.push_back(calcDists);
+            
+            if (subsample) {  
+                
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+                for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+            }
+               }
 
-                // Allocate memory for thread data.
-                summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
-                pDataArray.push_back(tempSum);
-                processIDS.push_back(i);
+        if (iters != 1) {
+            //we need to find the average distance and standard deviation for each groups distance
+            
+            vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
+            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                calcAverages[i].resize(calcDistsTotals[0][i].size());
                 
-                hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
+                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
+                    calcAverages[i][j].dist = 0.0;
+                }
             }
             
-            //parent do your part
-            driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
-            m->appendFiles((sumFileName + "0.temp"), sumFileName);
-            m->mothurRemove((sumFileName + "0.temp"));
-            if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
             
-            //Wait until all threads have terminated.
-            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) iters;
+                }
+            }
             
-            //Close all thread handles and free memory allocations.
-            for(int i=0; i < pDataArray.size(); i++){
-                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
-                m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+            //find standard deviation
+            vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
+            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
+                stdDev[i].resize(calcDistsTotals[0][i].size());
                 
-                for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
-                
-                if (createPhylip) {
-                    for (int k = 0; k < calcDists.size(); k++) {
-                        int size = pDataArray[i]->calcDists[k].size();
-                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].seq1 = calcDists[i][j].seq1;
+                    stdDev[i][j].seq2 = calcDists[i][j].seq2;
+                    stdDev[i][j].dist = 0.0;
+                }
+            }
+            
+            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+                for (int i = 0; i < stdDev.size(); i++) {  
+                    for (int j = 0; j < stdDev[i].size(); j++) {
+                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
                     }
                 }
-              
-                CloseHandle(hThreadArray[i]);
-                delete pDataArray[i];
             }
-
-#endif
-        }
-              
-        if (createPhylip) {
+            
+            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist /= (float) iters;
+                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+                }
+            }
+            
+            //print results
             for (int i = 0; i < calcDists.size(); i++) {
-                if (m->control_pressed) { break; }
-                               
-                string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
-                outputNames.push_back(distFileName);
-                ofstream outDist;
-                m->openOutputFile(distFileName, outDist);
-                outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-                
-                //initialize matrix
-                vector< vector<float> > matrix; //square matrix to represent the distance
+                vector< vector<double> > matrix; //square matrix to represent the distance
                 matrix.resize(thisLookup.size());
                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
                 
+                vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
+                stdmatrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
+                
                 
-                for (int j = 0; j < calcDists[i].size(); j++) {
-                    int row = calcDists[i][j].seq1;
-                    int column = calcDists[i][j].seq2;
-                    float dist = calcDists[i][j].dist;
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    float stdDist = stdDev[i][j].dist;
                     
                     matrix[row][column] = dist;
                     matrix[column][row] = dist;
+                    stdmatrix[row][column] = stdDist;
+                    stdmatrix[column][row] = stdDist;
                 }
                 
-                //output to file
-                outDist << thisLookup.size() << endl;
-                for (int r=0; r<thisLookup.size(); r++) { 
-                    //output name
-                    string name = thisLookup[r]->getGroup();
-                    if (name.length() < 10) { //pad with spaces to make compatible
-                        while (name.length() < 10) {  name += " ";  }
+                string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".ave.dist";
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outAve;
+                m->openOutputFile(distFileName, outAve);
+                outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
+                
+                printSims(outAve, matrix);
+                
+                outAve.close();
+                
+                distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".std.dist";
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outSTD;
+                m->openOutputFile(distFileName, outSTD);
+                outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
+                
+                printSims(outSTD, stdmatrix);
+                
+                outSTD.close();
+                
+            }
+        }else {
+            if (createPhylip) {
+                for (int i = 0; i < calcDists.size(); i++) {
+                    if (m->control_pressed) { break; }
+                    
+                    //initialize matrix
+                    vector< vector<double> > matrix; //square matrix to represent the distance
+                    matrix.resize(thisLookup.size());
+                    for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                    
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        int row = calcDists[i][j].seq1;
+                        int column = calcDists[i][j].seq2;
+                        double dist = calcDists[i][j].dist;
+                        
+                        matrix[row][column] = dist;
+                        matrix[column][row] = dist;
                     }
-                    outDist << name << '\t';
-                                       
-                    //output distances
-                    for (int l = 0; l < r; l++) {      outDist  << matrix[r][l] << '\t';  }
-                    outDist << endl;
+                    
+                    string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".dist";
+                    outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                    ofstream outDist;
+                    m->openOutputFile(distFileName, outDist);
+                    outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                    
+                    printSims(outDist, matrix);
+                    
+                    outDist.close();
                 }
-                
-                outDist.close();
             }
         }
-               return 0;
+
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
index fbfea7bd98fac33bb8570934b6bcd9e9d99ceaba..99ed5d039c9e95e6404731d092b81c2f824a0309 100644 (file)
@@ -84,15 +84,16 @@ private:
        vector<Calculator*> sumCalculators;     
        InputData* input;
        
-       bool abort, allLines, mult, all, createPhylip;
+       bool abort, allLines, mult, all, createPhylip, subsample;
        set<string> labels; //holds labels to be used
-       string label, calc, groups, sharedfile;
+       string label, calc, groups, sharedfile, output;
        vector<string>  Estimators, Groups, outputNames;
        vector<SharedRAbundVector*> lookup;
        string format, outputDir;
-       int numGroups, processors;
+       int numGroups, processors, subsampleSize, iters;
        int process(vector<SharedRAbundVector*>, string, string);
        int driver(vector<SharedRAbundVector*>, int, int, string, string, vector< vector<seqDist> >&);
+    int printSims(ostream&, vector< vector<double> >&);
 
 };
 
index 2d25e835cbd5608bc1c4f4c6cc67397155f2c834..71c3027db2c8bbe97265cb40af839c6a81a17b33 100644 (file)
@@ -291,6 +291,34 @@ double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vecto
     }
 }
 /**************************************************************************************************/
+double TrialSwap2::getSD (int runs, vector<double> scorevec, double nullMean)
+{
+    try{
+        double sum = 0;
+        for(int i=0;i<runs;i++)
+            {
+                if (m->control_pressed) { return 0; }
+                sum += pow((scorevec[i] - nullMean),2);
+            }
+        return sqrt( (1/double(runs)) * sum );
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "getSD");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double TrialSwap2::get_zscore (double sd, double nullMean, double initscore)
+{
+    try {
+        return (initscore - nullMean) / sd;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "get_zscore");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
 int TrialSwap2::print_matrix(vector<vector<int> > &matrix, int nrows, int ncols)
 {
     try {
index 924938b1a16297015a3bdb14d07db0f91fc7b5bc..455221968f7d513641b0095f457a44b3778401c1 100644 (file)
@@ -26,6 +26,8 @@ public:
     double calc_vratio (int, int, vector<int>, vector<int>);
     int calc_checker (vector<vector<int> > &, vector<int>, int, int);
     double calc_c_score (vector<vector<int> > &, vector<int>, int, int);
+    double get_zscore (double, double, double);
+    double getSD (int, vector<double>, double);
     
     
 private:
@@ -35,7 +37,7 @@ private:
     int print_matrix(vector<vector<int> > &, int, int);
     
     
-    
+        
 };
 #endif