]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge remote-tracking branch 'mothur/master'
authorPat Schloss <pschloss@umich.edu>
Mon, 18 Feb 2013 15:16:15 +0000 (10:16 -0500)
committerPat Schloss <pschloss@umich.edu>
Mon, 18 Feb 2013 15:16:15 +0000 (10:16 -0500)
29 files changed:
.gitignore
Mothur.xcodeproj/project.pbxproj
Mothur.xcodeproj/project.xcworkspace/xcuserdata/SarahsWork.xcuserdatad/UserInterfaceState.xcuserstate [deleted file]
clustersplitcommand.cpp
collectcommand.cpp
commandfactory.cpp
flowdata.cpp
makecontigscommand.cpp
makecontigscommand.h
makefile
matrixoutputcommand.cpp
mergetaxsummarycommand.cpp [new file with mode: 0644]
mergetaxsummarycommand.h [new file with mode: 0644]
mothur.h
mothurout.cpp
newcommandtemplate.cpp
optionparser.cpp
parsefastaqcommand.cpp
pcrseqscommand.h
prcseqscommand.cpp
qualityscores.cpp
seqerrorcommand.cpp
sffinfocommand.cpp
shhhercommand.cpp
shhhercommand.h
treegroupscommand.cpp
trimflowscommand.cpp
trimoligos.cpp
trimoligos.h

index ac6f4409bf8be051d0665c0743bf853f42579f25..853fd84a4e280293f81727c25057644c758cd2c0 100644 (file)
@@ -1,3 +1,4 @@
 .DS_Store
 *.zip
 *.pbxproj
+*.xcuserdata
\ No newline at end of file
index c78e3fbb4fbf64ebf2a4f44509592b80bd06a86f..600f6d701955db37eaf0e362a4406ec501bdfcf1 100644 (file)
@@ -58,6 +58,7 @@
                A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7876A25152A017C00A0AE86 /* subsample.cpp */; };
                A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
                A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
+               A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
                A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
                A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
                        outputFiles = (
                                "$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o",
                        );
-                       script = "/usr/local/gfortran/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
+                       script = "/usr/local/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
                };
 /* End PBXBuildRule section */
 
                A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = "<group>"; };
                A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = "<group>"; };
                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
+               A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergetaxsummarycommand.h; sourceTree = "<group>"; };
+               A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergetaxsummarycommand.cpp; sourceTree = "<group>"; };
                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
                A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filtersharedcommand.cpp; sourceTree = "<group>"; };
                                A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */,
                                A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */,
                                A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */,
+                               A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */,
+                               A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */,
                                A7E9B75812D37EC400DA6239 /* metastatscommand.h */,
                                A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */,
                                A7E9B75A12D37EC400DA6239 /* mgclustercommand.h */,
                                A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
                                A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */,
                                A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */,
+                               A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                GCC_MODEL_TUNING = "";
                                GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.28.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"11/2/2012\\\"\"",
+                                       "VERSION=\"\\\"1.29.2\\\"\"",
+                                       "RELEASE_DATE=\"\\\"2/12/2013\\\"\"",
                                );
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
diff --git a/Mothur.xcodeproj/project.xcworkspace/xcuserdata/SarahsWork.xcuserdatad/UserInterfaceState.xcuserstate b/Mothur.xcodeproj/project.xcworkspace/xcuserdata/SarahsWork.xcuserdatad/UserInterfaceState.xcuserstate
deleted file mode 100644 (file)
index b4934c1..0000000
Binary files a/Mothur.xcodeproj/project.xcworkspace/xcuserdata/SarahsWork.xcuserdatad/UserInterfaceState.xcuserstate and /dev/null differ
index 87d26cea75033c911d4db315265123fdd3dd49de..92f0e486898f80b2c6f85bb984a5b7662bc18340 100644 (file)
@@ -947,7 +947,7 @@ vector<string>  ClusterSplitCommand::createProcesses(vector< map<string, string>
             if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
         }
         
-        //not lets reverse the order of ever other process, so we balance big files running with little ones
+        //now lets reverse the order of ever other process, so we balance big files running with little ones
         for (int i = 0; i < processors; i++) {
             //cout << i << endl;
             int remainder = ((i+1) % processors);
index de92e494ac7565953716e1ab413ffc13ee829913..b892a91bee89e9b6d01ac7bda8e40833fd4db3b0 100644 (file)
@@ -67,8 +67,8 @@ string CollectCommand::getHelpString(){
                helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
                helpString += "The collect.single command should be in the following format: \n";
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
-               helpString += "collect.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
-               helpString += "Example collect(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-chao-ace-jack).\n";
+               helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n";
+               helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n";
                helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n";
                helpString += validCalculator.printCalc("single");
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
index 42e99d1331b3694792eed47e94ab9d21b33f9545..193c53c4d610111cf369084f1a153400bfbe38e5 100644 (file)
 #include "primerdesigncommand.h"
 #include "getdistscommand.h"
 #include "removedistscommand.h"
+#include "mergetaxsummarycommand.h"
 
 /*******************************************************/
 
@@ -303,6 +304,7 @@ CommandFactory::CommandFactory(){
     commands["primer.design"]          = "primer.design";
     commands["get.dists"]           = "get.dists";
     commands["remove.dists"]        = "remove.dists";
+    commands["merge.taxsummary"]    = "merge.taxsummary";
     
 
 }
@@ -522,6 +524,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
         else if(commandName == "remove.dists")          {      command = new RemoveDistsCommand(optionString);             }
+        else if(commandName == "merge.taxsummary")      {      command = new MergeTaxSummaryCommand(optionString);         }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -682,6 +685,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "primer.design")         {      pipecommand = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      pipecommand = new GetDistsCommand(optionString);                }
         else if(commandName == "remove.dists")          {      pipecommand = new RemoveDistsCommand(optionString);             }
+        else if(commandName == "merge.taxsummary")      {      pipecommand = new MergeTaxSummaryCommand(optionString);         }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -828,6 +832,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "primer.design")         {      shellcommand = new PrimerDesignCommand();           }
         else if(commandName == "get.dists")             {      shellcommand = new GetDistsCommand();               }
         else if(commandName == "remove.dists")          {      shellcommand = new RemoveDistsCommand();            }
+        else if(commandName == "merge.taxsummary")      {      shellcommand = new MergeTaxSummaryCommand();        }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 5dc7dc3e6c780171f1ee5cacdb1d766864f18a18..f4605b6dbe1613fdb3e1a6beb1929a3585ff5fe7 100644 (file)
@@ -50,7 +50,7 @@ bool FlowData::getNext(ifstream& flowFile){
             translateFlow();
             m->gobble(flowFile);
                }
-            
+           
                if(flowFile){   return 1;       }
                else            {       return 0;       }
        }
@@ -86,16 +86,18 @@ string FlowData::getSequenceName(ifstream& flowFile) {
 void FlowData::updateEndFlow(){
        try{
                
+        if (baseFlow.length() > 4) { return; }
+        
                //int currLength = 0;
                float maxIntensity = (float) maxHomoP + 0.49;
                
                int deadSpot = 0;
-                               
+                       
                while(deadSpot < endFlow){
                        int signal = 0;
                        int noise = 0;
                        
-                       for(int i=0;i<4;i++){
+                       for(int i=0;i<baseFlow.length();i++){
                                float intensity = flowData[i + deadSpot];
                                if(intensity > signalIntensity){
                                        signal++;
@@ -110,7 +112,7 @@ void FlowData::updateEndFlow(){
                                break;
                        }
                
-                       deadSpot += 4;
+                       deadSpot += baseFlow.length();
                }
                endFlow = deadSpot;
 
@@ -129,13 +131,13 @@ void FlowData::translateFlow(){
                sequence = "";
                for(int i=0;i<endFlow;i++){
                        int intensity = (int)(flowData[i] + 0.5);
-                       char base = baseFlow[i % 4];
+                       char base = baseFlow[i % baseFlow.length()];
                        
                        for(int j=0;j<intensity;j++){
                                sequence += base;
                        }
                }
-
+        
                if(sequence.size() > 4){
                        sequence = sequence.substr(4);
                }
index 3474c57abec72b362dc9af4c27336473899beb96..c4866370bce4be7db8e5e67d7dec799b895080f2 100644 (file)
@@ -27,6 +27,7 @@ vector<string> MakeContigsCommand::setParameters(){
 
         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
+        CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
                CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
                CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
                CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
@@ -74,6 +75,7 @@ string MakeContigsCommand::getHelpString(){
         helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
         helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
         helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+        helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
         helpString += "The make.contigs command should be in the following format: \n";
                helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
                helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
@@ -318,6 +320,9 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
 
             temp = validParameter.validFile(parameters, "allfiles", false);            if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "trimoverlap", false);         if (temp == "not found") { temp = "F"; }
+                       trimOverlap = m->isTrue(temp);
                        
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
                        if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
@@ -734,7 +739,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
             }
 
                                  
-                       contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
+                       contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h);
                        pDataArray.push_back(tempcontig);
             
                        hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
@@ -919,6 +924,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
             int overlapStart = fSeq.getStartPos();
             int seq2Start = rSeq.getStartPos();
+            
             //bigger of the 2 starting positions is the location of the overlapping start
             if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
                 overlapStart = seq2Start; 
@@ -968,6 +974,8 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
             }
             
+            if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart);  if (contig.length() == 0) { trashCode += "l"; } }
+            
             if(trashCode.length() == 0){
                 bool ignore = false;
                 
index a23d397d202ab1fd71ecb250c7c7246bcb82db3c..2fbc09e9fa5f464bcf56e1f5f324055f76d893a3 100644 (file)
@@ -59,7 +59,7 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort, allFiles, createGroup;
+    bool abort, allFiles, createGroup, trimOverlap;
     string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
        float match, misMatch, gapOpen, gapExtend;
        int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
@@ -106,7 +106,7 @@ struct contigsData {
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
        int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
-    bool allFiles, createGroup, done;
+    bool allFiles, createGroup, done, trimOverlap;
     map<string, int> groupCounts; 
     map<string, string> groupMap;
     vector<string> primerNameVector;   
@@ -115,7 +115,7 @@ struct contigsData {
        map<int, oligosPair> primers;
        
        contigsData(){}
-       contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) {
+       contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, bool to, int tid) {
         files = f;
                outputFasta = of;
         outputMisMatches = om;
@@ -137,6 +137,7 @@ struct contigsData {
         bdiffs = bdf;
         tdiffs = tdf;
         allFiles = all;
+        trimOverlap = to;
         createGroup = cg;
                threadID = tid;
         deltaq = delt;
@@ -310,6 +311,8 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                 for (int i = overlapEnd; i < length; i++) {  contig += seq1[i]; }
             }
 
+            if (pDataArray->trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } }
+            
             if(trashCode.length() == 0){
                 bool ignore = false;
                 if (pDataArray->createGroup) {
index d4035c8a82f24ea2fa3c231a887375160125ce9e..3ce1686327126b12c45afb51599b2e5a066e19aa 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"1/23/2013\""
-VERSION = "\"1.29.1\""
+RELEASE_DATE = "\"2/12/2013\""
+VERSION = "\"1.29.2\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
index 95b881190d085135539601c17836ce7d123592da..310e32bf23e048c73ae37579ac2e8e2d1cb8b0ff 100644 (file)
@@ -18,7 +18,7 @@ vector<string> MatrixOutputCommand::setParameters(){
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
-               CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
+               CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
         CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
@@ -45,7 +45,7 @@ string MatrixOutputCommand::getHelpString(){
         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
                helpString += "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
-               helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
+               helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, column and square. The default is lt.\n";
         helpString += "The mode parameter allows you to specify if you want the average or the median values reported when subsampling. Options are average, and median. The default is average.\n";
                helpString += "Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n";
                helpString += "The default value for groups is all the groups in your groupfile.\n";
@@ -156,7 +156,7 @@ MatrixOutputCommand::MatrixOutputCommand(string option)  {
                        }
                        
                        output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "lt"; }
-                       if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+                       if ((output != "lt") && (output != "square") && (output != "column")) { m->mothurOut(output + " is not a valid output form. Options are lt, column and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
             
             mode = validParameter.validFile(parameters, "mode", false);                if(mode == "not found"){        mode = "average"; }
                        if ((mode != "average") && (mode != "median")) { m->mothurOut(mode + " is not a valid mode. Options are average and medina. I will use average."); m->mothurOutEndLine(); output = "average"; }
@@ -449,11 +449,9 @@ void MatrixOutputCommand::printSims(ostream& out, vector< vector<double> >& simM
        try {
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-               
-               //output num seqs
-               out << simMatrix.size() << endl;
-               
+                               
                if (output == "lt") {
+            out << simMatrix.size() << endl;
                        for (int b = 0; b < simMatrix.size(); b++)      {
                                out << lookup[b]->getGroup() << '\t';
                                for (int n = 0; n < b; n++)     {
@@ -461,7 +459,14 @@ void MatrixOutputCommand::printSims(ostream& out, vector< vector<double> >& simM
                                }
                                out << endl;
                        }
+        }else if (output == "column") {
+            for (int b = 0; b < simMatrix.size(); b++) {
+                for (int n = 0; n < b; n++)    {
+                    out << lookup[b]->getGroup() << '\t' << lookup[n]->getGroup() << '\t' << simMatrix[b][n] << endl;
+                }
+            }
                }else{
+            out << simMatrix.size() << endl;
                        for (int b = 0; b < simMatrix.size(); b++)      {
                                out << lookup[b]->getGroup() << '\t';
                                for (int n = 0; n < simMatrix[b].size(); n++)   {
diff --git a/mergetaxsummarycommand.cpp b/mergetaxsummarycommand.cpp
new file mode 100644 (file)
index 0000000..852e0e0
--- /dev/null
@@ -0,0 +1,373 @@
+//
+//  mergetaxsummarycommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/13/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "mergetaxsummarycommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> MergeTaxSummaryCommand::setParameters(){        
+       try {
+               CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput);
+               CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MergeTaxSummaryCommand::getHelpString(){        
+       try {
+               string helpString = "";
+               helpString += "The merge.taxsummary command takes a list of tax.summary files separated by dashes and merges them into one file."; 
+               helpString += "The merge.taxsummary command parameters are input and output."; 
+               helpString += "Example merge.taxsummary(input=small.tax.summary-large.tax.summary, output=all.tax.summary).";
+               helpString += "Note: No spaces between parameter labels (i.e. output), '=' and parameters (i.e.yourOutputFileName).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MergeTaxSummaryCommand::MergeTaxSummaryCommand(){      
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["taxsummary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+MergeTaxSummaryCommand::MergeTaxSummaryCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               if(option == "help") { help();  abort = true; calledHelp = true;    }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;   }
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["taxsummary"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       
+                       string fileList = validParameter.validFile(parameters, "input", false);                 
+                       if(fileList == "not found") { m->mothurOut("you must enter two or more file names"); m->mothurOutEndLine();  abort=true;  }
+                       else{   m->splitAtDash(fileList, fileNames);    }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found")   {       outputDir = "";         }
+                       
+                       
+                       numInputFiles = fileNames.size();
+                       ifstream testFile;
+                       if(numInputFiles == 0){
+                               m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) +  " file names"); m->mothurOutEndLine();
+                               abort=true;  
+                       }
+                       else{
+                               for(int i=0;i<numInputFiles;i++){
+                                       if (inputDir != "") {
+                        string path = m->hasPath(fileNames[i]);
+                        //if the user has not given a path then, add inputdir. else leave path alone.
+                        if (path == "") {      fileNames[i] = inputDir + fileNames[i];         }
+                    }
+                    
+                    int ableToOpen;
+                    ifstream in;
+                    ableToOpen = m->openInputFile(fileNames[i], in, "noerror");
+                    in.close();        
+                    
+                    //if you can't open it, try default location
+                    if (ableToOpen == 1) {
+                        if (m->getDefaultPath() != "") { //default path is set
+                            string tryPath = m->getDefaultPath() + m->getSimpleName(fileNames[i]);
+                            m->mothurOut("Unable to open " + fileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fileNames[i] = tryPath;
+                        }
+                    }
+                    
+                    //if you can't open it, try output location
+                    if (ableToOpen == 1) {
+                        if (m->getOutputDir() != "") { //default path is set
+                            string tryPath = m->getOutputDir() + m->getSimpleName(fileNames[i]);
+                            m->mothurOut("Unable to open " + fileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fileNames[i] = tryPath;
+                        }
+                    }
+                    
+                    
+                    
+                    if (ableToOpen == 1) { 
+                        m->mothurOut("Unable to open " + fileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                        //erase from file list
+                        fileNames.erase(fileNames.begin()+i);
+                        i--;
+                    }
+                               }
+                       }   
+                       
+                       outputFileName = validParameter.validFile(parameters, "output", false);                 
+                       if (outputFileName == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine();  abort=true;  }
+                       else if (outputDir != "") { outputFileName = outputDir + m->getSimpleName(outputFileName);   }
+            
+               }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MergeTaxSummaryCommand::execute(){
+       try {
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+        outputFileName = m->getFullPathName(outputFileName);
+               m->mothurRemove(outputFileName);
+        
+        vector<rawTaxNode> tree;
+        tree.push_back(rawTaxNode("Root"));
+               tree[0].rank = "0";
+        bool hasGroups = true;
+        set<string> groups;
+       
+        for (int i = 0; i < fileNames.size(); i++) {
+            
+            ifstream in;
+            m->openInputFile(fileNames[i], in);
+            string temp = m->getline(in);
+            vector<string> headers = m->splitWhiteSpace(temp);
+            
+            vector<string> thisFilesGroups;
+            if (headers.size() == 5) { hasGroups = false; }
+            else {  for (int j = 5; j < headers.size(); j++) { groups.insert(headers[j]); thisFilesGroups.push_back(headers[j]); } }
+            
+            int level, daugterLevels, total;
+            string rankId, tax; 
+            map<int, int> levelToCurrentNode;
+            levelToCurrentNode[0] = 0;
+            while (!in.eof()) {
+                
+                if (m->control_pressed) {   return 0;  }
+                
+                in >> level >> rankId >> tax >> daugterLevels >> total; m->gobble(in);
+                map<string, int> groupCounts;
+                if (thisFilesGroups.size() != 0) {  
+                    for (int j = 0; j < thisFilesGroups.size(); j++) {  
+                        int tempNum; in >> tempNum; m->gobble(in);
+                        groupCounts[thisFilesGroups[j]] = tempNum; 
+                    } 
+                }
+                
+                if (level == 0) {}
+                else { 
+                    map<int, int>::iterator itParent = levelToCurrentNode.find(level-1);
+                    int parent = 0;
+                    if (itParent == levelToCurrentNode.end()) { m->mothurOut("[ERROR]: situation I didnt expect.\n"); }
+                    else { parent = itParent->second; }
+                    
+                    levelToCurrentNode[level] = addTaxToTree(tree, level, parent, tax, total, groupCounts);
+                } 
+            }
+            in.close();
+        }
+        
+        if (!hasGroups && (groups.size() != 0)) { groups.clear();  m->mothurOut("[WARNING]: not all files contain group breakdown, ignoring group counts.\n");  }
+        
+        ofstream out;
+        m->openOutputFile(outputFileName, out);
+        print(out, tree, groups);
+                       
+               if (m->control_pressed) {  m->mothurRemove(outputFileName); return 0;  }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["taxsummary"].push_back(outputFileName);
+               m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "execute");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::addTaxToTree(vector<rawTaxNode>& tree, int level, int currentNode, string taxon, int total, map<string, int> groups){
+       try {
+               map<string, int>::iterator childPointer;
+               
+        childPointer = tree[currentNode].children.find(taxon);
+        int nodeToIncrement = 0;
+                       
+        if(childPointer != tree[currentNode].children.end()){  //if the node already exists, increment counts
+            nodeToIncrement = childPointer->second;
+            tree[nodeToIncrement].total += total;
+            
+            for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
+                map<string, int>::iterator it = tree[nodeToIncrement].groupCount.find(itGroups->first);
+                if (it == tree[nodeToIncrement].groupCount.end()) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; }
+                else {   it->second += itGroups->second;  }
+            }
+        }
+        else{                                                                                  //otherwise, create it
+            tree.push_back(rawTaxNode(taxon));
+            tree[currentNode].children[taxon] = tree.size()-1;
+            tree[tree.size()-1].parent = currentNode;
+            nodeToIncrement = tree.size()-1;
+            tree[nodeToIncrement].total = total;
+            tree[nodeToIncrement].level = level;
+            for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
+                 tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; 
+            }
+        }
+        
+               return nodeToIncrement;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "addSeqToTree");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::assignRank(int index, vector<rawTaxNode>& tree){
+       try {
+               map<string,int>::iterator it;
+               int counter = 1;
+               
+               for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+            if (m->control_pressed) { return 0; }
+                       tree[it->second].rank = tree[index].rank + '.' + toString(counter);
+                       counter++;
+            
+                       assignRank(it->second, tree);
+               }
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "assignRank");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
+       try {
+               
+               assignRank(0, tree); 
+        vector<string> mGroups;
+               //print labels
+               out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
+               for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { out << (*it) << '\t'; }             
+               out << endl;
+        
+        for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {  tree[0].groupCount[*it2] = 0;  }
+            
+        map<string,int>::iterator it;
+               for(it=tree[0].children.begin();it!=tree[0].children.end();it++){   
+            tree[0].total += tree[it->second].total;
+                       for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { 
+                map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
+                if (itGroups != tree[it->second].groupCount.end()) { 
+                    tree[0].groupCount[*it2] += itGroups->second;
+                }
+            }
+               }
+
+               
+               //print root
+               out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t";
+               
+        for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { 
+            map<string, int>:: iterator itGroups = tree[0].groupCount.find(*it);
+            int num = 0;
+            if (itGroups != tree[0].groupCount.end()) { num = itGroups->second; }
+            out << num << '\t';
+        }
+        out << endl;
+               
+               //print rest
+               print(0, out, tree, groups);
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "print");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int MergeTaxSummaryCommand::print(int i, ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
+       try {
+               map<string,int>::iterator it;
+               for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+                       
+            //print root
+            out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t";
+            
+            for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { 
+                map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
+                int num = 0;
+                if (itGroups != tree[it->second].groupCount.end()) { num = itGroups->second; }
+                out << num << '\t';
+            }
+            out << endl;
+
+                       print(it->second, out, tree, groups);
+               }
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeTaxSummaryCommand", "print");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/mergetaxsummarycommand.h b/mergetaxsummarycommand.h
new file mode 100644 (file)
index 0000000..d39617a
--- /dev/null
@@ -0,0 +1,47 @@
+//
+//  mergetaxsummarycommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/13/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_mergetaxsummarycommand_h
+#define Mothur_mergetaxsummarycommand_h
+
+#include "mothur.h"
+#include "command.hpp"
+#include "phylosummary.h"
+
+class MergeTaxSummaryCommand : public Command {
+public:
+       MergeTaxSummaryCommand(string);
+       MergeTaxSummaryCommand();
+       ~MergeTaxSummaryCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "merge.taxsummary";    }
+       string getCommandCategory()             { return "Phylotype Analysis";          }
+       string getHelpString(); 
+    string getOutputPattern(string){ return "";  }     
+       string getCitation() { return "http://www.mothur.org/wiki/Merge.taxsummary"; }
+       string getDescription()         { return "merges tax summary files creating one file"; }
+    
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+       vector<string> fileNames, outputNames;
+       string outputFileName;
+       int numInputFiles;
+       bool abort;
+       
+    int addTaxToTree(vector<rawTaxNode>&, int, int, string, int, map<string, int>);
+    int assignRank(int index, vector<rawTaxNode>& tree);
+    int print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups);
+    int print(int, ofstream& out, vector<rawTaxNode>& tree, set<string> groups);
+};
+
+
+#endif
index cd14056b21ba82f9d6c07e650fcf210616e7a882..98b34c31c58c88b614412be4e9ba6874ec1cbe36 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -170,7 +170,6 @@ inline bool compareIndexes(PDistCell left, PDistCell right){
        return (left.index > right.index);      
 }
 //********************************************************************************************************************
-//sorts highest to lowest
 inline bool compareSpearman(spearmanRank left, spearmanRank right){
        return (left.score < right.score);      
 } 
@@ -185,11 +184,7 @@ inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right)
     }
     return false;      
 } 
-//********************************************************************************************************************
-//sorts lowest to highest
-inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
-       return (left.score < right.score);      
-} 
 /************************************************************/
 //sorts lowest to highest
 inline bool compareDistLinePairs(distlinePair left, distlinePair right){
index dc77490e4b9bba1bc9bf6e4c16b0f402985c73d1..9f678fef98505cae0de916b1db66914d5f41074f 100644 (file)
@@ -453,7 +453,7 @@ void MothurOut::errorOut(exception& e, string object, string function) {
         if (object == "cluster"){
             mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.  \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
         }else {
-            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
         }
     }
 }
@@ -951,7 +951,7 @@ string MothurOut::getFullPathName(string fileName){
                                }
                        
                                for (int i = index; i >= 0; i--) {
-                                       newFileName = dirs[i] +  "\\\\" + newFileName;          
+                                       newFileName = dirs[i] +  "\\" + newFileName;            
                                }
                                
                                return newFileName;
@@ -2776,6 +2776,7 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                string individual = "";
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2796,7 +2797,25 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                                }
                        }
                }
-               container.insert(individual);
+               */
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    container.insert(individual);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        container.insert(individual);
         
        }
        catch(exception& e) {
@@ -2812,6 +2831,7 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                int lineNum;
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2832,7 +2852,26 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                                        prevEscape = false;
                                }
                        }
-               }
+               }*/
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    convert(individual, lineNum); //convert the string to int
+                    container.insert(lineNum);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        
                convert(individual, lineNum); //convert the string to int
                container.insert(lineNum);
        }
index 3c893d8b44a04feddefb5ec18bebdc9350064656..b9dc986a7fa10f933cff0add5092e76a4ce62e5c 100644 (file)
@@ -6,7 +6,7 @@
 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
 //
 
-
+//
 #include "newcommandtemplate.h"
 
 //**********************************************************************************************************************
index a6d5c0a668be5ebdc259a3108d4ff11341f7cd00..d9d9145f1ffcf73caa4c502fb19acec23476929e 100644 (file)
@@ -116,6 +116,8 @@ map<string, string> OptionParser::getParameters() {
                         it->second = m->getDesignFile();
                     }else if (it->first == "sff") {
                         it->second = m->getSFFFile();
+                    }else if (it->first == "flow") {
+                            it->second = m->getFlowFile();
                     }else if (it->first == "oligos") {
                         it->second = m->getOligosFile();
                     }else if (it->first == "accnos") {
index 89f97acf13ad45650a1f8294a3e88895beaa347c..704e752341320dd75b9f213658d74530583ec9cb 100644 (file)
@@ -183,7 +183,10 @@ int ParseFastaQCommand::execute(){
                        string name = m->getline(in); m->gobble(in);
                        if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { name = name.substr(1); }
+                       else { 
+                name = name.substr(1); 
+                for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+            }
                        
                        //read sequence
                        string sequence = m->getline(in); m->gobble(in);
@@ -193,7 +196,10 @@ int ParseFastaQCommand::execute(){
                        string name2 = m->getline(in); m->gobble(in);
                        if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { name2 = name2.substr(1);  }
+                       else { 
+                name2 = name2.substr(1);  
+                for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+            }
                        
                        //read quality scores
                        string quality = m->getline(in); m->gobble(in);
index c6f7ff9c7f6670073a7080496f80eac1cdfb2a72..16c1e29be5f9cbba00180a55a746bbe923b3cbdb 100644 (file)
@@ -48,10 +48,10 @@ private:
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
     bool abort, keepprimer, keepdots;
        string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
-       int start, end, processors, length;
+       int start, end, processors, length, pdiffs;
        
     vector<string> revPrimer, outputNames;
-       vector<string> primers;
+       map<string, int> primers;
     
     int writeAccnos(set<string>);
     int readName(set<string>&);
@@ -62,10 +62,7 @@ private:
     bool readEcoli();
        int driverPcr(string, string, string, set<string>&, linePair);  
        int createProcesses(string, string, string, set<string>&);
-    bool findForward(Sequence&, int&, int&);
-    bool findReverse(Sequence&, int&, int&);
     bool isAligned(string, map<int, int>&);
-    bool compareDNASeq(string, string);
     string reverseOligo(string);
 };
 
@@ -78,16 +75,16 @@ struct pcrData {
     string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
        unsigned long long fstart;
        unsigned long long fend;
-       int count, start, end, length;
+       int count, start, end, length, pdiffs;
        MothurOut* m;
-       vector<string> primers;
+       map<string, int> primers;
     vector<string> revPrimer;
     set<string> badSeqNames;
     bool keepprimer, keepdots;
        
        
        pcrData(){}
-       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
                filename = f;
         goodFasta = gf;
         badFasta = bfn;
@@ -104,6 +101,7 @@ struct pcrData {
         length = l;
                fstart = fst;
         fend = fen;
+        pdiffs = pd;
                count = 0;
        }
 };
@@ -132,6 +130,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                }
         
         set<int> lengths;
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
                
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
             pDataArray->count++;
@@ -160,62 +161,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process primers
                     if (pDataArray->primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        //bool good = findForward(currSeq, primerStart, primerEnd);
-                        ///////////////////////////////////////////////////////////////
-                        bool good = false;
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->primers.size();j++){
-                            string oligo = pDataArray->primers[j];
-                            
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = 0; l < rawSequence.length()-olength; l++){
-                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-                        ///////////////////////////////////////////////////////////////
-                        
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
                         else{
@@ -229,6 +175,15 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
                                 }
+                                ///////////////////////////////////////////////////////////////
+                                mapAligned.clear();
+                                string seq = currSeq.getAligned(); 
+                                int countBases = 0;
+                                for (int k = 0; k < seq.length(); k++) {
+                                    if (!isalpha(seq[k])) { ; }
+                                    else { mapAligned[countBases] = k; countBases++; } 
+                                }                                                   
+                                ///////////////////////////////////////////////////////////////
                             }else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -239,60 +194,8 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process reverse primers
                     if (pDataArray->revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = false;
-                        //findReverse(currSeq, primerStart, primerEnd);
-                         ///////////////////////////////////////////////////////////////
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->revPrimer.size();j++){
-                            string oligo = pDataArray->revPrimer[j];
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = rawSequence.length()-olength; l >= 0; l--){
-                                
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-
-                         ///////////////////////////////////////////////////////////////
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
                         else{ 
                             //are you aligned
index 4d5b6d963a026433c5851de6d444edc524cd6d1a..5fa71bef6b3474cc516491044c41729a318d449f 100644 (file)
@@ -21,6 +21,8 @@ vector<string> PcrSeqsCommand::setParameters(){
                CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
                CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
                CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
@@ -41,7 +43,7 @@ string PcrSeqsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The pcr.seqs command reads a fasta file.\n";
-        helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+        helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
                helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
@@ -49,6 +51,7 @@ string PcrSeqsCommand::getHelpString(){
         helpString += "The processors parameter allows you to use multiple processors.\n";
         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
+        helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
                return helpString;
@@ -262,6 +265,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors); 
+            
+            temp = validParameter.validFile(parameters, "pdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
                        
             nomatch = validParameter.validFile(parameters, "nomatch", false);  if (nomatch == "not found") { nomatch = "reject"; }
                        
@@ -319,7 +325,7 @@ int PcrSeqsCommand::execute(){
                
                
         length = 0;
-               if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
+               if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } }  if (m->control_pressed) {  return 0; }
         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
         
         vector<unsigned long long> positions; 
@@ -499,7 +505,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
             
                        // Allocate memory for thread data.
-                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
                        pDataArray.push_back(tempPcr);
                        
                        //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -561,6 +567,10 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                int count = 0;
         set<int> lengths;
         
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+        
                while (!done) {
             
                        if (m->control_pressed) {  break; }
@@ -570,6 +580,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
             string trashCode = "";
                        if (currSeq.getName() != "") {
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } 
+                
                 bool goodSeq = true;
                 if (oligosfile != "") {
                     map<int, int> mapAligned;
@@ -578,7 +590,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                     //process primers
                     if (primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = findForward(currSeq, primerStart, primerEnd);
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
                         else{
@@ -590,8 +602,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                                 } 
                                 else                {  
                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                             }
                                 }
+                                isAligned(currSeq.getAligned(), mapAligned);
                             }else { 
                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -602,10 +615,10 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                     //process reverse primers
                     if (revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = findReverse(currSeq, primerStart, primerEnd);
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
                         if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
                         else{ 
-                            //are you aligned
+                              //are you aligned
                             if (aligned) { 
                                 if (!keepprimer)    {  
                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
@@ -700,75 +713,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
        }
 }
 //********************************************************************/
-bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               
-               for(int j=0;j<primers.size();j++){
-                       string oligo = primers[j];
-                       
-                       if(rawSequence.length() < oligo.length()) {  break;  }
-                       
-                       //search for primer
-            int olength = oligo.length();
-            for (int j = 0; j < rawSequence.length()-olength; j++){
-                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
-                string rawChunk = rawSequence.substr(j, olength);
-                if(compareDNASeq(oligo, rawChunk)) {
-                    primerStart = j;
-                    primerEnd = primerStart + olength;
-                    return true;
-                }
-                
-            }
-        }      
-               
-        primerStart = 0; primerEnd = 0;
-               return false;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimOligos", "stripForward");
-               exit(1);
-       }
-}
-//******************************************************************/
-bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               
-               for(int i=0;i<revPrimer.size();i++){
-                       string oligo = revPrimer[i];
-                       if(rawSequence.length() < oligo.length()) {  break;  }
-                       
-                       //search for primer
-            int olength = oligo.length();
-            for (int j = rawSequence.length()-olength; j >= 0; j--){
-                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
-                string rawChunk = rawSequence.substr(j, olength);
-            
-                if(compareDNASeq(oligo, rawChunk)) {
-                    primerStart = j;
-                    primerEnd = primerStart + olength;
-                    return true;
-                }
-                
-            }
-               }       
-               
-        primerStart = 0; primerEnd = 0;
-               return false;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "findReverse");
-               exit(1);
-       }
-}
-//********************************************************************/
 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
        try {
+        aligned.clear();
         bool isAligned = false;
         
         int countBases = 0;
@@ -832,6 +779,7 @@ bool PcrSeqsCommand::readOligos(){
                m->openInputFile(oligosfile, inOligos);
                
                string type, oligo, group;
+        int primerCount = 0;
                
                while(!inOligos.eof()){
             
@@ -859,7 +807,7 @@ bool PcrSeqsCommand::readOligos(){
                         if (c == 10 || c == 13){       break;  } 
                         else if (c == 32 || c == 9){;} //space or tab
                                        } 
-                                       primers.push_back(oligo);
+                                       primers[oligo] = primerCount; primerCount++;
                 }else if(type == "REVERSE"){
                     string oligoRC = reverseOligo(oligo);
                     revPrimer.push_back(oligoRC);
@@ -935,43 +883,6 @@ int PcrSeqsCommand::writeAccnos(set<string> badNames){
                exit(1);
        }
     
-}
-//******************************************************************/
-bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
-       try {
-               bool success = 1;
-               int length = oligo.length();
-               
-               for(int i=0;i<length;i++){
-                       
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
-                               
-                               if(success == 0)        {       break;   }
-                       }
-                       else{
-                               success = 1;
-                       }
-               }
-               
-               return success;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
-               exit(1);
-       }
-       
 }
 //***************************************************************************************************************
 int PcrSeqsCommand::readName(set<string>& names){
index 4998b3d1c4dabd18b97fb8f33e36860127e757e5..33ca1728052e4db59372fc2d8129e5bcbaf729c5 100644 (file)
@@ -34,13 +34,17 @@ QualityScores::QualityScores(ifstream& qFile){
                int score;
                seqName = getSequenceName(qFile);
                
+        if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
+        
                if (!m->control_pressed) {
             string qScoreString = m->getline(qFile);
-            //cout << qScoreString << endl;
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
+            
             while(qFile.peek() != '>' && qFile.peek() != EOF){
                 if (m->control_pressed) { break; }
                 string temp = m->getline(qFile);
-                //cout << temp << endl;
+                if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
                 qScoreString +=  ' ' + temp;
             }
             //cout << "done reading " << endl; 
@@ -51,6 +55,8 @@ QualityScores::QualityScores(ifstream& qFile){
                 string temp;
                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
                 
+                if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n.");  }
+                
                 //check temp to make sure its a number
                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
                 convert(temp, score);
index 67e43aa0d4b7f6a6ff251bc45a8254ca49b8b162..0a6eae93df72e8556554bef32c5a52074a4d0a54 100644 (file)
@@ -1381,6 +1381,10 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                                        
                                        string sname = "";  nameStream >> sname;
                                        sname = sname.substr(1);
+                    
+                    for (int i = 0; i < sname.length(); i++) {
+                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+                    }
                                        
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
@@ -1441,6 +1445,10 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                     istringstream nameStream(input);
                     string sname = "";  nameStream >> sname;
                     
+                    for (int i = 0; i < sname.length(); i++) {
+                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+                    }
+                    
                     map<string, int>::iterator it = firstSeqNamesReport.find(sname);
                 
                     if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk
index a1b7066482b973bcbaeebefdc718c5c96d70aae9..0148ab19f407b91d293eebb547927d4c381fd6d0 100644 (file)
@@ -1039,7 +1039,11 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         
         if (trim) {
             if(header.clipQualRight < header.clipQualLeft){
-                seq = "NNNN";
+                if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
             }
             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1260,7 +1264,11 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head
                
         if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               seq = "NNNN";
+                               if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1296,8 +1304,13 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
                
                if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               out << ">" << header.name << " xy=" << header.xy << endl;
-                               out << "0\t0\t0\t0";
+                if (header.clipQualRight == 0) { //don't trim right
+                    out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
+                    for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';      }       
+                }else {
+                    out << ">" << header.name << " xy=" << header.xy << endl;
+                    out << "0\t0\t0\t0";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
@@ -1325,6 +1338,8 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
 //**********************************************************************************************************************
 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
+        if (header.clipQualRight == 0) { header.clipQualRight = read.flowgram.size(); }
+        
                if(header.clipQualRight > header.clipQualLeft){
                        
                        int rightIndex = 0;
index a859d145b49e261bace85fb973d3aa9466bc1123..30815a077c19ff7663d498223c49e73ddc268809 100644 (file)
@@ -21,8 +21,7 @@ vector<string> ShhherCommand::setParameters(){
         CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
                CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
                CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
-               CommandParameter porder("order", "String", "", "", "", "", "","",false,false); parameters.push_back(porder);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder);               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -39,6 +38,11 @@ string ShhherCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+        helpString += "The shhh.flows command parameters are flow, file, lookup, cutoff, processors, large, maxiter, sigma, mindelta and order.\n";
+        helpString += "The flow parameter is used to input your flow file.\n";
+        helpString += "The file parameter is used to input the *flow.files file created by trim.flows.\n";
+        helpString += "The lookup parameter is used specify the lookup file you would like to use. http://www.mothur.org/wiki/Lookup_files.\n";
+        helpString += "The order parameter options are A or B.  A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n";
                return helpString;
        }
        catch(exception& e) {
@@ -357,11 +361,18 @@ ShhherCommand::ShhherCommand(string option) {
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        m->mothurConvert(temp, sigma); 
                        
-                       flowOrder = validParameter.validFile(parameters, "order", false);
-                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
-                       else if(flowOrder.length() != 4){
-                               m->mothurOut("The value of the order option must be four bases long\n");
-                       }
+                       temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
+                else if(toupper(temp[0]) == 'B'){ 
+                    flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+                }
+            }
+
                        
                }
 #ifdef USE_MPI
@@ -1777,7 +1788,7 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                 
                 for(int j=0;j<numFlowCells;j++){
                     
-                    char base = flowOrder[j % 4];
+                    char base = flowOrder[j % flowOrder.length()];
                     for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
                         newSeq += base;
                     }
@@ -1895,7 +1906,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                 
                 otuCountsFile << "ideal\t";
                 for(int j=8;j<numFlowCells;j++){
-                    char base = bases[j % 4];
+                    char base = bases[j % bases.length()];
                     for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
                         otuCountsFile << base;
                     }
@@ -1909,7 +1920,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                     string newSeq = "";
                     
                     for(int k=0;k<lengths[sequence];k++){
-                        char base = bases[k % 4];
+                        char base = bases[k % bases.length()];
                         int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
                         
                         for(int s=0;s<freq;s++){
@@ -1947,10 +1958,10 @@ int ShhherCommand::execute(){
         if (numFiles < processors) { processors = numFiles; }
         
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-        if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
+        if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName); }
         else { createProcesses(flowFileVector); } //each processor processes one file
 #else
-        driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
+        driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName);
 #endif
         
                if(compositeFASTAFileName != ""){
@@ -1971,6 +1982,40 @@ int ShhherCommand::execute(){
        }
 }
 #endif
+//********************************************************************************************************************
+//sorts biggest to smallest
+inline bool compareFileSizes(string left, string right){
+    
+    FILE * pFile;
+    long leftsize = 0;
+    
+    //get num bytes in file
+    string filename = left;
+    pFile = fopen (filename.c_str(),"rb");
+    string error = "Error opening " + filename;
+    if (pFile==NULL) perror (error.c_str());
+    else{
+        fseek (pFile, 0, SEEK_END);
+        leftsize=ftell (pFile);
+        fclose (pFile);
+    }
+    
+    FILE * pFile2;
+    long rightsize = 0;
+    
+    //get num bytes in file
+    filename = right;
+    pFile2 = fopen (filename.c_str(),"rb");
+    error = "Error opening " + filename;
+    if (pFile2==NULL) perror (error.c_str());
+    else{
+        fseek (pFile2, 0, SEEK_END);
+        rightsize=ftell (pFile2);
+        fclose (pFile2);
+    }
+    
+    return (leftsize > rightsize);     
+} 
 /**************************************************************************************************/
 
 int ShhherCommand::createProcesses(vector<string> filenames){
@@ -1981,9 +2026,31 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                
                //sanity check
                if (filenames.size() < processors) { processors = filenames.size(); }
-               
+        
+        //sort file names by size to divide load better
+        sort(filenames.begin(), filenames.end(), compareFileSizes);
+        
+        vector < vector <string> > dividedFiles; //dividedFiles[1] = vector of filenames for process 1...
+        dividedFiles.resize(processors);
+        
+        //for each file, figure out which process will complete it
+        //want to divide the load intelligently so the big files are spread between processes
+        for (int i = 0; i < filenames.size(); i++) { 
+            int processToAssign = (i+1) % processors; 
+            if (processToAssign == 0) { processToAssign = processors; }
+            
+            dividedFiles[(processToAssign-1)].push_back(filenames[i]);
+        }
+        
+        //now lets reverse the order of ever other process, so we balance big files running with little ones
+        for (int i = 0; i < processors; i++) {
+            int remainder = ((i+1) % processors);
+            if (remainder) {  reverse(dividedFiles[i].begin(), dividedFiles[i].end());  }
+        }
+        
+                       
                //divide the groups between the processors
-               vector<linePair> lines;
+               /*vector<linePair> lines;
         vector<int> numFilesToComplete;
                int numFilesPerProcessor = filenames.size() / processors;
                for (int i = 0; i < processors; i++) {
@@ -1992,7 +2059,7 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                        if(i == (processors - 1)){      endIndex = filenames.size();    }
                        lines.push_back(linePair(startIndex, endIndex));
             numFilesToComplete.push_back((endIndex-startIndex));
-               }
+               }*/
                
         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
@@ -2004,7 +2071,7 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+                               num = driver(dividedFiles[process], compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp");
                 
                 //pass numSeqs to parent
                                ofstream out;
@@ -2022,7 +2089,7 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                }
                
                //do my part
-               driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
+               driver(dividedFiles[0], compositeFASTAFileName, compositeNamesFileName);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -2081,8 +2148,8 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                        if (!in.eof()) { 
                 int tempNum = 0; 
                 in >> tempNum; 
-                if (tempNum != numFilesToComplete[i+1]) {
-                    m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
+                if (tempNum != dividedFiles[i+1].size()) {
+                    m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(dividedFiles[i+1].size()) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
                 }
             }
                        in.close(); m->mothurRemove(tempFile);
@@ -2152,12 +2219,12 @@ vector<string> ShhherCommand::parseFlowFiles(string filename){
 }
 /**************************************************************************************************/
 
-int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName){
     try {
         
         int numCompleted = 0;
         
-        for(int i=start;i<end;i++){
+        for(int i=0;i<filenames.size();i++){
                        
                        if (m->control_pressed) { break; }
                        
@@ -3307,7 +3374,7 @@ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTU
                                
                                for(int j=0;j<numFlowCells;j++){
                                        
-                                       char base = flowOrder[j % 4];
+                                       char base = flowOrder[j % flowOrder.length()];
                                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
                                                newSeq += base;
                                        }
@@ -3408,7 +3475,7 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num
                                
                                otuCountsFile << "ideal\t";
                                for(int j=8;j<numFlowCells;j++){
-                                       char base = bases[j % 4];
+                                       char base = bases[j % bases.length()];
                                        for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
                                                otuCountsFile << base;
                                        }
@@ -3422,7 +3489,7 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num
                                        string newSeq = "";
                                        
                                        for(int k=0;k<lengths[sequence];k++){
-                                               char base = bases[k % 4];
+                                               char base = bases[k % bases.length()];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
                         
                                                for(int s=0;s<freq;s++){
index 3b757241eacee655570ef823bfed96bfd39abe64..c787fe41d5c0f2a30975feb9461bdaa3194aae18 100644 (file)
@@ -70,7 +70,7 @@ private:
     vector<string> flowFileVector;
        
     vector<string> parseFlowFiles(string);
-    int driver(vector<string>, string, string, int, int);
+    int driver(vector<string>, string, string);
     int createProcesses(vector<string>);
     int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
     int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
index 0df53d5649881c924be60140338e5fff7c2bc840..b793dc885ef9213f290f118be55066a0e2837086 100644 (file)
@@ -644,6 +644,7 @@ int TreeGroupCommand::makeSimsShared() {
             }else {
                 m->clearGroups();
                 Groups.clear();
+                m->Treenames.clear();
                 vector<SharedRAbundVector*> temp;
                 for (int i = 0; i < lookup.size(); i++) {
                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
@@ -652,6 +653,7 @@ int TreeGroupCommand::makeSimsShared() {
                     }else { 
                         Groups.push_back(lookup[i]->getGroup()); 
                         temp.push_back(lookup[i]);
+                        m->Treenames.push_back(lookup[i]->getGroup());
                     }
                 } 
                 lookup = temp;
@@ -917,11 +919,15 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
             }
                }
-               
+        
+               if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
+            
         if (iters != 1) {
             //we need to find the average distance and standard deviation for each groups distance
             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
             
+            if (m->debug) {  m->mothurOut("[DEBUG]: found averages.\n"); }
+            
             //create average tree for each calc
             for (int i = 0; i < calcDists.size(); i++) {
                 vector< vector<double> > matrix; //square matrix to represent the distance
@@ -951,6 +957,8 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 if (newTree != NULL) { writeTree(outputFile, newTree); }                
             }
             
+            if (m->debug) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
+            
             //create all trees for each calc and find their consensus tree
             for (int i = 0; i < calcDists.size(); i++) {
                 if (m->control_pressed) { break; }
@@ -982,7 +990,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                         int row = calcDistsTotals[myIter][i][j].seq1;
                         int column = calcDistsTotals[myIter][i][j].seq2;
                         double dist = calcDistsTotals[myIter][i][j].dist;
-                        
+                       
                         matrix[row][column] = dist;
                         matrix[column][row] = dist;
                     }
@@ -997,11 +1005,15 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 outAll.close();
                 if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                 
+                if (m->debug) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
+                
                 Consensus consensus;
                 //clear old tree names if any
                 m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
                 Tree* conTree = consensus.getTree(trees);
                 
+                if (m->debug) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
+                
                 //create a new filename
                 variables["[tag]"] = "cons";
                 string conFile = getOutputFileName("tree",variables);                          
index e9bd081605b4211a26084334122ee12da83b949b..61b19b8dbaa61546c0e984e444fa8ddda5b366f1 100644 (file)
@@ -28,7 +28,7 @@ vector<string> TrimFlowsCommand::setParameters(){
                CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
                CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
                CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
-               CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
+        CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder);
                CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -214,12 +214,18 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
        
-                       flowOrder = validParameter.validFile(parameters, "order", false);
-                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
-                       else if(flowOrder.length() != 4){
-                               m->mothurOut("The value of the order option must be four bases long\n");
-                       }
-
+                       temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
+                else if(toupper(temp[0]) == 'B'){ 
+                    flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+                }
+            }
+            
                        if(oligoFileName == "") {       allFiles = 0;           }
                        else                                    {       allFiles = 1;           }
 
@@ -228,7 +234,6 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
             numLinkers = 0;
             numSpacers = 0;
                }
-               
        }
        catch(exception& e) {
                m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
index ecdca9bfe60f44c972d821b5f82c7353ee9a9561..a53f4e85388f0f7bbb6fda40c2074b4f555e29d3 100644 (file)
@@ -144,6 +144,24 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
                barcodes = br;
                primers = pr;
                revPrimer = r;
+        
+        maxFBarcodeLength = 0;
+        for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+            string oligo = it->first;
+            if(oligo.length() > maxFBarcodeLength){
+                maxFBarcodeLength = oligo.length();
+            }
+        }
+        maxRBarcodeLength = maxFBarcodeLength;
+        
+        maxFPrimerLength = 0;
+        for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+            string oligo = it->first;
+            if(oligo.length() > maxFPrimerLength){
+                maxFPrimerLength = oligo.length();
+            }
+        }
+        maxRPrimerLength = maxFPrimerLength;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimOligos", "TrimOligos");
@@ -152,7 +170,135 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
 }
 /********************************************************************/
 TrimOligos::~TrimOligos() {}
+//********************************************************************/
+bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+                       string oligo = it->first;
+                       
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = 0; j < rawSequence.length()-olength; j++){
+                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+        }      
+               
+        primerStart = 0; primerEnd = 0;
+        //if you don't want to allow for diffs
+               if (pdiffs == 0) { return false;  }
+               else { //try aligning and see if you can find it
+                                               
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       
+            Alignment* alignment;
+            if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));  }
+            else{ alignment = NULL; } 
+            
+                       for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+                
+                string prim = it->first;
+                //search for primer
+                int olength = prim.length();
+                if (rawSequence.length() < olength) {} //ignore primers too long for this seq
+                else{
+                    for (int j = 0; j < rawSequence.length()-olength; j++){
+                        
+                        string oligo = it->first;
+
+                        if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                               
+                        string rawChunk = rawSequence.substr(j, olength+pdiffs);
+                        
+                        //use needleman to align first primer.length()+numdiffs of sequence to each barcode
+                        alignment->align(oligo, rawChunk);
+                        oligo = alignment->getSeqAAln();
+                        string temp = alignment->getSeqBAln();
+                               
+                        int alnLength = oligo.length();
+                               
+                        for(int i=oligo.length()-1;i>=0;i--){
+                            if(oligo[i] != '-'){       alnLength = i+1;        break;  }
+                        }
+                        oligo = oligo.substr(0,alnLength);
+                        temp = temp.substr(0,alnLength);
+                               
+                        int numDiff = countDiffs(oligo, temp);
+                               
+                        if(numDiff < minDiff){
+                            minDiff = numDiff;
+                            minCount = 1;
+                            primerStart = j;
+                            primerEnd = primerStart + alnLength;
+                        }else if(numDiff == minDiff){ minCount++; }
+                    }
+                }
+                       }
+                       
+            if (alignment != NULL) {  delete alignment;  }
+            
+                       if(minDiff > pdiffs)    {       primerStart = 0; primerEnd = 0; return false;           }       //no good matches
+                       else if(minCount > 1)   {       primerStart = 0; primerEnd = 0; return false;   }       //can't tell the difference between multiple primers
+                       else{  return true; }
+               }
+       
+        primerStart = 0; primerEnd = 0;
+               return false;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripForward");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(int i=0;i<revPrimer.size();i++){
+                       string oligo = revPrimer[i];
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = rawSequence.length()-olength; j >= 0; j--){
+                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+                
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+               }       
+               
+        primerStart = 0; primerEnd = 0;
+               return false;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "findReverse");
+               exit(1);
+       }
+}
 //*******************************************************************/
+
 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
        try {
                
index 8aa44bdd365356f195069e4f11528300844fbc7a..d72b8146d36bc659ec13ddffc28463ef4a7b8204 100644 (file)
@@ -50,6 +50,11 @@ class TrimOligos {
     
         bool stripSpacer(Sequence&);
         bool stripSpacer(Sequence&, QualityScores&);
+    
+        //seq, primerStart, primerEnd
+        bool findForward(Sequence&, int&, int&);
+        bool findReverse(Sequence&, int&, int&);
+    
                                
        
        private: