]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge remote-tracking branch 'mothur/master'
authorPat Schloss <pschloss@umich.edu>
Wed, 4 Apr 2012 19:26:37 +0000 (15:26 -0400)
committerPat Schloss <pschloss@umich.edu>
Wed, 4 Apr 2012 19:26:37 +0000 (15:26 -0400)
161 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
aligncommand.h
alignmentdb.cpp
alignmentdb.h
bayesian.cpp
bellerophon.cpp
blastalign.hpp
blastdb.cpp
blastdb.hpp
catchallcommand.cpp
chimeraccodecommand.cpp
chimeracheckcommand.cpp
chimeraperseuscommand.cpp
chimeraperseuscommand.h
chimerapintailcommand.cpp
chimeraslayercommand.cpp
chimeraslayercommand.h
chimerauchimecommand.cpp
chimerauchimecommand.h
chopseqscommand.cpp
classifyseqscommand.cpp
classifyseqscommand.h
classifytreecommand.cpp [new file with mode: 0644]
classifytreecommand.h [new file with mode: 0644]
clusterclassic.cpp
clustersplitcommand.cpp
clustersplitcommand.h
commandfactory.cpp
cooccurrencecommand.cpp [new file with mode: 0644]
cooccurrencecommand.h [new file with mode: 0644]
corraxescommand.cpp
countseqscommand.cpp
createdatabasecommand.cpp [new file with mode: 0644]
createdatabasecommand.h [new file with mode: 0644]
database.hpp
decalc.cpp
deconvolutecommand.cpp
distancecommand.cpp
distancecommand.h
distancedb.cpp
distancedb.hpp
eachgapdist.h
engine.cpp
filterseqscommand.cpp
filterseqscommand.h
fisher2.c [deleted file]
fisher2.h [deleted file]
flowdata.cpp
formatcolumn.cpp
formatphylip.cpp
getsharedotucommand.cpp
hcluster.cpp
ignoregaps.h
indicatorcommand.cpp
kmerdb.hpp
linearalgebra.cpp
linearalgebra.h
makefile
maligner.cpp
matrixoutputcommand.cpp
matrixoutputcommand.h
metastats.h [deleted file]
metastats2.c [deleted file]
metastatscommand.cpp
metastatscommand.h
mothur.cpp
mothur.h
mothurmetastats.cpp
mothurmetastats.h
mothurout.cpp
mothurout.h
myPerseus.cpp
myseqdist.cpp
myutils.cpp
nmdscommand.cpp
nseqs.h
onegapdist.h
onegapignore.h
optionparser.cpp
otuassociationcommand.cpp
pairwiseseqscommand.cpp
pairwiseseqscommand.h
parsefastaqcommand.cpp
parsimony.cpp
pcrseqscommand.h [new file with mode: 0644]
phylodiversity.cpp
phylodiversitycommand.cpp
phylotree.cpp
phylotypecommand.cpp
pintail.cpp
prcseqscommand.cpp [new file with mode: 0644]
preclustercommand.cpp
preclustercommand.h
qualityscores.h
rarefact.cpp
rarefactcommand.cpp
rarefactcommand.h
referencedb.cpp
screenseqscommand.cpp
screenseqscommand.h
seqerrorcommand.cpp
seqerrorcommand.h
seqnoise.cpp
seqsummarycommand.cpp
seqsummarycommand.h
sequence.cpp
sequence.hpp
sequenceparser.cpp
setdircommand.cpp
sffinfocommand.cpp
sffinfocommand.h
sharedace.cpp
sharedanderbergs.cpp
sharedbraycurtis.cpp
sharedchao1.cpp
sharedcommand.cpp
sharedjclass.cpp
sharedkulczynski.cpp
sharedkulczynskicody.cpp
sharedlennon.cpp
sharedmorisitahorn.cpp
sharedochiai.cpp
sharedrabundfloatvector.cpp
sharedrabundvector.cpp
sharedsobs.cpp
sharedsobscollectsummary.cpp
sharedsorclass.cpp
sharedthetan.cpp
sharedthetayc.cpp
shhhercommand.cpp
shhhercommand.h
shhhseqscommand.cpp
shhhseqscommand.h
sortseqscommand.cpp [new file with mode: 0644]
sortseqscommand.h [new file with mode: 0644]
subsample.cpp [new file with mode: 0644]
subsample.h [new file with mode: 0644]
subsamplecommand.cpp
subsamplecommand.h
suffixdb.hpp
suffixnodes.hpp
suffixtree.cpp
suffixtree.hpp
summaryqualcommand.cpp
summaryqualcommand.h
summarysharedcommand.cpp
summarysharedcommand.h
tree.cpp
trialSwap2.cpp [new file with mode: 0644]
trialswap2.h [new file with mode: 0644]
trimflowscommand.cpp
trimflowscommand.h
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
trimseqscommand.h
unifracweightedcommand.cpp
unweighted.cpp
uvest.cpp
weighted.cpp

index df21610099f7d7de8642c1beddeba3f00f90dca9..b181f5b68fee47805e3691c67d4ba5fd8af82f3e 100644 (file)
@@ -3,7 +3,7 @@
        archiveVersion = 1;
        classes = {
        };
-       objectVersion = 45;
+       objectVersion = 46;
        objects = {
 
 /* Begin PBXBuildFile section */
                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
                A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
                A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
+               A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
                A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; };
                A774101414695AF60098E6AC /* shhhseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774101314695AF60098E6AC /* shhhseqscommand.cpp */; };
                A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; };
                A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+               A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
+               A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7876A25152A017C00A0AE86 /* subsample.cpp */; };
                A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
                A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
+               A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
                A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
+               A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
+               A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
                A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
                A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
                A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
                A7E9B8C412D37EC400DA6239 /* fastamap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */; };
                A7E9B8C512D37EC400DA6239 /* fileoutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */; };
                A7E9B8C612D37EC400DA6239 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */; };
-               A7E9B8C712D37EC400DA6239 /* fisher2.c in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E512D37EC400DA6239 /* fisher2.c */; };
                A7E9B8C812D37EC400DA6239 /* flowdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E712D37EC400DA6239 /* flowdata.cpp */; };
                A7E9B8C912D37EC400DA6239 /* formatcolumn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E912D37EC400DA6239 /* formatcolumn.cpp */; };
                A7E9B8CA12D37EC400DA6239 /* formatphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6EC12D37EC400DA6239 /* formatphylip.cpp */; };
                A7E9B8FB12D37EC400DA6239 /* memeuclidean.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74F12D37EC400DA6239 /* memeuclidean.cpp */; };
                A7E9B8FC12D37EC400DA6239 /* mempearson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75112D37EC400DA6239 /* mempearson.cpp */; };
                A7E9B8FD12D37EC400DA6239 /* mergefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */; };
-               A7E9B8FE12D37EC400DA6239 /* metastats2.c in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75612D37EC400DA6239 /* metastats2.c */; };
                A7E9B8FF12D37EC400DA6239 /* metastatscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */; };
                A7E9B90012D37EC400DA6239 /* mgclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */; };
                A7E9B90112D37EC400DA6239 /* mothur.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75B12D37EC400DA6239 /* mothur.cpp */; };
                A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; };
                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; };
                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; };
+               A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; };
                A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; };
                A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; };
                A7FA2AC714A0E881007C09A6 /* bsplvb.f in Sources */ = {isa = PBXBuildFile; fileRef = A7FA2ABC14A0E881007C09A6 /* bsplvb.f */; };
                A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
                A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
                A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = "<group>"; };
+               A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcrseqscommand.h; sourceTree = "<group>"; };
+               A76CDD811510F143004C8458 /* prcseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prcseqscommand.cpp; sourceTree = "<group>"; };
                A7730EFD13967241007433A3 /* countseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countseqscommand.h; sourceTree = "<group>"; };
                A7730EFE13967241007433A3 /* countseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countseqscommand.cpp; sourceTree = "<group>"; };
                A774101214695AF60098E6AC /* shhhseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhseqscommand.h; sourceTree = "<group>"; };
                A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
                A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
                A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+               A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
+               A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = createdatabasecommand.cpp; sourceTree = "<group>"; };
+               A7876A25152A017C00A0AE86 /* subsample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsample.cpp; sourceTree = "<group>"; };
+               A7876A28152A018B00A0AE86 /* subsample.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsample.h; sourceTree = "<group>"; };
                A79234D513C74BF6002B08E2 /* mothurfisher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurfisher.h; sourceTree = "<group>"; };
                A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = "<group>"; };
                A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = "<group>"; };
                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
+               A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
+               A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
                A7A3C8C814D041AD00B1BFBE /* otuassociationcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = otuassociationcommand.h; sourceTree = "<group>"; };
                A7A61F1A130035C800E05B6B /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
                A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
                A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
                A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraperseuscommand.cpp; sourceTree = "<group>"; };
+               A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cooccurrencecommand.cpp; sourceTree = "<group>"; };
+               A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cooccurrencecommand.h; sourceTree = "<group>"; };
+               A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trialSwap2.cpp; sourceTree = "<group>"; };
+               A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = "<group>"; };
                A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
                A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                A7E9B6E212D37EC400DA6239 /* filters.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filters.h; sourceTree = "<group>"; };
                A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filterseqscommand.cpp; sourceTree = "<group>"; };
                A7E9B6E412D37EC400DA6239 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filterseqscommand.h; sourceTree = "<group>"; };
-               A7E9B6E512D37EC400DA6239 /* fisher2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fisher2.c; sourceTree = "<group>"; };
-               A7E9B6E612D37EC400DA6239 /* fisher2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fisher2.h; sourceTree = "<group>"; };
                A7E9B6E712D37EC400DA6239 /* flowdata.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = flowdata.cpp; sourceTree = "<group>"; };
                A7E9B6E812D37EC400DA6239 /* flowdata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = flowdata.h; sourceTree = "<group>"; };
                A7E9B6E912D37EC400DA6239 /* formatcolumn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = formatcolumn.cpp; sourceTree = "<group>"; };
                A7E9B75212D37EC400DA6239 /* mempearson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mempearson.h; sourceTree = "<group>"; };
                A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergefilecommand.cpp; sourceTree = "<group>"; };
                A7E9B75412D37EC400DA6239 /* mergefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergefilecommand.h; sourceTree = "<group>"; };
-               A7E9B75512D37EC400DA6239 /* metastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = metastats.h; sourceTree = "<group>"; };
-               A7E9B75612D37EC400DA6239 /* metastats2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = metastats2.c; sourceTree = "<group>"; };
                A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = metastatscommand.cpp; sourceTree = "<group>"; };
                A7E9B75812D37EC400DA6239 /* metastatscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = metastatscommand.h; sourceTree = "<group>"; };
                A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mgclustercommand.cpp; sourceTree = "<group>"; };
                A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = SOURCE_ROOT; };
                A7E9B87F12D37EC400DA6239 /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = "<group>"; };
                A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
+               A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifytreecommand.cpp; sourceTree = "<group>"; };
+               A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifytreecommand.h; sourceTree = "<group>"; };
                A7F9F5CD141A5E500032F693 /* sequenceparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequenceparser.h; sourceTree = "<group>"; };
                A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequenceparser.cpp; sourceTree = "<group>"; };
                A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mantelcommand.h; sourceTree = "<group>"; };
                                A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */,
                                A7E9B83012D37EC400DA6239 /* slibshuff.cpp */,
                                A7E9B83112D37EC400DA6239 /* slibshuff.h */,
+                               A7876A28152A018B00A0AE86 /* subsample.h */,
+                               A7876A25152A017C00A0AE86 /* subsample.cpp */,
+                               A7C3DC0E14FE469500FE1924 /* trialswap2.h */,
+                               A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */,
                                A7FF19F0140FFDA500AD216D /* trimoligos.h */,
                                A7FF19F1140FFDA500AD216D /* trimoligos.cpp */,
                                A7E9B87412D37EC400DA6239 /* validcalculator.cpp */,
                                A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */,
                                A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */,
                                A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */,
+                               A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */,
+                               A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */,
                                A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
                                A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */,
                                A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */,
                                A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */,
                                A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */,
                                A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */,
+                               A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */,
+                               A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */,
                                A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */,
                                A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */,
                                A795840B13F13CD900F201D5 /* countgroupscommand.h */,
                                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */,
                                A7730EFD13967241007433A3 /* countseqscommand.h */,
                                A7730EFE13967241007433A3 /* countseqscommand.cpp */,
+                               A77EBD2C1523707F00ED407C /* createdatabasecommand.h */,
+                               A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */,
                                A7E9B6C412D37EC400DA6239 /* deconvolutecommand.h */,
                                A7E9B6C312D37EC400DA6239 /* deconvolutecommand.cpp */,
                                A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */,
                                A7FC486612D795D60055BC5C /* pcacommand.cpp */,
                                A7E9B78812D37EC400DA6239 /* pcoacommand.h */,
                                A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */,
+                               A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */,
+                               A76CDD811510F143004C8458 /* prcseqscommand.cpp */,
                                A7E9B78C12D37EC400DA6239 /* phylodiversitycommand.h */,
                                A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */,
                                A7E9B79212D37EC400DA6239 /* phylotypecommand.h */,
                                A7E9B82712D37EC400DA6239 /* shhhercommand.cpp */,
                                A774101214695AF60098E6AC /* shhhseqscommand.h */,
                                A774101314695AF60098E6AC /* shhhseqscommand.cpp */,
+                               A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */,
+                               A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */,
                                A7E9B84012D37EC400DA6239 /* splitabundcommand.h */,
                                A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
                                A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
                        isa = PBXGroup;
                        children = (
                                A7D161E7149F7F50000523E8 /* fortran */,
-                               A7E9B6E512D37EC400DA6239 /* fisher2.c */,
-                               A7E9B6E612D37EC400DA6239 /* fisher2.h */,
-                               A7E9B75512D37EC400DA6239 /* metastats.h */,
-                               A7E9B75612D37EC400DA6239 /* metastats2.c */,
                                A79234D513C74BF6002B08E2 /* mothurfisher.h */,
                                A79234D613C74BF6002B08E2 /* mothurfisher.cpp */,
                                A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */,
                08FB7793FE84155DC02AAC07 /* Project object */ = {
                        isa = PBXProject;
                        attributes = {
+                               LastUpgradeCheck = 0420;
                                ORGANIZATIONNAME = "Schloss Lab";
                        };
                        buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */;
-                       compatibilityVersion = "Xcode 3.1";
+                       compatibilityVersion = "Xcode 3.2";
                        developmentRegion = English;
                        hasScannedForEncodings = 1;
                        knownRegions = (
                                A7E9B8C412D37EC400DA6239 /* fastamap.cpp in Sources */,
                                A7E9B8C512D37EC400DA6239 /* fileoutput.cpp in Sources */,
                                A7E9B8C612D37EC400DA6239 /* filterseqscommand.cpp in Sources */,
-                               A7E9B8C712D37EC400DA6239 /* fisher2.c in Sources */,
                                A7E9B8C812D37EC400DA6239 /* flowdata.cpp in Sources */,
                                A7E9B8C912D37EC400DA6239 /* formatcolumn.cpp in Sources */,
                                A7E9B8CA12D37EC400DA6239 /* formatphylip.cpp in Sources */,
                                A7E9B8FB12D37EC400DA6239 /* memeuclidean.cpp in Sources */,
                                A7E9B8FC12D37EC400DA6239 /* mempearson.cpp in Sources */,
                                A7E9B8FD12D37EC400DA6239 /* mergefilecommand.cpp in Sources */,
-                               A7E9B8FE12D37EC400DA6239 /* metastats2.c in Sources */,
                                A7E9B8FF12D37EC400DA6239 /* metastatscommand.cpp in Sources */,
                                A7E9B90012D37EC400DA6239 /* mgclustercommand.cpp in Sources */,
                                A7E9B90112D37EC400DA6239 /* mothur.cpp in Sources */,
                                A7FA2B1614A0EBEA007C09A6 /* sslvrg.f in Sources */,
                                A7FA2B5B14A0F0C2007C09A6 /* intrv.f in Sources */,
                                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */,
+                               A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */,
+                               A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */,
+                               A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */,
+                               A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */,
+                               A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */,
+                               A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */,
+                               A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                ALWAYS_SEARCH_USER_PATHS = NO;
                                COPY_PHASE_STRIP = NO;
                                GCC_DYNAMIC_NO_PIC = NO;
-                               GCC_ENABLE_FIX_AND_CONTINUE = YES;
                                GCC_MODEL_TUNING = G5;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                INSTALL_PATH = /usr/local/bin;
                                PRODUCT_NAME = Mothur;
                                SDKROOT = macosx10.6;
                                GCC_ENABLE_SSE3_EXTENSIONS = NO;
                                GCC_ENABLE_SSE41_EXTENSIONS = NO;
                                GCC_ENABLE_SSE42_EXTENSIONS = NO;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.23.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"1/9/2012\\\"\"",
+                                       "VERSION=\"\\\"1.24.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"3/12/2012\\\"\"",
                                );
                                "GCC_VERSION[arch=*]" = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                        "-lncurses",
                                        "-lreadline",
                                );
-                               PREBINDING = NO;
                                SDKROOT = macosx10.6;
                                USER_HEADER_SEARCH_PATHS = "";
                        };
                                GCC_C_LANGUAGE_STANDARD = gnu99;
                                GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
                                GCC_MODEL_TUNING = "";
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.19.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"5/9/2011\\\"\"",
+                                       "VERSION=\"\\\"1.24.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"3/12/2012\\\"\"",
                                );
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
                                        "-lncurses",
                                        "-lreadline",
                                );
-                               PREBINDING = NO;
                                SDKROOT = macosx10.6;
                        };
                        name = Release;
index f03a9017d4867f1d8634a0ef72a25f1590bf13b6..8215de301b646f4d4571944de85dddb40478e7e5 100644 (file)
@@ -422,7 +422,7 @@ int AlignCommand::execute(){
 #else
 
                        vector<unsigned long long> positions; 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        positions = m->divideFile(candidateFileNames[s], processors);
                        for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
                #else
@@ -430,7 +430,8 @@ int AlignCommand::execute(){
                                lines.push_back(new linePair(0, 1000));
                        }else {
                                positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); 
-                               
+                               if (positions.size() < processors) { processors = positions.size(); }
+                
                                //figure out how many sequences you have to process
                                int numSeqsPerProcessor = numFastaSeqs / processors;
                                for (int i = 0; i < processors; i++) {
@@ -617,7 +618,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -826,7 +827,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
        try {
                int num = 0;
                processIDS.resize(0);
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                //loop through and create all the processes you want
index b455761ed6fdcf2e8c11a6bcbb52b1704caee464..c7ce14406c570f50a3510532e55ba7b5469c5c62 100644 (file)
@@ -120,7 +120,7 @@ struct alignData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ 
        alignData* pDataArray;
index df4eaec783dbb81e33c09c3d97473c02425ba1ce..f59c5db7d6e6ef2d039ccdda8b48a6ea3fb6428a 100644 (file)
 #include "blastdb.hpp"
 #include "referencedb.h"
 
-/**************************************************************************************************/
-//deep copy
-AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence), threadID(adb.threadID) {
-       try {
-               
-               m = MothurOut::getInstance();
-               if (adb.method == "blast") {
-                       search = new BlastDB(*((BlastDB*)adb.search));
-               }else if(adb.method == "kmer") {
-                       search = new KmerDB(*((KmerDB*)adb.search));
-               }else if(adb.method == "suffix") {
-                       search = new SuffixDB(*((SuffixDB*)adb.search));
-               }else {
-                       m->mothurOut("[ERROR]: cannot create copy of alignment database, unrecognized method - " + adb.method); m->mothurOutEndLine();
-               }
-               
-               for (int i = 0; i < adb.templateSequences.size(); i++) {
-                       Sequence temp(adb.templateSequences[i]);
-                       templateSequences.push_back(temp);
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");
-               exit(1);
-       }
-       
-}
 /**************************************************************************************************/
 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){          //      This assumes that the template database is in fasta format, may 
        try {                                                                                   //      need to alter this in the future?
index 900aadc6eb8be36b9c0193f8d255a2fdc8312269..537af8d21548e118a8d7470133be1acd4aab11fb 100644 (file)
@@ -22,7 +22,6 @@ public:
 
        AlignmentDB(string, string, int, float, float, float, float, int);  //reads fastafile passed in and stores sequences
        AlignmentDB(string);
-       AlignmentDB(const AlignmentDB& adb);
        ~AlignmentDB();
        
        Sequence findClosestSequence(Sequence*);
index f7ea6e4351868a20a191169b995e94faff6fa053..54a123c5a7835d7c1a2c48d36fe3ec7070462027 100644 (file)
@@ -111,10 +111,9 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                //initialze probabilities
                                wordGenusProb.resize(numKmers);
                                WordPairDiffArr.resize(numKmers);
-                       //cout << numKmers << '\t' << genusNodes.size() << endl;
+                       
                                for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
-                       //cout << numKmers << '\t' << genusNodes.size() << endl;        
-                               ofstream out;
+                    ofstream out;
                                ofstream out2;
                                
                                #ifdef USE_MPI
@@ -505,7 +504,7 @@ map<string, int> Bayesian::parseTaxMap(string newTax) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) {
        try{
                
@@ -606,7 +605,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                istringstream iss (tempBuf,istringstream::in);
                                float probTemp;
                                iss >> zeroCountProb[i] >> numbers[i] >> probTemp; 
-                               WordPairDiffArr[i].prob = tempProb;
+                               WordPairDiffArr[i].prob = probTemp;
 
                        }
                        
index 9dd21a4446e8d0ebd60ad7438dabe73d7a293128..833cfb907d6d4bd2aed8acb3d8522932ac3b2e7d 100644 (file)
@@ -314,7 +314,7 @@ int Bellerophon::getChimeras() {
        #else
        
                //divide breakpoints between processors
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(processors == 1){ 
                                lines.push_back(linePair(0, iters));    
                                
@@ -356,7 +356,7 @@ int Bellerophon::getChimeras() {
 
 int Bellerophon::createProcesses(vector<int> mid) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int exitCommand = 1;
                vector<int> processIDS;
index 5b78da6b635770b53453cf9b6ec84493b17bf71a..31688bd86d77386d384fef73bb9c3289e681ada3 100644 (file)
@@ -1,3 +1,7 @@
+#ifndef BlastAlignment_H
+#define BlastAlignment_H
+
+
 /*
  *  blastalign.hpp
  *  
@@ -36,3 +40,7 @@ private:
        float gapExtend;
 };
 
+#endif
+
+
+
index bc646dfaa498750448ba741f5649450a185415dd..2eced7e14dc6fa42e0147747fc622bfa088250f7 100644 (file)
@@ -24,7 +24,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
                int randNumber = rand();
                //int randNumber = 12345;
                string pid = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                pid += getpid();        
 #else
                pid += toString(threadID);      
@@ -42,7 +42,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
                        for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        path += "blast/bin/";   
 #else
                        path += "blast\\bin\\";
@@ -51,7 +51,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
                
                
                string formatdbCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                formatdbCommand = path + "formatdb";    //      format the database, -o option gives us the ability
 #else
                formatdbCommand = path + "formatdb.exe";
@@ -64,7 +64,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
                if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; }
                
                string blastCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                blastCommand = path + "blastall";       //      format the database, -o option gives us the ability
 #else
                blastCommand = path + "blastall.exe";
@@ -80,7 +80,7 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
                
                
                string megablastCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                megablastCommand = path + "megablast";  //      format the database, -o option gives us the ability
 #else
                megablastCommand = path + "megablast.exe";
@@ -114,7 +114,7 @@ BlastDB::BlastDB(string b, int tid) : Database() {
                        for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        path += "blast/bin/";   
 #else
                        path += "blast\\bin\\";
@@ -123,7 +123,7 @@ BlastDB::BlastDB(string b, int tid) : Database() {
                
                int randNumber = rand();
                string pid = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                pid += getpid();        
 #else
                pid += toString(threadID);      
@@ -134,7 +134,7 @@ BlastDB::BlastDB(string b, int tid) : Database() {
                blastFileName = pid + toString(randNumber) + ".blast";
                
                string formatdbCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                formatdbCommand = path + "formatdb";    //      format the database, -o option gives us the ability
 #else
                formatdbCommand = path + "formatdb.exe";
@@ -149,7 +149,7 @@ BlastDB::BlastDB(string b, int tid) : Database() {
                if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " +  formatdbCommand + " file does not exist. mothur requires formatdb.exe."); m->mothurOutEndLine(); m->control_pressed = true; }
                
                string blastCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                blastCommand = path + "blastall";       //      format the database, -o option gives us the ability
 #else
                blastCommand = path + "blastall.exe";
@@ -165,7 +165,7 @@ BlastDB::BlastDB(string b, int tid) : Database() {
                
                
                string megablastCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                megablastCommand = path + "megablast";  //      format the database, -o option gives us the ability
 #else
                megablastCommand = path + "megablast.exe";
@@ -226,7 +226,7 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
                
                string blastCommand;
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                
                        blastCommand = path + "blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);
                        blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber));
@@ -289,7 +289,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
 //7000004128189528left 0       100             66      0       0       1       66      61      126     1e-31    131    
                string blastCommand;
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        blastCommand = path + "megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                        blastCommand += (" -i " + (queryFileName+pid+toString(randNumber)) + " -o " + blastFileName+pid+toString(randNumber));
                #else
@@ -363,7 +363,7 @@ void BlastDB::generateDB() {
                        
                string formatdbCommand;
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        formatdbCommand = path + "formatdb -p F -o T -i " + dbFileName; //      format the database, -o option gives us the ability
                #else
                        //formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName;   //      format the database, -o option gives us the ability
index e2f4f57180560ae287c644a527553abde89e45e6..50a8379b68ec89130b7df1892aab4538daa77a1e 100644 (file)
@@ -18,8 +18,6 @@ class BlastDB : public Database {
 public:
        BlastDB(string, float, float, float, float, string, int);
        BlastDB(string, int);
-       BlastDB(const BlastDB& bdb) : dbFileName(bdb.dbFileName), queryFileName(bdb.queryFileName), blastFileName(bdb.blastFileName), path(bdb.path),
-                                                                       count(bdb.count), gapOpen(bdb.gapOpen), gapExtend(bdb.gapExtend), match(bdb.match), misMatch(bdb.misMatch), Database(bdb) {}
        ~BlastDB();
        
        void generateDB();
index 857f68431c54b8a0076577ae74b70ecfba6e9773..bf866cbedca07b7ee8b732eec683c64520ee2017 100644 (file)
@@ -176,7 +176,7 @@ int CatchAllCommand::execute() {
                
                savedOutputDir = outputDir;
                string catchAllCommandExe = ""; 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe ";
                        if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd.
                #else
@@ -224,7 +224,7 @@ int CatchAllCommand::execute() {
                                                                                        
                                                //create system command
                                                string catchAllCommand = "";
-                                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                                        catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
                                                #else
                                                        if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
@@ -269,7 +269,7 @@ int CatchAllCommand::execute() {
                                                                                        
                                                //create system command
                                                string catchAllCommand = "";
-                                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                                        catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
                                                #else
                                                        if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
@@ -334,7 +334,7 @@ int CatchAllCommand::execute() {
                                
                                //create system command
                                string catchAllCommand = "";
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1";
                                #else
                                        if (outputPath.length() > 0) { outputPath = outputPath.substr(0, outputPath.length()-1); }
index af6e33a1254adc623948604100f8b9109e6c18e6..fc98e0fc45c3b576a3e2420ac415c016266b4c1c 100644 (file)
@@ -393,7 +393,7 @@ int ChimeraCcodeCommand::execute(){
                        
                        
                        //break up file
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
                        
                                for (int i = 0; i < (positions.size()-1); i++) {
@@ -524,7 +524,7 @@ int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string fi
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -611,7 +611,7 @@ int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File
 
 int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, string accnos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int num = 0;
                
index 9f53b1753903a44b469f0766a261894cb3177476..cc486d3ab690fd655c1de8a22ae406eafc2c242f 100644 (file)
@@ -427,7 +427,7 @@ int ChimeraCheckCommand::execute(){
                        
                        
                        //break up file
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                vector<unsigned long long> positions = m->divideFile(fastaFileNames[i], processors);
                        
                                for (int s = 0; s < (positions.size()-1); s++) {
@@ -522,7 +522,7 @@ int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string fi
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -599,7 +599,7 @@ int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File
 
 int ChimeraCheckCommand::createProcesses(string outputFileName, string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int num = 0;
                
index 8eaf536040a5703bbd7d9f4b93329f79cf6c6830..e7294a854ec4134ccad5e17aa0ed8abb51876d09 100644 (file)
@@ -466,14 +466,15 @@ string ChimeraPerseusCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -533,6 +534,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                
                vector<seqData> sequences;
                bool error = false;
+        alignLength = 0;
                
                for (int i = 0; i < thisGroupsSeqs.size(); i++) {
                
@@ -543,6 +545,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                        else {
                                int num = m->getNumNames(it->second);
                                sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
                        }
                }
                
@@ -570,7 +573,8 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                bool error = false;
                ifstream in;
                m->openInputFile(inputFile, in);
-               
+               alignLength = 0;
+        
                while (!in.eof()) {
                        
                        if (m->control_pressed) { in.close(); return sequences; }
@@ -581,6 +585,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                        if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                        else {
                                sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                        }
                }
                in.close();
@@ -625,7 +630,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                }
                
                int numSeqs = sequences.size();
-               int alignLength = sequences[0].sequence.size();
+               //int alignLength = sequences[0].sequence.size();
                
                ofstream chimeraFile;
                ofstream accnosFile;
@@ -641,7 +646,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                
                for(int i=0;i<numSeqs;i++){     
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
-                       
+    
                        vector<bool> restricted = chimeras;
                        
                        vector<vector<int> > leftDiffs(numSeqs);
@@ -662,7 +667,9 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                        
                        string dummyA, dummyB;
                        
-                       if(comparisons >= 2){   
+            if (sequences[i].sequence.size() < 3) { 
+                chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+            }else if(comparisons >= 2){        
                                minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
@@ -762,7 +769,7 @@ int ChimeraPerseusCommand::createProcessesGroups(SequenceParser& parser, string
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
index 84563ac6ecc006ca7150f7f5e025b340da964fdf..01f5768a0c070c70aa6c7264b6d482b3636d7148 100644 (file)
@@ -44,7 +44,7 @@ private:
        
        bool abort;
        string fastafile, groupfile, outputDir, namefile;
-       int processors;
+       int processors, alignLength;
        double cutoff, alpha, beta;
        
        vector<string> outputNames;
@@ -98,7 +98,7 @@ struct perseusData {
        }
 };
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ 
        perseusData* pDataArray;
index 3eb6589a83beebf79f8cf812ce519b35d0bcc37e..7311173265d55bbac59af028922de1f7f7e5d447 100644 (file)
@@ -488,7 +488,7 @@ int ChimeraPintailCommand::execute(){
                #else
                                                
                        //break up file
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
                        
                                for (int i = 0; i < (positions.size()-1); i++) {
@@ -611,7 +611,7 @@ int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -697,7 +697,7 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi
 
 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int num = 0;
                
index cfcad1514ca629823081f54e35156934275644b1..2c435cadb11c1bc413b982676ac58082d4683992 100644 (file)
@@ -495,14 +495,14 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        else {
                                //add / to name if needed
                                string lastChar = blastlocation.substr(blastlocation.length()-1);
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if (lastChar != "/") { blastlocation += "/"; }
 #else
                                if (lastChar != "\\") { blastlocation += "\\"; }        
 #endif
                                blastlocation = m->getFullPathName(blastlocation);
                                string formatdbCommand = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                formatdbCommand = blastlocation + "formatdb";   
 #else
                                formatdbCommand = blastlocation + "formatdb.exe";
@@ -515,7 +515,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                                if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
                                
                                string blastCommand = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                blastCommand = blastlocation + "megablast";     
 #else
                                blastCommand = blastlocation + "megablast.exe";
@@ -533,7 +533,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
 
                        //until we resolve the issue 10-18-11
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
                        //processors=1;
 #endif
@@ -594,13 +594,14 @@ int ChimeraSlayerCommand::execute(){
 #else
                                //break up file
                                vector<unsigned long long> positions; 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                positions = m->divideFile(thisFastaName, processors);
                                for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
 #else
                                if (processors == 1) {  lines.push_back(linePair(0, 1000)); }
                                else {
                                        positions = m->setFilePosFasta(thisFastaName, numSeqs); 
+                    if (positions.size() < processors) { processors = positions.size(); }
                                        
                                        //figure out how many sequences you have to process
                                        int numSeqsPerProcessor = numSeqs / processors;
@@ -900,6 +901,16 @@ int ChimeraSlayerCommand::deconvoluteResults(SequenceParser* parser, string outp
                map<string, string> uniqueNames = parser->getAllSeqsMap();
                map<string, string>::iterator itUnique;
                int total = 0;
+        
+        if (trimera) { //add in more potential uniqueNames
+            map<string, string> newUniqueNames = uniqueNames;
+            for (map<string, string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {
+                newUniqueNames[(it->first)+"_LEFT"] = (it->first)+"_LEFT";
+                newUniqueNames[(it->first)+"_RIGHT"] = (it->first)+"_RIGHT";
+            }
+            uniqueNames = newUniqueNames;
+            newUniqueNames.clear();
+        }
                
                //edit accnos file
                ifstream in2; 
@@ -1147,14 +1158,15 @@ string ChimeraSlayerCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -1187,7 +1199,7 @@ int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string
                        m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); 
                        
                        lines.clear();
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        int proc = 1;
                        vector<unsigned long long> positions = m->divideFile(thisFastaName, proc);
                        lines.push_back(linePair(positions[0], positions[1]));  
@@ -1244,7 +1256,7 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno
                        breakUp.push_back(thisFileToPriority);
                }
                                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -1447,7 +1459,7 @@ int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string fi
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos.end)) { break; }
                        #else
@@ -1628,7 +1640,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                int num = 0;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
index 6f3455debc4b6f161102a7bd2a75e797d3c639f0..2c6fec8d8d1082ea270f5e2c2a8832a7b57033a4 100644 (file)
@@ -172,7 +172,7 @@ struct slayerData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ 
        slayerData* pDataArray;
index 01289a0d0edf66969827d65a11a703c24da86845..f238094c9563587be29064a8f3fdbce54314c0f9 100644 (file)
@@ -461,7 +461,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
                        string uchimeCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
 #else
                        uchimeCommand = path + "uchime.exe";
@@ -927,14 +927,15 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -1005,7 +1006,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                path = path.substr(0, (tempPath.find_last_of('m')));
                
                string uchimeCommand = path;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                uchimeCommand += "uchime ";
 #else
                uchimeCommand += "uchime";
@@ -1243,7 +1244,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                
                //uchime_main(numArgs, uchimeParameters); 
                //cout << "commandString = " << commandString << endl;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
                commandString = "\"" + commandString + "\"";
 #endif
@@ -1309,7 +1310,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                int num = 0;
                vector<string> files;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                //break up file into multiple files
                m->divideFile(filename, processors, files);
                
@@ -1492,7 +1493,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                                
                //loop through and create all the processes you want
                while (process != processors) {
index b401ae85ae3c22e6d587fd27e649d27db9c6d939..499b18298dfffc66657f62fd00d48d7c3846bb15 100644 (file)
@@ -141,7 +141,7 @@ struct uchimeData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
        uchimeData* pDataArray;
@@ -189,7 +189,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
                        string uchimeCommand = path;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        uchimeCommand += "uchime ";
 #else
                        uchimeCommand += "uchime";
@@ -403,7 +403,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                        
                        //uchime_main(numArgs, uchimeParameters); 
                        //cout << "commandString = " << commandString << endl;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
                        commandString = "\"" + commandString + "\"";
 #endif
index 68576cdb521efc994bedf2d3b2d40cc98a740b9c..4e06201cd352104b5ee9c9d1c41764d16473003e 100644 (file)
@@ -233,7 +233,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        
                                        for (int i = 0; i < temp.length(); i++) {
                                                //eliminate N's
-                                               if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
+                                               if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
                                                
                                                numBasesCounted++; 
                                                
@@ -255,7 +255,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        
                                        for (int i = (temp.length()-1); i >= 0; i--) {
                                                //eliminate N's
-                                               if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
+                                               if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
                                                
                                                numBasesCounted++; 
 
@@ -283,7 +283,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        for (int i = 0; i < temp.length(); i++) {
                                                //eliminate N's
                                                if (toupper(temp[i]) == 'N') { 
-                                                       temp[i] == '.'; 
+                                                       temp[i] = '.'; 
                                                        tempLength--;
                                                        if (tempLength < numbases) { stopSpot = 0; break; }
                                                }
@@ -309,7 +309,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        for (int i = (temp.length()-1); i >= 0; i--) {
                                                //eliminate N's
                                                if (toupper(temp[i]) == 'N') { 
-                                                       temp[i] == '.'; 
+                                                       temp[i] = '.'; 
                                                        tempLength--;
                                                        if (tempLength < numbases) { stopSpot = 0; break; }
                                                }
index 328cd58f1f32bc265de857de4ede5aa557c2cd94..0504e6635802c3f6e3e186e4568675906774b3ff 100644 (file)
@@ -457,10 +457,14 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                search = "kmer";
                        }
                        
-                       if (namefileNames.size() == 0){
-                               vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
-                               parser.getNameFile(files);
-                       }
+            if (!abort) {
+                if (namefileNames.size() == 0){
+                    if (fastaFileNames.size() != 0) {
+                        vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
+                        parser.getNameFile(files);
+                    }
+                }
+            }
                        
                }
                
@@ -615,7 +619,7 @@ int ClassifySeqsCommand::execute(){
 #else
                
                        vector<unsigned long long> positions; 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        positions = m->divideFile(fastaFileNames[s], processors);
                        for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
 #else
@@ -623,6 +627,7 @@ int ClassifySeqsCommand::execute(){
                                lines.push_back(new linePair(0, 1000));
                        }else {
                                positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); 
+                if (positions.size() < processors) { processors = positions.size(); }
                                
                                //figure out how many sequences you have to process
                                int numSeqsPerProcessor = numFastaSeqs / processors;
@@ -821,7 +826,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                int num = 0;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                //loop through and create all the processes you want
@@ -881,7 +886,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                        string extension = "";
                        if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        
-                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flipThreshold);
+                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip);
                        pDataArray.push_back(tempclass);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
@@ -905,16 +910,35 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                }
                
        #endif  
-               
+        vector<string> nonBlankAccnosFiles;
+               if (!(m->isBlank(accnos))) { nonBlankAccnosFiles.push_back(accnos); }
+               else { m->mothurRemove(accnos); } //remove so other files can be renamed to it
+        
                for(int i=0;i<processIDS.size();i++){
                        appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
                        appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
-                       appendTaxFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+            if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) {
+                               nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp");
+                       }else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));  }
+
                        m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
                        m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
-                       m->mothurRemove((m->getFullPathName(accnos) + toString(processIDS[i]) + ".temp"));
                }
                
+        //append accnos files
+               if (nonBlankAccnosFiles.size() != 0) { 
+                       rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str());
+                       
+                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                               appendTaxFiles(nonBlankAccnosFiles[h], accnos);
+                               m->mothurRemove(nonBlankAccnosFiles[h]);
+                       }
+               }else { //recreate the accnosfile if needed
+                       ofstream out;
+                       m->openOutputFile(accnos, out);
+                       out.close();
+               }
+
                return num;
                
        }
@@ -1002,7 +1026,7 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
index 0bf4a9154412080d28fa065ab33789279832fbeb..acee70c5cb474dee4e25eea4a6dcf6b5bd55fa06 100644 (file)
@@ -10,7 +10,7 @@
  *
  */
 
-#include "mothur.h"
+
 #include "command.hpp"
 #include "classify.h"
 #include "referencedb.h"
@@ -130,7 +130,7 @@ struct classifyData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ 
        classifyData* pDataArray;
@@ -163,7 +163,7 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                //make classify
                Classify* myclassify;
                if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);         }
-               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID, pDataArray->flipThreshold);                                }
+               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
                else {
                        pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
                        pDataArray->m->mothurOutEndLine();
diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp
new file mode 100644 (file)
index 0000000..9ec4e6f
--- /dev/null
@@ -0,0 +1,579 @@
+//
+//  classifytreecommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/20/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "classifytreecommand.h"
+#include "phylotree.h"
+
+//**********************************************************************************************************************
+vector<string> ClassifyTreeCommand::setParameters(){   
+       try {
+               CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
+        CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
+        CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
+        CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getHelpString(){   
+       try {
+               string helpString = "";
+               helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
+               helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
+               helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
+               helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
+               helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
+        helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
+        helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
+               helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+ClassifyTreeCommand::ClassifyTreeCommand(){    
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["tree"] = tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       m->runParse = true;
+                       m->clearGroups();
+                       m->clearAllGroups();
+                       m->Treenames.clear();
+                       m->names.clear();
+                       
+                       vector<string> tempOutNames;
+                       outputTypes["tree"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("tree");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["tree"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
+                       }
+                       
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+            
+                       //check for required parameters
+                       treefile = validParameter.validFile(parameters, "tree", true);
+                       if (treefile == "not open") { treefile = ""; abort = true; }
+                       else if (treefile == "not found") { treefile = ""; 
+                treefile = m->getTreeFile(); 
+                if (treefile != "") {  m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
+                else { m->mothurOut("No valid current files. You must provide a tree file."); m->mothurOutEndLine(); abort = true; }
+            }else { m->setTreeFile(treefile); }        
+            
+            taxonomyfile = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomyfile == "not open") { taxonomyfile = ""; abort = true; }
+                       else if (taxonomyfile == "not found") { taxonomyfile = ""; 
+                taxonomyfile = m->getTaxonomyFile(); 
+                if (taxonomyfile != "") {  m->mothurOut("Using " + taxonomyfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
+                else { m->mothurOut("No valid current files. You must provide a taxonomy file."); m->mothurOutEndLine(); abort = true; }
+            }else { m->setTaxonomyFile(taxonomyfile); }        
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { namefile = ""; abort = true; }
+                       else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+            
+            string temp = validParameter.validFile(parameters, "cutoff", false);                       if (temp == "not found") { temp = "51"; }
+                       m->mothurConvert(temp, cutoff); 
+                       
+                       if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
+            
+            if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");           
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ClassifyTreeCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
+               
+               int start = time(NULL);
+        
+               /***************************************************/
+               //    reading tree info                                                    //
+               /***************************************************/
+        m->setTreeFile(treefile);
+        if (groupfile != "") {
+                       //read in group map info.
+                       tmap = new TreeMap(groupfile);
+                       tmap->readMap();
+               }else{ //fake out by putting everyone in one group
+                       Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
+                       tmap = new TreeMap();
+                       
+                       for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
+               }
+               
+               if (namefile != "") { readNamesFile(); }
+               
+               read = new ReadNewickTree(treefile);
+               int readOk = read->read(tmap); 
+               
+               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
+               
+               read->AssembleTrees();
+               vector<Tree*> T = read->getTrees();
+        Tree* outputTree = T[0]; 
+               delete read;
+               
+               //make sure all files match
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = m->Treenames.size();  }
+               }else {  numNamesInTree = m->Treenames.size();  }
+               
+               
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < tmap->getNumSeqs()) {
+                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < m->Treenames.size(); j++) {
+                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               if (m->control_pressed) { 
+                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
+                                       m->clearGroups();
+                                       return 0;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == m->Treenames.size()) { 
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
+                               }
+                       }
+               }
+                        
+        if (m->control_pressed) { delete outputTree; delete tmap;  return 0; }
+               
+        readTaxonomyFile();
+        
+        
+        /***************************************************/
+        //             get concensus taxonomies                    //
+        /***************************************************/
+        getClassifications(outputTree);
+        delete outputTree; delete tmap;
+                       
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
+               
+               //set tree file as new current treefile
+               if (treefile != "") {
+                       string current = "";
+                       itTypes = outputTypes.find("tree");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
+                       }
+               }
+               
+               m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the concensus taxonomies."); m->mothurOutEndLine();
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "execute");       
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//traverse tree finding concensus taxonomy at each node
+//label node with a number to relate to output summary file
+//report all concensus taxonomies to file 
+int ClassifyTreeCommand::getClassifications(Tree*& T){
+       try {
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(treefile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+               outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               //print headings
+               out << "TreeNode\t";
+               if (groupfile != "") { out << "Group\t"; } 
+        out << "NumRep\tTaxonomy" << endl; 
+               
+               string treeOutputDir = outputDir;
+               if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
+               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+               
+               //create a map from tree node index to names of descendants, save time later
+               map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
+               for (int i = 0; i < T->getNumNodes(); i++) {
+                       if (m->control_pressed) { return 0; }
+                       
+                       nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants);
+               }
+               
+               //for each node
+               for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
+                       
+                       if (m->control_pressed) { out.close(); return 0; }
+            
+                       string tax = "not classifed";
+            int size;
+            if (groupfile != "") {
+                for (map<string, set<string> >::iterator itGroups = nodeToDescendants[i].begin(); itGroups != nodeToDescendants[i].end(); itGroups++) {
+                    if (itGroups->first != "AllGroups") {
+                        tax = getTaxonomy(itGroups->second, size);
+                        out << (i+1) << '\t' << itGroups->first << '\t' << size << '\t' << tax << endl;
+                    }
+                }
+            }else {
+                string group = "AllGroups";
+                tax = getTaxonomy(nodeToDescendants[i][group], size);
+                out << (i+1) << '\t' << size << '\t' << tax << endl;
+            }
+                               
+                       T->tree[i].setLabel((i+1));
+               }
+               out.close();
+        
+               ofstream outTree;
+               m->openOutputFile(outputTreeFileName, outTree);
+               outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
+               T->print(outTree, "both");
+               outTree.close();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "GetConcensusTaxonomies");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
+       try{
+               string conTax = "";
+        size = 0;
+                       
+               //create a tree containing sequences from this bin
+               PhyloTree* phylo = new PhyloTree();
+               
+               for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
+            
+            
+                       //if namesfile include the names
+                       if (namefile != "") {
+                
+                               //is this sequence in the name file - namemap maps seqName -> repSeqName
+                               map<string, string>::iterator it2 = nameMap.find(*it);
+                               
+                               if (it2 == nameMap.end()) { //this name is not in name file, skip it
+                                       m->mothurOut((*it) + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
+                                       
+                                       //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                                       map<string, string>::iterator itTax = taxMap.find((it2->second));
+                    
+                                       if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
+                        
+                                               if ((*it) != (it2->second)) { m->mothurOut((*it) + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                               else {  m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                       }else{
+                                               //add seq to tree
+                        int num = nameCount[(*it)]; // we know its there since we found it in nameMap
+                                               for (int i = 0; i < num; i++) {  phylo->addSeqToTree((*it)+toString(i), it2->second);  }
+                        size += num;
+                                       }
+                               }
+                               
+                       }else{
+                               //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                               map<string, string>::iterator itTax = taxMap.find((*it));
+                
+                               if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
+                                       m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
+                                       //add seq to tree
+                                       phylo->addSeqToTree((*it), itTax->second);
+                    size++;
+                               }
+                       }
+            
+                       if (m->control_pressed) { delete phylo; return conTax; }
+                       
+               }
+               
+               //build tree
+               phylo->assignHeirarchyIDs(0);
+               
+               TaxNode currentNode = phylo->get(0);
+               int myLevel = 0;        
+               //at each level
+               while (currentNode.children.size() != 0) { //you still have more to explore
+            
+                       TaxNode bestChild;
+                       int bestChildSize = 0;
+                       
+                       //go through children
+                       for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+                               
+                               TaxNode temp = phylo->get(itChild->second);
+                               
+                               //select child with largest accesions - most seqs assigned to it
+                               if (temp.accessions.size() > bestChildSize) {
+                                       bestChild = phylo->get(itChild->second);
+                                       bestChildSize = temp.accessions.size();
+                               }
+                               
+                       }
+            
+                       //is this taxonomy above cutoff
+                       int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
+                       
+                       if (consensusConfidence >= cutoff) { //if yes, add it
+                conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
+                               myLevel++;
+                       }else{ //if no, quit
+                               break;
+                       }
+                       
+                       //move down a level
+                       currentNode = bestChild;
+               }
+               
+               if (myLevel != phylo->getMaxLevel()) {
+                       while (myLevel != phylo->getMaxLevel()) {
+                               conTax += "unclassified;";
+                               myLevel++;
+                       }
+               }               
+               if (conTax == "") {  conTax = "no_consensus;";  }
+               
+               delete phylo;   
+        
+        return conTax;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getTaxonomy");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i, map<int, map<string, set<string> > > descendants){
+       try {
+               map<string ,set<string> > names;
+               
+               map<string ,set<string> >::iterator it;
+        map<string ,set<string> >::iterator it2;
+               
+               int lc = T->tree[i].getLChild();
+               int rc = T->tree[i].getRChild();
+               
+               if (lc == -1) { //you are a leaf your only descendant is yourself
+            string group = tmap->getGroup(T->tree[i].getName());
+            set<string> mynames; mynames.insert(T->tree[i].getName());
+            names[group] = mynames; //mygroup -> me
+            names["AllGroups"] = mynames;
+               }else{ //your descedants are the combination of your childrens descendants
+                       names = descendants[lc];
+                       for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
+                it2 = names.find(it->first); //do we already have this group
+                if (it2 == names.end()) { //nope, so add it
+                    names[it->first] = it->second;
+                }else {
+                    for (set<string>::iterator it3 = (it->second).begin(); it3 != (it->second).end(); it3++) {
+                        names[it->first].insert(*it3);
+                    }
+                }
+                               
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getDescendantList");     
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ClassifyTreeCommand::readTaxonomyFile() {
+       try {
+               
+               ifstream in;
+               m->openInputFile(taxonomyfile, in);
+               
+               string name, tax;
+        
+               while(!in.eof()){
+                       in >> name >> tax;              
+                       m->gobble(in);
+                       
+                       //are there confidence scores, if so remove them
+                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
+                       
+                       taxMap[name] = tax;
+                       
+                       if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
+               }
+               in.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
+               exit(1);
+       }
+}
+
+/*****************************************************************/
+int ClassifyTreeCommand::readNamesFile() {
+       try {
+               ifstream inNames;
+               m->openInputFile(namefile, inNames);
+               
+               string name, names;
+        
+               while(!inNames.eof()){
+                       inNames >> name;                        //read from first column  A
+                       inNames >> names;               //read from second column  A,B,C,D
+                       m->gobble(inNames);
+                       
+                       //parse names into vector
+                       vector<string> theseNames;
+                       m->splitAtComma(names, theseNames);
+            
+                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
+            nameCount[name] = theseNames.size();
+                       
+                       if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
+               }
+               inNames.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
+               exit(1);
+       }
+}
+
+/*****************************************************************/
+
+
diff --git a/classifytreecommand.h b/classifytreecommand.h
new file mode 100644 (file)
index 0000000..026e4ba
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef Mothur_classifytreecommand_h
+#define Mothur_classifytreecommand_h
+
+//
+//  classifytreecommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/20/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "readtree.h"
+#include "treemap.h"
+
+class ClassifyTreeCommand : public Command {
+public:
+       ClassifyTreeCommand(string);
+       ClassifyTreeCommand();
+       ~ClassifyTreeCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "classify.tree";                               }
+       string getCommandCategory()             { return "Phylotype Analysis";          }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; }
+       string getDescription()         { return "Find the consensus taxonomy for the descendant of each tree node"; }
+    
+       int execute();
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+       ReadTree* read;
+    TreeMap* tmap;
+       string treefile, taxonomyfile, groupfile, namefile, outputDir;
+       bool abort;
+       vector<string> outputNames;
+    int numUniquesInName, cutoff;
+    map<string, string> nameMap;
+    map<string, int> nameCount;
+    map<string, string> taxMap;
+       
+       int getClassifications(Tree*&);
+       map<string, set<string> > getDescendantList(Tree*&, int, map<int, map<string, set<string> > >);
+    string getTaxonomy(set<string>, int&);
+    int readNamesFile(); 
+    int readTaxonomyFile();
+       
+};
+
+
+
+#endif
index 0048dc6053d0bab06130eeaa6ab5d10b576aeb48..1ce81c4f2affda469164709f862bb36ff023c163 100644 (file)
@@ -434,11 +434,11 @@ void ClusterClassic::print() {
 try {
                //update location of seqs in smallRow since they move to smallCol now
                for (int i = 0; i < dMatrix.size(); i++) {
-                       cout << "row = " << i << '\t';
+                       m->mothurOut("row = " + toString(i) + "\t");
                        for (int j = 0; j < dMatrix[i].size(); j++) {
-                               cout << dMatrix[i][j] << '\t';
+                               m->mothurOut(toString(dMatrix[i][j]) + "\t");
                        }
-                       cout << endl;
+                       m->mothurOutEndLine();
                }
        }
        catch(exception& e) {
index 34caf654124886f9cd23638a149b2b3487ca53e2..8a19f1d4f1b7991e8a5155ed5156e638276d860e 100644 (file)
@@ -8,12 +8,7 @@
  */
 
 #include "clustersplitcommand.h"
-#include "readcluster.h"
-#include "splitmatrix.h"
-#include "readphylip.h"
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "inputdata.h"
+
 
 
 //**********************************************************************************************************************
@@ -555,74 +550,16 @@ int ClusterSplitCommand::execute(){
                MPI_Barrier(MPI_COMM_WORLD);
                
        #else
-               
+               ///////////////////// WINDOWS CAN ONLY USE 1 PROCESSORS ACCESS VIOLATION UNRESOLVED ///////////////////////
                //sanity check
                if (processors > distName.size()) { processors = distName.size(); }
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if(processors == 1){
                                        listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                                }else{
-                                       
-                                       //cout << processors << '\t' << distName.size() << endl;
-                                       vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
-                                       dividedNames.resize(processors);
-                                       
-                                       //for each file group figure out which process will complete it
-                                       //want to divide the load intelligently so the big files are spread between processes
-                                       for (int i = 0; i < distName.size(); i++) { 
-                                               //cout << i << endl;
-                                               int processToAssign = (i+1) % processors; 
-                                               if (processToAssign == 0) { processToAssign = processors; }
-                                               
-                                               dividedNames[(processToAssign-1)].push_back(distName[i]);
-                                       }
-                                       
-                                       //not lets reverse the order of ever other process, so we balance big files running with little ones
-                                       for (int i = 0; i < processors; i++) {
-                                               //cout << i << endl;
-                                               int remainder = ((i+1) % processors);
-                                               if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
-                                       }
-                                       
-                                       createProcesses(dividedNames);
-                                                       
-                                       if (m->control_pressed) { return 0; }
-
-                                       //get list of list file names from each process
-                                       for(int i=0;i<processors;i++){
-                                               string filename = toString(processIDS[i]) + ".temp";
-                                               ifstream in;
-                                               m->openInputFile(filename, in);
-                                               
-                                               in >> tag; m->gobble(in);
-                                               
-                                               while(!in.eof()) {
-                                                       string tempName;
-                                                       in >> tempName; m->gobble(in);
-                                                       listFileNames.push_back(tempName);
-                                               }
-                                               in.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp"));
-                                               
-                                               //get labels
-                                               filename = toString(processIDS[i]) + ".temp.labels";
-                                               ifstream in2;
-                                               m->openInputFile(filename, in2);
-                                               
-                                               float tempCutoff;
-                                               in2 >> tempCutoff; m->gobble(in2);
-                                               if (tempCutoff < cutoff) { cutoff = tempCutoff; }
-                                               
-                                               while(!in2.eof()) {
-                                                       string tempName;
-                                                       in2 >> tempName; m->gobble(in2);
-                                                       if (labels.count(tempName) == 0) { labels.insert(tempName); }
-                                               }
-                                               in2.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
-                                       }
-                               }
+                                       listFileNames = createProcesses(distName, labels);
+                }
                #else
                                listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                #endif
@@ -904,12 +841,35 @@ void ClusterSplitCommand::printData(ListVector* oldList){
        }
 }
 //**********************************************************************************************************************
-int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
+vector<string>  ClusterSplitCommand::createProcesses(vector< map<string, string> > distName, set<string>& labels){
        try {
+        
+        vector<string> listFiles;
+        vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
+        dividedNames.resize(processors);
+        
+        //for each file group figure out which process will complete it
+        //want to divide the load intelligently so the big files are spread between processes
+        for (int i = 0; i < distName.size(); i++) { 
+            //cout << i << endl;
+            int processToAssign = (i+1) % processors; 
+            if (processToAssign == 0) { processToAssign = processors; }
+            
+            dividedNames[(processToAssign-1)].push_back(distName[i]);
+            if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
+        }
+        
+        //not lets reverse the order of ever other process, so we balance big files running with little ones
+        for (int i = 0; i < processors; i++) {
+            //cout << i << endl;
+            int remainder = ((i+1) % processors);
+            if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
+        }
+        
+        if (m->control_pressed) { return listFiles; }
        
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               int exitCommand = 1;
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               int process = 1;
                processIDS.clear();
                
                //loop through and create all the processes you want
@@ -950,14 +910,99 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                        }
                }
                
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
                //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i< processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }
+        
+        //get list of list file names from each process
+        for(int i=0;i<processIDS.size();i++){
+            string filename = toString(processIDS[i]) + ".temp";
+            ifstream in;
+            m->openInputFile(filename, in);
+            
+            in >> tag; m->gobble(in);
+            
+            while(!in.eof()) {
+                string tempName;
+                in >> tempName; m->gobble(in);
+                listFiles.push_back(tempName);
+            }
+            in.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp"));
+            
+            //get labels
+            filename = toString(processIDS[i]) + ".temp.labels";
+            ifstream in2;
+            m->openInputFile(filename, in2);
+            
+            float tempCutoff;
+            in2 >> tempCutoff; m->gobble(in2);
+            if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+            
+            while(!in2.eof()) {
+                string tempName;
+                in2 >> tempName; m->gobble(in2);
+                if (labels.count(tempName) == 0) { labels.insert(tempName); }
+            }
+            in2.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
+        }
+        
+
+    #else
+       
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the clusterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add labels.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
                
-               return exitCommand;
+               vector<clusterData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       clusterData* tempCluster = new clusterData(dividedNames[i], m, cutoff, method, outputDir, hard, precision, length, i);
+                       pDataArray.push_back(tempCluster);
+                       processIDS.push_back(i);
+            
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyClusterThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);  
+            
+               }
+        
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            //get tag
+            tag = pDataArray[i]->tag;
+            //get listfiles created
+            for(int j=0; j < pDataArray[i]->listFiles.size(); j++){ listFiles.push_back(pDataArray[i]->listFiles[j]); }
+            //get labels
+            set<string>::iterator it;
+            for(it = pDataArray[i]->labels.begin(); it != pDataArray[i]->labels.end(); it++){ labels.insert(*it); }
+                       //check cutoff
+            if (pDataArray[i]->cutoff < cutoff) { cutoff = pDataArray[i]->cutoff; }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
        #endif          
+        
+        return listFiles;
        
        }
        catch(exception& e) {
@@ -969,18 +1014,19 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
 
 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
        try {
-               Cluster* cluster;
-               SparseMatrix* matrix;
-               ListVector* list;
-               ListVector oldList;
-               RAbundVector* rabund;
                
                vector<string> listFileNames;
-               
                double smallestCutoff = cutoff;
                
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
+            
+            Cluster* cluster = NULL;
+            SparseMatrix* matrix = NULL;
+            ListVector* list = NULL;
+            ListVector oldList;
+            RAbundVector* rabund = NULL;
+            
                        if (m->control_pressed) { return listFileNames; }
                        
                        string thisNamefile = distNames[i].begin()->second;
@@ -1011,8 +1057,8 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        oldList = *list;
                        matrix = read->getMatrix();
                        
-                       delete read; 
-                       delete nameMap; 
+                       delete read;  read = NULL;
+                       delete nameMap; nameMap = NULL;
                        
                        
                        #ifdef USE_MPI
@@ -1097,6 +1143,7 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        }
        
                        delete matrix; delete list;     delete cluster; delete rabund; 
+            matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
                        listFile.close();
                        
                        if (m->control_pressed) { //clean up
index c4b236cb12dbfc535c1de5da7840b39f21176fb7..0e32ffaea7710189859f458de4adc897362e12ef 100644 (file)
 #include "listvector.hpp"
 #include "cluster.hpp"
 #include "sparsematrix.hpp"
-
+#include "readcluster.h"
+#include "splitmatrix.h"
+#include "readphylip.h"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "inputdata.h"
+#include "clustercommand.h"
 
 class ClusterSplitCommand : public Command {
        
@@ -47,12 +53,208 @@ private:
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
-       int createProcesses(vector < vector < map<string, string> > >);
+       vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
        int mergeLists(vector<string>, map<float, int>, ListVector*);
        map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
        int createMergedDistanceFile(vector< map<string, string> >);
 };
 
+/////////////////not working for Windows////////////////////////////////////////////////////////////
+// getting an access violation error.  This is most likely caused by the 
+// threads stepping on eachother's structures, as I can run the thread function and the cluster fuction 
+// in separately without errors occuring.  I suspect it may be in the use of the
+// static class mothurOut, but I can't pinpoint the problem.  All other objects are made new
+// within the thread.  MothurOut is used by almost all the classes in mothur, so if this was 
+// really the cause I would expect to see all the windows threaded commands to have issues, but not 
+// all do. So far, shhh.flows and trim.flows have similiar problems. Other thoughts, could it have 
+// anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector 
+// is copied by nameassignment and passed to read which passes to the thread?  -westcott 2-8-12
+////////////////////////////////////////////////////////////////////////////////////////////////////
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct clusterData {
+       set<string> labels;
+       vector < map<string, string> > distNames; 
+       string method; 
+    MothurOut* m;
+       double cutoff, precision;
+    string tag, outputDir;
+    vector<string> listFiles;
+    bool hard;
+    int length, threadID;
+       
+       
+       clusterData(){}
+       clusterData(vector < map<string, string> > dv, MothurOut* mout, double cu, string me, string ou, bool hd, double pre, int len, int th) {
+               distNames = dv;
+               m = mout;
+               cutoff = cu;
+        method = me;
+               outputDir = ou;
+        hard = hd;
+        precision = pre;
+        length = len;
+        threadID = th;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ 
+       clusterData* pDataArray;
+       pDataArray = (clusterData*)lpParam;
+       
+       try {
+               cout << "starting " << endl;            
+               
+               double smallestCutoff = pDataArray->cutoff;
+               
+               //cluster each distance file
+               for (int i = 0; i < pDataArray->distNames.size(); i++) {
+            
+            Cluster* mycluster = NULL;
+            SparseMatrix* mymatrix = NULL;
+            ListVector* mylist = NULL;
+            ListVector myoldList;
+            RAbundVector* myrabund = NULL;
+                        
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string thisNamefile = pDataArray->distNames[i].begin()->second;
+                       string thisDistFile = pDataArray->distNames[i].begin()->first;
+            cout << thisNamefile << '\t' << thisDistFile << endl;      
+                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Reading " + thisDistFile); pDataArray->m->mothurOutEndLine();
+                       
+                       ReadMatrix* myread = new ReadColumnMatrix(thisDistFile);        
+                       myread->setCutoff(pDataArray->cutoff);
+                       NameAssignment* mynameMap = new NameAssignment(thisNamefile);
+                       mynameMap->readMap();
+            cout << "done reading " << thisNamefile << endl;  
+                       myread->read(mynameMap);
+                       cout << "done reading " << thisDistFile << endl;  
+                       if (pDataArray->m->control_pressed) {  delete myread; delete mynameMap; break; }
+            
+                       mylist = myread->getListVector();
+                       myoldList = *mylist;
+                       mymatrix = myread->getMatrix();
+            cout << "here" << endl;    
+                       delete myread; myread = NULL;
+                       delete mynameMap; mynameMap = NULL;
+                       
+            pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Clustering " + thisDistFile); pDataArray->m->mothurOutEndLine();
+            
+                       myrabund = new RAbundVector(mylist->getRAbundVector());
+                        cout << "here" << endl;        
+                       //create cluster
+                       if (pDataArray->method == "furthest")   {       mycluster = new CompleteLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "nearest"){       mycluster = new SingleLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "average"){       mycluster = new AverageLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method);     }
+                       pDataArray->tag = mycluster->getTag();
+             cout << "here" << endl;   
+                       if (pDataArray->outputDir == "") { pDataArray->outputDir += pDataArray->m->hasPath(thisDistFile); }
+                       string fileroot = pDataArray->outputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisDistFile));
+                        cout << "here" << endl;        
+                       ofstream listFile;
+                       pDataArray->m->openOutputFile(fileroot+ pDataArray->tag + ".list",      listFile);
+             cout << "here" << endl;   
+                       pDataArray->listFiles.push_back(fileroot+ pDataArray->tag + ".list");
+            
+                       float previousDist = 0.00000;
+                       float rndPreviousDist = 0.00000;
+                       
+                       myoldList = *mylist;
+        
+                       bool print_start = true;
+                       int start = time(NULL);
+                       double saveCutoff = pDataArray->cutoff;
+            
+                       while (mymatrix->getSmallDist() < pDataArray->cutoff && mymatrix->getNNodes() > 0){
+                
+                               if (pDataArray->m->control_pressed) { //clean up
+                                       delete mymatrix; delete mylist; delete mycluster; delete myrabund;
+                                       listFile.close();
+                                       for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                                       pDataArray->listFiles.clear(); break;
+                               }
+                
+                               mycluster->update(saveCutoff);
+                
+                               float dist = mymatrix->getSmallDist();
+                               float rndDist;
+                               if (pDataArray->hard) {
+                                       rndDist = pDataArray->m->ceilDist(dist, pDataArray->precision); 
+                               }else{
+                                       rndDist = pDataArray->m->roundDist(dist, pDataArray->precision); 
+                               }
+                
+                               if(previousDist <= 0.0000 && dist != previousDist){
+                                       myoldList.setLabel("unique");
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count("unique") == 0) {  pDataArray->labels.insert("unique");  }
+                               }
+                               else if(rndDist != rndPreviousDist){
+                                       myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                               }
+                       
+                               previousDist = dist;
+                               rndPreviousDist = rndDist;
+                               myoldList = *mylist;
+                       }
+            
+             cout << "here2" << endl;  
+                       if(previousDist <= 0.0000){
+                               myoldList.setLabel("unique");
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count("unique") == 0) { pDataArray->labels.insert("unique"); }
+                       }
+                       else if(rndPreviousDist<pDataArray->cutoff){
+                               myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                       }
+            
+                       delete mymatrix; delete mylist; delete mycluster; delete myrabund; 
+            mymatrix = NULL; mylist = NULL; mycluster = NULL; myrabund = NULL;
+                       listFile.close();
+                       
+                       if (pDataArray->m->control_pressed) { //clean up
+                               for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                               pDataArray->listFiles.clear(); break;
+                       }
+                        cout << "here3" << endl;       
+                       pDataArray->m->mothurRemove(thisDistFile);
+                       pDataArray->m->mothurRemove(thisNamefile);
+                        cout << "here4" << endl;       
+                       if (saveCutoff != pDataArray->cutoff) { 
+                               if (pDataArray->hard)   {  saveCutoff = pDataArray->m->ceilDist(saveCutoff, pDataArray->precision);     }
+                               else            {       saveCutoff = pDataArray->m->roundDist(saveCutoff, pDataArray->precision);  }
+                
+                               pDataArray->m->mothurOut("Cutoff was " + toString(pDataArray->cutoff) + " changed cutoff to " + toString(saveCutoff)); pDataArray->m->mothurOutEndLine();  
+                       }
+                        cout << "here5" << endl;       
+                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+               }
+               
+               pDataArray->cutoff = smallestCutoff;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ClusterSplitCommand", "MyClusterThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+
+
 #endif
 
index d813e1b5e22f9184de49151d3183b528c1dfc7cd..b61c2d2dc820c7cf05b7f82d3983017f2d6d4c55 100644 (file)
 #include "shhhseqscommand.h"
 #include "summaryqualcommand.h"
 #include "otuassociationcommand.h"
+#include "sortseqscommand.h"
+#include "classifytreecommand.h"
+#include "cooccurrencecommand.h"
+#include "pcrseqscommand.h"
+#include "createdatabasecommand.h"
 
 /*******************************************************/
 
@@ -275,6 +280,11 @@ CommandFactory::CommandFactory(){
        commands["summary.qual"]                = "summary.qual";
        commands["shhh.seqs"]                   = "shhh.seqs";
        commands["otu.association"]             = "otu.association";
+    commands["sort.seqs"]           = "sort.seqs";
+    commands["classify.tree"]       = "classify.tree";
+    commands["cooccurrence"]        = "cooccurrence";
+    commands["pcr.seqs"]            = "pcr.seqs";
+    commands["create.database"]     = "create.database";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -305,6 +315,7 @@ CommandFactory::~CommandFactory(){
 //This function calls the appropriate command fucntions based on user input.
 Command* CommandFactory::getCommand(string commandName, string optionString){
        try {
+        
                delete command;   //delete the old command
                
                //user has opted to redirect output from dir where input files are located to some other place
@@ -436,6 +447,11 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "chimera.perseus")               {       command = new ChimeraPerseusCommand(optionString);                      }
                else if(commandName == "shhh.seqs")                             {       command = new ShhhSeqsCommand(optionString);                            }
                else if(commandName == "otu.association")               {       command = new OTUAssociationCommand(optionString);                      }
+        else if(commandName == "sort.seqs")             {      command = new SortSeqsCommand(optionString);                }
+        else if(commandName == "classify.tree")         {      command = new ClassifyTreeCommand(optionString);            }
+        else if(commandName == "cooccurrence")          {      command = new CooccurrenceCommand(optionString);            }
+        else if(commandName == "pcr.seqs")              {      command = new PcrSeqsCommand(optionString);                 }
+        else if(commandName == "create.database")       {      command = new CreateDatabaseCommand(optionString);          }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -581,6 +597,11 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "chimera.perseus")               {       pipecommand = new ChimeraPerseusCommand(optionString);                  }
                else if(commandName == "shhh.seqs")                             {       pipecommand = new ShhhSeqsCommand(optionString);                                }
                else if(commandName == "otu.association")               {       pipecommand = new OTUAssociationCommand(optionString);                  }
+        else if(commandName == "sort.seqs")             {      pipecommand = new SortSeqsCommand(optionString);                }
+        else if(commandName == "classify.tree")         {      pipecommand = new ClassifyTreeCommand(optionString);            }
+        else if(commandName == "cooccurrence")          {      pipecommand = new CooccurrenceCommand(optionString);            }
+        else if(commandName == "pcr.seqs")              {      pipecommand = new PcrSeqsCommand(optionString);                 }
+        else if(commandName == "create.database")       {      pipecommand = new CreateDatabaseCommand(optionString);          }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -714,6 +735,11 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "chimera.perseus")               {       shellcommand = new ChimeraPerseusCommand();                     }
                else if(commandName == "shhh.seqs")                             {       shellcommand = new ShhhSeqsCommand();                           }
                else if(commandName == "otu.association")               {       shellcommand = new OTUAssociationCommand();                     }
+        else if(commandName == "sort.seqs")             {      shellcommand = new SortSeqsCommand();               }
+        else if(commandName == "classify.tree")         {      shellcommand = new ClassifyTreeCommand();           }
+        else if(commandName == "cooccurrence")          {      shellcommand = new CooccurrenceCommand();           }
+        else if(commandName == "pcr.seqs")              {      shellcommand = new PcrSeqsCommand();                }
+        else if(commandName == "create.database")       {      shellcommand = new CreateDatabaseCommand();         }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
@@ -739,7 +765,7 @@ Command* CommandFactory::getCommand(){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 bool CommandFactory::isValidCommand(string command) {
        try {   
        
diff --git a/cooccurrencecommand.cpp b/cooccurrencecommand.cpp
new file mode 100644 (file)
index 0000000..6864f79
--- /dev/null
@@ -0,0 +1,432 @@
+/*
+ *  cooccurrencecommand.cpp
+ *  Mothur
+ *
+ *  Created by kiverson on 1/2/12.
+ *  Copyright 2012 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "cooccurrencecommand.h"
+
+//**********************************************************************************************************************
+vector<string> CooccurrenceCommand::setParameters() {  
+       try { 
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);             
+               CommandParameter pmetric("metric", "Multiple", "cscore-checker-combo-vratio", "cscore", "", "", "",false,false); parameters.push_back(pmetric);
+               CommandParameter pmatrix("matrixmodel", "Multiple", "sim1-sim2-sim3-sim4-sim5-sim6-sim7-sim8-sim9", "sim2", "", "", "",false,false); parameters.push_back(pmatrix);
+        CommandParameter pruns("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pruns);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string CooccurrenceCommand::getHelpString(){   
+       try {
+               string helpString = "The cooccurrence command calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance.";
+        helpString += "The cooccurrence command parameters are shared, metric, matrixmodel, iters, label and groups.";
+        helpString += "The matrixmodel parameter options are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8 and sim9. Default=sim2";
+        helpString += "The metric parameter options are cscore, checker, combo and vratio. Default=cscore";
+        helpString += "The label parameter is used to analyze specific labels in your input.\n";
+               helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
+        helpString += "The cooccurrence command should be in the following format: \n";
+               helpString += "cooccurrence(shared=yourSharedFile) \n";
+               helpString += "Example cooccurrence(shared=final.an.shared).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourShared).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+CooccurrenceCommand::CooccurrenceCommand(){    
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+CooccurrenceCommand::CooccurrenceCommand(string option) {
+       try {
+               abort = false; calledHelp = false;   
+               allLines = 1;
+                               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       map<string,string>::iterator it;
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+                       }
+               
+            vector<string> tempOutNames;
+            outputTypes["summary"] = tempOutNames;
+               
+               //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //get shared file
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
+                       else if (sharedfile == "not found") { 
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile(); 
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+                       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
+
+                       
+                       metric = validParameter.validFile(parameters, "metric", false);                         if (metric == "not found") { metric = "cscore"; }
+                       
+                       if ((metric != "cscore") && (metric != "checker") && (metric != "combo") && (metric != "vratio")) {
+                               m->mothurOut("[ERROR]: " + metric + " is not a valid metric option for the cooccurrence command. Choices are cscore, checker, combo, vratio."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       
+                       matrix = validParameter.validFile(parameters, "matrix", false);                         if (matrix == "not found") { matrix = "sim2"; }
+                       
+                       if ((matrix != "sim1") && (matrix != "sim2") && (matrix != "sim3") && (matrix != "sim4") && (matrix != "sim5" ) && (matrix != "sim6" ) && (matrix != "sim7" ) && (matrix != "sim8" ) && (matrix != "sim9" )) {
+                               m->mothurOut("[ERROR]: " + matrix + " is not a valid matrix option for the cooccurrence command. Choices are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8, sim9."); m->mothurOutEndLine(); abort = true; 
+                       }
+            
+            groups = validParameter.validFile(parameters, "groups", false);                    
+                       if (groups == "not found") { groups = "";   }
+                       else { 
+                               m->splitAtDash(groups, Groups); 
+                       }                       
+                       m->setGroups(Groups);
+            
+            string temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, runs); 
+
+               }
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int CooccurrenceCommand::execute(){
+       try {
+       
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               InputData* input = new InputData(sharedfile, "sharedfile");
+               vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+
+        ofstream out;
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "cooccurence.summary";
+        m->openOutputFile(outputFileName, out);
+        outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
+        out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+        out << "metric\tlabel\tScore\tpValue\n";
+
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       
+                       if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
+       
+                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
+
+                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                               
+                               getCooccurrence(lookup, out);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                       
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+                               lookup = input->getSharedRAbundVectors(lastLabel);
+                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                               getCooccurrence(lookup, out);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                       
+                       if (m->control_pressed) {  outputTypes.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
+
+                       //get next line to process
+                       lookup = input->getSharedRAbundVectors();                               
+               }
+               
+               if (m->control_pressed) { delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
+
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+       
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                       lookup = input->getSharedRAbundVectors(lastLabel);
+                       
+                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                       
+                       getCooccurrence(lookup, out);
+                       
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+               }
+       
+        out.close(); 
+        
+               //reset groups parameter 
+               delete input; 
+        m->clearGroups(); 
+
+        m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+       try {
+        int numOTUS = thisLookUp[0]->getNumBins();
+        vector< vector<int> > initmatrix; initmatrix.resize(thisLookUp.size());
+        vector< vector<int> > co_matrix; co_matrix.resize(thisLookUp[0]->getNumBins());
+        for (int i = 0; i < thisLookUp[0]->getNumBins(); i++) { co_matrix[i].resize((thisLookUp.size()), 0); }
+        for (int i = 0; i < thisLookUp.size(); i++) { initmatrix[i].resize((thisLookUp[i]->getNumBins()), 0); }
+        vector<int> columntotal; columntotal.resize(thisLookUp.size(), 0);
+        vector<int> rowtotal; rowtotal.resize(numOTUS, 0);
+        
+        int rowcount = 0;
+        for (int i = 0; i < thisLookUp.size(); i++) {
+                       for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
+                               if (m->control_pressed) { return 0; }                   
+                               int abund = thisLookUp[i]->getAbundance(j);
+                               
+                               if(abund > 0) {
+                                   initmatrix[i][j] = 1;
+                    co_matrix[j][i] = 1;
+                    rowcount++;
+                    columntotal[j]++;
+                               }
+                       }
+            rowtotal[i] = rowcount;
+            rowcount = 0;
+        }
+        
+        //nrows is ncols of inital matrix. All the functions need this value. They assume the transposition has already taken place and nrows and ncols refer to that matrix.
+        //comatrix and initmatrix are still vectors of vectors of ints as in the original script. The abundancevector is only what was read in ie not a co-occurrence matrix!
+        int ncols = numOTUS;//rows of inital matrix
+        int nrows = thisLookUp.size();//groups
+        double initscore = 0.0;
+        //transpose matrix
+        int newmatrows = ncols;
+        int newmatcols = nrows;
+      
+        //swap for transposed matrix
+        nrows = newmatrows;//ncols;
+        ncols = newmatcols;//nrows;
+        
+        vector<int> initcolumntotal; initcolumntotal.resize(ncols, 0);
+        vector<int> initrowtotal; initrowtotal.resize(nrows, 0);
+        vector<double> stats;
+               
+        TrialSwap2 trial;
+        
+        initcolumntotal = rowtotal;
+        initrowtotal = columntotal;
+        trial.update_row_col_totals(co_matrix, rowtotal, columntotal);
+        
+        if (metric == "cscore")         { initscore = trial.calc_c_score(co_matrix, rowtotal);    }
+        else if (metric == "checker")   { initscore = trial.calc_checker(co_matrix, rowtotal);    }
+        else if (metric == "vratio")    { initscore = trial.calc_vratio(rowtotal, columntotal);   }
+        else if (metric == "combo")     { initscore = trial.calc_combo(co_matrix);                }
+        else                            {  m->mothurOut("[ERROR]: No metric selected!\n");  m->control_pressed = true; return 1;            }
+        
+        m->mothurOut("Initial c score: " + toString(initscore)); m->mothurOutEndLine();
+        
+        //nullmatrix burn in
+        for(int i=0;i<10000;i++) {
+            if (m->control_pressed) { return 0; }
+            if (matrix == "sim1") {
+                trial.sim1(co_matrix);
+            }else if (matrix == "sim2") {
+                trial.sim2(co_matrix);
+            }else if (matrix == "sim3") {
+                trial.sim3(initmatrix);
+                co_matrix = initmatrix;
+            }else if (matrix == "sim4") {
+                trial.sim4(columntotal, rowtotal, co_matrix);
+            }else if (matrix == "sim5") {
+                trial.sim5(initcolumntotal, initrowtotal, initmatrix);
+                trial.transpose_matrix(initmatrix,co_matrix);
+            }else if (matrix == "sim6") {
+                trial.sim6(columntotal, co_matrix);
+            }else if (matrix == "sim7") {
+                trial.sim7(initcolumntotal, initmatrix);          
+                co_matrix = initmatrix;
+            }else if (matrix == "sim8") {
+                trial.sim8(columntotal, rowtotal, co_matrix);
+            }else if (matrix == "sim9") {
+                trial.swap_checkerboards (co_matrix);
+            }else{
+                m->mothurOut("[ERROR]: No model selected! \n");
+                m->control_pressed = true;
+            }
+        }
+                
+        //run
+        for(int i=0;i<runs;i++) {
+            if (m->control_pressed) { return 0; }
+            //calc metric of nullmatrix
+            if (matrix == "sim1") {
+                trial.sim1(co_matrix);
+            }else if (matrix == "sim2") {
+                trial.sim2(co_matrix);
+            }else if (matrix == "sim3") {
+                trial.sim3(initmatrix);
+                co_matrix = initmatrix;
+            }else if (matrix == "sim4") {
+                trial.sim4(columntotal, rowtotal, co_matrix);
+            }else if (matrix == "sim5") {
+                trial.sim5(initcolumntotal, initrowtotal, initmatrix);
+                trial.transpose_matrix(initmatrix,co_matrix);
+            }else if (matrix == "sim6") {
+                trial.sim6(columntotal, co_matrix);
+            }else if (matrix == "sim7") {
+                trial.sim7(initcolumntotal, initmatrix);          
+                co_matrix = initmatrix;
+            }else if (matrix == "sim8") {
+                trial.sim8(columntotal, rowtotal, co_matrix);
+            }else if (matrix == "sim9") {
+                trial.swap_checkerboards (co_matrix);
+            }else{
+                 m->mothurOut("[ERROR]: No model selected! \n");
+                 m->control_pressed = true;
+            }
+            //
+            //            
+            trial.update_row_col_totals(co_matrix, rowtotal, columntotal); 
+            
+            if (metric == "cscore") { 
+                stats.push_back(trial.calc_c_score(co_matrix, rowtotal));
+            }else if (metric == "checker") { 
+                stats.push_back(trial.calc_checker(co_matrix, rowtotal));
+            }else if (metric == "vratio") { 
+                stats.push_back(trial.calc_vratio(rowtotal, columntotal));
+            }else if (metric == "combo") { 
+                stats.push_back(trial.calc_combo(co_matrix));
+            }else {
+                m->mothurOut("[ERROR]: No metric selected!\n");
+                m->control_pressed = true;
+                return 1;
+            }
+            
+        }
+
+        double total = 0.0;
+        for (int i=0; i<stats.size();i++)   {   total+=stats[i];   }
+        
+        double nullMean = double (total/(double)stats.size()); 
+        
+        m->mothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine();
+        
+        double pvalue = 0.0;
+        if (metric == "cscore" || metric == "checker") {    pvalue = trial.calc_pvalue_greaterthan (stats, initscore);   }
+        else{   pvalue = trial.calc_pvalue_lessthan (stats, initscore); }
+        
+        m->mothurOut("pvalue: " + toString(pvalue)); m->mothurOutEndLine();
+        out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << pvalue << endl;
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CooccurrenceCommand", "Cooccurrence");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/cooccurrencecommand.h b/cooccurrencecommand.h
new file mode 100644 (file)
index 0000000..8f60e08
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef COOCCURRENCECOMMAND_H
+#define COOCCURRENCECOMMAND_H
+
+/*
+ *  COOCCURRENCE.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/10/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "command.hpp"
+#include "trialswap2.h"
+#include "inputdata.h"
+#include "sharedrabundvector.h"
+
+
+class CooccurrenceCommand : public Command {
+       
+public:
+       
+       CooccurrenceCommand(string);    
+       CooccurrenceCommand();
+       ~CooccurrenceCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "Cooccurrence";                        }
+       string getCommandCategory()             { return "Hypothesis Testing";  }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Cooccurrence"; }
+       string getDescription()         { return "calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; }
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+       
+private:
+    string metric, matrix, outputDir;
+    string label, sharedfile, groups;
+    bool abort, allLines;
+    set<string> labels;
+    vector<string> outputNames, Groups;
+    int runs;
+    
+    int getCooccurrence(vector<SharedRAbundVector*>&, ofstream&);
+       
+};
+
+#endif
+
+
index 10669e580998797222fd785b6d80608fd87ba036..c27eb4b9ac335112f19244c708cee9bb8630feda 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "corraxescommand.h"
 #include "sharedutilities.h"
+#include "linearalgebra.h"
 
 //**********************************************************************************************************************
 vector<string> CorrAxesCommand::setParameters(){       
@@ -304,6 +305,8 @@ int CorrAxesCommand::execute(){
 int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& out) {
    try {
           
+       LinearAlgebra linear;
+       
           //find average of each axis - X
           vector<float> averageAxes; averageAxes.resize(numaxes, 0.0);
           for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
@@ -318,7 +321,7 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
-                  if (metadatafile == "") {  out << i+1;       }
+                  if (metadatafile == "") {  out << m->currentBinLabels[i];    }
                   else {  out << metadataLabels[i];            }
                                   
                   //find the averages this otu - Y
@@ -355,11 +358,7 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                           rValues[k] = r;
                           out << '\t' << r; 
                
-               //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
-               double temp =  (1- (r*r)) / (double) (lookupFloat.size()-2);
-               temp = sqrt(temp);
-               double sig = r / temp;
-               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               double sig = linear.calcPearsonSig(lookupFloat.size(), r);
                
                out << '\t' << sig;
                   }
@@ -382,6 +381,9 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
 int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
+        LinearAlgebra linear;
+        vector<double> sf; 
+        
                //format data
                vector< map<float, int> > tableX; tableX.resize(numaxes);
                map<float, int>::iterator itTable;
@@ -421,6 +423,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
+            double sfTemp = 0.0;
                        for (int j = 0; j < scores[i].size(); j++) {
                                rankTotal += (j+1);
                                ties.push_back(scores[i][j]);
@@ -432,6 +435,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankAxes[ties[k].name].push_back(thisrank);
                                                }
+                        int t = ties.size();
+                        sfTemp += (t*t*t-t);
                                                ties.clear();
                                                rankTotal = 0;
                                        }
@@ -444,13 +449,14 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                        }
                                }
                        }
+            sf.push_back(sfTemp);
                }
                
                                
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
-                       if (metadatafile == "") {  out << i+1;  }
+                       if (metadatafile == "") {  out << m->currentBinLabels[i];       }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -478,6 +484,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                        
                        sort(otuScores.begin(), otuScores.end(), compareSpearman);
                        
+            double sg = 0.0;
                        map<string, float> rankOtus;
                        vector<spearmanRank> ties;
                        int rankTotal = 0;
@@ -492,6 +499,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                                        float thisrank = rankTotal / (float) ties.size();
                                                        rankOtus[ties[k].name] = thisrank;
                                                }
+                        int t = ties.size();
+                        sg += (t*t*t-t);
                                                ties.clear();
                                                rankTotal = 0;
                                        }
@@ -532,12 +541,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                
                                pValues[j] = p;
                 
-                //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
-                double temp = (lookupFloat.size()-2) / (double) (1- (p*p));
-                temp = sqrt(temp);
-                double sig = p*temp;
-                if (isnan(sig) || isinf(sig)) { sig = 0.0; }
-                
+                double sig = linear.calcSpearmanSig(n, sf[j], sg, di);            
                 out  << '\t' << sig;
                 
                        }
@@ -560,6 +564,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
 int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) {
        try {
                
+        LinearAlgebra linear;
+        
                //format data
                vector< vector<spearmanRank> > scores; scores.resize(numaxes);
                for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) {
@@ -603,7 +609,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                
-                       if (metadatafile == "") {  out << i+1;  }
+                       if (metadatafile == "") {  out << m->currentBinLabels[i];       }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -678,14 +684,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                out << '\t' << p;
                                pValues[j] = p;
                 
-                //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
-                double numer = 3.0 * (numCoor - numDisCoor);
-                int n = scores[j].size();
-                double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
-                denom = sqrt(denom);
-                double sig = numer / denom;
-                
-                if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+                double sig = linear.calcKendallSig(scores[j].size(), p);
                 
                 out << '\t' << sig;
                        }
index 9cdd033ac29609d6f763f52597a4809fea204326..e83c6035731aa5509655dcd25a7a45995c1a0004 100644 (file)
@@ -174,7 +174,8 @@ int CountSeqsCommand::execute(){
                //open input file
                ifstream in;
                m->openInputFile(namefile, in);
-               
+        
+               int total = 0;
                while (!in.eof()) {
                        if (m->control_pressed) { break; }
                        
@@ -217,7 +218,7 @@ int CountSeqsCommand::execute(){
                                out << firstCol << '\t' << names.size() << endl;
                        }
                        
-                       
+                       total += names.size();
                }
                in.close();
                
@@ -225,6 +226,8 @@ int CountSeqsCommand::execute(){
                
                if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
                
+        m->mothurOutEndLine();
+               m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
                m->mothurOutEndLine();
                m->mothurOut("Output File Name: "); m->mothurOutEndLine();
                m->mothurOut(outputFileName); m->mothurOutEndLine();    
diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp
new file mode 100644 (file)
index 0000000..1da67e6
--- /dev/null
@@ -0,0 +1,502 @@
+//
+//  createdatabasecommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/28/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "createdatabasecommand.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> CreateDatabaseCommand::setParameters(){ 
+       try {
+               CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
+               CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string CreateDatabaseCommand::getHelpString(){ 
+       try {
+               string helpString = "";
+               helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
+               helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+        helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
+        helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
+        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
+        helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
+        helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
+        helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
+        helpString += "The create.database command should be in the following format: \n";
+               helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
+               helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
+               helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";       
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+CreateDatabaseCommand::CreateDatabaseCommand(){        
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["database"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
+       try{
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if (option == "help") { 
+                       help(); abort = true; calledHelp = true;
+               }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+            
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["database"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("repname");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["repname"] = inputDir + it->second;          }
+                               }
+                               
+                               it = parameters.find("contaxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
+                               }
+                               
+                               it = parameters.find("repfasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["repfasta"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+            
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //check for required parameters
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not found") {                          
+                               //if there is a current list file, use it
+                               listfile = m->getListFile(); 
+                               if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       else if (listfile == "not open") { abort = true; }      
+                       else { m->setListFile(listfile); }
+                       
+                       contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
+                       if (contaxonomyfile == "not found") {  //if there is a current list file, use it
+               contaxonomyfile = "";  m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
+
+            repfastafile = validParameter.validFile(parameters, "repfasta", true);
+                       if (repfastafile == "not found") {  //if there is a current list file, use it
+                repfastafile = "";  m->mothurOut("The repfasta parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
+
+            repnamesfile = validParameter.validFile(parameters, "repname", true);
+                       if (repnamesfile == "not found") {  //if there is a current list file, use it
+                repnamesfile = "";  m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
+
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }  
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+            label = validParameter.validFile(parameters, "label", false);                      
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your listfile.\n");}
+        }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int CreateDatabaseCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //taxonomies holds the taxonomy info for each Otu
+        //classifyOtuSizes holds the size info of each Otu to help with error checking
+        vector<string> taxonomies;
+        vector<int> classifyOtuSizes = readTax(taxonomies);
+        
+        if (m->control_pressed) { return 0; }
+        
+        vector<Sequence> seqs;
+        vector<int> repOtusSizes = readFasta(seqs);
+        
+        if (m->control_pressed) { return 0; }
+        
+        //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
+        map<string, string> repNames;
+        int numUniqueNamesFile = readNames(repNames);
+        
+        //are there the same number of otus in the fasta and name files
+        if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
+        
+        if (m->control_pressed) { return 0; }
+        
+        //are there the same number of OTUs in the tax and fasta file
+        if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
+
+        if (m->control_pressed) { return 0; }
+        
+        //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
+        for (int i = 0; i < classifyOtuSizes.size(); i++) {
+            if (classifyOtuSizes[i] != repOtusSizes[i]) {
+               m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ".  These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; 
+            }
+        }
+        
+        if (m->control_pressed) { return 0; }
+        
+        //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
+        ListVector* list = getList();
+        
+        if (m->control_pressed) { delete list; return 0; }
+        
+        GroupMap* groupmap = NULL;
+        if (groupfile != "") {
+                       groupmap = new GroupMap(groupfile);
+                       groupmap->readMap();
+               }
+        
+        if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
+        
+        if (outputDir == "") { outputDir += m->hasPath(listfile); }
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + "database";
+        outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
+        
+        ofstream out;
+        m->openOutputFile(outputFileName, out);
+        
+        string header = "OTUNumber\tAbundance\t";
+        if (groupfile != "") { 
+            header = "OTUNumber\t";
+            for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
+        }
+        header += "repSeqName\trepSeq\tOTUConTaxonomy";
+        out << header << endl;
+        
+        for (int i = 0; i < list->getNumBins(); i++) {
+            
+            if (m->control_pressed) { break; }
+            
+            out << (i+1) << '\t';
+            
+            vector<string> binNames;
+            string bin = list->get(i);
+            
+            map<string, string>::iterator it = repNames.find(bin);
+            if (it == repNames.end()) {
+                m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            }
+            
+            m->splitAtComma(bin, binNames);
+            
+            //sanity check
+            if (binNames.size() != classifyOtuSizes[i]) {
+                 m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            }
+            
+            //output abundances
+            if (groupfile != "") {
+                string groupAbunds = "";
+                map<string, int> counts;
+                //initialize counts to 0
+                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+                
+                //find abundances by group
+                bool error = false;
+                for (int j = 0; j < binNames.size(); j++) {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if (group == "not found") {
+                        m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
+                        error = true;
+                    }else { counts[group]++; }
+                }
+                
+                //output counts
+                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
+                
+                if (error) { m->control_pressed = true; }
+            }else { out << binNames.size() << '\t'; }
+            
+            //output repSeq
+            out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+        }
+        out.close();
+        
+        delete list;
+        if (groupfile != "") { delete groupmap; }
+        
+        if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+        
+        m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               m->mothurOutEndLine();
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
+       try {
+               
+        vector<int> sizes; 
+        
+        ifstream in;
+        m->openInputFile(contaxonomyfile, in);
+        
+        //read headers
+        m->getline(in);
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; m->gobble(in);
+            
+            sizes.push_back(size);
+            taxonomies.push_back(tax);
+        }
+        in.close();
+        
+        return sizes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readTax");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
+       try {
+               
+        vector<int> sizes; 
+        
+        ifstream in;
+        m->openInputFile(repfastafile, in);
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string binInfo;
+            Sequence seq(in, binInfo, true);  m->gobble(in);
+            
+            //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
+            vector<string> info;
+            m->splitAtChar(binInfo, info, '|');
+            if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
+            
+            int size = 0;
+            m->mothurConvert(info[1], size);
+            
+            sizes.push_back(size);
+            seqs.push_back(seq);
+        }
+        in.close();
+        
+        return sizes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readFasta");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+int CreateDatabaseCommand::readNames(map<string, string>& nameMap) { 
+       try {
+               
+               //open input file
+               ifstream in;
+               m->openInputFile(repnamesfile, in);
+               
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol >> secondCol; m->gobble(in);
+                       
+                       nameMap[secondCol] = firstCol;
+               }
+               in.close();
+               
+               return nameMap.size();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readNames");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListVector* CreateDatabaseCommand::getList(){
+       try {
+               InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel; delete input; return list; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  delete input; return list;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input->getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input->getListVector();
+               }
+               
+               
+               if (m->control_pressed) { delete input; return list;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list;
+                       list = input->getListVector(lastLabel);
+               }       
+               
+               delete input;
+
+        return list;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "getList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/createdatabasecommand.h b/createdatabasecommand.h
new file mode 100644 (file)
index 0000000..643ff6e
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef Mothur_createdatabasecommand_h
+#define Mothur_createdatabasecommand_h
+
+//
+//  createdatabasecommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/28/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "listvector.hpp"
+#include "sequence.hpp"
+
+class CreateDatabaseCommand : public Command {
+public:
+       CreateDatabaseCommand(string);
+       CreateDatabaseCommand();
+       ~CreateDatabaseCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "create.database";             }
+       string getCommandCategory()             { return "OTU-Based Approaches"; }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Create.database"; }
+       string getDescription()         { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; }
+    
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+       
+       bool abort;
+       string listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+       
+       vector<string> outputNames;
+               
+       vector<int> readFasta(vector<Sequence>&);
+    vector<int> readTax(vector<string>&);
+    int readNames(map<string, string>&); 
+       ListVector* getList();
+       
+};
+
+
+
+
+#endif
index b2817a779dc1b95070ef77fbe6e8bf5b11159a3c..49f39035e0aa95d6739616c0a6804858b53cdf9e 100644 (file)
@@ -45,7 +45,6 @@ class Database {
 
 public:
        Database();
-       Database(const Database& db) : numSeqs(db.numSeqs), longest(db.longest), searchScore(db.searchScore), results(db.results), Scores(db.Scores) { m = MothurOut::getInstance(); }
        virtual ~Database();
        virtual void generateDB() = 0; 
        virtual void addSequence(Sequence) = 0;  //add sequence to search engine
index 973340ca0589c87dcf36176638f99f959cd484cc..98545f8e24a72dca332bf7571fc0e5e9e6936988 100644 (file)
@@ -599,7 +599,7 @@ cout << largest->second << '\t' << largest->first->score << '\t' << largest->fir
        }
 }
 
-//***************************************************************************************************************
+***************************************************************************************************************
 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
 int DeCalculator::findLargestContrib(vector<int> seen) {
        try{
@@ -624,7 +624,7 @@ int DeCalculator::findLargestContrib(vector<int> seen) {
                exit(1);
        }
 }
-//***************************************************************************************************************
+***************************************************************************************************************
 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
        try{
        
index 3541e00d064233c4da10619789291222290b33f1..3d0c0d51acaac8844b71a3efa40c7925674ad4c5 100644 (file)
@@ -154,7 +154,10 @@ int DeconvoluteCommand::execute() {
                
                map<string, string> nameMap;
                map<string, string>::iterator itNames;
-               if (oldNameMapFName != "")  {  m->readNames(oldNameMapFName, nameMap); }
+               if (oldNameMapFName != "")  {  
+            m->readNames(oldNameMapFName, nameMap); 
+            if (oldNameMapFName == outNameFile){ outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique.names";   }
+        }
                
                if (m->control_pressed) { return 0; }
                
index 65edcf75366d4096b1256266b05296146c30167b..79b8fe7c10544dafd2675a963a2229eb2e3e7046 100644 (file)
@@ -380,7 +380,7 @@ int DistanceCommand::execute(){
                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else          
                                
-       //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //if you don't need to fork anything
                if(processors == 1){
                        if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
@@ -507,7 +507,7 @@ int DistanceCommand::execute(){
 /**************************************************************************************************/
 void DistanceCommand::createProcesses(string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                processIDS.clear();
                
@@ -1014,7 +1014,7 @@ int DistanceCommand::convertMatrix(string outputFile) {
                string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
                
                //use the unix sort 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        string command = "sort -n " + outputFile + " -o " + outfile;
                        system(command.c_str());
                #else //sort using windows sort
@@ -1094,7 +1094,7 @@ int DistanceCommand::convertMatrix(string outputFile) {
                exit(1);
        }
 }
-/**************************************************************************************************
+**************************************************************************************************
 int DistanceCommand::convertToLowerTriangle(string outputFile) {
        try{
 
@@ -1102,7 +1102,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) {
                string outfile = m->getRootName(outputFile) + "sorted.dist.temp";
                
                //use the unix sort 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        string command = "sort -n " + outputFile + " -o " + outfile;
                        system(command.c_str());
                #else //sort using windows sort
@@ -1188,7 +1188,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
 //also check to make sure the 2 files have the same alignment length.
index f55f7144fa548fe30eae2b075a8abcf8fd4584f0..91f0ed50b3003345454690b7aecc049c449854d8 100644 (file)
@@ -53,7 +53,7 @@ struct distanceData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){ 
        distanceData* pDataArray;
index 2cbf5cad6edf5df5b62b7c345cf09032eaf7e10f..27e278574493c89d0197d99cbe71c0c3dd03d0d9 100644 (file)
 #include "onegapignore.h"
 
 
-/**************************************************************************************************/
-DistanceDB::DistanceDB(const DistanceDB& ddb) : data(ddb.data), templateSeqsLength(ddb.templateSeqsLength), templateAligned(ddb.templateAligned), Database(ddb) { 
-       distCalculator = new oneGapIgnoreTermGapDist(); 
-}
 /**************************************************************************************************/
 DistanceDB::DistanceDB() : Database() { 
        try {
index d7e05db85afed819b9f2d26c99d06c1426ca19f9..2624d6d6440190520e02af09c43264bf365c9f33 100644 (file)
@@ -19,7 +19,6 @@ class DistanceDB : public Database {
 public:
        
        DistanceDB();
-       DistanceDB(const DistanceDB& ddb); 
        ~DistanceDB() { delete distCalculator; }
        
        void generateDB() {} //doesn't generate a search db 
index fe9bc3099d7a1dcdd347bfe98a23e1bceb44768a..1aca10c4bd208f6cf6c0dbfd9f97fdde97afe576 100644 (file)
@@ -19,7 +19,6 @@ class eachGapDist : public Dist {
 public:
        
        eachGapDist() {}
-       eachGapDist(const eachGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){          
                int diff = 0;
index ea1c0e14dc9f89fe0157d24828a2e9552297389c..ffbe324acd490ec6eb621ce0fe8d26b3b38d3ac0 100644 (file)
@@ -34,7 +34,7 @@ string Engine::findMothursPath(){
                
                //delimiting path char
                char delim;
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        delim = ':';
                #else
                        delim = ';';
@@ -56,7 +56,7 @@ string Engine::findMothursPath(){
                
                if (mothurPath != "") {
                        //add mothur so it looks like what argv would look like
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                mothurPath += "/mothur";
                        #else
                                mothurPath += "\\mothur";
@@ -71,7 +71,7 @@ string Engine::findMothursPath(){
                                //is this mothurs path?
                                ifstream in;
                                string tempIn = dirs[i];
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        tempIn += "/mothur";
                                #else
                                        tempIn += "\\mothur";
@@ -219,7 +219,7 @@ bool InteractEngine::getInput(){
 string Engine::getCommand()  {
        try {
        
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        #ifdef USE_READLINE
                                char* nextCommand = NULL;
                                nextCommand = readline("mothur > ");
index 82c73f3d439c1d3cc154c8dac67398075134a1e8..a7d42b331bc3044de372c030175da4663f9e3a9c 100644 (file)
@@ -125,7 +125,12 @@ FilterSeqsCommand::FilterSeqsCommand(string option)  {
                        fasta = validParameter.validFile(parameters, "fasta", false);
                        if (fasta == "not found") {                             
                                fasta = m->getFastaFile(); 
-                               if (fasta != "") { fastafileNames.push_back(fasta);  m->mothurOut("Using " + fasta + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               if (fasta != "") { 
+                    fastafileNames.push_back(fasta);  
+                    m->mothurOut("Using " + fasta + " as input file for the fasta parameter."); m->mothurOutEndLine();
+                    string simpleName = m->getSimpleName(fasta);
+                    filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
+                }
                                else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
                        else { 
@@ -420,9 +425,22 @@ int FilterSeqsCommand::filterSequences() {
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
-                       
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
+            
+            vector<unsigned long long> positions;
+            if (savedPositions.size() != 0) { positions = savedPositions[s]; }
+            else {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               positions = m->divideFile(fastafileNames[s], processors);
+#else
+                if(processors != 1){
+                    int numFastaSeqs = 0;
+                    positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
+                    if (positions.size() < processors) { processors = positions.size(); }
+                }
+#endif
+            }
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                       //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
                        
                        for (int i = 0; i < (positions.size()-1); i++) {
                                lines.push_back(new linePair(positions[i], positions[(i+1)]));
@@ -432,23 +450,31 @@ int FilterSeqsCommand::filterSequences() {
                                        int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
                                        numSeqs += numFastaSeqs;
                                }else{
-                                       int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s]); 
+                                       int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
                                        numSeqs += numFastaSeqs;
-                               
-                                       rename((fastafileNames[s] + toString(processIDS[0]) + ".temp").c_str(), filteredFasta.c_str());
-                               
-                                       //append fasta files
-                                       for(int i=1;i<processors;i++){
-                                               m->appendFiles((fastafileNames[s] + toString(processIDS[i]) + ".temp"), filteredFasta);
-                                               m->mothurRemove((fastafileNames[s] + toString(processIDS[i]) + ".temp"));
-                                       }
                                }
                                
                                if (m->control_pressed) {  return 1; }
                #else
-                               lines.push_back(new linePair(0, 1000));
+            if(processors == 1){
+                lines.push_back(new linePair(0, 1000));
                                int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
                                numSeqs += numFastaSeqs;
+            }else {
+                int numFastaSeqs = positions.size()-1;
+                //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
+                
+                //figure out how many sequences you have to process
+                int numSeqsPerProcessor = numFastaSeqs / processors;
+                for (int i = 0; i < processors; i++) {
+                    int startIndex =  i * numSeqsPerProcessor;
+                    if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                    lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
+                }
+                
+                numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
+                numSeqs += numFastaSeqs;
+            }
 
                                if (m->control_pressed) {  return 1; }
                #endif
@@ -570,7 +596,7 @@ int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string i
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = in.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -596,12 +622,15 @@ int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string i
 }
 /**************************************************************************************************/
 
-int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
+int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, string filteredFastaName) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
+        
+        int process = 1;
                int num = 0;
                processIDS.clear();
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -629,8 +658,10 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
                        }
                }
                
+        num = driverRunFilter(F, filteredFastaName, filename, lines[0]);
+        
                //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i<processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }       
@@ -641,11 +672,56 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); m->mothurRemove(tempFile);
+            
+            m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filteredFastaName);
+            m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
                }
-
-               
-               return num;
-#endif         
+               
+#else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the filterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to F.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<filterRunData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++){
+                       
+            string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; }
+            
+                       filterRunData* tempFilter = new filterRunData(filter, filename, (filteredFastaName + extension), m, lines[i]->start, lines[i]->end, alignmentLength, i);
+                       pDataArray.push_back(tempFilter);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyRunFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+        
+        num = driverRunFilter(F, (filteredFastaName + toString(processors-1) + ".temp"), filename, lines[processors-1]);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        
+        for (int i = 1; i < processors; i++) {
+            m->appendFiles((filteredFastaName + toString(i) + ".temp"), filteredFastaName);
+            m->mothurRemove((filteredFastaName + toString(i) + ".temp"));
+               }
+#endif 
+        
+        return num;
+        
        }
        catch(exception& e) {
                m->errorOut(e, "FilterSeqsCommand", "createProcessesRunFilter");
@@ -740,9 +816,9 @@ string FilterSeqsCommand::createFilter() {
                                
 #else
                                
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
+                vector<unsigned long long> positions;
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               positions = m->divideFile(fastafileNames[s], processors);
                                for (int i = 0; i < (positions.size()-1); i++) {
                                        lines.push_back(new linePair(positions[i], positions[(i+1)]));
                                }       
@@ -754,14 +830,32 @@ string FilterSeqsCommand::createFilter() {
                                        int numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]); 
                                        numSeqs += numFastaSeqs;
                                }
-                               
-                               if (m->control_pressed) {  return filterString; }
                #else
-                               lines.push_back(new linePair(0, 1000));
-                               int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
-                               numSeqs += numFastaSeqs;
-                               if (m->control_pressed) {  return filterString; }
+                if(processors == 1){
+                    lines.push_back(new linePair(0, 1000));
+                    int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
+                    numSeqs += numFastaSeqs;
+                               }else {
+                    int numFastaSeqs = 0;
+                    positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
+                    if (positions.size() < processors) { processors = positions.size(); }
+                    
+                    //figure out how many sequences you have to process
+                    int numSeqsPerProcessor = numFastaSeqs / processors;
+                    for (int i = 0; i < processors; i++) {
+                        int startIndex =  i * numSeqsPerProcessor;
+                        if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                        lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
+                    }
+                    
+                    numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]); 
+                                       numSeqs += numFastaSeqs;
+                }
                #endif
+                //save the file positions so we can reuse them in the runFilter function
+                savedPositions[s] = positions;
+                
+                               if (m->control_pressed) {  return filterString; }
 #endif
                        
                        }
@@ -848,7 +942,7 @@ string FilterSeqsCommand::createFilter() {
        
        MPI_Barrier(MPI_COMM_WORLD);
 #endif
-                               
+            
                return filterString;
        }
        catch(exception& e) {
@@ -882,7 +976,7 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
                                        count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = in.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -954,11 +1048,12 @@ int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File&
 
 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        int process = 1;
                int num = 0;
                processIDS.clear();
-               
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -1033,8 +1128,50 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                        m->mothurRemove(tempFilename);
                }
                
-               return num;
-#endif         
+               
+#else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the filterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to F.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<filterData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors];
+               HANDLE  hThreadArray[processors]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors; i++ ){
+                       
+                       filterData* tempFilter = new filterData(filename, m, lines[i]->start, lines[i]->end, alignmentLength, trump, vertical, soft, hard, i);
+                       pDataArray.push_back(tempFilter);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyCreateFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            F.mergeFilter(pDataArray[i]->F.getFilter());
+            
+                       for (int k = 0; k < alignmentLength; k++) {      F.a[k] += pDataArray[i]->F.a[k];       }
+                       for (int k = 0; k < alignmentLength; k++) {      F.t[k] += pDataArray[i]->F.t[k];       }
+                       for (int k = 0; k < alignmentLength; k++) {      F.g[k] += pDataArray[i]->F.g[k];       }
+                       for (int k = 0; k < alignmentLength; k++) {      F.c[k] += pDataArray[i]->F.c[k];       }
+                       for (int k = 0; k < alignmentLength; k++) {      F.gap[k] += pDataArray[i]->F.gap[k];   }
+
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+               
+#endif 
+        return num;
+        
        }
        catch(exception& e) {
                m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter");
index 3bf36c040231c5f801281f9d553c60bed01101c8..4405c33f52165d5c89cbe47e8513147cb14dd879 100644 (file)
@@ -40,6 +40,7 @@ private:
 \r
        vector<linePair*> lines;\r
        vector<int> processIDS;\r
+    map<int, vector<unsigned long long> > savedPositions;\r
 \r
        string vertical, filter, fasta, hard, outputDir, filterFileName;\r
        vector<string> fastafileNames;  \r
@@ -55,7 +56,7 @@ private:
        string createFilter();\r
        int filterSequences();\r
        int createProcessesCreateFilter(Filters&, string);\r
-       int createProcessesRunFilter(string, string);\r
+       int createProcessesRunFilter(string, string, string);\r
        int driverRunFilter(string, string, string, linePair*);\r
        int driverCreateFilter(Filters& F, string filename, linePair* line);\r
        #ifdef USE_MPI\r
@@ -65,4 +66,179 @@ private:
        \r
 };\r
 \r
+\r
+/**************************************************************************************************/\r
+//custom data structure for threads to use.\r
+// This is passed by void pointer so it can be any data type\r
+// that can be passed using a single void pointer (LPVOID).\r
+struct filterData {\r
+       Filters F;\r
+    int count, tid, alignmentLength;\r
+    unsigned long long start, end;\r
+    MothurOut* m;\r
+    string filename, vertical, hard;\r
+    char trump;\r
+    float soft;\r
+       \r
+       filterData(){}\r
+       filterData(string fn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, char tr, string vert, float so, string ha, int t) {\r
+        filename = fn;\r
+               m = mout;\r
+               start = st;\r
+               end = en;\r
+        tid = t;\r
+        trump = tr;\r
+        alignmentLength = aLength;\r
+        vertical = vert;\r
+        soft = so;\r
+        hard = ha;\r
+               count = 0;\r
+       }\r
+};\r
+/**************************************************************************************************/\r
+//custom data structure for threads to use.\r
+// This is passed by void pointer so it can be any data type\r
+// that can be passed using a single void pointer (LPVOID).\r
+struct filterRunData {\r
+    int count, tid, alignmentLength;\r
+    unsigned long long start, end;\r
+    MothurOut* m;\r
+    string filename;\r
+    string filter, outputFilename;\r
+       \r
+       filterRunData(){}\r
+       filterRunData(string f, string fn, string ofn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, int t) {\r
+        filter = f;\r
+        outputFilename = ofn;\r
+        filename = fn;\r
+               m = mout;\r
+               start = st;\r
+               end = en;\r
+        tid = t;\r
+        alignmentLength = aLength;\r
+               count = 0;\r
+       }\r
+};\r
+\r
+/**************************************************************************************************/\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
+#else\r
+static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){ \r
+       filterData* pDataArray;\r
+       pDataArray = (filterData*)lpParam;\r
+       \r
+       try {\r
+\r
+               if (pDataArray->soft != 0)                      {  pDataArray->F.setSoft(pDataArray->soft);             }\r
+               if (pDataArray->trump != '*')           {  pDataArray->F.setTrump(pDataArray->trump);   }\r
+               \r
+               pDataArray->F.setLength(pDataArray->alignmentLength);\r
+               \r
+               if(pDataArray->trump != '*' || pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0){\r
+                       pDataArray->F.initialize();\r
+               }\r
+               \r
+               if(pDataArray->hard.compare("") != 0)   {       pDataArray->F.doHard(pDataArray->hard);         }\r
+               else                                            {       pDataArray->F.setFilter(string(pDataArray->alignmentLength, '1'));      }\r
+        \r
+               ifstream in;\r
+               pDataArray->m->openInputFile(pDataArray->filename, in);\r
+        \r
+               //print header if you are process 0\r
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {\r
+                       in.seekg(0);\r
+               }else { //this accounts for the difference in line endings. \r
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); \r
+               }\r
+               \r
+               pDataArray->count = pDataArray->end;\r
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process\r
+                       \r
+                       if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }\r
+                       \r
+                       Sequence current(in); pDataArray->m->gobble(in); \r
+                       \r
+                       if (current.getName() != "") {\r
+                               if (current.getAligned().length() != pDataArray->alignmentLength) { pDataArray->m->mothurOut("Sequences are not all the same length, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true;  }\r
+                \r
+                if(pDataArray->trump != '*')                   {       pDataArray->F.doTrump(current);         }\r
+                if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0)       {       pDataArray->F.getFreqs(current);        }\r
+                       }\r
+            \r
+            //report progress\r
+                       if((i) % 100 == 0){     pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }\r
+               }\r
+               \r
+        if((pDataArray->count) % 100 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }\r
+        \r
+               in.close();\r
+               \r
+               return 0;\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyCreateFilterThreadFunction");\r
+               exit(1);\r
+       }\r
+} \r
+/**************************************************************************************************/\r
+static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){ \r
+       filterRunData* pDataArray;\r
+       pDataArray = (filterRunData*)lpParam;\r
+       \r
+       try {\r
+        \r
+        ofstream out;\r
+               pDataArray->m->openOutputFile(pDataArray->outputFilename, out);\r
+\r
+               ifstream in;\r
+               pDataArray->m->openInputFile(pDataArray->filename, in);\r
+        \r
+               //print header if you are process 0\r
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {\r
+                       in.seekg(0);\r
+               }else { //this accounts for the difference in line endings. \r
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); \r
+               }\r
+               \r
+               pDataArray->count = pDataArray->end;\r
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process\r
+                       \r
+                       if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; }\r
+                       \r
+                       Sequence seq(in); pDataArray->m->gobble(in);\r
+            if (seq.getName() != "") {\r
+                string align = seq.getAligned();\r
+                string filterSeq = "";\r
+                \r
+                for(int j=0;j<pDataArray->alignmentLength;j++){\r
+                    if(pDataArray->filter[j] == '1'){\r
+                        filterSeq += align[j];\r
+                    }\r
+                }\r
+                \r
+                out << '>' << seq.getName() << endl << filterSeq << endl;\r
+            }\r
+            \r
+            //report progress\r
+                       if((i) % 100 == 0){     pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }\r
+               }\r
+               \r
+        if((pDataArray->count) % 100 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }\r
+        \r
+               in.close();\r
+        out.close();\r
+               \r
+               return 0;\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyRunFilterThreadFunction");\r
+               exit(1);\r
+       }\r
+} \r
+/**************************************************************************************************/\r
+#endif\r
+\r
+\r
 #endif\r
diff --git a/fisher2.c b/fisher2.c
deleted file mode 100644 (file)
index c861834..0000000
--- a/fisher2.c
+++ /dev/null
@@ -1,2158 +0,0 @@
-#include "fisher2.h"
-
-
-static void f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
-                 double *expect, double *percnt, double *emin, double
-                 *prt, double *pre, double *fact, int *ico, int
-                 *iro, int *kyy, int *idif, int *irn, int *key,
-                 int *ldkey, int *ipoin, double *stp, int *ldstp,
-                 int *ifrq, double *dlp, double *dsp, double *tm,
-                 int *key2, int *iwk, double *rwk);
-static void f3xact(int *nrow, int *irow, int *ncol,    int *icol,
-                 double *dlp, int *mm, double *fact, int *ico, int
-                 *iro, int *it, int *lb, int *nr, int *nt, int
-                 *nu, int *itc, int *ist, double *stv, double *alen,
-                 const double *tol);
-static void f4xact(int *nrow, int *irow, int *ncol, int *icol,
-                 double *dsp, double *fact, int *icstk, int *ncstk,
-                 int *lstk, int *mstk, int *nstk, int *nrstk, int
-                 *irstk, double *ystk, const double *tol);
-static void f5xact(double *pastp, const double *tol, int *kval, int *key,
-                 int *ldkey, int *ipoin, double *stp, int *ldstp,
-                 int *ifrq, int *npoin, int *nr, int *nl, int
-                 *ifreq, int *itop, int *ipsh);
-static void f6xact(int *nrow, int *irow, int *iflag, int *kyy,
-                  int *key, int *ldkey, int *last, int *ipn);
-static void f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
-                  int *iflag);
-static void f8xact(int *irow, int *is, int *i1, int *izero, int *myNew);
-static double f9xact(int *n, int *mm, int *ir, double *fact);
-static void f10act(int *nrow, int *irow, int *ncol, int *icol,
-                 double *val, int *xmin, double *fact, int *nd,
-                 int *ne, int *m);
-static void f11act(int *irow, int *i1, int *i2, int *myNew);
-static void prterr(int icode, char *mes);
-static int iwork(int iwkmax, int *iwkpt, int number, int itype);
-// void fexact(int *nrow, int *ncol, double *table, int *ldtabl,
-//       double *expect, double *percnt, double *emin, double *prt,
-//       double *pre, /* myNew in C : */ int *workspace);
- static void isort(int *n, int *ix);
- static double gammds(double *y, double *p, int *ifault);
- static double alogam(double *x, int *ifault);
-
-
-/* The only public function : */
-void fexact(int *nrow, int *ncol, double *table, int *ldtabl,
-       double *expect, double *percnt, double *emin, double *prt,
-       double *pre, /* myNew in C : */ int *workspace) {
-
-/*
-  ALGORITHM 643, COLLECTED ALGORITHMS FROM ACM.
-  THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
-  VOL. 19, NO. 4, DECEMBER, 1993, PP. 484-488.
-  -----------------------------------------------------------------------
-  Name:              FEXACT
-  Purpose:    Computes Fisher's exact test probabilities and a hybrid
-             approximation to Fisher exact test probabilities for a
-             contingency table using the network algorithm.
-  Usage:      CALL FEXACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
-                           EMIN, PRT, PRE)
-  Arguments:
-    NROW    - The number of rows in the table.                 (Input)
-    NCOL    - The number of columns in the table.              (Input)
-    TABLE   - NROW by NCOL matrix containing the contingency
-              table.                                           (Input)
-    LDTABL  - Leading dimension of TABLE exactly as specified
-              in the dimension statement in the calling
-             program.                                          (Input)
-    EXPECT  - Expected value used in the hybrid algorithm for
-             deciding when to use asymptotic theory
-             probabilities.                                    (Input)
-             If EXPECT <= 0.0 then asymptotic theory probabilities
-             are not used and Fisher exact test probabilities are
-             computed.  Otherwise, if PERCNT or more of the cells in
-             the remaining table have estimated expected values of
-             EXPECT or more, with no remaining cell having expected
-             value less than EMIN, then asymptotic chi-squared
-             probabilities are used.  See the algorithm section of the
-             manual document for details.
-             Use EXPECT = 5.0 to obtain the 'Cochran' condition.
-    PERCNT  - Percentage of remaining cells that must have
-              estimated expected values greater than EXPECT
-             before asymptotic probabilities can be used.      (Input)
-             See argument EXPECT for details.
-             Use PERCNT = 80.0 to obtain the 'Cochran' condition.
-    EMIN    - Minimum cell estimated expected value allowed for
-             asymptotic chi-squared probabilities to be used.  (Input)
-             See argument EXPECT for details.
-             Use EMIN = 1.0 to obtain the 'Cochran' condition.
-    PRT     - Probability of the observed table for fixed
-              marginal totals.                                 (Output)
-    PRE     - Table p-value.                                   (Output)
-             PRE is the probability of a more extreme table,
-             where `extreme' is in a probabilistic sense.
-             If EXPECT < 0 then the Fisher exact probability
-             is returned.  Otherwise, an approximation to the
-             Fisher exact probability is computed based upon
-             asymptotic chi-squared probabilities for ``large''
-             table expected values.  The user defines ``large''
-             through the arguments EXPECT, PERCNT, and EMIN.
-
-  Remarks:
-  1. For many problems one megabyte or more of workspace can be
-     required. If the environment supports it, the user should begin
-     by increasing the workspace used to 200,000 units.
-  2. In FEXACT, LDSTP = 30*LDKEY.  The proportion of table space used
-     by STP may be changed by changing the line MULT = 30 below to
-     another value.
-  3. FEXACT may be converted to single precision by setting IREAL = 3,
-     and converting all DOUBLE PRECISION specifications (except the
-     specifications for RWRK, IWRK, and DWRK) to REAL. This will
-     require changing the names and specifications of the intrinsic
-     functions ALOG, AMAX1, AMIN1, EXP, and REAL.  In addition, the
-     machine specific constants will need to be changed, and the name
-     DWRK will need to be changed to RWRK in the call to F2XACT.
-  4. Machine specific constants are specified and documented in F2XACT.
-     A missing value code is specified in both FEXACT and F2XACT.
-  5. Although not a restriction, is is not generally practical to call
-     this routine with large tables which are not sparse and in
-     which the 'hybrid' algorithm has little effect.  For example,
-     although it is feasible to compute exact probabilities for the
-     table
-           1 8 5 4 4 2 2
-           5 3 3 4 3 1 0
-          10 1 4 0 0 0 0,
-     computing exact probabilities for a similar table which has been
-     enlarged by the addition of an extra row (or column) may not be
-     feasible.
-  -----------------------------------------------------------------------
-  */
-
-    /* CONSTANT Parameters : */
-
-    /* To increase the length of the table of paste path lengths relative
-       to the length of the hash table, increase MULT.
-    */
-    const int mult = 30;
-    /* AMISS is a missing value indicator which is returned when the
-       probability is not defined.
-    */
-    const double amiss = -12345.;
-    /*
-      Set IREAL = 4 for DOUBLE PRECISION
-      Set IREAL = 3 for SINGLE PRECISION
-    */
-#define i_real 4
-#define i_int  2
-
-    /* System generated locals */
-    int ikh;
-    /* Local variables */
-    int nco, nro, ntot, numb, iiwk, irwk;
-    int i, j, k, kk, ldkey, ldstp, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10;
-    int i3a, i3b, i3c, i9a, iwkmax, iwkpt;
-
-    /* Workspace Allocation (freed at end) */
-    double *equiv;
-    iwkmax = 2 * (int) (*workspace / 2);
-//    equiv = (double *) R_alloc(iwkmax / 2, sizeof(double));
-    equiv = (double *) calloc(iwkmax / 2, sizeof(double));
-
-    /* The check could never happen with Calloc!
-    equiv = Calloc(iwkmax / 2, double);
-    if (!equiv) {
-       prterr(0, "Can not allocate specified workspace");
-    } */
-
-#define dwrk (equiv)
-#define iwrk ((int *)equiv)
-#define rwrk ((float *)equiv)
-
-    /* Parameter adjustments */
-    table -= *ldtabl + 1;
-
-    /* Function Body */
-    iwkpt = 0;
-
-    if (*nrow > *ldtabl)
-       prterr(1, "NROW must be less than or equal to LDTABL.");
-
-    ntot = 0;
-    for (i = 1; i <= *nrow; ++i) {
-       for (j = 1; j <= *ncol; ++j) {
-           if (table[i + j * *ldtabl] < 0.)
-               prterr(2, "All elements of TABLE must be positive.");
-           ntot = (int) (ntot + table[i + j * *ldtabl]);
-       }
-    }
-    if (ntot == 0) {
-       prterr(3, "All elements of TABLE are zero.\n"
-              "PRT and PRE are set to missing values.");
-       *prt = amiss;
-       *pre = amiss;
-       goto L_End;
-    }
-
-    nco = max(*nrow, *ncol);
-    nro = *nrow + *ncol - nco;/* = min(*nrow, *ncol) */
-    k = *nrow + *ncol + 1;
-    kk = k * nco;
-
-    ikh = ntot + 1;
-    i1  = iwork(iwkmax, &iwkpt, ikh, i_real);
-    i2  = iwork(iwkmax, &iwkpt, nco, i_int);
-    i3  = iwork(iwkmax, &iwkpt, nco, i_int);
-    i3a = iwork(iwkmax, &iwkpt, nco, i_int);
-    i3b = iwork(iwkmax, &iwkpt, nro, i_int);
-    i3c = iwork(iwkmax, &iwkpt, nro, i_int);
-    ikh = max(k * 5 + (kk << 1), nco * 7 + 800);
-    iiwk= iwork(iwkmax, &iwkpt, ikh, i_int);
-    ikh = max(nco + 401, k);
-    irwk= iwork(iwkmax, &iwkpt, ikh, i_real);
-
-    /* NOTE:
-       What follows below splits the remaining amount iwkmax - iwkpt of
-       (int) workspace into hash tables as follows.
-           type  size       index
-          INT   2 * ldkey  i4 i5 i11
-          REAL  2 * ldkey  i8 i9 i10
-          REAL  2 * ldstp  i6
-          INT   6 * ldstp  i7
-       Hence, we need ldkey times
-           3 * 2 + 3 * 2 * s + 2 * mult * s + 6 * mult
-       chunks of integer memory, where s = sizeof(REAL) / sizeof(INT).
-       If doubles are used and are twice as long as ints, this gives
-           18 + 10 * mult
-       so that the value of ldkey can be obtained by dividing available
-       (int) workspace by this number.
-
-       In fact, because iwork() can actually s * n + s - 1 int chunks
-       when allocating a REAL, we use ldkey = available / numb - 1.
-
-       FIXME:
-       Can we always assume that sizeof(double) / sizeof(int) is 2?
-       */
-    
-    if (i_real == 4) {         /* Double precision reals */
-       numb = 18 + 10 * mult;
-    } else {                   /* Single precision reals */
-       numb = (mult << 3) + 12;
-    }
-    ldkey = (iwkmax - iwkpt) / numb - 1;
-    ldstp = mult * ldkey;
-    ikh = ldkey << 1;  i4  = iwork(iwkmax, &iwkpt, ikh, i_int);
-    ikh = ldkey << 1;  i5  = iwork(iwkmax, &iwkpt, ikh, i_int);
-    ikh = ldstp << 1;  i6  = iwork(iwkmax, &iwkpt, ikh, i_real);
-    ikh = ldstp * 6;   i7  = iwork(iwkmax, &iwkpt, ikh, i_int);
-    ikh = ldkey << 1;  i8  = iwork(iwkmax, &iwkpt, ikh, i_real);
-    ikh = ldkey << 1;  i9  = iwork(iwkmax, &iwkpt, ikh, i_real);
-    ikh = ldkey << 1;  i9a = iwork(iwkmax, &iwkpt, ikh, i_real);
-    ikh = ldkey << 1;  i10 = iwork(iwkmax, &iwkpt, ikh, i_int);
-
-    /* To convert to double precision, change RWRK to DWRK in the next CALL.
-     */
-    f2xact(nrow,
-          ncol,
-          &table[*ldtabl + 1],
-          ldtabl,
-          expect,
-          percnt,
-          emin,
-          prt,
-          pre,
-          dwrk + i1,
-          iwrk + i2,
-          iwrk + i3,
-          iwrk + i3a,
-          iwrk + i3b,
-          iwrk + i3c,
-          iwrk + i4,
-          &ldkey,
-          iwrk + i5,
-          dwrk + i6,
-          &ldstp,
-          iwrk + i7,
-          dwrk + i8,
-          dwrk + i9,
-          dwrk + i9a,
-          iwrk + i10,
-          iwrk + iiwk,
-          dwrk + irwk);
-
-L_End:
-    /* Free(equiv); */
-    free(equiv);
-    return;
-}
-
-#undef rwrk
-#undef iwrk
-#undef dwrk
-
-
-/*
-  -----------------------------------------------------------------------
-  Name:                F2XACT
-  Purpose:     Computes Fisher's exact test for a contingency table,
-               routine with workspace variables specified.
-  Usage:       F2XACT (NROW, NCOL, TABLE, LDTABL, EXPECT, PERCNT,
-                       EMIN, PRT, PRE, FACT, ICO, IRO, KYY, IDIF,
-                       IRN, KEY, LDKEY, IPOIN, STP, LDSTP, IFRQ,
-                       DLP, DSP, TM, KEY2, IWK, RWK)
-  -----------------------------------------------------------------------
-  */
-void
-f2xact(int *nrow, int *ncol, double *table, int *ldtabl,
-       double *expect, double *percnt, double *emin, double *prt,
-       double *pre, double *fact, int *ico, int *iro, int *kyy,
-       int *idif, int *irn, int *key, int *ldkey, int *ipoin,
-       double *stp, int *ldstp, int *ifrq, double *dlp, double *dsp,
-       double *tm, int *key2, int *iwk, double *rwk)
-{
-    /* IMAX is the largest representable int on the machine. */
-    const int imax = SINT_MAX;
-//     const int imax = 2147483647; //xx: I DONÂ¥T like this, and
-// thanks to the hint from Jason Turner I don't do it anymore. (R.D-U).
-
-    /* AMISS is a missing value indicator which is returned when the
-       probability is not defined. */
-    const double amiss = -12345.;
-       
-    /* TOL is chosen as the square root of the smallest relative spacing. */
-#ifndef Macintosh
-    const  static double tol = 3.45254e-7;
-#else
-    static double tol = 3.45254e-7;
-#endif    
-    /* EMX is a large positive value used in comparing expected values. */
-    const static double emx = 1e30;
-
-    /* Local variables {{any really need to be static ???}} */
-    static int kval, kmax, jkey, last, ipsh, itmp, itop, jstp, ntot,
-       jstp2, jstp3, jstp4, i, ii, j, k, n, iflag, ncell, ifreq, chisq,
-       ikkey, ikstp, ikstp2, k1, kb, kd, ks,
-       i31, i32, i33, i34, i35, i36, i37, i38, i39,
-       i41, i42, i43, i44, i45, i46, i47, i48, i310, i311,
-       nco, nrb, ipn, ipo, itp, nro, nro2;
-    static double dspt, dd, df,ddf, drn,dro, emn, obs, obs2, obs3,
-       pastp, pv, tmp;
-    double d1;
-#ifndef USING_R
-    double d2;
-    static int ifault;
-#endif
-    int nr_gt_nc=0;
-
-    /* Parameter adjustments */
-    table -= *ldtabl + 1;
-    --ico;
-    --iro;
-    --kyy;
-    --idif;
-    --irn;
-    --key;
-    --ipoin;
-    --stp;
-    --ifrq;
-    --dlp;
-    --dsp;
-    --tm;
-    --key2;
-    --iwk;
-    --rwk;
-
-
-    /* Check table dimensions */
-    if (*nrow > *ldtabl)
-       prterr(1, "NROW must be less than or equal to LDTABL.");
-    if (*ncol <= 1)
-       prterr(4, "NCOL must be at least 2");
-
-    /* Initialize KEY array */
-    for (i = 1; i <= *ldkey << 1; ++i) {
-       key[i] = -9999;
-       key2[i] = -9999;
-    }
-    /* Initialize parameters */
-    *pre = 0.;
-    itop = 0;
-    if (*expect > 0.)
-       emn = *emin;
-    else
-       emn = emx;
- if (*nrow > *ncol){
-    nr_gt_nc =  1;
-}
-else{
-        nr_gt_nc =  0;
-}
-    /* nco := max(nrow, ncol) : */
-    if(nr_gt_nc)
-       nco = *nrow;
-    else
-       nco = *ncol;
-    /* Initialize pointers for workspace */
-    /* f3xact */
-    i31 = 1;
-    i32 = i31 + nco;
-    i33 = i32 + nco;
-    i34 = i33 + nco;
-    i35 = i34 + nco;
-    i36 = i35 + nco;
-    i37 = i36 + nco;
-    i38 = i37 + nco;
-    i39 = i38 + 400;
-    i310 = 1;
-    i311 = 401;
-    /* f4xact */
-    k = *nrow + *ncol + 1;
-    i41 = 1;
-    i42 = i41 + k;
-    i43 = i42 + k;
-    i44 = i43 + k;
-    i45 = i44 + k;
-    i46 = i45 + k;
-    i47 = i46 + k * nco;
-    i48 = 1;
-
-    /* Compute row marginals and total */
-    ntot = 0;
-    for (i = 1; i <= *nrow; ++i) {
-       iro[i] = 0;
-       for (j = 1; j <= *ncol; ++j) {
-           if (table[i + j * *ldtabl] < -1e-4)
-               prterr(2, "All elements of TABLE must be positive.");
-           iro[i] += (int) table[i + j * *ldtabl];
-       }
-       ntot += iro[i];
-    }
-
-    if (ntot == 0) {
-       prterr(3, "All elements of TABLE are zero.\n"
-              "PRT and PRE are set to missing values.");
-       *pre = *prt = amiss;
-       return;
-    }
-
-    /* Column marginals */
-    for (i = 1; i <= *ncol; ++i) {
-       ico[i] = 0;
-       for (j = 1; j <= *nrow; ++j)
-           ico[i] += (int) table[j + i * *ldtabl];
-    }
-
-    /* sort marginals */
-    isort(nrow, &iro[1]);
-    isort(ncol, &ico[1]);
-
-    /* Determine row and column marginals.
-       Define max(nrow,ncol) =: nco >= nro := min(nrow,ncol)
-       nco is defined above
-
-       Swap marginals if necessary to  ico[1:nco] & iro[1:nro]
-     */
-    if (nr_gt_nc) {
-       nro = *ncol;
-       /* Swap marginals */
-       for (i = 1; i <= nco; ++i) {
-           itmp = iro[i];
-           if (i <= nro)
-               iro[i] = ico[i];
-           ico[i] = itmp;
-       }
-    } else
-       nro = *nrow;
-
-
-    /* Get multiplers for stack */
-    kyy[1] = 1;
-    for (i = 2; i <= nro; ++i) {
-       /* Hash table multipliers */
-       if (iro[i - 1] + 1 <= imax / kyy[i - 1]) {
-           kyy[i] = kyy[i - 1] * (iro[i - 1] + 1);
-           j /= kyy[i - 1];
-       } else
-           goto L_ERR_5;
-    }
-    /* Maximum product */
-    if (iro[nro - 1] + 1 <= imax / kyy[nro - 1]) {
-       kmax = (iro[nro] + 1) * kyy[nro - 1];
-    } else {
-    L_ERR_5:
-       prterr(5, "The hash table key cannot be computed because "
-              "the largest key\n"
-              "is larger than the largest representable int.\n"
-              "The algorithm cannot proceed.\n"
-              "Reduce the workspace size, or use `exact = FALSE'.");
-       return;
-    }
-
-    /* Compute log factorials */
-    fact[0] = 0.;
-    fact[1] = 0.;
-    if(ntot >= 2) fact[2] = log(2.); 
-/* MM: old code assuming log() to be SLOW */
-    for (i = 3; i <= ntot; i += 2) {
-       fact[i] = fact[i - 1] + log((double) i);
-       j = i + 1;
-       if (j <= ntot)
-           fact[j] = fact[i] + fact[2] + fact[j / 2] - fact[j / 2 - 1];
-    }
-    /* Compute obs := observed path length */
-    obs = tol;
-    ntot = 0;
-    for (j = 1; j <= nco; ++j) {
-       dd = 0.;
-       for (i = 1; i <= nro; ++i) {
-           if (nr_gt_nc) {
-               dd += fact[(int) table[j + i * *ldtabl]];
-               ntot +=    (int) table[j + i * *ldtabl];
-           } else {
-               dd += fact[(int) table[i + j * *ldtabl]];
-               ntot +=    (int) table[i + j * *ldtabl];
-           }
-       }
-       obs += fact[ico[j]] - dd;
-    }
-    /* Denominator of observed table: DRO */
-    dro = f9xact(&nro, &ntot, &iro[1], fact);
-    *prt = exp(obs - dro);
-    /* Initialize pointers */
-    k = nco;
-    last = *ldkey + 1;
-    jkey = *ldkey + 1;
-    jstp = *ldstp + 1;
-    jstp2 = *ldstp * 3 + 1;
-    jstp3 = (*ldstp << 2) + 1;
-    jstp4 = *ldstp * 5 + 1;
-    ikkey = 0;
-    ikstp = 0;
-    ikstp2 = *ldstp << 1;
-    ipo = 1;
-    ipoin[1] = 1;
-    stp[1] = 0.;
-    ifrq[1] = 1;
-    ifrq[ikstp2 + 1] = -1;
-
-Outer_Loop:
-    kb = nco - k + 1;
-    ks = 0;
-    n = ico[kb];
-    kd = nro + 1;
-    kmax = nro;
-    /* IDIF is the difference in going to the daughter */
-    for (i = 1; i <= nro; ++i)
-       idif[i] = 0;
-
-    /* Generate the first daughter */
-    do {
-       --kd;
-       ntot = min(n, iro[kd]);
-       idif[kd] = ntot;
-       if (idif[kmax] == 0)
-           --kmax;
-       n -= ntot;
-    }
-    while (n > 0 && kd != 1);
-
-    if (n != 0) {
-       goto L310;
-    }
-
-    k1 = k - 1;
-    n = ico[kb];
-    ntot = 0;
-    for (i = kb + 1; i <= nco; ++i)
-       ntot += ico[i];
-
-
-L150:
-    /* Arc to daughter length=ICO(KB) */
-    for (i = 1; i <= nro; ++i)
-       irn[i] = iro[i] - idif[i];
-
-    /* Sort irn */
-    if (k1 > 1) {
-       if (nro == 2) {
-           if (irn[1] > irn[2]) {
-               ii = irn[1];
-               irn[1] = irn[2];
-               irn[2] = ii;
-           }
-       } else if (nro == 3) {
-           ii = irn[1];
-           if (ii > irn[3]) {
-               if (ii > irn[2]) {
-                   if (irn[2] > irn[3]) {
-                       irn[1] = irn[3];
-                       irn[3] = ii;
-                   } else {
-                       irn[1] = irn[2];
-                       irn[2] = irn[3];
-                       irn[3] = ii;
-                   }
-               } else {
-                   irn[1] = irn[3];
-                   irn[3] = irn[2];
-                   irn[2] = ii;
-               }
-           } else if (ii > irn[2]) {
-               irn[1] = irn[2];
-               irn[2] = ii;
-           } else if (irn[2] > irn[3]) {
-               ii = irn[2];
-               irn[2] = irn[3];
-               irn[3] = ii;
-           }
-       } else {
-           for (j = 2; j <= nro; ++j) {
-               i = j - 1;
-               ii = irn[j];
-
-               while (ii < irn[i]) {
-                   irn[i + 1] = irn[i];
-                   --i;
-                   if (i == 0)
-                       break;
-               }
-               irn[i + 1] = ii;
-           }
-       }
-       /* Adjust start for zero */
-       for (i = 1; i <= nro; ++i) {
-           if (irn[i] != 0)
-               break;
-       }
-
-       nrb = i;
-       nro2 = nro - i + 1;
-    } else {
-       nrb = 1;
-       nro2 = nro;
-    }
-    /* Some table values */
-    ddf = f9xact(&nro, &n, &idif[1], fact);
-    drn = f9xact(&nro2, &ntot, &irn[nrb], fact) - dro + ddf;
-    /* Get hash value */
-    if (k1 > 1) {
-       kval = irn[1] + irn[2] * kyy[2];
-       for (i = 3; i <= nro; ++i) {
-           kval += irn[i] * kyy[i];
-       }
-       /* Get hash table entry */
-       i = kval % (*ldkey << 1) + 1;
-       /* Search for unused location */
-       for (itp = i; itp <= *ldkey << 1; ++itp) {
-           ii = key2[itp];
-           if (ii == kval) {
-               goto L240;
-           } else if (ii < 0) {
-               key2[itp] = kval;
-               dlp[itp] = 1.;
-               dsp[itp] = 1.;
-               goto L240;
-           }
-       }
-
-       for (itp = 1; itp <= i - 1; ++itp) {
-           ii = key2[itp];
-           if (ii == kval) {
-               goto L240;
-           } else if (ii < 0) {
-               key2[itp] = kval;
-               dlp[itp] = 1.;
-               goto L240;
-           }
-       }
-
-       /* KH
-          prterr(6, "LDKEY is too small.\n"
-          "It is not possible to give the value of LDKEY required,\n"
-          "but you could try doubling LDKEY (and possibly LDSTP).");
-          */
-       prterr(6, "LDKEY is too small for this problem.\n"
-              "Try increasing the size of the workspace.");
-    }
-
-L240:
-    ipsh = (1);
-    /* Recover pastp */
-    ipn = ipoin[ipo + ikkey];
-    pastp = stp[ipn + ikstp];
-    ifreq = ifrq[ipn + ikstp];
-    /* Compute shortest and longest path */
-    if (k1 > 1) {
-       obs2 = obs - fact[ico[kb + 1]] - fact[ico[kb + 2]] - ddf;
-       for (i = 3; i <= k1; ++i) {
-           obs2 -= fact[ico[kb + i]];
-       }
-       if (dlp[itp] > 0.) {
-           dspt = obs - obs2 - ddf;
-           /* Compute longest path */
-           dlp[itp] = 0.;
-           f3xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dlp[itp],
-                  &ntot, fact, &iwk[i31], &iwk[i32], &iwk[i33],
-                  &iwk[i34], &iwk[i35], &iwk[i36], &iwk[i37],
-                  &iwk[i38], &iwk[i39], &rwk[i310], &rwk[i311], &tol);
-           dlp[itp] = min(0., dlp[itp]);
-           /* Compute shortest path */
-           dsp[itp] = dspt;
-           f4xact(&nro2, &irn[nrb], &k1, &ico[kb + 1], &dsp[itp], fact,
-                  &iwk[i47], &iwk[i41], &iwk[i42], &iwk[i43],
-                  &iwk[i44], &iwk[i45], &iwk[i46], &rwk[i48], &tol);
-           dsp[itp] = min(0., dsp[itp] - dspt);
-           /* Use chi-squared approximation? */
-           if ((irn[nrb] * ico[kb + 1]) > ntot * emn) {
-               ncell = 0.;
-               for (i = 0; i < nro2; ++i)
-                   for (j = 1; j <= k1; ++j)
-                       if (irn[nrb + i] * ico[kb + j] >= ntot * *expect)
-                           ncell++;
-
-               if (ncell * 100 >= k1 * nro2 * *percnt) {
-                   tmp = 0.;
-                   for (i = 0; i < nro2; ++i)
-                       tmp += (fact[irn[nrb + i]] -
-                               fact[irn[nrb + i] - 1]);
-                   tmp *= k1 - 1;
-                   for (j = 1; j <= k1; ++j)
-                       tmp += (nro2 - 1) * (fact[ico[kb + j]] -
-                                            fact[ico[kb + j] - 1]);
-                   df = (double) ((nro2 - 1) * (k1 - 1));
-                   tmp += df * 1.83787706640934548356065947281;
-                   tmp -= (nro2 * k1 - 1) * (fact[ntot] - fact[ntot - 1]);
-                   tm[itp] = (obs - dro) * -2. - tmp;
-               } else {
-                   /* tm(itp) set to a flag value */
-                   tm[itp] = -9876.;
-               }
-           } else {
-               tm[itp] = -9876.;
-           }
-       }
-       obs3 = obs2 - dlp[itp];
-       obs2 -= dsp[itp];
-       if (tm[itp] == -9876.) {
-           chisq = (0);
-       } else {
-           chisq = (1);
-           tmp = tm[itp];
-       }
-    } else {
-       obs2 = obs - drn - dro;
-       obs3 = obs2;
-    }
-
-L300:
-    /* Process node with new PASTP */
-    if (pastp <= obs3) {
-       /* Update pre */
-       *pre += (double) ifreq * exp(pastp + drn);
-    } else if (pastp < obs2) {
-       if (chisq) {
-           df = (double) ((nro2 - 1) * (k1 - 1));
-#ifdef USING_R
-           pv = pgamma(fmax2(0., tmp + (pastp + drn) * 2.) / 2.,
-                       df / 2., /*scale = */ 1.,
-                       /*lower_tail = */FALSE, /*log_p = */ FALSE);
-#else
-           d1 = max(0., tmp + (pastp + drn) * 2.) / 2.;
-           d2 = df / 2.;
-           pv = 1. - gammds(&d1, &d2, &ifault);
-#endif
-           *pre += (double) ifreq * exp(pastp + drn) * pv;
-       } else {
-           /* Put daughter on queue */
-           d1 = pastp + ddf;
-           f5xact(&d1, &tol, &kval, &key[jkey], ldkey, &ipoin[jkey],
-                  &stp[jstp], ldstp, &ifrq[jstp], &ifrq[jstp2],
-                  &ifrq[jstp3], &ifrq[jstp4], &ifreq, &itop, &ipsh);
-           ipsh = (0);
-       }
-    }
-    /* Get next PASTP on chain */
-    ipn = ifrq[ipn + ikstp2];
-    if (ipn > 0) {
-       pastp = stp[ipn + ikstp];
-       ifreq = ifrq[ipn + ikstp];
-       goto L300;
-    }
-    /* Generate a new daughter node */
-    f7xact(&kmax, &iro[1], &idif[1], &kd, &ks, &iflag);
-    if (iflag != 1) {
-       goto L150;
-    }
-
-L310:
-    /* Go get a new mother from stage K */
-    do {
-       iflag = 1;
-       f6xact(&nro, &iro[1], &iflag, &kyy[1], &key[ikkey + 1], ldkey,
-              &last, &ipo);
-       /* Update pointers */
-       if (iflag != 3)
-           goto Outer_Loop;
-       /* else  iflag == 3 : no additional nodes to process */
-       --k;
-       itop = 0;
-       ikkey = jkey - 1;
-       ikstp = jstp - 1;
-       ikstp2 = jstp2 - 1;
-       jkey = *ldkey - jkey + 2;
-       jstp = *ldstp - jstp + 2;
-       jstp2 = (*ldstp << 1) + jstp;
-       for (i = 1; i <= *ldkey << 1; ++i)
-           key2[i] = -9999;
-
-    } while (k >= 2);
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F3XACT
-  Purpose:    Computes the shortest path length for a given table.
-  Usage:      F3XACT (NROW, IROW, NCOL, ICOL, DLP, MM, FACT, ICO, IRO,
-                     IT, LB, NR, NT, NU, ITC, IST, STV, ALEN, TOL)
-  Arguments:
-    NROW    - The number of rows in the table.                 (Input)
-    IROW    - Vector of length NROW containing the row sums
-              for the table.                                   (Input)
-    NCOL    - The number of columns in the table.              (Input)
-    ICOL    - Vector of length K containing the column sums
-              for the table.                                   (Input)
-    DLP     - The longest path for the table.                  (Output)
-    MM     - The total count in the table.                     (Output)
-    FACT    - Vector containing the logarithms of factorials.  (Input)
-    ICO     - Work vector of length MAX(NROW,NCOL).
-    IRO     - Work vector of length MAX(NROW,NCOL).
-    IT     - Work vector of length MAX(NROW,NCOL).
-    LB     - Work vector of length MAX(NROW,NCOL).
-    NR     - Work vector of length MAX(NROW,NCOL).
-    NT     - Work vector of length MAX(NROW,NCOL).
-    NU     - Work vector of length MAX(NROW,NCOL).
-    ITC     - Work vector of length 400.
-    IST     - Work vector of length 400.
-    STV     - Work vector of length 400.
-    ALEN    - Work vector of length MAX(NROW,NCOL).
-    TOL     - Tolerance.                                       (Input)
-  -----------------------------------------------------------------------
-  */
-
-void
-f3xact(int *nrow, int *irow, int *ncol, int *icol, double *dlp,
-       int *mm, double *fact, int *ico, int *iro, int *it,
-       int *lb, int *nr, int *nt, int *nu, int *itc, int *ist,
-       double *stv, double *alen, const double *tol)
-{
-    /* Initialized data */
-    static int ldst = 200;
-    static int nst = 0;
-    static int nitc = 0;
-
-    /* Local variables */
-    static int xmin;
-    static int i, k;
-    static double v;
-    static int n11, n12, ii, nn, ks, ic1, ic2, nc1, nn1;
-    static int nr1, nco;
-    static double val;
-    static int nct, ipn, irl, key, lev, itp, nro;
-    static double vmn;
-    static int nrt, kyy, nc1s;
-
-    /* Parameter adjustments */
-    --stv;
-    --ist;
-    --itc;
-    --nu;
-    --nt;
-    --nr;
-    --lb;
-    --it;
-    --iro;
-    --ico;
-    --icol;
-    --irow;
-
-    /* Function Body */
-    for (i = 0; i <= *ncol; ++i) {
-       alen[i] = 0.;
-    }
-    for (i = 1; i <= 400; ++i) {
-       ist[i] = -1;
-    }
-    /* nrow is 1 */
-    if (*nrow <= 1) {
-       if (*nrow > 0) {
-           *dlp -= fact[icol[1]];
-           for (i = 2; i <= *ncol; ++i) {
-               *dlp -= fact[icol[i]];
-           }
-       }
-       return;
-    }
-    /* ncol is 1 */
-    if (*ncol <= 1) {
-       if (*ncol > 0) {
-           *dlp = *dlp - fact[irow[1]] - fact[irow[2]];
-           for (i = 3; i <= *nrow; ++i) {
-               *dlp -= fact[irow[i]];
-           }
-       }
-       return;
-    }
-    /* 2 by 2 table */
-    if (*nrow * *ncol == 4) {
-       n11 = (irow[1] + 1) * (icol[1] + 1) / (*mm + 2);
-       n12 = irow[1] - n11;
-       *dlp = *dlp - fact[n11] - fact[n12] - fact[icol[1] - n11]
-           - fact[icol[2] - n12];
-       return;
-    }
-    /* Test for optimal table */
-    val = 0.;
-    xmin = (0);
-    if (irow[*nrow] <= irow[1] + *ncol) {
-       f10act(nrow, &irow[1], ncol, &icol[1], &val, &xmin, fact,
-              &lb[1], &nu[1], &nr[1]);
-    }
-    if (! xmin) {
-       if (icol[*ncol] <= icol[1] + *nrow) {
-           f10act(ncol, &icol[1], nrow, &irow[1], &val, &xmin, fact,
-                  &lb[1], &nu[1], &nr[1]);
-       }
-    }
-
-    if (xmin) {
-       *dlp -= val;
-       return;
-    }
-    /* Setup for dynamic programming */
-    nn = *mm;
-    /* Minimize ncol */
-    if (*nrow >= *ncol) {
-       nro = *nrow;
-       nco = *ncol;
-       for (i = 1; i <= *nrow; ++i) {
-           iro[i] = irow[i];
-       }
-       ico[1] = icol[1];
-       nt[1] = nn - ico[1];
-       for (i = 2; i <= *ncol; ++i) {
-           ico[i] = icol[i];
-           nt[i] = nt[i - 1] - ico[i];
-       }
-    } else {
-       nro = *ncol;
-       nco = *nrow;
-       ico[1] = irow[1];
-       nt[1] = nn - ico[1];
-       for (i = 2; i <= *nrow; ++i) {
-           ico[i] = irow[i];
-           nt[i] = nt[i - 1] - ico[i];
-       }
-       for (i = 1; i <= *ncol; ++i)
-           iro[i] = icol[i];
-    }
-    /* Initialize pointers */
-    vmn = 1e10;
-    nc1s = nco - 1;
-    irl = 1;
-    ks = 0;
-    k = ldst;
-    kyy = ico[nco] + 1;
-
-LnewNode: /* Setup to generate new node */
-
-    lev = 1;
-    nr1 = nro - 1;
-    nrt = iro[irl];
-    nct = ico[1];
-    lb[1] = (int) ((double) ((nrt + 1) * (nct + 1)) /
-                   (double) (nn + nr1 * nc1s + 1) - *tol) - 1;
-    nu[1] = (int) ((double) ((nrt + nc1s) * (nct + nr1)) /
-                   (double) (nn + nr1 + nc1s)) - lb[1] + 1;
-    nr[1] = nrt - lb[1];
-
-LoopNode: /* Generate a node */
-    --nu[lev];
-    if (nu[lev] == 0) {
-       if (lev == 1)
-           goto L200;
-
-       --lev;
-       goto LoopNode;
-    }
-    ++lb[lev];
-    --nr[lev];
-L120:
-    alen[lev] = alen[lev - 1] + fact[lb[lev]];
-    if (lev < nc1s) {
-       nn1 = nt[lev];
-       nrt = nr[lev];
-       ++lev;
-       nc1 = nco - lev;
-       nct = ico[lev];
-       lb[lev] = (int) ((double) ((nrt + 1) * (nct + 1)) /
-                         (double) (nn1 + nr1 * nc1 + 1) - *tol);
-       nu[lev] = (int) ((double) ((nrt + nc1) * (nct + nr1)) /
-                         (double) (nn1 + nr1 + nc1) - lb[lev] + 1);
-       nr[lev] = nrt - lb[lev];
-       goto L120;
-    }
-    alen[nco] = alen[lev] + fact[nr[lev]];
-    lb[nco] = nr[lev];
-
-    v = val + alen[nco];
-    if (nro == 2) {
-       /* Only 1 row left */
-       v = v + fact[ico[1] - lb[1]] + fact[ico[2] - lb[2]];
-       for (i = 3; i <= nco; ++i) {
-           v += fact[ico[i] - lb[i]];
-       }
-       if (v < vmn) {
-           vmn = v;
-       }
-    } else if (nro == 3 && nco == 2) {
-       /* 3 rows and 2 columns */
-       nn1 = nn - iro[irl] + 2;
-       ic1 = ico[1] - lb[1];
-       ic2 = ico[2] - lb[2];
-       n11 = (iro[irl + 1] + 1) * (ic1 + 1) / nn1;
-       n12 = iro[irl + 1] - n11;
-       v = v + fact[n11] + fact[n12] + fact[ic1 - n11]
-           + fact[ic2 - n12];
-       if (v < vmn) {
-           vmn = v;
-       }
-    } else {
-       /* Column marginals are new node */
-       for (i = 1; i <= nco; ++i) {
-           it[i] = ico[i] - lb[i];
-       }
-       /* Sort column marginals */
-       if (nco == 2) {
-           if (it[1] > it[2]) {
-               ii = it[1];
-               it[1] = it[2];
-               it[2] = ii;
-           }
-       } else if (nco == 3) {
-           ii = it[1];
-           if (ii > it[3]) {
-               if (ii > it[2]) {
-                   if (it[2] > it[3]) {
-                       it[1] = it[3];
-                       it[3] = ii;
-                   } else {
-                       it[1] = it[2];
-                       it[2] = it[3];
-                       it[3] = ii;
-                   }
-               } else {
-                   it[1] = it[3];
-                   it[3] = it[2];
-                   it[2] = ii;
-               }
-           } else if (ii > it[2]) {
-               it[1] = it[2];
-               it[2] = ii;
-           } else if (it[2] > it[3]) {
-               ii = it[2];
-               it[2] = it[3];
-               it[3] = ii;
-           }
-       } else {
-           isort(&nco, &it[1]);
-       }
-       /* Compute hash value */
-       key = it[1] * kyy + it[2];
-       for (i = 3; i <= nco; ++i) {
-           key = it[i] + key * kyy;
-       }
-       if(key < 0)
-               //PROBLEM "Bug in FEXACT: gave negative key" RECOVER(NULL_ENTRY);
-               printf("Bug in FEXACT: gave negative key \n"); //xx:another one of my ugly kluges (R.D-U)
-
-       /* Table index */
-       ipn = key % ldst + 1;
-
-       /* Find empty position */
-       for (itp = ipn, ii = ks + ipn; itp <= ldst; ++itp, ++ii) {
-           if (ist[ii] < 0) {
-               goto L180;
-           } else if (ist[ii] == key) {
-               goto L190;
-           }
-       }
-
-       for (itp = 1, ii = ks + 1; itp <= ipn - 1; ++itp, ++ii) {
-           if (ist[ii] < 0) {
-               goto L180;
-           } else if (ist[ii] == key) {
-               goto L190;
-           }
-       }
-
-       prterr(30, "Stack length exceeded in f3xact.\n"
-              "This problem should not occur.");
-
-L180: /* Push onto stack */
-       ist[ii] = key;
-       stv[ii] = v;
-       ++nst;
-       ii = nst + ks;
-       itc[ii] = itp;
-       goto LoopNode;
-
-L190: /* Marginals already on stack */
-       stv[ii] = min(v, stv[ii]);
-    }
-    goto LoopNode;
-
-
-L200: /* Pop item from stack */
-    if (nitc > 0) {
-       /* Stack index */
-       itp = itc[nitc + k] + k;
-       --nitc;
-       val = stv[itp];
-       key = ist[itp];
-       ist[itp] = -1;
-       /* Compute marginals */
-       for (i = nco; i >= 2; --i) {
-           ico[i] = key % kyy;
-           key /= kyy;
-       }
-       ico[1] = key;
-       /* Set up nt array */
-       nt[1] = nn - ico[1];
-       for (i = 2; i <= nco; ++i)
-           nt[i] = nt[i - 1] - ico[i];
-
-       /* Test for optimality (L90) */
-       xmin = (0);
-       if (iro[nro] <= iro[irl] + nco) {
-           f10act(&nro, &iro[irl], &nco, &ico[1], &val, &xmin, fact,
-                  &lb[1], &nu[1], &nr[1]);
-       }
-       if (!xmin && ico[nco] <= ico[1] + nro)
-           f10act(&nco, &ico[1], &nro, &iro[irl], &val, &xmin, fact,
-                  &lb[1], &nu[1], &nr[1]);
-       if (xmin) {
-           if (vmn > val)
-               vmn = val;
-           goto L200;
-       }
-       else goto LnewNode;
-
-    } else if (nro > 2 && nst > 0) {
-       /* Go to next level */
-       nitc = nst;
-       nst = 0;
-       k = ks;
-       ks = ldst - ks;
-       nn -= iro[irl];
-       ++irl;
-       --nro;
-       goto L200;
-    }
-
-    *dlp -= vmn;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F4XACT
-  Purpose:    Computes the longest path length for a given table.
-  Usage:      CALL F4XACT (NROW, IROW, NCOL, ICOL, DSP, FACT, ICSTK,
-                         NCSTK, LSTK, MSTK, NSTK, NRSTK, IRSTK, YSTK,
-                         TOL)
-  Arguments:
-     NROW   - The number of rows in the table. (Input)
-     IROW   - Vector of length NROW containing the row sums for the
-             table.  (Input)
-     NCOL   - The number of columns in the table.  (Input)
-     ICOL   - Vector of length K containing the column sums for the
-             table.  (Input)
-     DSP    - The shortest path for the table. (Output)
-     FACT   - Vector containing the logarithms of factorials.  (Input)
-     ICSTK  - NCOL by NROW+NCOL+1 work array.
-     NCSTK  - Work vector of length NROW+NCOL+1.
-     LSTK   - Work vector of length NROW+NCOL+1.
-     MSTK   - Work vector of length NROW+NCOL+1.
-     NSTK   - Work vector of length NROW+NCOL+1.
-     NRSTK  - Work vector of length NROW+NCOL+1.
-     IRSTK  - NROW by MAX(NROW,NCOL) work array.
-     YSTK   - Work vector of length NROW+NCOL+1.
-     TOL    - Tolerance.  (Input)
-  -----------------------------------------------------------------------
-  */
-
-void
-f4xact(int *nrow, int *irow, int *ncol, int *icol, double *dsp,
-       double *fact, int *icstk, int *ncstk, int *lstk, int *mstk,
-       int *nstk, int *nrstk, int *irstk, double *ystk, const double *tol)
-{
-    /* System generated locals */
-    int ikh;
-
-    /* Local variables */
-    int i, j, k, l, m, n, mn, ic1, ir1, ict, irt, istk, nco, nro;
-    double y, amx;
-
-    /* Parameter adjustments */
-    irstk -= *nrow + 1;
-    --irow;
-    icstk -= *ncol + 1;
-    --icol;
-    --ncstk;
-    --lstk;
-    --mstk;
-    --nstk;
-    --nrstk;
-    --ystk;
-
-    /* Function Body */
-    /* Take care of the easy cases first */
-    if (*nrow == 1) {
-       for (i = 1; i <= *ncol; ++i) {
-           *dsp -= fact[icol[i]];
-       }
-       return;
-    }
-    if (*ncol == 1) {
-       for (i = 1; i <= *nrow; ++i) {
-           *dsp -= fact[irow[i]];
-       }
-       return;
-    }
-    if (*nrow * *ncol == 4) {
-       if (irow[2] <= icol[2]) {
-           *dsp = *dsp - fact[irow[2]] - fact[icol[1]]
-               - fact[icol[2] - irow[2]];
-       } else {
-           *dsp = *dsp - fact[icol[2]] - fact[irow[1]]
-               - fact[irow[2] - icol[2]];
-       }
-       return;
-    }
-    /* initialization before loop */
-    for (i = 1; i <= *nrow; ++i) {
-       irstk[i + *nrow] = irow[*nrow - i + 1];
-    }
-    for (j = 1; j <= *ncol; ++j) {
-       icstk[j + *ncol] = icol[*ncol - j + 1];
-    }
-
-    nro = *nrow;
-    nco = *ncol;
-    nrstk[1] = nro;
-    ncstk[1] = nco;
-    ystk[1] = 0.;
-    y = 0.;
-    istk = 1;
-    l = 1;
-    amx = 0.;
-
-    /* First LOOP */
-    do {
-       ir1 = irstk[istk * *nrow + 1];
-       ic1 = icstk[istk * *ncol + 1];
-       if (ir1 > ic1) {
-           if (nro >= nco) {
-               m = nco - 1;    n = 2;
-           } else {
-               m = nro;        n = 1;
-           }
-       } else if (ir1 < ic1) {
-           if (nro <= nco) {
-               m = nro - 1;    n = 1;
-           } else {
-               m = nco;        n = 2;
-           }
-       } else {
-           if (nro <= nco) {
-               m = nro - 1;    n = 1;
-           } else {
-               m = nco - 1;    n = 2;
-           }
-       }
-
-    L60:
-       if (n == 1) {
-           i = l; j = 1;
-       } else {
-           i = 1; j = l;
-       }
-
-       irt = irstk[i + istk * *nrow];
-       ict = icstk[j + istk * *ncol];
-       mn = irt;
-       if (mn > ict) {
-           mn = ict;
-       }
-       y += fact[mn];
-       if (irt == ict) {
-           --nro;
-           --nco;
-           f11act(&irstk[istk * *nrow + 1], &i, &nro,
-                  &irstk[(istk + 1) * *nrow + 1]);
-           f11act(&icstk[istk * *ncol + 1], &j, &nco,
-                  &icstk[(istk + 1) * *ncol + 1]);
-       } else if (irt > ict) {
-           --nco;
-           f11act(&icstk[istk * *ncol + 1], &j, &nco,
-                  &icstk[(istk + 1) * *ncol + 1]);
-           ikh = irt - ict;
-           f8xact(&irstk[istk * *nrow + 1], &ikh, &i,
-                  &nro, &irstk[(istk + 1) * *nrow + 1]);
-       } else {
-           --nro;
-           f11act(&irstk[istk * *nrow + 1], &i, &nro,
-                  &irstk[(istk + 1) * *nrow + 1]);
-           ikh = ict - irt;
-           f8xact(&icstk[istk * *ncol + 1], &ikh, &j,
-                  &nco, &icstk[(istk + 1) * *ncol + 1]);
-       }
-
-       if (nro == 1) {
-           for (k = 1; k <= nco; ++k) {
-               y += fact[icstk[k + (istk + 1) * *ncol]];
-           }
-           break;
-       }
-       if (nco == 1) {
-           for (k = 1; k <= nro; ++k) {
-               y += fact[irstk[k + (istk + 1) * *nrow]];
-           }
-           break;
-       }
-
-       lstk[istk] = l;
-       mstk[istk] = m;
-       nstk[istk] = n;
-       ++istk;
-       nrstk[istk] = nro;
-       ncstk[istk] = nco;
-       ystk[istk] = y;
-       l = 1;
-    } while(1);/* end do */
-
-/* L90:*/
-    if (y > amx) {
-       amx = y;
-       if (*dsp - amx <= *tol) {
-           *dsp = 0.;
-           return;
-       }
-    }
-
-L100:
-    --istk;
-    if (istk == 0) {
-       *dsp -= amx;
-       if (*dsp - amx <= *tol) {
-           *dsp = 0.;
-       }
-       return;
-    }
-    l = lstk[istk] + 1;
-
-/* L110: */
-    for(;; ++l) {
-       if (l > mstk[istk])     goto L100;
-
-       n = nstk[istk];
-       nro = nrstk[istk];
-       nco = ncstk[istk];
-       y = ystk[istk];
-       if (n == 1) {
-           if (irstk[l     + istk * *nrow] <
-               irstk[l - 1 + istk * *nrow])    goto L60;
-       }
-       else if (n == 2) {
-           if (icstk[l     + istk * *ncol] <
-               icstk[l - 1 + istk * *ncol])    goto L60;
-       }
-    }
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F5XACT
-  Purpose:    Put node on stack in network algorithm.
-  Usage:      CALL F5XACT (PASTP, TOL, KVAL, KEY, LDKEY, IPOIN, STP,
-                         LDSTP, IFRQ, NPOIN, NR, NL, IFREQ, ITOP,
-                         IPSH)
-  Arguments:
-     PASTP  - The past path length.                            (Input)
-     TOL    - Tolerance for equivalence of past path lengths.          (Input)
-     KVAL   - Key value.                                       (Input)
-     KEY    - Vector of length LDKEY containing the key values.        (in/out)
-     LDKEY  - Length of vector KEY.                            (Input)
-     IPOIN  - Vector of length LDKEY pointing to the
-             linked list of past path lengths.                 (in/out)
-     STP    - Vector of length LSDTP containing the
-             linked lists of past path lengths.                (in/out)
-     LDSTP  - Length of vector STP.                            (Input)
-     IFRQ   - Vector of length LDSTP containing the past path
-             frequencies.                                      (in/out)
-     NPOIN  - Vector of length LDSTP containing the pointers to
-             the next past path length.                        (in/out)
-     NR            - Vector of length LDSTP containing the right object
-             pointers in the tree of past path lengths.        (in/out)
-     NL            - Vector of length LDSTP containing the left object
-             pointers in the tree of past path lengths.        (in/out)
-     IFREQ  - Frequency of the current path length.             (Input)
-     ITOP   - Pointer to the top of STP.                       (Input)
-     IPSH   - Option parameter.                                        (Input)
-             If IPSH is true, the past path length is found in the
-             table KEY.  Otherwise the location of the past path
-             length is assumed known and to have been found in
-             a previous call. ==>>>>> USING "static" variables
-  -----------------------------------------------------------------------
-  */
-
-void
-f5xact(double *pastp, const double *tol, int *kval, int *key, int *ldkey,
-       int *ipoin, double *stp, int *ldstp, int *ifrq, int *npoin,
-       int *nr, int *nl, int *ifreq, int *itop, int *ipsh)
-{
-    /* Local variables */
-    static int itmp, ird, ipn, itp;
-    double test1, test2;
-
-    /* Parameter adjustments */
-    --nl;
-    --nr;
-    --npoin;
-    --ifrq;
-    --stp;
-    --ipoin;
-    --key;
-
-    /* Function Body */
-    if (*ipsh) {
-       /* Convert KVAL to int in range 1, ..., LDKEY. */
-       ird = *kval % *ldkey + 1;
-       /* Search for an unused location */
-       for (itp = ird; itp <= *ldkey; ++itp) {
-           if (key[itp] == *kval) {
-               goto L40;
-           }
-           if (key[itp] < 0) {
-               goto L30;
-           }
-       }
-       for (itp = 1; itp <= ird - 1; ++itp) {
-           if (key[itp] == *kval) {
-               goto L40;
-           }
-           if (key[itp] < 0) {
-               goto L30;
-           }
-       }
-       /* Return if KEY array is full */
-       /* KH
-         prterr(6, "LDKEY is too small for this problem.\n"
-         "It is not possible to estimate the value of LDKEY "
-         "required,\n"
-         "but twice the current value may be sufficient.");
-         */
-       prterr(6, "LDKEY is too small for this problem.\n"
-              "Try increasing the size of the workspace.");
-
-       /* Update KEY */
-L30:
-       key[itp] = *kval;
-       ++(*itop);
-       ipoin[itp] = *itop;
-       /* Return if STP array full */
-       if (*itop > *ldstp) {
-           /* KH
-              prterr(7, "LDSTP is too small for this problem.\n"
-              "It is not possible to estimate the value of LDSTP "
-              "required,\n"
-              "but twice the current value may be sufficient.");
-              */
-           prterr(7, "LDSTP is too small for this problem.\n"
-                  "Try increasing the size of the workspace.");
-       }
-       /* Update STP, etc. */
-       npoin[*itop] = -1;
-       nr[*itop] = -1;
-       nl[*itop] = -1;
-       stp[*itop] = *pastp;
-       ifrq[*itop] = *ifreq;
-       return;
-    }
-
-    /* Find location, if any, of pastp */
-L40:
-    ipn = ipoin[itp];
-    test1 = *pastp - *tol;
-    test2 = *pastp + *tol;
-
-L50:
-    if (stp[ipn] < test1) {
-       ipn = nl[ipn];
-       if (ipn > 0) {
-           goto L50;
-       }
-    } else if (stp[ipn] > test2) {
-       ipn = nr[ipn];
-       if (ipn > 0) {
-           goto L50;
-       }
-    } else {
-       ifrq[ipn] += *ifreq;
-       return;
-    }
-    /* Return if STP array full */
-    ++(*itop);
-    if (*itop > *ldstp) {
-       /*
-         prterr(7, "LDSTP is too small for this problem.\n"
-         "It is not possible to estimate the value of LDSTP "
-         "required,\n"
-         "but twice the current value may be sufficient.");
-         */
-       prterr(7, "LDSTP is too small for this problem.\n"
-              "Try increasing the size of the workspace.");
-       return;
-    }
-    /* Find location to add value */
-    ipn = ipoin[itp];
-    itmp = ipn;
-
-L60:
-    if (stp[ipn] < test1) {
-       itmp = ipn;
-       ipn = nl[ipn];
-       if (ipn > 0) {
-           goto L60;
-       } else {
-           nl[itmp] = *itop;
-       }
-    } else if (stp[ipn] > test2) {
-       itmp = ipn;
-       ipn = nr[ipn];
-       if (ipn > 0) {
-           goto L60;
-       } else {
-           nr[itmp] = *itop;
-       }
-    }
-    /* Update STP, etc. */
-    npoin[*itop] = npoin[itmp];
-    npoin[itmp] = *itop;
-    stp[*itop] = *pastp;
-    ifrq[*itop] = *ifreq;
-    nl[*itop] = -1;
-    nr[*itop] = -1;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F6XACT
-  Purpose:    Pop a node off the stack.
-  Usage:      CALL F6XACT (NROW, IROW, IFLAG, KYY, KEY, LDKEY, LAST, IPN)
-  Arguments:
-    NROW    - The number of rows in the table.                 (Input)
-    IROW    - Vector of length nrow containing the row sums on
-              output.                                          (Output)
-    IFLAG   - Set to 3 if there are no additional nodes to process.
-                                                               (Output)
-    KYY     - Constant mutlipliers used in forming the hash
-              table key.                                       (Input)
-    KEY     - Vector of length LDKEY containing the hash table
-              keys.                                            (In/out)
-    LDKEY   - Length of vector KEY.                            (Input)
-    LAST    - Index of the last key popped off the stack.      (In/out)
-    IPN     - Pointer to the linked list of past path lengths. (Output)
-  -----------------------------------------------------------------------
-  */
-void
-f6xact(int *nrow, int *irow, int *iflag, int *kyy, int *key, int
-       *ldkey, int *last, int *ipn)
-{
-    int kval, j;
-
-    /* Parameter adjustments */
-    --key;
-    --kyy;
-    --irow;
-
-    /* Function Body */
-L10:
-    ++(*last);
-    if (*last <= *ldkey) {
-       if (key[*last] < 0) {
-           goto L10;
-       }
-       /* Get KVAL from the stack */
-       kval = key[*last];
-       key[*last] = -9999;
-       for (j = *nrow; j >= 2; --j) {
-           irow[j] = kval / kyy[j];
-           kval -= irow[j] * kyy[j];
-       }
-       irow[1] = kval;
-       *ipn = *last;
-    } else {
-       *last = 0;
-       *iflag = 3;
-    }
-    return;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F7XACT
-  Purpose:    Generate the new nodes for given marinal totals.
-  Usage:      CALL F7XACT (NROW, IMAX, IDIF, K, KS, IFLAG)
-  Arguments:
-    NROW    - The number of rows in the table.                 (Input)
-    IMAX    - The row marginal totals.                         (Input)
-    IDIF    - The column counts for the new column.            (in/out)
-    K      - Indicator for the row to decrement.               (in/out)
-    KS     - Indicator for the row to increment.               (in/out)
-    IFLAG   - Status indicator.                                        (Output)
-             If IFLAG is zero, a new table was generated.  For
-             IFLAG = 1, no additional tables could be generated.
-  -----------------------------------------------------------------------
-  */
-
-void
-f7xact(int *nrow, int *imax, int *idif, int *k, int *ks,
-       int *iflag)
-    
-{
-    int i, m, k1, mm;
-
-    /* Parameter adjustments */
-    --idif;
-    --imax;
-
-    /* Function Body */
-    *iflag = 0;
-    /* Find node which can be incremented, ks */
-    if (*ks == 0)
-       do {
-           ++(*ks);
-       } while (idif[*ks] == imax[*ks]);
-
-    /* Find node to decrement (>ks) */
-    if (idif[*k] > 0 && *k > *ks) {
-       --idif[*k];
-       do {
-           --(*k);
-       } while(imax[*k] == 0);
-
-       m = *k;
-
-       /* Find node to increment (>=ks) */
-       while (idif[m] >= imax[m]) {
-           --m;
-       }
-       ++idif[m];
-       /* Change ks */
-       if (m == *ks) {
-           if (idif[m] == imax[m]) {
-               *ks = *k;
-           }
-       }
-    }
-    else {
- Loop:
-       /* Check for finish */
-       for (k1 = *k + 1; k1 <= *nrow; ++k1) {
-           if (idif[k1] > 0) {
-               goto L70;
-           }
-       }
-       *iflag = 1;
-       return;
-
- L70:
-       /* Reallocate counts */
-       mm = 1;
-       for (i = 1; i <= *k; ++i) {
-           mm += idif[i];
-           idif[i] = 0;
-       }
-       *k = k1;
-
-       do {
-           --(*k);
-           m = min(mm, imax[*k]);
-           idif[*k] = m;
-           mm -= m;
-       } while (mm > 0 && *k != 1);
-
-       /* Check that all counts reallocated */
-       if (mm > 0) {
-           if (k1 != *nrow) {
-               *k = k1;
-               goto Loop;
-           }
-           *iflag = 1;
-           return;
-       }
-       /* Get ks */
-       --idif[k1];
-       *ks = 0;
-       do {
-           ++(*ks);
-           if (*ks > *k) {
-               return;
-           }
-       } while (idif[*ks] >= imax[*ks]);
-    }
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F8XACT
-  Purpose:    Routine for reducing a vector when there is a zero
-             element.
-  Usage:      CALL F8XACT (IROW, IS, I1, IZERO, NEW)
-  Arguments:
-     IROW   - Vector containing the row counts.                        (Input)
-     IS            - Indicator.                                        (Input)
-     I1            - Indicator.                                        (Input)
-     IZERO  - Position of the zero.                            (Input)
-     NEW    - Vector of new row counts.                                (Output)
-  -----------------------------------------------------------------------
-  */
-
-void
-f8xact(int *irow, int *is, int *i1, int *izero, int *myNew)
-{
-    int i;
-
-    /* Parameter adjustments */
-    --myNew;
-    --irow;
-
-    /* Function Body */
-    for (i = 1; i < *i1; ++i)
-       myNew[i] = irow[i];
-
-    for (i = *i1; i <= *izero - 1; ++i) {
-       if (*is >= irow[i + 1])
-           break;
-       myNew[i] = irow[i + 1];
-    }
-
-    myNew[i] = *is;
-
-    for(;;) {
-       ++i;
-       if (i > *izero)
-           return;
-       myNew[i] = irow[i];
-    }
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F9XACT
-  Purpose:    Computes the log of a multinomial coefficient.
-  Usage:      F9XACT(N, MM, IR, FACT)
-  Arguments:
-     N     - Length of IR.                                     (Input)
-     MM            - Number for factorial in numerator.                (Input)
-     IR            - Vector of length N containing the numbers for
-              the denominator of the factorial.                        (Input)
-     FACT   - Table of log factorials.                         (Input)
-     F9XACT - The log of the multinomal coefficient.           (Output)
-  -----------------------------------------------------------------------
-  */
-
-double
-f9xact(int *n, int *mm, int *ir, double *fact)
-{
-    double d;
-    int k;
-
-    d = fact[*mm];
-    for (k = 0; k < *n; k++)
-       d -= fact[ir[k]];
-    return d;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:            F10ACT
-  Purpose:  Computes the shortest path length for special tables.
-  Usage:    F10ACT (NROW, IROW, NCOL, ICOL, VAL, XMIN, FACT, ND, NE, M)
-  Arguments:
-     NROW   - The number of rows in the table.                 (Input)
-     IROW   - Vector of length NROW containing the row totals. (Input)
-     NCOL   - The number of columns in the table.              (Input)
-     ICO    - Vector of length NCOL containing the column totals.(Input)
-     VAL    - The shortest path.                               (Output)
-     XMIN   - Set to true if shortest path obtained.           (Output)
-     FACT   - Vector containing the logarithms of factorials.   (Input)
-     ND            - Workspace vector of length NROW.                  (Input)
-     NE            - Workspace vector of length NCOL.                  (Input)
-     M     - Workspace vector of length NCOL.                  (Input)
-
-  Chapter:    STAT/LIBRARY Categorical and Discrete Data Analysis
-  -----------------------------------------------------------------------
-  */
-
-void
-f10act(int *nrow, int *irow, int *ncol, int *icol, double *val,
-       int *xmin, double *fact, int *nd, int *ne, int *m)
-{
-    /* Local variables */
-    int i, is, ix, nrw1;
-
-    /* Parameter adjustments */
-    --m;
-    --ne;
-    --nd;
-    --icol;
-    --irow;
-
-    /* Function Body */
-    for (i = 1; i <= *nrow - 1; ++i)
-       nd[i] = 0;
-
-    is = icol[1] / *nrow;
-    ix = icol[1] - *nrow * is;
-    ne[1] = is;
-    m[1] = ix;
-    if (ix != 0)
-       ++nd[ix];
-
-    for (i = 2; i <= *ncol; ++i) {
-       ix = icol[i] / *nrow;
-       ne[i] = ix;
-       is += ix;
-       ix = icol[i] - *nrow * ix;
-       m[i] = ix;
-       if (ix != 0)
-           ++nd[ix];
-    }
-
-    for (i = *nrow - 2; i >= 1; --i)
-       nd[i] += nd[i + 1];
-
-    ix = 0;
-    nrw1 = *nrow + 1;
-    for (i = *nrow; i >= 2; --i) {
-       ix = ix + is + nd[nrw1 - i] - irow[i];
-       if (ix < 0)
-           return;
-    }
-
-    for (i = 1; i <= *ncol; ++i) {
-       ix = ne[i];
-       is = m[i];
-       *val = *val + is * fact[ix + 1] + (*nrow - is) * fact[ix];
-    }
-    *xmin = (1);
-
-    return;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              F11ACT
-  Purpose:    Routine for revising row totals.
-  Usage:      CALL F11ACT (IROW, I1, I2, NEW)
-  Arguments:
-     IROW   - Vector containing the row totals.        (Input)
-     I1            - Indicator.                        (Input)
-     I2            - Indicator.                        (Input)
-     NEW    - Vector containing the row totals.        (Output)
-  -----------------------------------------------------------------------
-  */
-void
-f11act(int *irow, int *i1, int *i2, int *myNew)
-{
-    int i;
-
-    /* Parameter adjustments */
-    --myNew;
-    --irow;
-
-    for (i = 1; i <= (*i1 - 1); ++i)   myNew[i] = irow[i];
-    for (i = *i1; i <= *i2; ++i)       myNew[i] = irow[i + 1];
-
-    return;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              prterr
-  Purpose:    Print an error message and stop.
-  Usage:      prterr(icode, mes)
-  Arguments:
-     icode  - Integer code for the error message.              (Input)
-     mes    - Character string containing the error message.   (Input)
-  -----------------------------------------------------------------------
-  */
-void
-prterr(int icode, char *mes)
-{
-//    PROBLEM "FEXACT error %d.\n%s", icode, mes RECOVER(NULL_ENTRY);
-//   printf("FEXACT error %d.\n%s", icode, mes RECOVER(NULL_ENTRY));
-   printf("FEXACT error %d.\n", icode); //xx:another one of my ugly kluges
-   return;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              iwork
-  Purpose:    Routine for allocating workspace.
-  Usage:      iwork (iwkmax, iwkpt, number, itype)
-  Arguments:
-     iwkmax - Maximum (int) amount of workspace.               (Input)
-     iwkpt  - Amount of (int) workspace currently allocated.   (in/out)
-     number - Number of elements of workspace desired.         (Input)
-     itype  - Workspace type.                                  (Input)
-             ITYPE  TYPE
-               2    integer
-               3    float
-               4    double
-     iwork(): Index in rwrk, dwrk, or iwrk of the beginning of
-              the first free element in the workspace array.   (Output)
-  -----------------------------------------------------------------------
-  */
-int
-iwork(int iwkmax, int *iwkpt, int number, int itype)
-{
-    int i;
-
-    i = *iwkpt;
-    if (itype == 2 || itype == 3)
-       *iwkpt += number;
-    else { /* double */
-       if (i % 2 != 0)
-           ++i;
-       *iwkpt += (number << 1);
-       i /= 2;
-    }
-    if (*iwkpt > iwkmax)
-       prterr(40, "Out of workspace.");
-
-    return i;
-}
-
-#ifndef USING_R
-
-void isort(int *n, int *ix)
-{
-/*
-  -----------------------------------------------------------------------
-  Name:              ISORT
-  Purpose:    Shell sort for an int vector.
-  Usage:      CALL ISORT (N, IX)
-  Arguments:
-     N     - Lenth of vector IX.       (Input)
-     IX            - Vector to be sorted.      (in/out)
-  -----------------------------------------------------------------------
-  */
-    static int ikey, i, j, m, il[10], kl, it, iu[10], ku;
-
-    /* Parameter adjustments */
-    --ix;
-
-    /* Function Body */
-    m = 1;
-    i = 1;
-    j = *n;
-
-L10:
-    if (i >= j) {
-       goto L40;
-    }
-    kl = i;
-    ku = j;
-    ikey = i;
-    ++j;
-    /* Find element in first half */
-L20:
-    ++i;
-    if (i < j) {
-       if (ix[ikey] > ix[i]) {
-           goto L20;
-       }
-    }
-    /* Find element in second half */
-L30:
-    --j;
-    if (ix[j] > ix[ikey]) {
-       goto L30;
-    }
-    /* Interchange */
-    if (i < j) {
-       it = ix[i];
-       ix[i] = ix[j];
-       ix[j] = it;
-       goto L20;
-    }
-    it = ix[ikey];
-    ix[ikey] = ix[j];
-    ix[j] = it;
-    /* Save upper and lower subscripts of the array yet to be sorted */
-    if (m < 11) {
-       if (j - kl < ku - j) {
-           il[m - 1] = j + 1;
-           iu[m - 1] = ku;
-           i = kl;
-           --j;
-       } else {
-           il[m - 1] = kl;
-           iu[m - 1] = j - 1;
-           i = j + 1;
-           j = ku;
-       }
-       ++m;
-       goto L10;
-    } else {
-       prterr(20, "This should never occur.");
-    }
-    /* Use another segment */
-L40:
-    --m;
-    if (m == 0) {
-       return;
-    }
-    i = il[m - 1];
-    j = iu[m - 1];
-    goto L10;
-}
-
-double gammds(double *y, double *p, int *ifault)
-{
-/*
-  -----------------------------------------------------------------------
-  Name:              GAMMDS
-  Purpose:    Cumulative distribution for the gamma distribution.
-  Usage:      PGAMMA (Q, ALPHA,IFAULT)
-  Arguments:
-     Q     - Value at which the distribution is desired.  (Input)
-     ALPHA  - Parameter in the gamma distribution.  (Input)
-     IFAULT - Error indicator. (Output)
-              IFAULT  DEFINITION
-                0     No error
-                1     An argument is misspecified.
-                2     A numerical error has occurred.
-     PGAMMA - The cdf for the gamma distribution with parameter alpha
-             evaluated at Q.  (Output)
-  -----------------------------------------------------------------------
-
-  Algorithm AS 147 APPL. Statist. (1980) VOL. 29, P. 113
-
-  Computes the incomplete gamma integral for positive parameters Y, P
-  using and infinite series.
-  */
-
-    static double a, c, f, g;
-    static int ifail;
-
-    /* Checks for the admissibility of arguments and value of F */
-    *ifault = 1;
-    g = 0.;
-    if (*y <= 0. || *p <= 0.) {
-       return g;
-    }
-    *ifault = 2;
-
-    /*
-      ALOGAM is natural log of gamma function no need to test ifail as
-      an error is impossible
-      */
-
-    a = *p + 1.;
-    f = exp(*p * log(*y) - alogam(&a, &ifail) - *y);
-    if (f == 0.) {
-       return g;
-    }
-    *ifault = 0;
-
-    /* Series begins */
-    c = 1.;
-    g = 1.;
-    a = *p;
-L10:
-    a += 1.;
-    c = c * *y / a;
-    g += c;
-    if (c / g > 1e-6) {
-       goto L10;
-    }
-    g *= f;
-    return g;
-}
-
-/*
-  -----------------------------------------------------------------------
-  Name:              ALOGAM
-  Purpose:    Value of the log-gamma function.
-  Usage:      ALOGAM (X, IFAULT)
-  Arguments:
-     X     - Value at which the log-gamma function is to be evaluated.
-             (Input)
-     IFAULT  - Error indicator.         (Output)
-              IFAULT  DEFINITION
-                0     No error
-                1     X < 0
-     ALGAMA - The value of the log-gamma function at XX.  (Output)
-  -----------------------------------------------------------------------
-
-  Algorithm ACM 291, Comm. ACM. (1966) Vol. 9, P. 684
-
-  Evaluates natural logarithm of gamma(x) for X greater than zero.
-  */
-
-double alogam(double *x, int *ifault)
-{
-    /* Initialized data */
-       //printf("alogam x = %f\t%d\n",*x,*ifault);
-    static double a1 = .918938533204673;
-    static double a2 = 5.95238095238e-4;
-    static double a3 = 7.93650793651e-4;
-    static double a4 = .002777777777778;
-    static double a5 = .083333333333333;
-
-    /* Local variables */
-    static double f, y, z;
-
-    *ifault = 1;
-    if (*x < 0.) {
-       return(0.);
-    }
-    *ifault = 0;
-    y = *x;
-    f = 0.;
-    if (y >= 7.) {
-       goto L30;
-    }
-    f = y;
-L10:
-    y += 1.;
-    if (y >= 7.) {
-       goto L20;
-    }
-    f *= y;
-    goto L10;
-L20:
-    f = -log(f);
-L30:
-    z = 1. / (y * y);
-       
-       //printf("returning %f\n",(f + (y - .5) * log(y) - y + a1 + (((-a2 * z + a3) * z - a4) * z + a5) / y));
-    return(f + (y - .5) * log(y) - y + a1 +
-          (((-a2 * z + a3) * z - a4) * z + a5) / y);
-}
-
-
-#endif /* not USING_R */
-
diff --git a/fisher2.h b/fisher2.h
deleted file mode 100644 (file)
index 905a1a0..0000000
--- a/fisher2.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef GUARD_fisher2
-#define GUARD_fisher2
-
-#include <stdlib.h>
-#include <stdio.h> 
-#include <math.h>
-#include <limits.h> 
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SINT_MAX INT_MAX
-
-#define        max(a, b)               ((a) < (b) ? (b) : (a))
-#define        min(a, b)               ((a) > (b) ? (b) : (a))
-
-
-void fexact(int *, int *, double *, int *,
-       double *, double *, double *, double *,
-       double *, int *);
-          
-#ifdef __cplusplus        
-}
-#endif
-
-#endif 
-
index 6d6c89bc88fad0f6a474f5d466bc6c7d1d082e47..1420f84b992a54ed6dfb6d3a28d8452a083691de 100644 (file)
@@ -135,7 +135,8 @@ void FlowData::capFlows(int mF){
        try{
                
                maxFlows = mF;
-               if(endFlow > maxFlows){ endFlow = maxFlows;     }               
+               if(endFlow > maxFlows){ endFlow = maxFlows;     }       
+        translateFlow();
                
        }
        catch(exception& e) {
index d43f85d16907d92583ba95069002661253569a6c..6b29f90464f3259c6c625421d9ee1cadbc8b2ef8 100644 (file)
@@ -82,7 +82,7 @@ int FormatColumnMatrix::read(NameAssignment* nameMap){
                string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
                
                //use the unix sort 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        string command = "sort -n " + squareFile + " -o " + outfile;
                        system(command.c_str());
                #else //sort using windows sort
index de49daa32a31c6aa2a4bfbac97257ca993656a03..60591178836ee793a83cbd3ec75776b463af4ead 100644 (file)
@@ -96,7 +96,7 @@ int FormatPhylipMatrix::read(NameAssignment* nameMap){
                                string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
                                
                                //use the unix sort 
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        string command = "sort -n " + tempFile + " -o " + outfile;
                                        system(command.c_str());
                                #else //sort using windows sort
index 2312649d1cff739956638ba2ad8788949669f1d5..02d8413675bade0cf79f8781987cc45078b74db2 100644 (file)
@@ -46,8 +46,8 @@ string GetSharedOTUCommand::getHelpString(){
                helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
                helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
                helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
-               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
-               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
                helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
                helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
                helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
index f0514655a35212215a49fbdb7b1c14c5a3415352..6cd45314194fccce192c7654e2d8994aff6766c5 100644 (file)
@@ -752,7 +752,7 @@ seqDist HCluster::getNextDist(char* buffer, int& index, int size){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 int HCluster::processFile() {
        try {
                string firstName, secondName;
index ccc6b96ee9d6f0e5f3761f9e969741f5e08defdd..894f92d9644182ece0a7d18e491384d2058f2d54 100644 (file)
@@ -21,7 +21,6 @@ class ignoreGaps : public Dist {
 public:
        
        ignoreGaps() {}
-       ignoreGaps(const ignoreGaps& ddb) {}
        
        void calcDist(Sequence A, Sequence B){          
                int diff = 0;
index 1864f7473b0e21ff06d84ce76968f81374189954..01d8c2e8634d65c31f11515f89ecfa950b81a1ae 100644 (file)
@@ -1120,7 +1120,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
        try {
                vector<float> pvalues;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
                        for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
@@ -1234,7 +1234,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >
        try {
                vector<float> pvalues;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
                        for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
index 4ae00b91311677488c35961186dfbf072ba005f3..ead3d7e0b20d7b0ee070caebaad74aedeaae7dd3 100644 (file)
@@ -26,7 +26,6 @@ class KmerDB : public Database {
        
 public:
        KmerDB(string, int);
-       KmerDB(const KmerDB& kdb) : kmerSize(kdb.kmerSize), maxKmer(kdb.maxKmer), count(kdb.count), kmerDBName(kdb.kmerDBName), kmerLocations(kdb.kmerLocations), Database(kdb) {}
        KmerDB();
        ~KmerDB();
        
index 0269fc49d3ddbf5c661fc99d21e2ef5ac4e5a1f3..effb1ad44e04abd9c1bdd8523ca3b05f4253de94 100644 (file)
@@ -9,6 +9,13 @@
 
 #include "linearalgebra.h"
 
+// This class references functions used from "Numerical Recipes in C++" //
+
+/*********************************************************************************************************************************/
+inline double SQR(const double a)
+{
+    return a*a;
+}
 /*********************************************************************************************************************************/
 
 inline double SIGN(const double a, const double b)
@@ -16,6 +23,235 @@ inline double SIGN(const double a, const double b)
     return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
 }
 /*********************************************************************************************************************************/
+//NUmerical recipes pg. 245 - Returns the complementary error function erfc(x) with fractional error everywhere less than 1.2 Ã— 10−7.
+double LinearAlgebra::erfcc(double x){
+    try {
+        double t,z,ans;
+        z=fabs(x);
+        t=1.0/(1.0+0.5*z); 
+        
+        ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
+            t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
+            t*(-0.82215223+t*0.17087277))))))))); 
+        
+        //cout << "in erfcc " << t << '\t' << ans<< endl;
+        return (x >= 0.0 ? ans : 2.0 - ans);
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "betai");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 232
+double LinearAlgebra::betai(const double a, const double b, const double x) {
+    try {
+        double bt;
+        double result = 0.0;
+        
+        if (x < 0.0 || x > 1.0) { m->mothurOut("[ERROR]: bad x in betai.\n"); m->control_pressed = true; return 0.0; }
+        
+        if (x == 0.0 || x == 1.0)  { bt = 0.0; }
+        else { bt = exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(1.0-x));  }
+        
+        if (x < (a+1.0) / (a+b+2.0)) { result = bt*betacf(a,b,x)/a; }
+        else { result = 1.0-bt*betacf(b,a,1.0-x)/b; }
+        
+        return result;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "betai");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 219
+double LinearAlgebra::gammln(const double xx) {
+    try {
+        int j;
+        double x,y,tmp,ser;
+        static const double cof[6]={76.18009172947146,-86.50532032941677,24.01409824083091,
+            -1.231739572450155,0.120858003e-2,-0.536382e-5};
+        
+        y=x=xx;
+        tmp=x+5.5;
+        tmp -= (x+0.5)*log(tmp);
+        ser=1.0;
+        for (j=0;j<6;j++) {
+            ser += cof[j]/++y;
+        }
+        return -tmp+log(2.5066282746310005*ser/x);
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "gammln");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 223
+double LinearAlgebra::gammp(const double a, const double x) {
+    try {
+        double gamser,gammcf,gln;
+        
+        if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMP\n"); m->control_pressed = true; return 0.0;}
+        if (x < (a+1.0)) {
+            gser(gamser,a,x,gln);
+            return gamser;
+        } else {
+            gcf(gammcf,a,x,gln);
+            return 1.0-gammcf;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "gammp");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 223
+double LinearAlgebra::gammq(const double a, const double x) {
+    try {
+        double gamser,gammcf,gln;
+        
+        if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMQ\n"); m->control_pressed = true; return 0.0; }
+        if (x < (a+1.0)) {
+            gser(gamser,a,x,gln);
+            return 1.0-gamser;
+        } else {
+            gcf(gammcf,a,x,gln);
+            return gammcf;
+        }   
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "gammp");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 224
+double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
+    try {
+        const int ITMAX=100;
+        const double EPS=numeric_limits<double>::epsilon();
+        const double FPMIN=numeric_limits<double>::min()/EPS;
+        int i;
+        double an,b,c,d,del,h;
+        
+        gln=gammln(a);
+        b=x+1.0-a;
+        c=1.0/FPMIN;
+        d=1.0/b;
+        h=d;
+        for (i=1;i<=ITMAX;i++) {
+            an = -i*(i-a);
+            b += 2.0;
+            d=an*d+b;
+            if (fabs(d) < FPMIN) { d=FPMIN; }
+            c=b+an/c;
+            if (fabs(c) < FPMIN) { c=FPMIN; }
+            d=1.0/d;
+            del=d*c;
+            h *= del;
+            if (fabs(del-1.0) <= EPS) break;
+        }
+        if (i > ITMAX)  { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
+        gammcf=exp(-x+a*log(x)-gln)*h;
+        
+        return 0.0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "gcf");
+               exit(1);
+       }
+
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 223
+double LinearAlgebra::gser(double& gamser, const double a, const double x, double& gln) {
+    try {
+        int n;
+        double sum,del,ap;
+        const double EPS = numeric_limits<double>::epsilon();
+        
+        gln=gammln(a);
+        if (x <= 0.0) { 
+            if (x < 0.0) {  m->mothurOut("[ERROR]: x less than 0 in routine GSER\n"); m->control_pressed = true;  }
+            gamser=0.0; return 0.0;
+        } else {
+            ap=a;
+            del=sum=1.0/a;
+            for (n=0;n<100;n++) {
+                ++ap;
+                del *= x/ap;
+                sum += del;
+                if (fabs(del) < fabs(sum)*EPS) {
+                    gamser=sum*exp(-x+a*log(x)-gln);
+                    return 0.0;
+                }
+            }
+            
+            m->mothurOut("[ERROR]: a too large, ITMAX too small in routine GSER\n");
+            return 0.0;
+        }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "gser");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Numerical Recipes pg. 233
+double LinearAlgebra::betacf(const double a, const double b, const double x) {
+    try {
+        const int MAXIT = 100;
+        const double EPS = numeric_limits<double>::epsilon();
+        const double FPMIN = numeric_limits<double>::min() / EPS;
+        int m1, m2;
+        double aa, c, d, del, h, qab, qam, qap;
+        
+        qab=a+b;
+        qap=a+1.0;
+        qam=a-1.0;
+        c=1.0;
+        d=1.0-qab*x/qap;
+        if (fabs(d) < FPMIN) d=FPMIN;
+        d=1.0/d;
+        h=d;
+        for (m1=1;m1<=MAXIT;m1++) {
+            m2=2*m1;
+            aa=m1*(b-m1)*x/((qam+m2)*(a+m2));
+            d=1.0+aa*d;
+            if (fabs(d) < FPMIN) d=FPMIN;
+            c=1.0+aa/c;
+            if (fabs(c) < FPMIN) c=FPMIN;
+            d=1.0/d;
+            h *= d*c;
+            aa = -(a+m1)*(qab+m1)*x/((a+m2)*(qap+m2));
+            d=1.0+aa*d;
+            if (fabs(d) < FPMIN) d=FPMIN;
+            c=1.0+aa/c;
+            if (fabs(c) < FPMIN) c=FPMIN;
+            d=1.0/d;
+            del=d*c;
+            h *= del;
+            if (fabs(del-1.0) < EPS) break;
+        }
+        
+        if (m1 > MAXIT) { m->mothurOut("[ERROR]: a or b too big or MAXIT too small in betacf."); m->mothurOutEndLine(); m->control_pressed = true; }
+        return h;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "betacf");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
 
 vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
        try {
@@ -798,14 +1034,7 @@ double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double&
                                
                double p = (numCoor - numDisCoor) / (float) count;
                
-               //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests
-               double numer = 3.0 * (numCoor - numDisCoor);
-        int n = xscores.size();
-        double denom = n * (n-1) * (2*n + 5) / (double) 2.0;
-        denom = sqrt(denom);
-        sig = numer / denom;
-               
-               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               sig = calcKendallSig(x.size(), p);
                
                return p;
        }
@@ -814,12 +1043,200 @@ double LinearAlgebra::calcKendall(vector<double>& x, vector<double>& y, double&
                exit(1);
        }
 }
+double LinearAlgebra::ran0(int& idum)
+{
+    const int IA=16807,IM=2147483647,IQ=127773;
+    const int IR=2836,MASK=123459876;
+    const double AM=1.0/double(IM);
+    int k;
+    double ans;
+    
+    idum ^= MASK;
+    k=idum/IQ;
+    idum=IA*(idum-k*IQ)-IR*k;
+    if (idum < 0) idum += IM;
+    ans=AM*idum;
+    idum ^= MASK;
+    return ans;
+}
+
+double LinearAlgebra::ran1(int &idum)
+{
+       const int IA=16807,IM=2147483647,IQ=127773,IR=2836,NTAB=32;
+       const int NDIV=(1+(IM-1)/NTAB);
+       const double EPS=3.0e-16,AM=1.0/IM,RNMX=(1.0-EPS);
+       static int iy=0;
+       static vector<int> iv(NTAB);
+       int j,k;
+       double temp;
+    
+       if (idum <= 0 || !iy) {
+               if (-idum < 1) idum=1;
+               else idum = -idum;
+               for (j=NTAB+7;j>=0;j--) {
+                       k=idum/IQ;
+                       idum=IA*(idum-k*IQ)-IR*k;
+                       if (idum < 0) idum += IM;
+                       if (j < NTAB) iv[j] = idum;
+               }
+               iy=iv[0];
+       }
+       k=idum/IQ;
+       idum=IA*(idum-k*IQ)-IR*k;
+       if (idum < 0) idum += IM;
+       j=iy/NDIV;
+       iy=iv[j];
+       iv[j] = idum;
+       if ((temp=AM*iy) > RNMX) return RNMX;
+       else return temp;
+}
+
+double LinearAlgebra::ran2(int &idum)
+{
+       const int IM1=2147483563,IM2=2147483399;
+       const int IA1=40014,IA2=40692,IQ1=53668,IQ2=52774;
+       const int IR1=12211,IR2=3791,NTAB=32,IMM1=IM1-1;
+       const int NDIV=1+IMM1/NTAB;
+       const double EPS=3.0e-16,RNMX=1.0-EPS,AM=1.0/double(IM1);
+       static int idum2=123456789,iy=0;
+       static vector<int> iv(NTAB);
+       int j,k;
+       double temp;
+    
+       if (idum <= 0) {
+               idum=(idum==0 ? 1 : -idum);
+               idum2=idum;
+               for (j=NTAB+7;j>=0;j--) {
+                       k=idum/IQ1;
+                       idum=IA1*(idum-k*IQ1)-k*IR1;
+                       if (idum < 0) idum += IM1;
+                       if (j < NTAB) iv[j] = idum;
+               }
+               iy=iv[0];
+       }
+       k=idum/IQ1;
+       idum=IA1*(idum-k*IQ1)-k*IR1;
+       if (idum < 0) idum += IM1;
+       k=idum2/IQ2;
+       idum2=IA2*(idum2-k*IQ2)-k*IR2;
+       if (idum2 < 0) idum2 += IM2;
+       j=iy/NDIV;
+       iy=iv[j]-idum2;
+       iv[j] = idum;
+       if (iy < 1) iy += IMM1;
+       if ((temp=AM*iy) > RNMX) return RNMX;
+       else return temp;
+}
+
+double LinearAlgebra::ran3(int &idum)
+{
+       static int inext,inextp;
+       static int iff=0;
+       const int MBIG=1000000000,MSEED=161803398,MZ=0;
+       const double FAC=(1.0/MBIG);
+       static vector<int> ma(56);
+       int i,ii,k,mj,mk;
+    
+       if (idum < 0 || iff == 0) {
+               iff=1;
+               mj=labs(MSEED-labs(idum));
+               mj %= MBIG;
+               ma[55]=mj;
+               mk=1;
+               for (i=1;i<=54;i++) {
+                       ii=(21*i) % 55;
+                       ma[ii]=mk;
+                       mk=mj-mk;
+                       if (mk < int(MZ)) mk += MBIG;
+                       mj=ma[ii];
+               }
+               for (k=0;k<4;k++)
+                       for (i=1;i<=55;i++) {
+                               ma[i] -= ma[1+(i+30) % 55];
+                               if (ma[i] < int(MZ)) ma[i] += MBIG;
+                       }
+               inext=0;
+               inextp=31;
+               idum=1;
+       }
+       if (++inext == 56) inext=1;
+       if (++inextp == 56) inextp=1;
+       mj=ma[inext]-ma[inextp];
+       if (mj < int(MZ)) mj += MBIG;
+       ma[inext]=mj;
+       return mj*FAC;
+}
+
+double LinearAlgebra::ran4(int &idum)
+{
+#if defined(vax) || defined(_vax_) || defined(__vax__) || defined(VAX)
+       static const unsigned long jflone = 0x00004080;
+       static const unsigned long jflmsk = 0xffff007f;
+#else
+       static const unsigned long jflone = 0x3f800000;
+       static const unsigned long jflmsk = 0x007fffff;
+#endif
+       unsigned long irword,itemp,lword;
+       static int idums = 0;
+    
+       if (idum < 0) {
+               idums = -idum;
+               idum=1;
+       }
+       irword=idum;
+       lword=idums;
+       psdes(lword,irword);
+       itemp=jflone | (jflmsk & irword);
+       ++idum;
+       return (*(float *)&itemp)-1.0;
+}
+
+void LinearAlgebra::psdes(unsigned long &lword, unsigned long &irword)
+{
+       const int NITER=4;
+       static const unsigned long c1[NITER]={
+               0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L};
+       static const unsigned long c2[NITER]={
+               0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L};
+       unsigned long i,ia,ib,iswap,itmph=0,itmpl=0;
+    
+       for (i=0;i<NITER;i++) {
+               ia=(iswap=irword) ^ c1[i];
+               itmpl = ia & 0xffff;
+               itmph = ia >> 16;
+               ib=itmpl*itmpl+ ~(itmph*itmph);
+               irword=lword ^ (((ia = (ib >> 16) |
+                          ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph);
+               lword=iswap;
+       }
+}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcKendallSig(double n, double r){
+    try {
+        
+        double sig = 0.0;
+        double svar=(4.0*n+10.0)/(9.0*n*(n-1.0)); 
+        double z= r/sqrt(svar); 
+        sig=erfcc(fabs(z)/1.4142136);
+
+               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+        
+        return sig;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcKendallSig");
+               exit(1);
+       }
+}
+
 /*********************************************************************************************************************************/
 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
        try {
                if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
                
                //format data
+        double sf = 0.0; //f^3 - f where f is the number of ties in x;
+        double sg = 0.0; //f^3 - f where f is the number of ties in y;
                map<float, int> tableX; 
                map<float, int>::iterator itTable;
                vector<spearmanRank> xscores; 
@@ -864,6 +1281,8 @@ double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double&
                                                float thisrank = rankTotal / (float) xties.size();
                                                rankx[xties[k].name] = thisrank;
                                        }
+                    int t = xties.size();
+                    sf += (t*t*t-t);
                                        xties.clear();
                                        rankTotal = 0;
                                }
@@ -914,6 +1333,8 @@ double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double&
                                                float thisrank = rankTotal / (float) yties.size();
                                                rank[yties[k].name] = thisrank;
                                        }
+                    int t = yties.size();
+                    sg += (t*t*t-t);
                                        yties.clear();
                                        rankTotal = 0;
                                }
@@ -942,19 +1363,52 @@ double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double&
                                
                p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2)));
                
-               //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
-               double temp = (x.size()-2) / (double) (1- (p*p));
-               temp = sqrt(temp);
-               sig = p*temp;
-               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
-                               
+               //Numerical Recipes 646
+        sig = calcSpearmanSig(n, sf, sg, di);
+               
                return p;
        }
        catch(exception& e) {
                m->errorOut(e, "LinearAlgebra", "calcSpearman");
                exit(1);
        }
-}              
+}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcSpearmanSig(double n, double sf, double sg, double d){
+    try {
+        
+        double sig = 0.0;
+        double probrs = 0.0;
+        double en=n;
+        double en3n=en*en*en-en;
+        double aved=en3n/6.0-(sf+sg)/12.0;
+        double fac=(1.0-sf/en3n)*(1.0-sg/en3n);
+        double vard=((en-1.0)*en*en*SQR(en+1.0)/36.0)*fac;
+        double zd=(d-aved)/sqrt(vard);
+        double probd=erfcc(fabs(zd)/1.4142136);
+        double rs=(1.0-(6.0/en3n)*(d+(sf+sg)/12.0))/sqrt(fac);
+        fac=(rs+1.0)*(1.0-rs);
+        if (fac > 0.0) {
+            double t=rs*sqrt((en-2.0)/fac);
+            double df=en-2.0;
+            probrs=betai(0.5*df,0.5,df/(df+t*t));
+        }else {
+            probrs = 0.0;
+        }
+        
+        //smaller of probd and probrs is sig
+        sig = probrs;
+        if (probd < probrs) { sig = probd; }
+        
+               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               
+        return sig;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcSpearmanSig");
+               exit(1);
+       }
+}
 /*********************************************************************************************************************************/
 double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double& sig){
        try {
@@ -988,11 +1442,8 @@ double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double&
                                
                r = numerator / denom;
                
-               //signifigance calc - http://faculty.vassar.edu/lowry/ch4apx.html
-               double temp =  (1- (r*r)) / (double) (x.size()-2);
-               temp = sqrt(temp);
-               sig = r / temp;
-               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               //Numerical Recipes pg.644
+        sig = calcPearsonSig(x.size(), r);
                
                return r;
        }
@@ -1000,7 +1451,31 @@ double LinearAlgebra::calcPearson(vector<double>& x, vector<double>& y, double&
                m->errorOut(e, "LinearAlgebra", "calcPearson");
                exit(1);
        }
-}                      
+}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcPearsonSig(double n, double r){
+    try {
+        
+        double sig = 0.0;
+        const double TINY = 1.0e-20;
+        double z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY)); //Fisher's z transformation
+    
+        //code below was giving an error in betacf with sop files
+        //int df = n-2;
+        //double t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)));
+        //sig = betai(0.5+df, 0.5, df/(df+t*t));
+        
+        //Numerical Recipes says code below gives approximately the same result
+        sig = erfcc(fabs(z*sqrt(n-1.0))/1.4142136);
+               if (isnan(sig) || isinf(sig)) { sig = 0.0; }
+               
+        return sig;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcPearsonSig");
+               exit(1);
+       }
+}
 /*********************************************************************************************************************************/
 
 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
index 321f46a1e283b6f4113b8dfd8242b0dfa190cc4e..ecb635f70672c8a8928090935fa8af724a5a819b 100644 (file)
@@ -33,12 +33,32 @@ public:
        double calcPearson(vector<double>&, vector<double>&, double&);
        double calcSpearman(vector<double>&, vector<double>&, double&);
        double calcKendall(vector<double>&, vector<double>&, double&);
-       
-
+    
+       double calcSpearmanSig(double, double, double, double); //length, f^3 - f where f is the number of ties in x, f^3 - f where f is the number of ties in y, sum of squared diffs in ranks. - designed to find the sif of one score.
+    double calcPearsonSig(double, double); //length, coeff.
+    double calcKendallSig(double, double); //length, coeff.
+    
+    
 private:
        MothurOut* m;
        
        double pythag(double, double);
+    double betacf(const double, const double, const double);
+    double betai(const double, const double, const double);
+    double gammln(const double);
+    double gammp(const double, const double);
+    double gammq(const double, const double);
+    double gser(double&, const double, const double, double&);
+    double gcf(double&, const double, const double, double&);
+    double erfcc(double);
+    
+    double ran0(int&); //for testing 
+    double ran1(int&); //for testing
+    double ran2(int&); //for testing
+    double ran3(int&); //for testing
+    double ran4(int&); //for testing
+    void psdes(unsigned long &, unsigned long &); //for testing
+    
 };
 
 #endif
index 476070eb2fbb654d054238c45ee335b2ce8fe0eb..d8e6dcd603913805b0355d3f69173f25cf7803b2 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,9 +15,10 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"1/9/2012\""
-VERSION = "\"1.23.0\""
+RELEASE_DATE = "\"3/16/2012\""
+VERSION = "\"1.24.1\""
 FORTAN_COMPILER = gfortran
+FORTRAN_FLAGS = 
 
 # Optimize to level 3:
 CXXFLAGS += -O3 
@@ -38,7 +39,8 @@ ifeq  ($(strip $(64BIT_VERSION)),yes)
        #if you are a linux user use the following line
        #CXXFLAGS += -mtune=native -march=native -m64
        
-        CXXFLAGS += -DBIT_VERSION
+       CXXFLAGS += -DBIT_VERSION
+    FORTRAN_FLAGS = -m64
 endif
 
 
@@ -97,12 +99,12 @@ mothur : fortranSource $(OBJECTS) uchime
        $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) $(LIBS)
        
        strip mothur
-
 uchime:
        cd uchime_src && ./mk && mv uchime .. && cd ..
        
 fortranSource:
-       ${FORTAN_COMPILER} -c *.f
+       ${FORTAN_COMPILER} -c $(FORTRAN_FLAGS) *.f
 
 install : mothur
 #      cp mothur ../Release/mothur
index f31e4821fe52cb4160af0c6dbc30c0bf989a1cf6..3d1d1c9fa5ac995a65265b62ed4c59f25ed6679d 100644 (file)
@@ -612,7 +612,7 @@ vector<trace_struct> Maligner::extractHighestPath(vector<vector<score_struct> >
        }
 }
 
-//***************************************************************************************************************
+***************************************************************************************************************
 
 vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
        try {
index d1cf54ba03216996b3335e030e6db7d80efd7222..73c38f7ca3b06cd5b38b45b070beb2c8fd2f0a8b 100644 (file)
@@ -8,58 +8,20 @@
  */
 
 #include "matrixoutputcommand.h"
-#include "sharedsobscollectsummary.h"
-#include "sharedchao1.h"
-#include "sharedace.h"
-#include "sharednseqs.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedkstest.h"
-#include "whittaker.h"
-#include "sharedochiai.h"
-#include "sharedanderbergs.h"
-#include "sharedkulczynski.h"
-#include "sharedkulczynskicody.h"
-#include "sharedlennon.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-#include "sharedjackknife.h"
-#include "whittaker.h"
-#include "odum.h"
-#include "canberra.h"
-#include "structeuclidean.h"
-#include "structchord.h"
-#include "hellinger.h"
-#include "manhattan.h"
-#include "structpearson.h"
-#include "soergel.h"
-#include "spearman.h"
-#include "structkulczynski.h"
-#include "structchi2.h"
-#include "speciesprofile.h"
-#include "hamming.h"
-#include "gower.h"
-#include "memchi2.h"
-#include "memchord.h"
-#include "memeuclidean.h"
-#include "mempearson.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> MatrixOutputCommand::setParameters(){   
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
                CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -76,9 +38,11 @@ string MatrixOutputCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The dist.shared command parameters are shared, groups, calc, output, processors and label.  shared is a required, unless you have a valid current file.\n";
+               helpString += "The dist.shared command parameters are shared, groups, calc, output, processors, subsample, iters and label.  shared is a required, unless you have a valid current file.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n";
                helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n";
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
                helpString += "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
                helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
                helpString += "Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n";
@@ -101,6 +65,7 @@ MatrixOutputCommand::MatrixOutputCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["phylip"] = tempOutNames;
+        outputTypes["subsample"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
@@ -135,6 +100,7 @@ MatrixOutputCommand::MatrixOutputCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["phylip"] = tempOutNames;
+            outputTypes["subsample"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -199,7 +165,19 @@ MatrixOutputCommand::MatrixOutputCommand(string option)  {
                                //remove citation from list of calcs
                                for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
                        }
-
+            
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters); 
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {  
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 1; }
+            
                        if (abort == false) {
                        
                                ValidCalculators validCalculator;
@@ -329,6 +307,32 @@ int MatrixOutputCommand::execute(){
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
                        lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
                }       
+        
+        if (subsample) { 
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = lookup[0]->getNumSeqs();
+                for (int i = 1; i < lookup.size(); i++) {
+                    int thisSize = lookup[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                m->clearGroups();
+                Groups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (lookup[i]->getNumSeqs() < subsampleSize) { 
+                        m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete lookup[i];
+                    }else { 
+                        Groups.push_back(lookup[i]->getGroup()); 
+                        temp.push_back(lookup[i]);
+                    }
+                } 
+                lookup = temp;
+                m->setGroups(Groups);
+            }
+        }
                
                if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); return 0;  }
                                
@@ -421,7 +425,7 @@ int MatrixOutputCommand::execute(){
        }
 }
 /***********************************************************/
-void MatrixOutputCommand::printSims(ostream& out, vector< vector<float> >& simMatrix) {
+void MatrixOutputCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
        try {
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
@@ -457,111 +461,307 @@ int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
        try {
                EstOutput data;
                vector<SharedRAbundVector*> subset;
-               vector< vector<seqDist> > calcDists; calcDists.resize(matrixCalculators.size()); //one for each calc, this will be used to make .dist files
-               
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               if(processors == 1){
-                       driver(thisLookup, 0, numGroups, calcDists);
-               }else{
-                       int process = 1;
-                       vector<int> processIDS;
-                       
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = fork();
-                               
-                               if (pid > 0) {
-                                       processIDS.push_back(pid); 
-                                       process++;
-                               }else if (pid == 0){
-                                       driver(thisLookup, lines[process].start, lines[process].end, calcDists);   
-                                       
-                                       string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist";
-                                       ofstream outtemp;
-                                       m->openOutputFile(tempdistFileName, outtemp);
-                                               
-                                       for (int i = 0; i < calcDists.size(); i++) {
-                                               outtemp << calcDists[i].size() << endl;
-                                                       
-                                               for (int j = 0; j < calcDists[i].size(); j++) {
-                                                       outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
-                                               }
-                                       }
-                                       outtemp.close();
-                                                                       
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                                       for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                       
-                       //parent do your part
-                       driver(thisLookup, lines[0].start, lines[0].end, calcDists);   
-                                               
-                       //force parent to wait until all the processes are done
-                       for (int i = 0; i < processIDS.size(); i++) {
-                               int temp = processIDS[i];
-                               wait(&temp);
-                       }
-                       
-                       for (int i = 0; i < processIDS.size(); i++) {
-                               string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
-                               ifstream intemp;
-                               m->openInputFile(tempdistFileName, intemp);
-                                       
-                               for (int k = 0; k < calcDists.size(); k++) {
-                                       int size = 0;
-                                       intemp >> size; m->gobble(intemp);
-                                               
-                                       for (int j = 0; j < size; j++) {
-                                               int seq1 = 0;
-                                               int seq2 = 0;
-                                               float dist = 1.0;
-                                                       
-                                               intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
-                                                       
-                                               seqDist tempDist(seq1, seq2, dist);
-                                               calcDists[k].push_back(tempDist);
-                                       }
-                               }
-                               intemp.close();
-                               m->mothurRemove(tempdistFileName);
-                       }
-                       
+               vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
+
+        vector< vector<seqDist>  > calcDists; calcDists.resize(matrixCalculators.size());              
+
+        for (int thisIter = 0; thisIter < iters; thisIter++) {
+            
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+            if (subsample) {
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+        
+            if(processors == 1){
+                driver(thisItersLookup, 0, numGroups, calcDists);
+            }else{
+                int process = 1;
+                vector<int> processIDS;
+                
+                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                //loop through and create all the processes you want
+                while (process != processors) {
+                    int pid = fork();
+                    
+                    if (pid > 0) {
+                        processIDS.push_back(pid); 
+                        process++;
+                    }else if (pid == 0){
+                        
+                        driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);   
+                        
+                        string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist";
+                        ofstream outtemp;
+                        m->openOutputFile(tempdistFileName, outtemp);
+                            
+                        for (int i = 0; i < calcDists.size(); i++) {
+                            outtemp << calcDists[i].size() << endl;
+                                
+                            for (int j = 0; j < calcDists[i].size(); j++) {
+                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                            }
+                        }
+                        outtemp.close();
+                                        
+                        exit(0);
+                    }else { 
+                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                        exit(0);
+                    }
+                }
+                
+                //parent do your part
+                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
+                            
+                //force parent to wait until all the processes are done
+                for (int i = 0; i < processIDS.size(); i++) {
+                    int temp = processIDS[i];
+                    wait(&temp);
+                }
+                
+                for (int i = 0; i < processIDS.size(); i++) {
+                    string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) +  ".dist";
+                    ifstream intemp;
+                    m->openInputFile(tempdistFileName, intemp);
+                        
+                    for (int k = 0; k < calcDists.size(); k++) {
+                        int size = 0;
+                        intemp >> size; m->gobble(intemp);
+                            
+                        for (int j = 0; j < size; j++) {
+                            int seq1 = 0;
+                            int seq2 = 0;
+                            float dist = 1.0;
+                                
+                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                
+                            seqDist tempDist(seq1, seq2, dist);
+                            calcDists[k].push_back(tempDist);
+                        }
+                    }
+                    intemp.close();
+                    m->mothurRemove(tempdistFileName);
+                }
+                #else
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
+                //Windows version shared memory, so be careful when passing variables through the distSharedData struct. 
+                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                //Taking advantage of shared memory to pass results vectors.
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
+                
+                vector<distSharedData*> pDataArray; 
+                DWORD   dwThreadIdArray[processors-1];
+                HANDLE  hThreadArray[processors-1]; 
+                
+                //Create processor worker threads.
+                for( int i=1; i<processors; i++ ){
+                    
+                    //make copy of lookup so we don't get access violations
+                    vector<SharedRAbundVector*> newLookup;
+                    for (int k = 0; k < thisItersLookup.size(); k++) {
+                        SharedRAbundVector* temp = new SharedRAbundVector();
+                        temp->setLabel(thisItersLookup[k]->getLabel());
+                        temp->setGroup(thisItersLookup[k]->getGroup());
+                        newLookup.push_back(temp);
+                    }
+                    
+                    //for each bin
+                    for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                        for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                    }
+                    
+                    // Allocate memory for thread data.
+                    distSharedData* tempSum = new distSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
+                    pDataArray.push_back(tempSum);
+                    processIDS.push_back(i);
+                    
+                    hThreadArray[i-1] = CreateThread(NULL, 0, MyDistSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                }
+                
+                //parent do your part
+                driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);   
+                           
+                //Wait until all threads have terminated.
+                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                
+                //Close all thread handles and free memory allocations.
+                for(int i=0; i < pDataArray.size(); i++){
+                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
+                    
+                    for (int k = 0; k < calcDists.size(); k++) {
+                        int size = pDataArray[i]->calcDists[k].size();
+                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
+                    }
+                    
+                    CloseHandle(hThreadArray[i]);
+                    delete pDataArray[i];
+                }
+
+                #endif
+            }
+            
+            calcDistsTotals.push_back(calcDists);
+            
+            if (subsample) {  
+                
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+                for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+            }
                }
-#else
-               driver(thisLookup, 0, numGroups, calcDists);
-#endif
                
-               for (int i = 0; i < calcDists.size(); i++) {
-                       if (m->control_pressed) { break; }
-                               
-                       //initialize matrix
-                       vector< vector<float> > matrix; //square matrix to represent the distance
-                       matrix.resize(thisLookup.size());
-                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
-                               
-                       for (int j = 0; j < calcDists[i].size(); j++) {
-                               int row = calcDists[i][j].seq1;
-                               int column = calcDists[i][j].seq2;
-                               float dist = calcDists[i][j].dist;
-                                       
-                               matrix[row][column] = dist;
-                               matrix[column][row] = dist;
-                       }
-                       
-                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".dist";
-                       outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
-                       ofstream outDist;
-                       m->openOutputFile(distFileName, outDist);
-                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-                       
-                       printSims(outDist, matrix);
-                       
-                       outDist.close();
-               }
+        if (iters != 1) {
+            //we need to find the average distance and standard deviation for each groups distance
+            
+            vector< vector<seqDist>  > calcAverages; calcAverages.resize(matrixCalculators.size()); 
+            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                calcAverages[i].resize(calcDistsTotals[0][i].size());
+                
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
+                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
+                    calcAverages[i][j].dist = 0.0;
+                }
+            }
+            
+            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
+            
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) iters;
+                }
+            }
+            
+            //find standard deviation
+            vector< vector<seqDist>  > stdDev; stdDev.resize(matrixCalculators.size());
+            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
+                stdDev[i].resize(calcDistsTotals[0][i].size());
+                
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].seq1 = calcDists[i][j].seq1;
+                    stdDev[i][j].seq2 = calcDists[i][j].seq2;
+                    stdDev[i][j].dist = 0.0;
+                }
+            }
+            
+            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+                for (int i = 0; i < stdDev.size(); i++) {  
+                    for (int j = 0; j < stdDev[i].size(); j++) {
+                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                    }
+                }
+            }
+
+            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist /= (float) iters;
+                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+                }
+            }
+            
+            //print results
+            for (int i = 0; i < calcDists.size(); i++) {
+                vector< vector<double> > matrix; //square matrix to represent the distance
+                matrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                
+                vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
+                stdmatrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
+
+            
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    float stdDist = stdDev[i][j].dist;
+                    
+                    matrix[row][column] = dist;
+                    matrix[column][row] = dist;
+                    stdmatrix[row][column] = stdDist;
+                    stdmatrix[column][row] = stdDist;
+                }
+            
+                string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".ave.dist";
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outAve;
+                m->openOutputFile(distFileName, outAve);
+                outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
+                
+                printSims(outAve, matrix);
+                
+                outAve.close();
+                
+                distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".std.dist";
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outSTD;
+                m->openOutputFile(distFileName, outSTD);
+                outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
+                
+                printSims(outSTD, stdmatrix);
+                
+                outSTD.close();
+
+            }
+        }else {
+        
+            for (int i = 0; i < calcDists.size(); i++) {
+                if (m->control_pressed) { break; }
+                
+                //initialize matrix
+                vector< vector<double> > matrix; //square matrix to represent the distance
+                matrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                
+                for (int j = 0; j < calcDists[i].size(); j++) {
+                    int row = calcDists[i][j].seq1;
+                    int column = calcDists[i][j].seq2;
+                    double dist = calcDists[i][j].dist;
+                    
+                    matrix[row][column] = dist;
+                    matrix[column][row] = dist;
+                }
+                
+                string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".dist";
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outDist;
+                m->openOutputFile(distFileName, outDist);
+                outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                
+                printSims(outDist, matrix);
+                
+                outDist.close();
+            }
+        }
                
                return 0;
        }
@@ -573,7 +773,6 @@ int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
 /**************************************************************************************************/
 int MatrixOutputCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, vector< vector<seqDist> >& calcDists) { 
        try {
-               
                vector<SharedRAbundVector*> subset;
                for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
                        
index ae38b46c5c1d60a1c6c97b5a618f1eca8d59a417..8af539ba01ae59ed07027d63f2bd9d1859b4c217 100644 (file)
 #include "inputdata.h"
 #include "groupmap.h"
 #include "validcalculator.h"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
+
 
 // aka. dist.shared()
 
@@ -46,7 +88,7 @@ private:
        };
        vector<linePair> lines;
        
-       void printSims(ostream&, vector< vector<float> >&);
+       void printSims(ostream&, vector< vector<double> >&);
        int process(vector<SharedRAbundVector*>);
        
        vector<Calculator*> matrixCalculators;
@@ -54,10 +96,10 @@ private:
        InputData* input;
        vector<SharedRAbundVector*> lookup;
        string exportFileName, output, sharedfile;
-       int numGroups, processors;
+       int numGroups, processors, iters, subsampleSize;
        ofstream out;
 
-       bool abort, allLines;
+       bool abort, allLines, subsample;
        set<string> labels; //holds labels to be used
        string outputFile, calc, groups, label, outputDir;
        vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
@@ -66,6 +108,166 @@ private:
 
 };
        
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct distSharedData {
+    vector<SharedRAbundVector*> thisLookup;
+    vector< vector<seqDist> > calcDists;
+    vector<string>  Estimators;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       
+       distSharedData(){}
+       distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
+               m = mout;
+               start = st;
+               end = en;
+        Estimators = est;
+        thisLookup = lu;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){ 
+       distSharedData* pDataArray;
+       pDataArray = (distSharedData*)lpParam;
+       
+       try {
+        
+        vector<Calculator*> matrixCalculators;
+        ValidCalculators validCalculator;
+        for (int i=0; i<pDataArray->Estimators.size(); i++) {
+            if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
+                if (pDataArray->Estimators[i] == "sharedsobs") { 
+                    matrixCalculators.push_back(new SharedSobsCS());
+                }else if (pDataArray->Estimators[i] == "sharedchao") { 
+                    matrixCalculators.push_back(new SharedChao1());
+                }else if (pDataArray->Estimators[i] == "sharedace") { 
+                    matrixCalculators.push_back(new SharedAce());
+                }else if (pDataArray->Estimators[i] == "jabund") {     
+                    matrixCalculators.push_back(new JAbund());
+                }else if (pDataArray->Estimators[i] == "sorabund") { 
+                    matrixCalculators.push_back(new SorAbund());
+                }else if (pDataArray->Estimators[i] == "jclass") { 
+                    matrixCalculators.push_back(new Jclass());
+                }else if (pDataArray->Estimators[i] == "sorclass") { 
+                    matrixCalculators.push_back(new SorClass());
+                }else if (pDataArray->Estimators[i] == "jest") { 
+                    matrixCalculators.push_back(new Jest());
+                }else if (pDataArray->Estimators[i] == "sorest") { 
+                    matrixCalculators.push_back(new SorEst());
+                }else if (pDataArray->Estimators[i] == "thetayc") { 
+                    matrixCalculators.push_back(new ThetaYC());
+                }else if (pDataArray->Estimators[i] == "thetan") { 
+                    matrixCalculators.push_back(new ThetaN());
+                }else if (pDataArray->Estimators[i] == "kstest") { 
+                    matrixCalculators.push_back(new KSTest());
+                }else if (pDataArray->Estimators[i] == "sharednseqs") { 
+                    matrixCalculators.push_back(new SharedNSeqs());
+                }else if (pDataArray->Estimators[i] == "ochiai") { 
+                    matrixCalculators.push_back(new Ochiai());
+                }else if (pDataArray->Estimators[i] == "anderberg") { 
+                    matrixCalculators.push_back(new Anderberg());
+                }else if (pDataArray->Estimators[i] == "kulczynski") { 
+                    matrixCalculators.push_back(new Kulczynski());
+                }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
+                    matrixCalculators.push_back(new KulczynskiCody());
+                }else if (pDataArray->Estimators[i] == "lennon") { 
+                    matrixCalculators.push_back(new Lennon());
+                }else if (pDataArray->Estimators[i] == "morisitahorn") { 
+                    matrixCalculators.push_back(new MorHorn());
+                }else if (pDataArray->Estimators[i] == "braycurtis") { 
+                    matrixCalculators.push_back(new BrayCurtis());
+                }else if (pDataArray->Estimators[i] == "whittaker") { 
+                    matrixCalculators.push_back(new Whittaker());
+                }else if (pDataArray->Estimators[i] == "odum") { 
+                    matrixCalculators.push_back(new Odum());
+                }else if (pDataArray->Estimators[i] == "canberra") { 
+                    matrixCalculators.push_back(new Canberra());
+                }else if (pDataArray->Estimators[i] == "structeuclidean") { 
+                    matrixCalculators.push_back(new StructEuclidean());
+                }else if (pDataArray->Estimators[i] == "structchord") { 
+                    matrixCalculators.push_back(new StructChord());
+                }else if (pDataArray->Estimators[i] == "hellinger") { 
+                    matrixCalculators.push_back(new Hellinger());
+                }else if (pDataArray->Estimators[i] == "manhattan") { 
+                    matrixCalculators.push_back(new Manhattan());
+                }else if (pDataArray->Estimators[i] == "structpearson") { 
+                    matrixCalculators.push_back(new StructPearson());
+                }else if (pDataArray->Estimators[i] == "soergel") { 
+                    matrixCalculators.push_back(new Soergel());
+                }else if (pDataArray->Estimators[i] == "spearman") { 
+                    matrixCalculators.push_back(new Spearman());
+                }else if (pDataArray->Estimators[i] == "structkulczynski") { 
+                    matrixCalculators.push_back(new StructKulczynski());
+                }else if (pDataArray->Estimators[i] == "speciesprofile") { 
+                    matrixCalculators.push_back(new SpeciesProfile());
+                }else if (pDataArray->Estimators[i] == "hamming") { 
+                    matrixCalculators.push_back(new Hamming());
+                }else if (pDataArray->Estimators[i] == "structchi2") { 
+                    matrixCalculators.push_back(new StructChi2());
+                }else if (pDataArray->Estimators[i] == "gower") { 
+                    matrixCalculators.push_back(new Gower());
+                }else if (pDataArray->Estimators[i] == "memchi2") { 
+                    matrixCalculators.push_back(new MemChi2());
+                }else if (pDataArray->Estimators[i] == "memchord") { 
+                    matrixCalculators.push_back(new MemChord());
+                }else if (pDataArray->Estimators[i] == "memeuclidean") { 
+                    matrixCalculators.push_back(new MemEuclidean());
+                }else if (pDataArray->Estimators[i] == "mempearson") { 
+                    matrixCalculators.push_back(new MemPearson());
+                }
+            }
+        }
+        
+        pDataArray->calcDists.resize(matrixCalculators.size());
+                       
+               vector<SharedRAbundVector*> subset;
+               for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
+                       
+                       for (int l = 0; l < k; l++) {
+                               
+                               if (k != l) { //we dont need to similiarity of a groups to itself
+                                       subset.clear(); //clear out old pair of sharedrabunds
+                                       //add new pair of sharedrabunds
+                                       subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
+                                       
+                                       for(int i=0;i<matrixCalculators.size();i++) {
+                                               
+                                               //if this calc needs all groups to calculate the pair load all groups
+                                               if (matrixCalculators[i]->getNeedsAll()) { 
+                                                       //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+                                                       for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
+                                                               if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
+                                                       }
+                                               }
+                                               
+                                               vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
+                                               
+                                               if (pDataArray->m->control_pressed) { return 1; }
+                                               
+                                               seqDist temp(l, k, tempdata[0]);
+                                               pDataArray->calcDists[i].push_back(temp);
+                                       }
+                               }
+                       }
+               }
+        
+        for(int i=0;i<matrixCalculators.size();i++){  delete matrixCalculators[i]; }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");
+               exit(1);
+       }
+} 
+#endif
        
 #endif
 
diff --git a/metastats.h b/metastats.h
deleted file mode 100644 (file)
index 5bab288..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef METASTATS2
-#define METASTATS2
-
-/*
- *  metastats.h
- *  Mothur
- *
- *  Created by westcott on 9/16/10.
- *  Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-#include <math.h>
-#include "fisher2.h"
-
-void testp(double *permuted_ttests,int *B,double *permuted,double 
-          *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double *ps);
-void permute_matrix(double *Imatrix,int *nc,int *nr,double 
-                   *permuted,int *g,double *trial_ts,double *Tinitial,double 
-                   *counter);
-void permute_array(int *array, int n);
-void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,double 
-                      *Ts,double *Tinitial,double *counter1);
-void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *storage);
-void start(double *Imatrix,int *g,int *nr,int *nc,double *testing,
-                        double storage[][9]);
-
-int metastat_main (char*, int, int, double, int, double**, int);
-
-#ifdef __cplusplus        
-}
-#endif
-
-#endif
-
-
-
diff --git a/metastats2.c b/metastats2.c
deleted file mode 100644 (file)
index 70edc3d..0000000
+++ /dev/null
@@ -1,559 +0,0 @@
-
-#include "metastats.h"
-
-//The following code has been modified using the original Metastats program from White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).
-
-int metastat_main (char* outputFileName, int numRows, int numCols, double threshold, int numPermutations, double** data, int secondGroupingStart){
-       
-       int size,c=0,i=0,j=0,counter=0, bflag=0; 
-       int B=numPermutations;
-       int row = numRows;
-       int col = numCols;
-       int g = secondGroupingStart;
-       double thresh=threshold;
-       double min=0; 
-       
-       char output[1024];
-       strcpy(output, outputFileName);
-       FILE *out;
-       
-       if (g>=col || g<=0){
-               printf("Check your g value\n"); 
-       }
-       
-       // Initialize the matrices
-       size = row*col;
-       double matrix[row][col];
-       double pmatrix[size],permuted[size];  
-       double storage[row][9];
-       
-       for (i=0;i<row;i++){
-               for (j =0;j<9;j++){
-                       storage[i][j]=0;                
-               }       
-       }
-       
-       for(i=0; i<row; i++){
-               for(j=0; j<col;j++){
-                       matrix[i][j]=data[i][j];
-                       pmatrix[c]=0; // initializing to zero
-                       permuted[c]=0;
-                       c++;
-               }
-       }
-       
-       // Produces the sum of each column
-       double total[col],total1=0,total2=0;
-       double ratio[col];
-       
-       for(i=0;i<col;i++){
-               total[i]=0;
-               ratio[i]=0; }
-       
-       for(i=0; i<col; i++){
-               for(j=0;j<row;j++){
-                       total[i]=total[i]+matrix[j][i];                 
-               }
-       }
-       
-       for(i=0;i<g-1;i++){
-               total1=total1+total[i];}
-       
-       for(i=g-1;i<col;i++){
-               total2=total2+total[i];}
-       
-       
-       // Creates the ratios by first finding the minimum of totals
-       min = total[0];
-       if (col==2){
-               if (total[0]<total[1]){
-                       min = total[1];}        
-       }
-       if (col >2){
-               for(i=1;i<col;i++){
-                       if (min > total[i]){
-                               min = total[i];}
-               }
-       }
-       if (min<=0){
-               printf("Error, the sum of one of the columns <= 0.");
-               return 0;       
-       }
-       
-       
-       // Ratio time...
-       for(i=0;i<col;i++){
-               ratio[i]=total[i]/min;
-       }
-       
-       //Change matrix into an array as received by R for compatibility.
-       
-       c=0;
-       for(i=0;i<col;i++){
-               for(j=0;j<row;j++){
-                       pmatrix[c]=matrix[j][i];
-                       c++;
-               }
-       }
-       
-       if(row == 1){
-               for (i =0; i<col;i++){
-                       pmatrix[i]=pmatrix[i]/ratio[i];
-               }
-       }
-       else {
-               counter = 0;
-               j=-1;
-               for (i=0; i<size; i++) {
-                       if (counter % row == 0) {
-                               j++;
-                       }
-                       pmatrix[i]=pmatrix[i]/ratio[j];
-                       counter++; 
-               }   
-       }
-       // pass everything to the rest of the code using pointers. then 
-       // write to output file. below pointers for most of the values are 
-       // created to send everything by reference.
-       
-       int ptt_size, *permutes,*nc,*nr,*gvalue;
-       
-       nc = &col;
-       nr = &row;
-       gvalue = &g;
-       
-       permutes = &B;
-       ptt_size = B*row;
-       
-       //changing ptt_size to row
-       double permuted_ttests[row], pvalues[row], tinitial[row];
-       
-       for(i=0;i<row;i++){
-               permuted_ttests[i]=0;}
-       
-       for(i=0;i<row;i++){
-               pvalues[i]=0;
-               tinitial[i]=0; }
-       
-       // Find the initial values for the matrix.
-       start(pmatrix,gvalue,nr,nc,tinitial,storage);
-       
-       // Start the calculations.
-       
-       if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){  
-               
-               double fish[row], fish2[row];
-               for(i=0;i<row;i++){
-                       fish[i]=0;
-                       fish2[i]=0;}
-               
-               for(i=0;i<row;i++){
-                       
-                       for(j=0;j<g-1;j++){
-                               fish[i]=fish[i]+matrix[i][j];
-                       }
-                       
-                       for(j=g-1;j<col;j++){ 
-                               fish2[i]=fish2[i]+matrix[i][j];
-                       }
-                       
-                       double  f11,f12,f21,f22;
-                       
-                       f11=fish[i];
-                       f12=fish2[i];
-                       
-                       f21=total1-f11;
-                       f22=total2-f12;
-                       
-                       double data[] = {f11, f12, f21, f22};
-                       
-                       // CONTINGENGCY TABLE:
-                       //   f11   f12
-                       //   f21   f22
-                       
-                       int *nr, *nc, *ldtabl, *work;
-                       int nrow=2, ncol=2, ldtable=2, workspace=100000;
-                       double *expect, *prc, *emin,*prt,*pre;
-                       double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
-                       
-                       nr = &nrow;
-                       nc = &ncol;
-                       ldtabl=&ldtable;
-                       work = &workspace;
-                       
-                       expect = &e;
-                       prc = &prc1;
-                       emin=&emin1;
-                       prt=&prt1;
-                       pre=&pre1;
-                       
-                       //MothurFisher fishtere;
-                       //double mothurFex = fishtere.fexact(f11, f12, f21, f22);
-                       
-                       fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
-                       
-                       if (*pre>.999999999){
-                               *pre=1;
-                       }
-                       
-                       //printf("feaxt = %f\t%f\t%f\t%f\t%f\t%f\n", *expect, *pre, f11, f12, f21, f22);
-                       storage[i][8] = *pre;
-                       pvalues[i]=*pre;
-               }
-       }
-       else{
-               
-               testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
-               
-               // Checks to make sure the matrix isn't sparse.
-               double sparse[row], sparse2[row];
-               for(i=0;i<row;i++){
-                       sparse[i]=0;
-                       sparse2[i]=0;}
-               
-               c=0;    
-               for(i=0;i<row;i++){
-                       
-                       for(j=0;j<g-1;j++){
-                               sparse[i]=sparse[i]+matrix[i][j];
-                       }
-                       
-                       if(sparse[i] < (double)(g-1)){
-                               c++;
-                       }
-                       for(j=g-1;j<col;j++){ // ?<= for col
-                               sparse2[i]=sparse2[i]+matrix[i][j];
-                       }
-                       
-                       if( (sparse2[i] <(double)(col-g+1))) {
-                               c++;
-                       }
-                       
-                       if (c==2){
-                               c=0;
-                               
-                               double  f11,f12,f21,f22;
-                               
-                               f11=sparse[i];
-                               sparse[i]=0;
-                               
-                               f12=sparse2[i];
-                               sparse2[i]=0;
-                               
-                               f21=total1-f11;
-                               f22=total2-f12;
-                               
-                               double data[] = {f11, f12, f21, f22};
-                               
-                               int *nr, *nc, *ldtabl, *work;
-                               int nrow=2, ncol=2, ldtable=2, workspace=10000000; // I added two zeros for larger data sets
-                               double *expect, *prc, *emin,*prt,*pre;
-                               double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
-                               
-                               nr = &nrow;
-                               nc = &ncol;
-                               ldtabl=&ldtable;
-                               work = &workspace;
-                               
-                               expect = &e;
-                               prc = &prc1;
-                               emin=&emin1;
-                               prt=&prt1;
-                               pre=&pre1;
-                               
-                               fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
-                               
-                               if (*pre>.999999999){
-                                       *pre=1;
-                               }
-                               storage[i][8] = *pre;
-                               pvalues[i]=*pre;
-                       }
-               }  
-               // End of else statement
-               bflag = 1;
-       }
-       
-       // Calculates the mean of counts (not normalized)
-       double temp[row][2];
-       
-       for(j=0;j<row;j++){
-               for(i=0;i<2;i++){
-                       temp[j][i]=0;
-               }
-       }
-       
-       for (j=0;j<row;j++){
-               for (i=1; i<=(g-1); i++){
-                       temp[j][0]=temp[j][0]+matrix[j][i-1];
-               }
-               temp[j][0]= (double) temp[j][0]/(g-1);
-               for(i=g;i<=col;i++){
-                       temp[j][1]=temp[j][1]+matrix[j][i-1];
-               }
-               temp[j][1]= (double) temp[j][1]/(col-g+1);
-       }
-       
-       for(i=0;i<row;i++){
-               storage[i][3]=temp[i][0];
-               storage[i][7]=temp[i][1];
-               storage[i][8]=pvalues[i];
-       }
-       
-       // BACKUP checks
-       
-       for (i=0;i<row;i++){
-               if(pvalues[i]<thresh){
-                       printf("Feature %d is significant, p = %.10lf \n",i+1,pvalues[i]);
-               }       
-       }
-       
-       // And now we write the files to a text file.
-       struct tm *local;
-       time_t t;
-       t = time(NULL);
-       local = localtime(&t);
-       
-       out = fopen(output,"w");
-       
-       fprintf(out,"Local time and date of test: %s\n", asctime(local));
-       fprintf(out,"# rows = %d, # col = %d, g = %d\n\n",row,col,g);
-       if (bflag == 1){
-               fprintf(out,"%d permutations\n\n",B);   
-       }
-       
-       //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
-       //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
-       fprintf(out, "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n");
-    
-       for(i=0; i<row; i++){
-               fprintf(out,"%d",(i+1));
-               
-               for(j=0; j<9;j++){
-                       fprintf(out,"\t%.12lf",storage[i][j]);
-               }
-               fprintf(out,"\n");
-       }  
-       
-       fprintf(out,"\n \n");
-       
-       // fclose(jobj);
-       fclose(out);
-       
-       return 0;
-}
-
-void testp(double *permuted_ttests,int *B,double *permuted,
-                  double *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double 
-                  *ps) {
-       
-       double Tvalues[*nr];
-       int a, b, n, j;
-       
-       a = *B;
-       b = *nr;
-       n = a*b;
-       
-       double counter[b];
-       
-       for(j=0;j<b;j++){
-               counter[j]=0;
-       }    
-       
-       for (j=1; j<=*B; j++){
-               permute_matrix(Imatrix,nc,nr,permuted,g,Tvalues,Tinitial,counter);
-               // for(i=0;i<*nr;i++){
-               //   permuted_ttests[k]=fabs(Tvalues[i]);
-               //    k++;
-    }
-       
-       
-       for(j=0;j<*nr;j++){
-               ps[j]=((counter[j]+1)/(double)(a+1));
-       }
-}      
-
-void permute_matrix(double *Imatrix,int *nc,int *nr,double *permuted,
-                                       int *g,double *trial_ts,double *Tinitial,double *counter1){
-       
-       int i=0,j=0,n=0,a=0,b=0,f=0,c=0,k=0;
-       
-       a = *nr; // number of rows
-       b = *nc;
-       n = a*b;
-       
-       int y[b];
-       
-       for (i=1; i<=*nc; i++){
-               y[i-1] = i;
-       }
-       
-       permute_array(y, b); 
-       
-       for (i=0; i<*nc; i++){
-               f = y[i]; //column number
-               c=1;
-               c*=(f-1);
-               c*=a;
-               if (f == 1){
-                       c = 0;
-               } // starting value position in the Imatrix
-               for(j=1; j<=*nr; j++){
-                       permuted[k] = Imatrix[c];
-                       c++;
-                       k++;
-               }
-       }
-       
-       calc_twosample_ts(permuted,g,nc,nr,trial_ts,Tinitial,counter1);
-}
-
-void permute_array(int *array, int n) {
-       static int seeded = 0;
-       int i;
-       
-       if (! seeded) {
-               seeded = 1;
-               srand(time(NULL));
-       }
-       
-       for (i = 0; i < n; i++) {
-               int selection = rand() % (n - i);
-               int tmp = array[i + selection];
-               array[i + selection] = array[i];
-               array[i] = tmp;
-       }
-}
-
-void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,
-                                          double *Ts,double *Tinitial,double *counter) {
-       int i,a;
-       a = *nr;
-       a*=4;
-       
-       double C1[*nr][3], C2[*nr][3], storage[a],tool[a];
-       double nrows,ncols,gvalue, xbardiff=0, denom=0;
-       
-       nrows = (double) *nr;
-       ncols = (double) *nc;
-       gvalue= (double) *g;
-       
-    meanvar(Pmatrix,g,nr,nc,storage);
-    for(i=0;i<=a-1;i++){
-               tool[i]=storage[i];
-    }
-    for (i=0; i<*nr;i++){
-               C1[i][0]=tool[i];
-               C1[i][1]=tool[i+*nr+*nr];
-               C1[i][2]=C1[i][1]/(gvalue-1);
-               
-               C2[i][0]=tool[i+*nr];
-               C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-               C2[i][2]=C2[i][1]/(ncols-gvalue+1);
-    }
-    
-    for (i=0; i<*nr; i++){
-               xbardiff = C1[i][0]-C2[i][0];
-               denom = sqrt(C1[i][2]+C2[i][2]);
-               Ts[i]=fabs(xbardiff/denom);
-               if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
-                       counter[i]++;
-               }
-    }  
-}
-
-void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
-       double temp[*nr], temp2[*nr],var[*nr],var2[*nr],a,b;
-       
-       int i,m,k,l,n;
-       
-       a = (double) *g-1;          
-       b = (double) (*nc-a);
-       
-       for (i = 0; i<*nr; i++){
-               temp[i]=0;
-               temp2[i]=0;
-               var[i]=0;
-               var2[i]=0;
-       }
-       
-       k = *nr; // number of rows 
-       l = *nc;
-       n = k*l;        
-       
-    m=0;
-    m=*g-1;
-    k=*nr;
-    m*=k; // m = g * nr now
-    for (i=0;i<m;i++){
-               temp[i%k]=temp[i%k]+pmatrix[i];
-    }
-    for (i=0;i<n;i++){
-               temp2[i%k]=temp2[i%k]+pmatrix[i];
-    }
-    for (i=0;i<*nr;i++){
-               temp2[i]=temp2[i]-temp[i];
-    }
-    for (i=0;i<=*nr-1;i++){
-               store[i]=temp[i]/a;
-               store[i+*nr]=temp2[i]/b;
-    }
-    
-    // That completes the mean calculations.
-    
-    for (i=0;i<m;i++){
-               var[i%k]=var[i%k]+pow((pmatrix[i]-store[i%k]),2);
-    }
-    for (i=m;i<n;i++){
-               var2[i%k]=var2[i%k]+pow((pmatrix[i]-store[(i%k)+*nr]),2);
-    }
-    
-    for (i=0;i<=*nr-1;i++){
-               store[i+2*k]=var[i]/(a-1);
-               store[i+3*k]=var2[i]/(b-1);
-    }
-    // That completes var calculations.
-}
-
-void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
-                  double storage[][9]){
-       int i, a = *nr;
-       a*=4;
-       
-       double store[a], tool[a], C1[*nr][3], C2[*nr][3];
-       double nrows,ncols,gvalue, xbardiff=0, denom=0;
-       
-       nrows = (double) *nr;
-       ncols = (double) *nc;
-       gvalue= (double) *g;
-       
-       meanvar(Imatrix,g,nr,nc,store);
-       
-       for(i=0;i<=a-1;i++){
-               tool[i]=store[i];
-       }
-       for (i=0; i<*nr;i++){
-               C1[i][0]=tool[i]; //mean group 1
-               storage[i][0]=C1[i][0];
-               C1[i][1]=tool[i+*nr+*nr]; // var group 1
-               storage[i][1]=C1[i][1];
-               C1[i][2]=C1[i][1]/(gvalue-1);
-               storage[i][2]=sqrt(C1[i][2]);
-               
-               C2[i][0]=tool[i+*nr]; // mean group 2
-               storage[i][4]=C2[i][0];    
-               C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-               storage[i][5]=C2[i][1];        
-               C2[i][2]=C2[i][1]/(ncols-gvalue+1);
-               storage[i][6]=sqrt(C2[i][2]);   
-       }
-       for (i=0; i<*nr; i++){
-               xbardiff = C1[i][0]-C2[i][0];
-               denom = sqrt(C1[i][2]+C2[i][2]);
-               initial[i]=fabs(xbardiff/denom);
-       }                                                                                       
-}
-
-
-
-
index b7aa7e5fcb3bcc809c89ac3cc82c1ecae9d6272a..4744424d426d61243894d4779fa60dcd20781be3 100644 (file)
@@ -8,9 +8,8 @@
  */
 
 #include "metastatscommand.h"
-#include "metastats.h"
 #include "sharedutilities.h"
-#include "mothurmetastats.h"
+
 
 //**********************************************************************************************************************
 vector<string> MetaStatsCommand::setParameters(){      
@@ -232,21 +231,19 @@ int MetaStatsCommand::execute(){
                //only 1 combo
                if (numGroups == 2) { processors = 1; }
                else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors != 1){
-                               int numPairs = namesOfGroupCombos.size();
-                               int numPairsPerProcessor = numPairs / processors;
+
+        if(processors != 1){
+            int numPairs = namesOfGroupCombos.size();
+            int numPairsPerProcessor = numPairs / processors;
                        
-                               for (int i = 0; i < processors; i++) {
-                                       int startPos = i * numPairsPerProcessor;
-                                       if(i == processors - 1){
-                                               numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                                       }
-                                       lines.push_back(linePair(startPos, numPairsPerProcessor));
-                               }
-                       }
-               #endif
+            for (int i = 0; i < processors; i++) {
+                int startPos = i * numPairsPerProcessor;
+                if(i == processors - 1){
+                    numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
+                }
+                lines.push_back(linePair(startPos, numPairsPerProcessor));
+            }
+        }
                
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
@@ -339,13 +336,13 @@ int MetaStatsCommand::execute(){
 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
        try {
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
                                if(processors == 1){
                                        driver(0, namesOfGroupCombos.size(), thisLookUp);
                                }else{
                                        int process = 1;
                                        vector<int> processIDS;
-               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        //loop through and create all the processes you want
                                        while (process != processors) {
                                                int pid = fork();
@@ -371,11 +368,67 @@ int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
                                                int temp = processIDS[i];
                                                wait(&temp);
                                        }
-                               }
-               #else
-                               driver(0, namesOfGroupCombos.size(), thisLookUp);
-               #endif
+        #else
+                    
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+                    //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                    //Taking advantage of shared memory to pass results vectors.
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    
+                    vector<metastatsData*> pDataArray; 
+                    DWORD   dwThreadIdArray[processors-1];
+                    HANDLE  hThreadArray[processors-1]; 
+                    
+                    //Create processor worker threads.
+                    for( int i=1; i<processors; i++ ){
+                        
+                        //make copy of lookup so we don't get access violations
+                        vector<SharedRAbundVector*> newLookup;
+                        vector<string> designMapGroups;
+                        for (int k = 0; k < thisLookUp.size(); k++) {
+                            SharedRAbundVector* temp = new SharedRAbundVector();
+                            temp->setLabel(thisLookUp[k]->getLabel());
+                            temp->setGroup(thisLookUp[k]->getGroup());
+                            newLookup.push_back(temp);
+                            designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
+                        }
+                        
+                        //for each bin
+                        for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
+                            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                            for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
+                        }
+                        
+                        // Allocate memory for thread data.
+                        metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
+                        pDataArray.push_back(tempSum);
+                        processIDS.push_back(i);
+                        
+                        hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                    }
+                    
+                    //do my part
+                                       driver(lines[0].start, lines[0].num, thisLookUp);
+                    
+                    //Wait until all threads have terminated.
+                    WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                    
+                    //Close all thread handles and free memory allocations.
+                    for(int i=0; i < pDataArray.size(); i++){
+                        for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
+                        for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
+                            outputNames.push_back(pDataArray[i]->outputNames[j]);
+                            outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
+                        }
+                                                
+                        CloseHandle(hThreadArray[i]);
+                        delete pDataArray[i];
+                    }
+        #endif
 
+                               }
+               
                return 0;
                
        }
@@ -394,7 +447,7 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                        //get set names
                        string setA = namesOfGroupCombos[c][0]; 
                        string setB = namesOfGroupCombos[c][1];
-               //cout << setA << '\t' << setB << endl;
+               
                        //get filename
                        string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
                        outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
@@ -424,24 +477,31 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                        setACount++;
                                }
                        }
-                       
-                       //for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; }
-                       //cout << setACount << endl;
-                       
+                                               
                        if ((setACount == 0) || (setBCount == 0))  { 
                                m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
                                outputNames.pop_back();
                        }else {
+                
+                ofstream outTemp;
+                string tempOut = outputDir + "data." + setA + "-" + setB + ".matrix";
+                m->openOutputFile(tempOut, outTemp);
+                for (int i = 0; i < subset.size(); i++) { outTemp << '\t' << subset[i]->getGroup(); }
+                outTemp << endl;
+                
+                
                                //fill data
                                for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                        //data[j] = new double[subset.size()];
                                        data2[j].resize(subset.size(), 0.0);
+                    outTemp << "OTU" << (j+1);
                                        for (int i = 0; i < subset.size(); i++) {
-                                               //data[j][i] = (subset[i]->getAbundance(j));
                                                data2[j][i] = (subset[i]->getAbundance(j));
+                        outTemp << '\t' << subset[i]->getAbundance(j);
                                        }
+                    outTemp << endl;
                                }
-                               
+                               outTemp.close();
                                m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
                                //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
                                
@@ -449,7 +509,6 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                MothurMetastats mothurMeta(threshold, iters);
                                mothurMeta.runMetastats(outputFileName , data2, setACount);
                                m->mothurOutEndLine();
-                               
                                m->mothurOutEndLine(); 
                        }
                        
index 88e32acc0426a15525b1278bd5fc6d9c4d6392fc..b4800b71d75bcda1bfa5a39db6f8191849d0b348 100644 (file)
@@ -13,6 +13,7 @@
 #include "command.hpp"
 #include "inputdata.h"
 #include "sharedrabundvector.h"
+#include "mothurmetastats.h"
 
 class MetaStatsCommand : public Command {
 
@@ -55,5 +56,106 @@ private:
        int driver(int, int, vector<SharedRAbundVector*>&);
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct metastatsData {
+    vector<SharedRAbundVector*> thisLookUp;
+    vector< vector<string> > namesOfGroupCombos;
+    vector<string> designMapGroups;
+    vector<string> outputNames;
+       int start;
+       int num, iters;
+       float threshold;
+       MothurOut* m;
+       string sharedfile;
+    string outputDir;
+       
+       metastatsData(){}
+       metastatsData(string sf, string oDir, MothurOut* mout, int st, int en, vector< vector<string> > ns, vector<SharedRAbundVector*> lu, vector<string> dg, int i, float thr) {
+               sharedfile = sf;
+        outputDir = oDir;
+               m = mout;
+               start = st;
+               num = en;
+        namesOfGroupCombos = ns;
+        thisLookUp = lu;
+        designMapGroups = dg;
+        iters = i;
+        threshold = thr;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyMetastatsThreadFunction(LPVOID lpParam){ 
+       metastatsData* pDataArray;
+       pDataArray = (metastatsData*)lpParam;
+       
+       try {
+               
+        //for each combo
+               for (int c = pDataArray->start; c < (pDataArray->start+pDataArray->num); c++) {
+                       
+                       //get set names
+                       string setA = pDataArray->namesOfGroupCombos[c][0]; 
+                       string setB = pDataArray->namesOfGroupCombos[c][1];
+            
+                       //get filename
+                       string outputFileName = pDataArray->outputDir +  pDataArray->m->getRootName(pDataArray->m->getSimpleName(pDataArray->sharedfile)) + pDataArray->thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
+                       pDataArray->outputNames.push_back(outputFileName); 
+                       
+                       vector< vector<double> > data2; data2.resize(pDataArray->thisLookUp[0]->getNumBins());
+                       
+                       vector<SharedRAbundVector*> subset;
+                       int setACount = 0;
+                       int setBCount = 0;
+                       for (int i = 0; i < pDataArray->thisLookUp.size(); i++) {
+                               //is this group for a set we want to compare??
+                               //sorting the sets by putting setB at the back and setA in the front
+                               if (pDataArray->designMapGroups[i] == setB) {  
+                                       subset.push_back(pDataArray->thisLookUp[i]);
+                                       setBCount++;
+                               }else if (pDataArray->designMapGroups[i] == setA) {
+                                       subset.insert(subset.begin()+setACount, pDataArray->thisLookUp[i]);
+                                       setACount++;
+                               }
+                       }
+            
+                       if ((setACount == 0) || (setBCount == 0))  { 
+                               pDataArray->m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); pDataArray->m->mothurOutEndLine(); 
+                               pDataArray->outputNames.pop_back();
+                       }else {
+                               //fill data
+                               for (int j = 0; j < pDataArray->thisLookUp[0]->getNumBins(); j++) {
+                                       data2[j].resize(subset.size(), 0.0);
+                                       for (int i = 0; i < subset.size(); i++) {
+                                               data2[j][i] = (subset[i]->getAbundance(j));
+                                       }
+                               }
+                               
+                               pDataArray->m->mothurOut("Comparing " + setA + " and " + setB + "..."); pDataArray->m->mothurOutEndLine(); 
+                               
+                               pDataArray->m->mothurOutEndLine();
+                               MothurMetastats mothurMeta(pDataArray->threshold, pDataArray->iters);
+                               mothurMeta.runMetastats(outputFileName, data2, setACount);
+                               pDataArray->m->mothurOutEndLine();
+                               pDataArray->m->mothurOutEndLine(); 
+                       }
+        }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "MetaStatsCommand", "MyMetastatsThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+
 #endif
 
index c9914229c9d41c72da266a46d5f26516e0f60fe4..accdb2962c198988c457dc8f837b8817dc2799f0 100644 (file)
@@ -46,7 +46,7 @@ int main(int argc, char *argv[]){
 
                m->setFileName(logFileName);
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        system("clear");
                #else
                        system("CLS");
@@ -57,7 +57,7 @@ int main(int argc, char *argv[]){
                
                        //add / to name if needed
                        string lastChar = temp.substr(temp.length()-1);
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if (lastChar != "/") { temp += "/"; }
                        #else
                                if (lastChar != "\\") { temp += "\\"; } 
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]){
                
                if (outputHeader)  {
                        //version
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                #if defined (__APPLE__) || (__MACH__)
                                        m->mothurOutJustToLog("Mac version");
                                        m->mothurOutEndLine(); m->mothurOutEndLine();
index 50344e24bfea85e8b7c4b829b5136e3e63d34fc7..2c143e8667786c5740f0aa6324ea4062eab66b76 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -42,6 +42,7 @@
 #include <cmath>
 #include <math.h>
 #include <algorithm>
+#include <numeric>
 
 //misc
 #include <cerrno>
@@ -53,7 +54,7 @@
 #endif
 /***********************************************************************/
 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
        #include <sys/wait.h>
        #include <sys/time.h>
        #include <sys/resource.h>
@@ -130,9 +131,9 @@ struct clusterNode {
 struct seqDist {
        int seq1;
        int seq2;
-       float dist;
+       double dist;
        seqDist() {}
-       seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
+       seqDist(int s1, int s2, double d) : seq1(s1), seq2(s2), dist(d) {}
        ~seqDist() {}
 };
 /************************************************************/
index ae822254c01ccdf7359ca319c44a2db5005f1fc4..56789736e5b0d8d9bbf273baa4a07c36efb86ec8 100644 (file)
@@ -26,202 +26,182 @@ MothurMetastats::MothurMetastats(double t, int n) {
 /***********************************************************/
 MothurMetastats::~MothurMetastats() {}
 /***********************************************************/
-//main metastats function
-int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
-       try {
-               int bflag = 0;
-               row = data.size();               //numBins
+ //main metastats function
+int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secGroupingStart) {
+    try {
+        row = data.size();              //numBins
                column = data[0].size(); //numGroups in subset
-               int size = row*column;
-               
-               //consistent with original, but this should never be true
-               if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
-               
-               //Initialize the matrices
-               vector<double> pmatrix; pmatrix.resize(size, 0.0);
-               vector<double> permuted; permuted.resize(size, 0.0);
-               vector< vector<double> > storage; storage.resize(row);
-               for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
-               
-               //Produces the sum of each column
-               vector<double> total; total.resize(column, 0.0);
-               vector<double> ratio; ratio.resize(column, 0.0);
-               double total1 = 0.0; double total2 = 0.0;
-               
-               //total[i] = total abundance for group[i]
+        secondGroupingStart = secGroupingStart; //g
+         
+        vector< vector<double> > Pmatrix; Pmatrix.resize(row);
+        for (int i = 0; i < row; i++) { Pmatrix[i].resize(column, 0.0);  } // the relative proportion matrix
+        vector< vector<double> > C1; C1.resize(row);
+        for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0);  } // statistic profiles for class1 and class 2
+        vector< vector<double> > C2; C2.resize(row);            // mean[1], variance[2], standard error[3] 
+        for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0);  } 
+        vector<double> T_statistics; T_statistics.resize(row, 1); // a place to store the true t-statistics 
+        vector<double> pvalues; pvalues.resize(row, 1); // place to store pvalues
+        vector<double> qvalues; qvalues.resize(row, 1); // stores qvalues
+       
+        //*************************************
+        //      convert to proportions
+        //      generate Pmatrix
+        //*************************************
+        vector<double> totals; totals.resize(column, 0); // sum of columns
+        //total[i] = total abundance for group[i]
                for (int i = 0; i < column; i++) {
                        for (int j = 0; j < row; j++) {
-                               total[i] += data[j][i];
+                               totals[i] += data[j][i];
                        }
-               }
-               
-               //total for first grouping
-               for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
-               
-               //total for second grouping
-               for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
-               
-               //Creates the ratios by first finding the minimum of totals
-               double min = total[0];
-               for (int i = 0; i < total.size(); i++) {
-                        if (total[i] < min) { min = total[i]; }
-               }
-               
-               //sanity check
-               if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
-               
-               //Ratio time...
-               for(int i = 0; i < ratio.size(); i++){  ratio[i] = total[i] / min; }
-               
-               //Change matrix into an array as received by R for compatibility - kept to be consistent with original
-               int count = 0;
-               for(int i = 0; i < column; i++){
-                       for(int j = 0; j < row; j++){
-                               pmatrix[count]=data[j][i];
-                               count++;
+        }
+        
+        for (int i = 0; i < column; i++) {
+                       for (int j = 0; j < row; j++) {
+                               Pmatrix[j][i] = data[j][i]/totals[i];
+               
                        }
-               }
-               
-               if(row == 1){
-                       for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
-               }else {
-                       count = 0; int j=-1;
-                       
-                       for (int i=0; i < size; i++) {
-                               if (count % row == 0) { j++; }
-                               pmatrix[i] /= ratio[j];
-                               count++; 
-                       }   
-               }
-               
-               vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
-               vector<double> pvalues;                 pvalues.resize(row, 0.0);
-               vector<double> tinitial;                tinitial.resize(row, 0.0);
-               
-               if (m->control_pressed) { return 1; }
-               
-               //Find the initial values for the matrix.
-               start(pmatrix, secondGroupingStart, tinitial, storage);
-               
-               if (m->control_pressed) { return 1; }
-               
-               // Start the calculations.
-               if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){ 
-                       
-                       vector<double> fish;    fish.resize(row, 0.0);
+        }
+        
+        //#********************************************************************************
+        //# ************************** STATISTICAL TESTING ********************************
+        //#********************************************************************************
+        
+        if (column == 2){  //# then we have a two sample comparison
+            //#************************************************************
+            //#  generate p values fisher's exact test
+            //#************************************************************
+            double total1, total2;
+                       //total for first grouping
+            for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; }
+            
+            //total for second grouping
+            for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; }
+            
+            vector<double> fish;       fish.resize(row, 0.0);
                        vector<double> fish2;   fish2.resize(row, 0.0);
-                       
+            
                        for(int i = 0; i < row; i++){
                                
                                for(int j = 0; j < secondGroupingStart; j++)            { fish[i] += data[i][j];        }
                                for(int j = secondGroupingStart; j < column; j++)       { fish2[i] += data[i][j];       }
                                
-                               //vector<double> tempData; tempData.resize(4, 0.0);
                                double f11, f12, f21, f22;
                                f11 = fish[i];
                                f12 = fish2[i];
                                f21 = total1 - fish[i];
                                f22 = total2 - fish2[i];
                                
-                               double pre = 0.0;
-                               
                                MothurFisher fisher;
-                               pre = fisher.fexact(f11, f12, f21, f22);
-                               
+                               double pre = fisher.fexact(f11, f12, f21, f22);
+                               if (pre > 0.999999999)  { pre = 1.0; }
+                
                                if (m->control_pressed) { return 1; }
                                
-                               if (pre > 0.999999999)  { pre = 1.0; }
-                               storage[i][8] = pre;
                                pvalues[i] = pre;
                        }
-                       
-               }else {
-       
-                       testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
-                       
-                       if (m->control_pressed) { return 1; }
-                       
-                       // Checks to make sure the matrix isn't sparse.
-                       vector<double> sparse;          sparse.resize(row, 0.0);
-                       vector<double> sparse2;         sparse2.resize(row, 0.0);
-                       
-                       int c = 0;
-                       
+            
+            //#*************************************
+            //#  calculate q values from p values
+            //#*************************************
+            qvalues = calc_qvalues(pvalues);
+            
+        }else { //we have multiple subjects per population
+            
+            //#*************************************
+            //#  generate statistics mean, var, stderr    
+            //#*************************************
+            for(int i = 0; i < row; i++){ // for each taxa
+                //# find the mean of each group
+                double g1Total = 0.0; double g2Total = 0.0;
+                for (int j = 0; j < secondGroupingStart; j++)       {     g1Total += Pmatrix[i][j]; }
+                C1[i][0] = g1Total/(double)(secondGroupingStart);
+                for (int j = secondGroupingStart; j < column; j++)  {     g2Total += Pmatrix[i][j]; }
+                C2[i][0] = g2Total/(double)(column-secondGroupingStart);
+                
+                 //# find the variance of each group
+                double g1Var = 0.0; double g2Var = 0.0;
+                for (int j = 0; j < secondGroupingStart; j++)       {     g1Var += pow((Pmatrix[i][j]-C1[i][0]), 2);  }
+                C1[i][1] = g1Var/(double)(secondGroupingStart-1);
+                for (int j = secondGroupingStart; j < column; j++)  {     g2Var += pow((Pmatrix[i][j]-C2[i][0]), 2);  }
+                C2[i][1] = g2Var/(double)(column-secondGroupingStart-1);
+                
+                //# find the std error of each group -std err^2 (will change to std err at end)
+                C1[i][2] = C1[i][1]/(double)(secondGroupingStart);    
+                C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart);
+            }
+            
+            //#*************************************
+            //#  two sample t-statistics
+            //#*************************************
+            for(int i = 0; i < row; i++){                  // # for each taxa
+                double xbar_diff = C1[i][0] - C2[i][0]; 
+                double denom = sqrt(C1[i][2] + C2[i][2]);
+                T_statistics[i] = xbar_diff/denom;  // calculate two sample t-statistic
+            }
+            
+            /*for (int i = 0; i < row; i++) {  
+                for (int j = 0; j < 3; j++) {
+                    cout << "C1[" << i+1 << "," << j+1 << "]=" << C1[i][j] << ";" << endl;
+                    cout << "C2[" << i+1 << "," << j+1 << "]=" << C2[i][j] << ";" << endl;
+                }
+                cout << "T_statistics[" << i+1 << "]=" << T_statistics[i] << ";" << endl;
+            }*/
+            //#*************************************
+            //# generate initial permuted p-values
+            //#*************************************
+            pvalues = permuted_pvalues(Pmatrix, T_statistics, data);
+            
+            //#*************************************
+            //#  generate p values for sparse data 
+            //#  using fisher's exact test
+            //#*************************************
+            double total1, total2;
+                       //total for first grouping
+            for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; }
+            
+            //total for second grouping
+            for (int i = secondGroupingStart; i < column; i++) { total2 += totals[i]; }
+            
+            vector<double> fish;       fish.resize(row, 0.0);
+                       vector<double> fish2;   fish2.resize(row, 0.0);
+            
                        for(int i = 0; i < row; i++){
                                
-                               for(int j = 0; j < secondGroupingStart; j++)    {       sparse[i] += data[i][j];        }
-                               if(sparse[i] < (double)secondGroupingStart)             {       c++;                                            }
+                               for(int j = 0; j < secondGroupingStart; j++)            { fish[i] += data[i][j];        }
+                               for(int j = secondGroupingStart; j < column; j++)       { fish2[i] += data[i][j];       }
+                               
+                if ((fish[1] < secondGroupingStart) && (fish2[i] < (column-secondGroupingStart))) {
+                    double f11, f12, f21, f22;
+                    f11 = fish[i];
+                    f12 = fish2[i];
+                    f21 = total1 - fish[i];
+                    f22 = total2 - fish2[i];
                                
-                               // ?<= for col
-                               for(int j = secondGroupingStart; j < column; j++)               {  sparse2[i] += data[i][j]; }
-                               if( (sparse2[i] < (double)(column-secondGroupingStart)))        { c++;                                           }
+                    MothurFisher fisher;
+                    double pre = fisher.fexact(f11, f12, f21, f22);
+                    if (pre > 0.999999999)     { pre = 1.0; }
+                
+                    if (m->control_pressed) { return 1; }
                                
-                               if (c == 2) {
-                                       c=0;
-                                       double f11,f12,f21,f22;
-                                       
-                                       f11=sparse[i];  sparse[i]=0;
-                                       f12=sparse2[i];  sparse2[i]=0;
-                                       f21 = total1 - f11;
-                                       f22 = total2 - f12;
-                                       
-                                       double pre = 0.0;
-                                       
-                                       MothurFisher fisher;
-                                       pre = fisher.fexact(f11, f12, f21, f22);
-                                       
-                                       if (m->control_pressed) { return 1; }
-                                       
-                                       if (pre > 0.999999999){
-                                               pre = 1.0;
-                                       }
-                                       
-                                       storage[i][8] = pre;
-                                       pvalues[i] = pre;
-                               }                               
+                    pvalues[i] = pre;
+                }
                        }
-                       
-                       bflag = 1;
-               }
 
-               // Calculates the mean of counts (not normalized)
-               vector< vector<double> > temp; temp.resize(row);
-               for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
-               
-               for (int j = 0; j < row; j++){
-                       if (m->control_pressed) { return 1; }
-                       
-                       for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
-                       temp[j][0] /= (double)secondGroupingStart;
-                       
-                       for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
-                       temp[j][1] /= (double)(column-secondGroupingStart);
-               }
-               
-               for(int i = 0; i < row; i++){
-                       if (m->control_pressed) { return 1; }
-                       
-                       storage[i][3]=temp[i][0];
-                       storage[i][7]=temp[i][1];
-                       storage[i][8]=pvalues[i];
-               }
-               
-               vector<double> qvalues = calc_qvalues(pvalues);
-               
-               // BACKUP checks
-               cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
-               for (int i = 0; i < row; i++){
-                       
-                       if (m->control_pressed) { return 1; }
-                       
-                       if(qvalues[i] < threshold){
-                               m->mothurOut("Feature " + toString((i+1)) + " is significant, q = "); 
-                               cout << qvalues[i];
-                               m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
-                       }       
-               }
-               
-               // And now we write the files to a text file.
+            //#*************************************
+            //#  calculate q values from p values
+            //#*************************************
+            qvalues = calc_qvalues(pvalues);
+            
+            //#*************************************
+            //#  convert stderr^2 to std error
+            //#*************************************
+            for(int i = 0; i < row; i++){
+                C1[i][2] = sqrt(C1[i][2]);
+                C2[i][2] = sqrt(C2[i][2]);
+            }
+        }
+        
+        // And now we write the files to a text file.
                struct tm *local;
                time_t t; t = time(NULL);
                local = localtime(&t);
@@ -229,269 +209,160 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                ofstream out;
                m->openOutputFile(outputFileName, out);
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-                                                 
+        
                out << "Local time and date of test: " << asctime(local) << endl;
                out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
-               if (bflag == 1){ out << numPermutations << " permutations" << endl << endl;     }
+               out << numPermutations << " permutations" << endl << endl;      
                
                //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
                //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
-               out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\tq-value\n";
+               out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tp-value\tq-value\n";
                
                for(int i = 0; i < row; i++){
                        if (m->control_pressed) { out.close(); return 0; }
                        
-                       out << (i+1);
-                       for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
-                       out << '\t' << qvalues[i];
-                       out << endl;
+            //if there are binlabels use them otherwise count.
+                       if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; }
+            else { out << (i+1) << '\t'; }
+            
+            out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl;
                }  
                
                out << endl << endl;
                out.close();
                
-               
-               return 0;
-       
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "runMetastats");
-               exit(1);
-       }       
-}
-/***********************************************************/
-//Find the initial values for the matrix
-int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
-       try {
-               
-               int a = row; a*=4;
-               
-               double xbardiff = 0.0; double denom = 0.0;
-               vector<double> store;   store.resize(a, 0.0);
-               vector<double> tool;    tool.resize(a, 0.0);
-               vector< vector<double> > C1; C1.resize(row);
-               for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
-               vector< vector<double> > C2; C2.resize(row);
-               for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
-               
-               meanvar(Imatrix, secondGroupingStart, store);
-               
-               if (m->control_pressed) { return 0; }
-               
-               //copy store into tool
-               tool = store;
-               
-               for (int i = 0; i < row; i++){
-                       C1[i][0]=tool[i]; //mean group 1
-                       storage[i][0]=C1[i][0];
-                       C1[i][1]=tool[i+row+row]; // var group 1
-                       storage[i][1]=C1[i][1];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart);
-                       storage[i][2]=sqrt(C1[i][2]);
-                       
-                       C2[i][0]=tool[i+row]; // mean group 2
-                       storage[i][4]=C2[i][0];    
-                       C2[i][1]=tool[i+row+row+row]; // var group 2 
-                       storage[i][5]=C2[i][1];        
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
-                       storage[i][6]=sqrt(C2[i][2]);   
-               }
-               
-               if (m->control_pressed) { return 0; }
-               
-               for (int i = 0; i < row; i++){
-                       xbardiff = C1[i][0]-C2[i][0];
-                       denom = sqrt(C1[i][2]+C2[i][2]);
-                       initial[i]=fabs(xbardiff/denom);
-               }       
 
-               return 0; 
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "start");
-               exit(1);
-       }       
-}
-/***********************************************************/
-int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
-       try {
-               vector<double> temp;    temp.resize(row, 0.0);
-               vector<double> temp2;   temp2.resize(row, 0.0);
-               vector<double> var;             var.resize(row, 0.0);
-               vector<double> var2;    var2.resize(row, 0.0);
-               
-               double a = secondGroupingStart;
-               double b = column - a;
-               int m = a * row;
-               int n = row * column;
-               
-               for (int i = 0; i < m; i++)             { temp[i%row] += pmatrix[i];    }
-               for (int i = 0; i < n; i++)             { temp2[i%row]+= pmatrix[i];    }
-               for (int i = 0; i < row; i++)   { temp2[i] -= temp[i];          }
-               for (int i = 0; i <= row-1;i++) {
-                       store[i] = temp[i]/a;
-                       store[i+row]=temp2[i]/b;
-               }
-               
-               //That completes the mean calculations.
-               
-               for (int i = 0; i < m; i++)             { var[i%row] += pow((pmatrix[i]-store[i%row]),2);               }
-               for (int i = m; i < n; i++)             { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
-               for (int i = 0; i <= row-1; i++){
-                       store[i+2*row]=var[i]/(a-1);
-                       store[i+3*row]=var2[i]/(b-1);
-               }
-               
-               // That completes var calculations.
-               
-               return 0;
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "meanvar");
-               exit(1);
-       }       
-}
-/***********************************************************/
-int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
-       try {
-               
-               vector<double> Tvalues;         Tvalues.resize(row, 0.0);
-               vector<double> counter;         counter.resize(row, 0.0);
-               int a, b, n;
-               
-               a = numPermutations;
-               b = row;
-               n = a*b;
-               
-               for (int j = 1; j <= row; j++)  {       
-                       if (m->control_pressed) { return 0; }
-                       permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);     
-               }
-               
-               for(int j = 0; j < row; j++)    {       
-                       if (m->control_pressed) { return 0; }
-                       ps[j] = ((counter[j]+1)/(double)(a+1)); 
-               }
-               
-               return 0;
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "testp");
-               exit(1);
-       }       
-}      
-/***********************************************************/
-int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
-       try {
-       
-               vector<int> y; y.resize(column, 0);
-               for (int i = 1; i <= column; i++){ y[i-1] = i; }
-               
-               permute_array(y); 
-               
-               int f = 0; int c = 0; int k = 0;
-               for (int i = 0; i < column; i++){
-                       
-                       if (m->control_pressed) { return 0; }
-                       
-                       f = y[i]; //column number
-                       c = 1;
-                       c *= (f-1);
-                       c *= row;
-                       if (f == 1){ c = 0; } // starting value position in the Imatrix
-                       
-                       for(int j = 1; j <= row; j++){
-                               permuted[k] = Imatrix[c];
-                               c++; k++;
-                       }
-               }
-               
-               calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
-               
-               return 0;
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "permute_matrix");
-               exit(1);
-       }       
+
+        return 0;
+        
+    }catch(exception& e) {
+        m->errorOut(e, "MothurMetastats", "runMetastats");
+        exit(1);
+    }  
 }
 /***********************************************************/
-int MothurMetastats::permute_array(vector<int>& array) {
+vector<double> MothurMetastats::permuted_pvalues(vector< vector<double> >& Imatrix, vector<double>& tstats, vector< vector<double> >& Fmatrix) {
        try {
-               static int seeded = 0;
-               
-               if (! seeded) {
-                       seeded = 1;
-                       srand(time(NULL));
-               }
-               
-               for (int i = 0; i < array.size(); i++) {
-                       if (m->control_pressed) { return 0; }
-                       
-                       int selection = rand() % (array.size() - i);
-                       int tmp = array[i + selection];
-                       array[i + selection] = array[i];
-                       array[i] = tmp;
-               }
-               
-               return 0;
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "permute_array");
-               exit(1);
-       }       
+        //# matrix stores tstats for each taxa(row) for each permuted trial(column)
+        vector<double> ps;  ps.resize(row, 0.0); //# to store the pvalues
+        vector< vector<double> > permuted_ttests; permuted_ttests.resize(numPermutations);            
+        for (int i = 0; i < numPermutations; i++) { permuted_ttests[i].resize(row, 0.0);  } 
+        //# calculate null version of tstats using B permutations.
+        for (int i = 0; i < numPermutations; i++) {   
+            permuted_ttests[i] = permute_and_calc_ts(Imatrix);
+        }
+        
+        //# calculate each pvalue using the null ts
+        if ((secondGroupingStart) < 8 || (column-secondGroupingStart) < 8){
+            vector< vector<double> > cleanedpermuted_ttests; cleanedpermuted_ttests.resize(numPermutations);  //# the array pooling just the frequently observed ts
+            //# then pool the t's together!
+            //# count how many high freq taxa there are
+            int hfc = 1;
+            for (int i = 0; i < row; i++) {                 // # for each taxa
+                double group1Total = 0.0; double group2Total = 0.0;
+                for(int j = 0; j < secondGroupingStart; j++)           { group1Total += Fmatrix[i][j]; }
+                               for(int j = secondGroupingStart; j < column; j++)       { group2Total += Fmatrix[i][j]; }
+                
+                if (group1Total >= secondGroupingStart || group2Total >= (column-secondGroupingStart)){ 
+                    hfc++;
+                    for (int j = 0; j < numPermutations; j++) {   cleanedpermuted_ttests[j].push_back(permuted_ttests[j][i]); }
+                }
+            }
+              
+            //#now for each taxa
+            for (int i = 0; i < row; i++) { 
+                //number of cleanedpermuted_ttests greater than tstat[i]
+                int numGreater = 0;
+                for (int j = 0; j < numPermutations; j++) {
+                    for (int k = 0; k < hfc; k++) {
+                        if (cleanedpermuted_ttests[j][k] > abs(tstats[i])) { numGreater++; }
+                    }
+                }
+                
+                ps[i] = (1/(double)(numPermutations*hfc))*numGreater;
+            }
+        }else{
+            for (int i = 0; i < row; i++) { 
+                //number of permuted_ttests[i] greater than tstat[i] //(sum(permuted_ttests[i,] > abs(tstats[i]))+1)
+                int numGreater = 1;
+                for (int j = 0; j < numPermutations; j++) { if (permuted_ttests[j][i] > abs(tstats[i])) { numGreater++; }   }
+                ps[i] = (1/(double)(numPermutations+1))*numGreater;
+            }
+        }
+        
+        return ps;
+        
+    }catch(exception& e) {
+        m->errorOut(e, "MothurMetastats", "permuted_pvalues");
+        exit(1);
+    }  
 }
 /***********************************************************/
-int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
+vector<double> MothurMetastats::permute_and_calc_ts(vector< vector<double> >& Imatrix) {
        try {
-               int a = row * 4;
-               
-               vector< vector<double> > C1; C1.resize(row);
-               for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
-               vector< vector<double> > C2; C2.resize(row);
-               for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
-               vector<double> storage; storage.resize(a, 0.0);
-               vector<double> tool;    tool.resize(a, 0.0);
-               double xbardiff = 0.0; double denom = 0.0;
-               
-               meanvar(Pmatrix, secondGroupingStart, storage);
-               
-               for(int i = 0;i <= (a-1); i++) {        
-                       if (m->control_pressed) { return 0; }
-                       tool[i] = storage[i];   
-               }
-               
-               for (int i = 0; i < row; i++){
-                       if (m->control_pressed) { return 0; }
-                       C1[i][0]=tool[i];
-                       C1[i][1]=tool[i+row+row];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart);
-                       
-                       C2[i][0]=tool[i+row];
-                       C2[i][1]=tool[i+row+row+row]; // var group 2 
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
-               }
-               
-               for (int i = 0; i < row; i++){
-                       if (m->control_pressed) { return 0; }
-                       xbardiff = C1[i][0]-C2[i][0];
-                       denom = sqrt(C1[i][2]+C2[i][2]);
-                       Ts[i]=fabs(xbardiff/denom);
-                       if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
-                               counter[i]++;
-                       }
-               }
-               
-               return 0;
-               
-       }catch(exception& e) {
-               m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
-               exit(1);
-       }
+        vector< vector<double> > permutedMatrix = Imatrix;
+        
+        //randomize columns, ie group abundances.
+        for (int i = 0; i < permutedMatrix.size(); i++) {   random_shuffle(permutedMatrix[i].begin(), permutedMatrix[i].end());     }
+        
+        //calc ts
+        vector< vector<double> > C1; C1.resize(row);
+        for (int i = 0; i < row; i++) { C1[i].resize(3, 0.0);  } // statistic profiles for class1 and class 2
+        vector< vector<double> > C2; C2.resize(row);            // mean[1], variance[2], standard error[3] 
+        for (int i = 0; i < row; i++) { C2[i].resize(3, 0.0);  } 
+        vector<double> Ts; Ts.resize(row, 0.0); // a place to store the true t-statistics 
+
+        //#*************************************
+        //#  generate statistics mean, var, stderr    
+        //#*************************************
+        for(int i = 0; i < row; i++){ // for each taxa
+            //# find the mean of each group
+            double g1Total = 0.0; double g2Total = 0.0;
+            for (int j = 0; j < secondGroupingStart; j++)       {     g1Total += permutedMatrix[i][j]; }
+            C1[i][0] = g1Total/(double)(secondGroupingStart);
+            for (int j = secondGroupingStart; j < column; j++)  {     g2Total += permutedMatrix[i][j]; }
+            C2[i][0] = g2Total/(double)(column-secondGroupingStart);
+            
+            //# find the variance of each group
+            double g1Var = 0.0; double g2Var = 0.0;
+            for (int j = 0; j < secondGroupingStart; j++)       {     g1Var += pow((permutedMatrix[i][j]-C1[i][0]), 2);  }
+            C1[i][1] = g1Var/(double)(secondGroupingStart-1);
+            for (int j = secondGroupingStart; j < column; j++)  {     g2Var += pow((permutedMatrix[i][j]-C2[i][0]), 2);  }
+            C2[i][1] = g2Var/(double)(column-secondGroupingStart-1);
+            
+            //# find the std error of each group -std err^2 (will change to std err at end)
+            C1[i][2] = C1[i][1]/(double)(secondGroupingStart);    
+            C2[i][2] = C2[i][1]/(double)(column-secondGroupingStart);
+        }
+        
+        //#*************************************
+        //#  two sample t-statistics
+        //#*************************************
+        for(int i = 0; i < row; i++){                  // # for each taxa
+            double xbar_diff = C1[i][0] - C2[i][0]; 
+            double denom = sqrt(C1[i][2] + C2[i][2]);
+            Ts[i] = abs(xbar_diff/denom);  // calculate two sample t-statistic
+        }
+
+        return Ts;
+
+        
+    }catch(exception& e) {
+        m->errorOut(e, "MothurMetastats", "permuted_ttests");
+        exit(1);
+    }  
 }
 /***********************************************************/
 vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
        try {
                
+       /* cout << "x <- c(" << pValues[0];
+        for (int l = 1; l < pValues.size(); l++){
+            cout << ", " << pValues[l];
+        }
+        cout << ")\n";*/
+        
                int numRows = pValues.size();
                vector<double> qvalues(numRows, 0.0);
 
index d80b68709b7bbd8417a26e47b3b6596d9a4fec77..4d6cf9166816f9f775e97eedc8a3231ec6daecd8 100644 (file)
@@ -23,9 +23,12 @@ class MothurMetastats {
        
        private:
                MothurOut* m;
-               int row, column, numPermutations;
+               int row, column, numPermutations, secondGroupingStart;
                double threshold;
-       
+        
+    vector<double> permuted_pvalues(vector< vector<double> >&, vector<double>&, vector< vector<double> >&);
+    vector<double> permute_and_calc_ts(vector< vector<double> >&);
+    
                int start(vector<double>&, int, vector<double>&, vector< vector<double> >&); //Find the initial values for the matrix
                int meanvar(vector<double>&, int, vector<double>&);
                int testp(vector<double>&, vector<double>&, vector<double>&, int, vector<double>&, vector<double>&);
index 20a7b5235029b6d7908ca0f23f119c3c38d095f1..98f5ce09608855690a9fc7442814841a064db379 100644 (file)
@@ -143,7 +143,7 @@ void MothurOut::setDefaultPath(string pathname)  {
        
                //add / to name if needed
                string lastChar = pathname.substr(pathname.length()-1);
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if (lastChar != "/") { pathname += "/"; }
                #else
                        if (lastChar != "\\") { pathname += "\\"; }     
@@ -212,8 +212,8 @@ void MothurOut::mothurOut(string output) {
                        if (pid == 0) { //only one process should output to screen
                #endif
                
-               cout << output;
                out << output;
+        logger() << output;
                
                #ifdef USE_MPI
                        }
@@ -234,8 +234,8 @@ void MothurOut::mothurOutEndLine() {
                        if (pid == 0) { //only one process should output to screen
                #endif
                
-               cout << endl;
                out << endl;
+        logger() << endl;
                
                #ifdef USE_MPI
                        }
@@ -257,13 +257,15 @@ void MothurOut::mothurOut(string output, ofstream& outputFile) {
                if (pid == 0) { //only one process should output to screen
 #endif
                        
-                       cout << output;
+                       
                        out << output;
                        outputFile << output;
+            logger() << output;
                        
 #ifdef USE_MPI
                }
 #endif
+        
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "MothurOut");
@@ -280,9 +282,9 @@ void MothurOut::mothurOutEndLine(ofstream& outputFile) {
                if (pid == 0) { //only one process should output to screen
 #endif
                        
-                       cout << endl;
                        out << endl;
                        outputFile << endl;
+            logger() << endl;
                        
 #ifdef USE_MPI
                }
@@ -332,7 +334,7 @@ void MothurOut::errorOut(exception& e, string object, string function) {
 //
 // On failure, returns 0.0, 0.0
 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
-  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
   
           vm_usage     = 0.0;
           resident_set = 0.0;
@@ -504,7 +506,7 @@ string MothurOut::getline(ifstream& fileHandle) {
 }
 /***********************************************************************/
 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #ifdef USE_COMPRESSION
 inline bool endsWith(string s, const char * suffix){
   size_t suffixLength = strlen(suffix);
@@ -518,7 +520,7 @@ string MothurOut::getRootName(string longName){
        
                string rootName = longName;
 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #ifdef USE_COMPRESSION
     if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
       int pos = rootName.find_last_of('.');
@@ -619,7 +621,7 @@ string MothurOut::hasPath(string longName){
 
 string MothurOut::getExtension(string longName){
        try {
-               string extension = longName;
+               string extension = "";
                
                if(longName.find_last_of('.') != longName.npos){
                        int pos = longName.find_last_of('.');
@@ -673,7 +675,7 @@ string MothurOut::getFullPathName(string fileName){
                                
                string cwd;
                //get current working directory 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)   
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)  
                        
                        if (path.find("~") != -1) { //go to home directory
                                string homeDir;
@@ -726,7 +728,7 @@ string MothurOut::getFullPathName(string fileName){
                                        }else if (path[(pos-1)] == '/') { //you want the current working dir ./
                                                path = path.substr(0, pos);
                                        }else if (pos == 1) { break;  //you are at the end
-                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                                       }else { mothurOut("cannot resolve path for " +  fileName + "\n"); return fileName; }
                                }
                        
                                for (int i = index; i >= 0; i--) {
@@ -772,7 +774,7 @@ string MothurOut::getFullPathName(string fileName){
                                        }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
                                                path = path.substr(0, pos);
                                        }else if (pos == 1) { break;  //you are at the end
-                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                                       }else { mothurOut("cannot resolve path for " +  fileName + "\n"); return fileName; }
                                }
                        
                                for (int i = index; i >= 0; i--) {
@@ -796,7 +798,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
        try {
                        //get full path name
                        string completeFileName = getFullPathName(fileName);
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #ifdef USE_COMPRESSION
       // check for gzipped or bzipped file
       if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -842,7 +844,7 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
 
                //get full path name
                string completeFileName = getFullPathName(fileName);
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #ifdef USE_COMPRESSION
   // check for gzipped or bzipped file
   if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -893,7 +895,7 @@ int MothurOut::renameFile(string oldName, string newName){
                int exist = openInputFile(newName, inTest, "");
                inTest.close();
                
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
                if (exist == 0) { //you could open it so you want to delete it
                        string command = "rm " + newName;
                        system(command.c_str());
@@ -920,7 +922,7 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
        try { 
        
                string completeFileName = getFullPathName(fileName);
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #ifdef USE_COMPRESSION
     // check for gzipped file
     if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
@@ -996,7 +998,7 @@ string MothurOut::sortFile(string distFile, string outputDir){
 
                
                //if you can, use the unix sort since its been optimized for years
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        string command = "sort -n -k +3 " + distFile + " -o " + outfile;
                        system(command.c_str());
                #else //you are stuck with my best attempt...
@@ -1151,7 +1153,7 @@ vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& n
                                        while(isspace(d) && (d != in.eof()))            { d=in.get(); count++;}
                                }
                                positions.push_back(count-1);
-                               cout << count-1 << endl;
+                               //cout << count-1 << endl;
                        }
                        in.close();
                
@@ -1199,7 +1201,7 @@ vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
                        fclose (pFile);
                }
                
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                
                //estimate file breaks
                unsigned long long chunkSize = 0;
@@ -1349,7 +1351,7 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                }
                in.close();
                
-               return 0;
+               return nameMap.size();
                
        }
        catch(exception& e) {
@@ -1378,7 +1380,7 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                }
                in.close();
                
-               return 0;
+               return nameMap.size();
                
        }
        catch(exception& e) {
index d2a36b351902f3bc25d7c2132fb2c20f15421b23..e1c8222ae1938e8b63422cc9ee6567d309b668f5 100644 (file)
 
 #include "mothur.h"
 
+/***********************************************/
+struct logger {
+    
+    logger() {}
+    ~logger() {}
+    
+    template< class T >
+    logger& operator <<( const T& o ) {
+        cout << o; return *this;
+    }
+    
+    logger& operator<<(ostream& (*m)(ostream&) ) {
+        cout << m; return *this;
+    }
+    
+}; 
 /***********************************************/
 
 class MothurOut {
@@ -122,7 +138,7 @@ class MothurOut {
                int getRandomIndex(int); //highest
 
                int control_pressed;
-               bool executing, runParse, jumble, gui;
+               bool executing, runParse, jumble, gui, mothurCalling;
                
                //current files - if you add a new type you must edit optionParser->getParameters, get.current command and mothurOut->printCurrentFiles/clearCurrentFiles.
                string getPhylipFile()          { return phylipfile;            }
@@ -203,6 +219,7 @@ class MothurOut {
                        gui = false;
                        printedHeaders = false;
                        commandInputsConvertError = false;
+            mothurCalling = false;
                        sharedHeaderMode = "";
                }
                ~MothurOut();
index 3cf38cdb9d752652cc70a865ce6b1b87e1da939b..f7cb65ec43c741a1d4ae5be194fd49fa75b2fc9e 100644 (file)
@@ -515,7 +515,7 @@ int Perseus::getChimera(vector<seqData> sequences,
                        for(int i=0;i<numRefSeqs;i++){
                                
                                if(restricted[i] == 0){
-                                       if(leftDiffs[i][l] < singleLeft[l] && sequences[i].frequency || (leftDiffs[i][l] == singleLeft[l] && sequences[i].frequency > sequences[bestLeft[l]].frequency)){
+                                       if(((leftDiffs[i][l] < singleLeft[l]) && sequences[i].frequency) || ((leftDiffs[i][l] == singleLeft[l]) && (sequences[i].frequency > sequences[bestLeft[l]].frequency))){
                                                singleLeft[l] = leftDiffs[i][l];
                                                bestLeft[l] = i;
                                        }
@@ -533,7 +533,7 @@ int Perseus::getChimera(vector<seqData> sequences,
                        for(int i=0;i<numRefSeqs;i++){
                                
                                if(restricted[i] == 0){
-                                       if(rightDiffs[i][l] < singleRight[l] && sequences[i].frequency || (rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency)){
+                                       if((rightDiffs[i][l] < singleRight[l] && sequences[i].frequency) || ((rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency))){
                                                singleRight[l] = rightDiffs[i][l];
                                                bestRight[l] = i;
                                        }
@@ -649,7 +649,7 @@ int Perseus::getTrimera(vector<seqData>& sequences,
                                        if(restricted[i] == 0){
                                                int delta = leftDiffs[i][y] - leftDiffs[i][x];
 
-                                               if(delta < minDelta[x][y] || delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency){
+                                               if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){
                                                        minDelta[x][y] = delta;
                                                        minDeltaSeq[x][y] = i;                                  
                                                }                               
index f5c8b40cd81d9d06a65a861c1e0e66355b275f58..78255d8f73d894c7cb4032d925b684843094bee7 100644 (file)
@@ -46,7 +46,7 @@ int correctDist::addSeq(string seqName, string seqSeq){
 /**************************************************************************************************/
 int correctDist::execute(string distanceFileName){
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
                processors = 1;
 #endif
@@ -146,7 +146,7 @@ vector<int> correctDist::fixSequence(string sequence){
 
 int correctDist::createProcess(string distanceFileName){
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDs;
                
index 226b0f63560b04348ea425290c98b48b1abc4de6..b1846499021624c64733cb44afcc344fdfecbb37 100755 (executable)
@@ -11,7 +11,7 @@
 #include <signal.h>\r
 #include <float.h>\r
 \r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
 #include <sys/time.h>\r
 #include <sys/resource.h>\r
 #include <unistd.h>\r
@@ -139,7 +139,7 @@ bool myisatty(int fd)
        return isatty(fd) != 0;\r
 }\r
 \r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
 #else\r
 //#ifdef BIT_VERSION\r
 //#include <io.h>\r
@@ -176,7 +176,7 @@ void LogStdioFileState(FILE *f)
        Log("fpos       %ld (retval %d)\n", (long) fpos, fgetpos_retval);\r
        //      Log("eof        %d\n", _eof(fd));\r
 #endif\r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
 #else\r
 #ifdef BIT_VERSION\r
        __int64 pos64 = _ftelli64(f);\r
@@ -612,7 +612,7 @@ void Die(const char *Format, ...)
        fprintf(stderr, "\n---Fatal error---\n%s\n", szStr);\r
        Log("\n---Fatal error---\n%s\n", szStr);\r
        \r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
 #else\r
        //if (IsDebuggerPresent())\r
        //      __debugbreak();\r
@@ -1205,7 +1205,7 @@ static void AddOpt(const OptInfo &Opt)
        g_Opts.insert(Opt);\r
 }\r
 \r
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)\r
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)\r
 #else\r
 #pragma warning(disable: 4505) // unreferenced local function\r
 #endif\r
index 1d936d458d2edb69e6d49f51d8a79171389ea544..e2a3712a410738b0396884accc41bdd555caa735 100644 (file)
@@ -156,7 +156,7 @@ NMDSCommand::NMDSCommand(string option)  {
                        m->mothurConvert(temp, epsilon); 
                        
                        if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; }
-                       if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; }
+                       if (maxdim < mindim) { maxdim = mindim; }
                }
                
        }
diff --git a/nseqs.h b/nseqs.h
index e82684b645041a10095c681061d4b16fa6929a9f..c0f9549c87584bbf99e503388ff737c9f5343e8e 100644 (file)
--- a/nseqs.h
+++ b/nseqs.h
@@ -31,7 +31,7 @@ public:
                int numGroups = shared.size();
                data.clear(); data.resize(numGroups,0);
 
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //get bin values and set sharedByAll 
                        bool sharedByAll = true;
                        for (int j = 0; j < numGroups; j++) {
index 971e6b0ebbad92014347872ee92f4222c044db72..3e5b6c72f1d862ab88698360566a84e6a87953de 100644 (file)
@@ -19,7 +19,6 @@ class oneGapDist : public Dist {
 public:
        
        oneGapDist() {}
-       oneGapDist(const oneGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){
                
index dba3f38292ced957330631ece5572901ee3438ae..fdbc196e9f2f7ff539a9b61bf35a8120483ac66b 100644 (file)
@@ -19,7 +19,6 @@ class oneGapIgnoreTermGapDist : public Dist {
 public:
        
        oneGapIgnoreTermGapDist() {}
-       oneGapIgnoreTermGapDist(const oneGapIgnoreTermGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){
                
index 1e8c4451538683d4725ed60e535a5a86a8a0ea79..06a900d8137a031236d52261882c744bb169b440 100644 (file)
@@ -123,7 +123,7 @@ bool OptionParser::getNameFile(vector<string> files) {
                string namefile = m->getNameFile();
                bool match = false;
                
-               if (namefile != "") {
+               if ((namefile != "")&&(!m->mothurCalling)) {
                        string temp = m->getRootName(m->getSimpleName(namefile));
                        vector<string> rootName;
                        m->splitAtChar(temp, rootName, '.');
index c82510c3d8d0b531d13e4c847d16fe98aa72d611..b520ca69ceb898c9321cf8b7b6cf565588377079 100644 (file)
@@ -300,7 +300,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                        for (int k = 0; k < i; k++) {
                                
                                if (m->control_pressed) { out.close(); return 0; }
-                                                                                               
+
                                double coef = 0.0;
                                double sig = 0.0;
                                if (method == "spearman")               {   coef = linear.calcSpearman(xy[i], xy[k], sig);      }
@@ -314,6 +314,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                
                out.close();
                
+               
                return 0;
                
        }
index 114f545aa26eb326f078b6ab7e1b18e024ecdcb5..98b0fde32dbb4cf9bf86aad283fa1d6ad3a7ea44 100644 (file)
@@ -8,19 +8,6 @@
  */
 
 #include "pairwiseseqscommand.h"
-#include "sequence.hpp"
-
-#include "gotohoverlap.hpp"
-#include "needlemanoverlap.hpp"
-#include "blastalign.hpp"
-#include "noalign.hpp"
-
-#include "ignoregaps.h"
-#include "eachgapdist.h"
-#include "eachgapignore.h"
-#include "onegapdist.h"
-#include "onegapignore.h"
-
 
 //**********************************************************************************************************************
 vector<string> PairwiseSeqsCommand::setParameters(){   
@@ -247,25 +234,6 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
                                 if (calc == "default")  {  calc = "onegap";  }
                        }
                        m->splitAtDash(calc, Estimators);
-                       
-                       ValidCalculators validCalculator;
-                       if (countends) {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
-                                               else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
-                                               else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
-                                       }
-                               }
-                       }else {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
-                                               else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
-                                               else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
-                                       }
-                               }
-                       }
                }
                
        }
@@ -280,17 +248,7 @@ int PairwiseSeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
-               
-               if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
-               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
-               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
-               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
-               else {
-                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
-                       m->mothurOutEndLine();
-                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
-               }
+               longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
                
                cutoff += 0.005;
                
@@ -357,11 +315,11 @@ int PairwiseSeqsCommand::execute(){
                                        
                                        driverMPI(start, end, outMPI, cutoff); 
                                        
-                                       if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  m->mothurRemove(outputFile); return 0; }
                                
                                        //wait on chidren
                                        for(int i = 1; i < processors; i++) { 
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   m->mothurRemove(outputFile);  return 0; }
                                                
                                                char buf[5];
                                                MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
@@ -370,7 +328,7 @@ int PairwiseSeqsCommand::execute(){
                                        //do your part
                                        driverMPI(start, end, outMPI, cutoff); 
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);  return 0; }
                                
                                        char buf[5];
                                        strcpy(buf, "done"); 
@@ -390,7 +348,7 @@ int PairwiseSeqsCommand::execute(){
                                        if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
                                        else { driverMPI(start, end, outputFile, mySize, output); }
                
-                                       if (m->control_pressed) {  outputTypes.clear();   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) {  outputTypes.clear();   m->mothurRemove(outputFile);   return 0; }
                                        
                                        int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
                                        MPI_File outMPI;
@@ -405,7 +363,7 @@ int PairwiseSeqsCommand::execute(){
                                        for(int b = 1; b < processors; b++) { 
                                                unsigned long long fileSize;
                                                
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);  delete distCalculator;  return 0; }
+                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);   return 0; }
                                                
                                                MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
                                                
@@ -435,7 +393,7 @@ int PairwiseSeqsCommand::execute(){
                                        if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
                                        else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
                                        
-                                       if (m->control_pressed) { delete distCalculator;  return 0; }
+                                       if (m->control_pressed) {  return 0; }
                                
                                        //tell parent you are done.
                                        MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
@@ -444,7 +402,7 @@ int PairwiseSeqsCommand::execute(){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else           
                                        
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        //if you don't need to fork anything
                        if(processors == 1){
                                if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
@@ -465,14 +423,14 @@ int PairwiseSeqsCommand::execute(){
                                
                                createProcesses(outputFile); 
                        }
-               #else
+               //#else
                        //ifstream inFASTA;
-                       if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
-                       else { driver(0, numSeqs, outputFile, "square");  }
-               #endif
+                       //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
+                       //else { driver(0, numSeqs, outputFile, "square");  }
+               //#endif
                
        #endif
-                       if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
+                       if (m->control_pressed) { outputTypes.clear();   m->mothurRemove(outputFile); return 0; }
                        
                        #ifdef USE_MPI
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
@@ -500,10 +458,8 @@ int PairwiseSeqsCommand::execute(){
                        
                        m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
                        
-                       if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
+                       if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; }
                }
-                       
-               delete distCalculator;
                
                //set phylip file as new current phylipfile
                string current = "";
@@ -535,9 +491,11 @@ int PairwiseSeqsCommand::execute(){
 /**************************************************************************************************/
 void PairwiseSeqsCommand::createProcesses(string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        int process = 1;
                processIDS.clear();
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -567,13 +525,51 @@ void PairwiseSeqsCommand::createProcesses(string filename) {
                        int temp = processIDS[i];
                        wait(&temp);
                }
+#else     
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //that's why the distance calculator was moved inside of the driver to make separate copies.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
                
-               //append and remove temp files
+               vector<pairwiseData*> pDataArray; //[processors-1];
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor-1 worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       string extension = toString(i) + ".temp";
+
+                       // Allocate memory for thread data.
+                       pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, i);
+                       pDataArray.push_back(tempDist);
+                       processIDS.push_back(i);
+                       
+                       if (output != "square") { hThreadArray[i] = CreateThread(NULL, 0, MyPairwiseThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);  } 
+            else { hThreadArray[i] = CreateThread(NULL, 0, MyPairwiseSquareThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);  }
+               }
+               
+               //do your part
+               if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
+               else { driver(lines[0].start, lines[0].end, filename, "square"); }
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
+#endif
+        
+        //append and remove temp files
                for (int i=0;i<processIDS.size();i++) { 
                        m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
                        m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
                }
-#endif
+        
        }
        catch(exception& e) {
                m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
@@ -587,6 +583,33 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
        try {
 
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
+        
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
                
                //column file
                ofstream outFile(dFileName.c_str(), ios::trunc);
@@ -606,7 +629,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
                        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -643,6 +666,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -657,7 +682,34 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
        try {
 
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                //column file
                ofstream outFile(dFileName.c_str(), ios::trunc);
                outFile.setf(ios::fixed, ios::showpoint);
@@ -675,7 +727,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -708,6 +760,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -723,14 +777,41 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI,
        try {
                MPI_Status status;
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                string outputString = "";
                
                for(int i=startLine;i<endLine;i++){
        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -772,7 +853,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI,
                        delete buf;
                        
                }
-               
+               delete alignment;
+        delete distCalculator;
                return 1;
        }
        catch(exception& e) {
@@ -794,7 +876,33 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
 
                MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
 
-               
+               Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
+        
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                
                string outputString = "";
                size = 0;
@@ -811,7 +919,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -848,6 +956,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                }
                
                MPI_File_close(&outMPI);
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -870,8 +980,33 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
 
                MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
                
+               Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
-               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+        
                string outputString = "";
                size = 0;
                
@@ -887,7 +1022,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) {  delete alignment; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -925,6 +1060,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                
                MPI_File_close(&outMPI);
                
+        delete alignment;
                return 1;
        }
        catch(exception& e) {
index a3a91f7bcd1f4ad2bbd52024d36153399ebc1414..e75f63c3fe80a4eaad7a2bccfae3ad266346b9e4 100644 (file)
 #include "validcalculator.h"
 #include "dist.h"
 #include "sequencedb.h"
+#include "sequence.hpp"
+
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+#include "ignoregaps.h"
+#include "eachgapdist.h"
+#include "eachgapignore.h"
+#include "onegapdist.h"
+#include "onegapignore.h"
 
 class PairwiseSeqsCommand : public Command {
        
@@ -44,8 +56,6 @@ private:
        vector<int> processIDS;   //end line, processid
        vector<distlinePair> lines;
        
-       Alignment* alignment;
-       Dist* distCalculator;
        SequenceDB alignDB;
        
        void createProcesses(string);
@@ -60,12 +70,254 @@ private:
        
        string fastaFileName, align, calc, outputDir, output;
        float match, misMatch, gapOpen, gapExtend, cutoff;
-       int processors;
+       int processors, longestBase;
        vector<string> fastaFileNames, Estimators;
        vector<string> outputNames;
        
        bool abort, countends, compress;
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct pairwiseData {
+    string outputFileName;
+       string align, square, distcalcType, output;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       float match, misMatch, gapOpen, gapExtend, cutoff;
+       int count, threadID, longestBase;
+    bool countends;
+    SequenceDB alignDB;
+       
+       pairwiseData(){}
+       pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
+               outputFileName = ofn;
+               m = mout;
+               start = st;
+               end = en;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+               longestBase = thr;
+               align = al;
+        square = sq;
+        distcalcType = di;
+        countends = co;
+        alignDB = DB;
+               count = 0;
+        output = op;
+               threadID = tid;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+               pDataArray->count = pDataArray->end;
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
+        if(pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            
+                       string name = pDataArray->alignDB.get(i).getName();
+                       //pad with spaces to make compatible
+                       if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
+            
+                       outFile << name << '\t';        
+                       
+                       for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                               outFile << dist << '\t'; 
+                       }
+                       
+                       outFile << endl; 
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       }
+                       
+               }
+               pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
+               exit(1);
+       }
+} 
+
+/**************************************************************************************************/
+static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+        pDataArray->count = pDataArray->end;
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+        
+        if((pDataArray->output == "lt") && pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            
+                       if(pDataArray->output == "lt")  {       
+                               string name = pDataArray->alignDB.get(i).getName();
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               outFile << name << '\t';        
+                       }
+
+                       
+                       for(int j=0;j<i;j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                               if(dist <= pDataArray->cutoff){
+                                       if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
+                               }
+                               if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
+                       }
+                       
+                       if (pDataArray->output == "lt") { outFile << endl; }
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       }
+                       
+               }
+               pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+        
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");
+               exit(1);
+       }
+} 
+
+#endif
+
+
 #endif
 
index 730873a4bd64e5409c5e3580bfd15338179a787c..e6d7ce6d1aa30da836bb88f2833bd666ae11bd85 100644 (file)
@@ -33,8 +33,10 @@ string ParseFastaQCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
-               helpString += "The fastq.info command parameter is fastq, and it is required.\n";
+               helpString += "The fastq.info command parameters are fastq, fasta and qfile; fastq is required.\n";
                helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
+        helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
+        helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
                helpString += "Example fastq.info(fastaq=test.fastaq).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
                return helpString;
index 49a73505c3df0c77b4ff3fe0b6e71f038eb5f13c..d26bc270efcdd7fff7cb1292ddc2bfb786a48665 100644 (file)
@@ -54,7 +54,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
                        }
                }
                
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
                }else{
@@ -91,7 +91,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
 
 EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;
                
diff --git a/pcrseqscommand.h b/pcrseqscommand.h
new file mode 100644 (file)
index 0000000..03092bc
--- /dev/null
@@ -0,0 +1,385 @@
+#ifndef Mothur_pcrseqscommand_h
+#define Mothur_pcrseqscommand_h
+
+//
+//  pcrseqscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/14/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+#include "sequence.hpp"
+#include "trimoligos.h"
+#include "alignment.hpp"
+#include "needlemanoverlap.hpp"
+
+class PcrSeqsCommand : public Command {
+public:
+       PcrSeqsCommand(string);
+       PcrSeqsCommand();
+       ~PcrSeqsCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "pcr.seqs";    }
+       string getCommandCategory()             { return "Sequence Processing";         }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
+       string getDescription()         { return "pcr.seqs"; }
+    
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+    
+    struct linePair {
+        unsigned long long start;
+        unsigned long long end;
+        linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+        linePair() {}
+    };
+    
+    vector<linePair> lines;
+       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
+    bool abort, keepprimer, keepdots;
+       string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
+       int start, end, pdiffs, processors, length;
+       
+    vector<string> revPrimer, outputNames;
+       vector<string> primers;
+    
+    int writeAccnos(set<string>);
+    int readName(set<string>&);
+    int readGroup(set<string>);
+    int readTax(set<string>);
+    bool readOligos();
+    bool readEcoli();
+       int driverPcr(string, string, string, set<string>&, linePair);  
+       int createProcesses(string, string, string, set<string>&);
+    bool findForward(Sequence&, int&, int&);
+    bool findReverse(Sequence&, int&, int&);
+    bool isAligned(string, map<int, int>&);
+    bool compareDNASeq(string, string);
+    string reverseOligo(string);
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct pcrData {
+       string filename; 
+    string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
+       unsigned long long fstart;
+       unsigned long long fend;
+       int count, start, end, length;
+       MothurOut* m;
+       vector<string> primers;
+    vector<string> revPrimer;
+    set<string> badSeqNames;
+    bool keepprimer, keepdots;
+       
+       
+       pcrData(){}
+       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+               filename = f;
+        goodFasta = gf;
+        badFasta = bfn;
+               m = mout;
+        oligosfile = ol;
+        ecolifile = ec;
+        primers = pr;
+        revPrimer = rpr;
+        nomatch = nm;
+        keepprimer = kp;
+        keepdots = kd;
+               start = st;
+               end = en;
+        length = l;
+               fstart = fst;
+        fend = fen;
+               count = 0;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
+       pcrData* pDataArray;
+       pDataArray = (pcrData*)lpParam;
+       
+       try {
+        ofstream goodFile;
+               pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
+        
+        ofstream badFile;
+               pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
+               
+               ifstream inFASTA;
+               pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
+        
+               //print header if you are process 0
+               if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
+                       inFASTA.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
+               }
+        
+        set<int> lengths;
+               pDataArray->count = pDataArray->fend;
+               for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
+            
+                       if (pDataArray->m->control_pressed) {  break; }
+                       
+                       Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+          
+            string trashCode = "";
+                       if (currSeq.getName() != "") {
+                
+                bool goodSeq = true;
+                if (pDataArray->oligosfile != "") {
+                    map<int, int> mapAligned;
+                    //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
+                    ///////////////////////////////////////////////////////////////
+                    bool aligned = false;
+                    string seq = currSeq.getAligned(); 
+                    int countBases = 0;
+                    for (int k = 0; k < seq.length(); k++) {
+                        if (!isalpha(seq[k])) { aligned = true; }
+                        else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
+                    }                                                   //ie. the 3rd base may be at spot 10 in the alignment
+                                                                        //later when we trim we want to trim from spot 10.
+                    ///////////////////////////////////////////////////////////////
+                    
+                    //process primers
+                    if (pDataArray->primers.size() != 0) {
+                        int primerStart = 0; int primerEnd = 0;
+                        //bool good = findForward(currSeq, primerStart, primerEnd);
+                        ///////////////////////////////////////////////////////////////
+                        bool good = false;
+                        string rawSequence = currSeq.getUnaligned();
+                        
+                        for(int j=0;j<pDataArray->primers.size();j++){
+                            string oligo = pDataArray->primers[j];
+                            
+                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
+                            
+                            if(rawSequence.length() < oligo.length()) {  break;  }
+                            
+                            //search for primer
+                            int olength = oligo.length();
+                            for (int l = 0; l < rawSequence.length()-olength; l++){
+                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
+                                string rawChunk = rawSequence.substr(l, olength);
+                                //compareDNASeq(oligo, rawChunk)
+                                ////////////////////////////////////////////////////////
+                                bool success = 1;
+                                for(int k=0;k<olength;k++){
+                                    
+                                    if(oligo[k] != rawChunk[k]){
+                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
+                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
+                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
+                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
+                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
+                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
+                                        
+                                        if(success == 0)       {       break;   }
+                                    }
+                                    else{
+                                        success = 1;
+                                    }
+                                }
+                                
+                                ////////////////////////////////////////////////////////////////////
+                                if(success) {
+                                    primerStart = j;
+                                    primerEnd = primerStart + olength;
+                                    good = true; break;
+                                }
+                            }
+                            if (good) { break; }
+                        }      
+                        
+                        if (!good) { primerStart = 0; primerEnd = 0; }
+                        ///////////////////////////////////////////////////////////////
+                        
+                        
+                        if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
+                        else{
+                            //are you aligned
+                            if (aligned) { 
+                                if (!pDataArray->keepprimer)    {  
+                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
+                                } 
+                                else                {  
+                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                }
+                            }else { 
+                                if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
+                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
+                            }
+                        }
+                    }
+                    
+                    //process reverse primers
+                    if (pDataArray->revPrimer.size() != 0) {
+                        int primerStart = 0; int primerEnd = 0;
+                        bool good = false;
+                        //findReverse(currSeq, primerStart, primerEnd);
+                         ///////////////////////////////////////////////////////////////
+                        string rawSequence = currSeq.getUnaligned();
+                        
+                        for(int j=0;j<pDataArray->revPrimer.size();j++){
+                            string oligo = pDataArray->revPrimer[j];
+                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
+                            if(rawSequence.length() < oligo.length()) {  break;  }
+                            
+                            //search for primer
+                            int olength = oligo.length();
+                            for (int l = rawSequence.length()-olength; l >= 0; l--){
+                                
+                                string rawChunk = rawSequence.substr(l, olength);
+                                //compareDNASeq(oligo, rawChunk)
+                                ////////////////////////////////////////////////////////
+                                bool success = 1;
+                                for(int k=0;k<olength;k++){
+                                    
+                                    if(oligo[k] != rawChunk[k]){
+                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
+                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
+                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
+                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
+                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
+                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
+                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
+                                        
+                                        if(success == 0)       {       break;   }
+                                    }
+                                    else{
+                                        success = 1;
+                                    }
+                                }
+                                
+                                ////////////////////////////////////////////////////////////////////
+                                if(success) {
+                                    primerStart = j;
+                                    primerEnd = primerStart + olength;
+                                    good = true; break;
+                                }
+                            }
+                            if (good) { break; }
+                        }      
+                        
+                        if (!good) { primerStart = 0; primerEnd = 0; }
+
+                         ///////////////////////////////////////////////////////////////
+                        if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
+                        else{ 
+                            //are you aligned
+                            if (aligned) { 
+                                if (!pDataArray->keepprimer)    {  
+                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
+                                } 
+                                else                {  
+                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
+                                }                             }
+                            else { 
+                                if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
+                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
+                            }
+                        }
+                    }
+                }else if (pDataArray->ecolifile != "") {
+                    //make sure the seqs are aligned
+                    lengths.insert(currSeq.getAligned().length());
+                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
+                    else if (currSeq.getAligned().length() != pDataArray->length) {
+                        pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
+                    }else {
+                        if (pDataArray->keepdots)   { 
+                            currSeq.filterToPos(start); 
+                            currSeq.filterFromPos(end);
+                        }else {
+                            string seqString = currSeq.getAligned().substr(0, end);
+                            seqString = seqString.substr(start);
+                            currSeq.setAligned(seqString); 
+                        }
+                    }
+                }else{ //using start and end to trim
+                    //make sure the seqs are aligned
+                    lengths.insert(currSeq.getAligned().length());
+                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
+                    else {
+                        if (pDataArray->end != -1) {
+                            if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
+                            else {
+                                if (pDataArray->keepdots)   { currSeq.filterFromPos(end); }
+                                else {
+                                    string seqString = currSeq.getAligned().substr(0, end);
+                                    currSeq.setAligned(seqString); 
+                                }
+                            }
+                        }
+                        if (pDataArray->start != -1) { 
+                            if (pDataArray->keepdots)   {  currSeq.filterToPos(start);  }
+                            else {
+                                string seqString = currSeq.getAligned().substr(start);
+                                currSeq.setAligned(seqString); 
+                            }
+                        }
+                        
+                    }
+                }
+                
+                               if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
+                               else {  
+                    pDataArray->badSeqNames.insert(currSeq.getName()); 
+                    currSeq.setName(currSeq.getName() + '|' + trashCode);
+                    currSeq.printSequence(badFile); 
+                }
+                       }
+                                               
+                       //report progress
+                       if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();              }
+               
+               goodFile.close();
+               inFASTA.close();
+        badFile.close();
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
+               exit(1);
+       }
+} 
+
+#endif
+
+/**************************************************************************************************/
+
+
+
+#endif
index 86dc539b8e1a6e1385d178ab0473bcc966a71705..d5ccaf948feaef51156f3a77346fe0908e4598ec 100644 (file)
@@ -20,7 +20,7 @@ EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vect
                //initialize Dscore
                for (int i=0; i<globaldata->Groups.size(); i++) {               DScore[globaldata->Groups[i]] = 0.0;    }
        
-               /********************************************************
+               ********************************************************
                //calculate a D value for each group 
                for(int v=0;v<treeNodes.size();v++){
                                
@@ -75,7 +75,7 @@ EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vect
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 
 
 
index 2b15d11d29580d1455ffea983788ab0715fe194b..abf9591f4060482a158df9b008e0f357d6c6b069 100644 (file)
@@ -353,7 +353,7 @@ int PhyloDiversityCommand::execute(){
                                if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if(processors == 1){
                                        driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
                                }else{
@@ -403,7 +403,7 @@ int PhyloDiversityCommand::execute(){
 //**********************************************************************************************************************
 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                vector<int> processIDS;
index dba1e3b56951f8ae1c484ace22187a9b65053d35..a9ef6cb0d85f44bde915606eb452db993ec40d8d 100644 (file)
@@ -128,8 +128,6 @@ PhyloTree::PhyloTree(string tfile){
                maxLevel = 0;
                calcTotals = true;
                string name, tax;
-               addSeqToTree("unknown", "unknown;");
-
                
                #ifdef USE_MPI
                        int pid, num, processors;
@@ -193,7 +191,16 @@ PhyloTree::PhyloTree(string tfile){
                #endif
        
                assignHeirarchyIDs(0);
-       
+        
+        
+        string unknownTax = "unknown;";
+        //added last taxon until you get desired level
+               for (int i = 1; i < maxLevel; i++) {
+                       unknownTax += "unclassfied;";
+               }
+        
+        addSeqToTree("unknown", unknownTax);
+        
                //create file for summary if needed
                setUp(tfile);
        }
index 1d6c391de69c83a86971873577a0df0b21efdbcd..2d2db08a40ea5e1aa8712902dcd419ddb1ad757a 100644 (file)
@@ -232,6 +232,7 @@ int PhylotypeCommand::execute(){
                                
                                ListVector list;
                                list.setLabel(level);
+                
                                //go through nodes and build listvector 
                                for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
                        
@@ -244,18 +245,20 @@ int PhylotypeCommand::execute(){
                                        //make the names compatable with listvector
                                        string name = "";
                                        for (int i = 0; i < names.size(); i++) {  
-                                               if (namefile != "") {   
-                                                       map<string, string>::iterator itNames = namemap.find(names[i]);  //make sure this name is in namefile
-               
-                                                       if (itNames != namemap.end()) {  name += namemap[names[i]] + ",";   } //you found it in namefile
-                                                       else { m->mothurOut(names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); exit(1);  }
-                                                       
-                                               }else{   name += names[i] + ",";        }
+                        
+                        if (names[i] != "unknown") {
+                            if (namefile != "") {      
+                                map<string, string>::iterator itNames = namemap.find(names[i]);  //make sure this name is in namefile
+                                
+                                if (itNames != namemap.end()) {  name += namemap[names[i]] + ",";   } //you found it in namefile
+                                else { m->mothurOut(names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); exit(1);  }
+                                
+                            }else{   name += names[i] + ",";   }
+                        }
                                        }
                                        name = name.substr(0, name.length()-1);  //rip off extra ','
-                                       
                                        //add bin to list vector
-                                       list.push_back(name);
+                                       if (name != "") { list.push_back(name); } //caused by unknown
                                }       
                                
                                //print listvector
index af64c25ce290a808a5edc8295645bc16bf277856..b9f2434d19d4cc842bfac1dce4f6de5a0d1b742d 100644 (file)
@@ -74,7 +74,7 @@ int Pintail::doPrep() {
        #ifdef USE_MPI
                //do nothing
        #else
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        //find breakup of templatefile for quantiles
                        if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
                        else { 
@@ -529,7 +529,7 @@ Sequence* Pintail::findPairs(Sequence* q) {
 //**************************************************************************************************
 void Pintail::createProcessesQuan() {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;
                                
diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp
new file mode 100644 (file)
index 0000000..d9c3776
--- /dev/null
@@ -0,0 +1,1066 @@
+//
+//  prcseqscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/14/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "pcrseqscommand.h"
+
+//**********************************************************************************************************************
+vector<string> PcrSeqsCommand::setParameters(){        
+       try {
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
+        CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
+               CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
+               CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
+               CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
+               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
+        CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+        
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string PcrSeqsCommand::getHelpString(){        
+       try {
+               string helpString = "";
+               helpString += "The pcr.seqs command reads a fasta file ...\n";
+               
+               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+               helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
+
+PcrSeqsCommand::PcrSeqsCommand(){      
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["taxonomy"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+        outputTypes["accnos"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+PcrSeqsCommand::PcrSeqsCommand(string option)  {
+       try {
+               
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["taxonomy"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+            outputTypes["accnos"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("ecoli");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                       }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not found") {                                 
+                               fastafile = m->getFastaFile(); 
+                               if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
+                       else { m->setFastaFile(fastafile); }
+                       
+            
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       string temp;
+                       temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
+                       keepprimer = m->isTrue(temp);   
+            
+            temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {     temp = "t";     }
+                       keepdots = m->isTrue(temp);     
+            
+                       temp = validParameter.validFile(parameters, "oligos", true);
+                       if (temp == "not found"){       oligosfile = "";                }
+                       else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
+                       else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
+                       
+            ecolifile = validParameter.validFile(parameters, "ecoli", true);
+                       if (ecolifile == "not found"){  ecolifile = "";         }
+                       else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
+                       
+            namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not found"){   namefile = "";          }
+                       else if(namefile == "not open"){        namefile = ""; abort = true;    } 
+            else { m->setNameFile(namefile); }
+            
+            groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not found"){  groupfile = "";         }
+                       else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
+            else { m->setGroupFile(groupfile); }
+            
+            taxfile = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxfile == "not found"){    taxfile = "";           }
+                       else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
+            else { m->setTaxonomyFile(taxfile); }
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, start);
+            
+            temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, end);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors); 
+                       
+            nomatch = validParameter.validFile(parameters, "nomatch", false);  if (nomatch == "not found") { nomatch = "reject"; }
+                       
+            if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
+            
+            //didnt set anything
+                       if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
+                m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
+            }
+            
+            if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
+            
+            if ((ecolifile != "") && (start != -1) && (end != -1)) {
+                m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
+            }
+
+            
+            if ((oligosfile != "") && (ecolifile != "")) {
+                 m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
+            }
+                       
+                       //check to make sure you didn't forget the name file by mistake                 
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+               }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+int PcrSeqsCommand::execute(){
+       try{
+        
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+        int start = time(NULL);
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
+               string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
+               outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
+        
+        string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
+               
+               
+        length = 0;
+               if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
+        if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
+        
+        vector<unsigned long long> positions; 
+        int numFastaSeqs = 0;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        positions = m->divideFile(fastafile, processors);
+        for (int i = 0; i < (positions.size()-1); i++) {       lines.push_back(linePair(positions[i], positions[(i+1)]));      }
+#else
+        if (processors == 1) {
+            lines.push_back(linePair(0, 1000));
+        }else {
+            positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
+            if (positions.size() < processors) { processors = positions.size(); }
+            
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numFastaSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+            }
+        }
+#endif
+        if (m->control_pressed) {  return 0; }
+
+        set<string> badNames;
+        if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
+        else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       } 
+               
+               if (m->control_pressed) {  return 0; }          
+        
+        //don't write or keep if blank
+        if (badNames.size() != 0)   { writeAccnos(badNames);        }   
+        if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
+        else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
+        if (namefile != "")                    {               readName(badNames);             }   
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
+        if (groupfile != "")           {               readGroup(badNames);    }
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
+               if (taxfile != "")                      {               readTax(badNames);              }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+        
+        m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+               m->mothurOutEndLine();
+               m->mothurOutEndLine();
+               
+               //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+               
+               itTypes = outputTypes.find("name");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+               }
+               
+               itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+               }
+               
+               itTypes = outputTypes.find("accnos");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+               }
+               
+               itTypes = outputTypes.find("taxonomy");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
+               }
+        
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
+               m->mothurOutEndLine();
+
+               
+               return 0;       
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "execute");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
+       try {
+        
+        vector<int> processIDS;   
+        int process = 1;
+               int num = 0;
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = filename + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << '\t' << badSeqNames.size() << endl;
+                for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+                    out << (*it) << endl;
+                }
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+        num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
+        
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+            int numBadNames = 0; string name = "";
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
+            for (int j = 0; j < numBadNames; j++) {
+                in >> name; m->gobble(in);
+                badSeqNames.insert(name);
+            }
+                       in.close(); m->mothurRemove(tempFile);
+            
+            m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+            m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+            
+            m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
+            m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+               }
+    #else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<pcrData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+            
+            string extension = "";
+            if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
+            
+                       // Allocate memory for thread data.
+                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+                       pDataArray.push_back(tempPcr);
+                       
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+        //do your part
+        num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
+        processIDS.push_back(processors-1);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames.insert(*it);       }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+            m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+            
+            m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
+            m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+               }
+        
+#endif 
+        
+        return num;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
+       try {
+               ofstream goodFile;
+               m->openOutputFile(goodFasta, goodFile);
+        
+        ofstream badFile;
+               m->openOutputFile(badFasta, badFile);
+               
+               ifstream inFASTA;
+               m->openInputFile(filename, inFASTA);
+        
+               inFASTA.seekg(filePos.start);
+        
+               bool done = false;
+               int count = 0;
+        set<int> lengths;
+        
+               while (!done) {
+            
+                       if (m->control_pressed) {  break; }
+                       
+                       Sequence currSeq(inFASTA); m->gobble(inFASTA);
+            
+            string trashCode = "";
+                       if (currSeq.getName() != "") {
+                
+                bool goodSeq = true;
+                if (oligosfile != "") {
+                    map<int, int> mapAligned;
+                    bool aligned = isAligned(currSeq.getAligned(), mapAligned);
+                    
+                    //process primers
+                    if (primers.size() != 0) {
+                        int primerStart = 0; int primerEnd = 0;
+                        bool good = findForward(currSeq, primerStart, primerEnd);
+                        
+                        if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
+                        else{
+                            //are you aligned
+                            if (aligned) { 
+                                if (!keepprimer)    {  
+                                    if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
+                                } 
+                                else                {  
+                                    if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                }
+                            }else { 
+                                if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
+                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
+                            }
+                        }
+                    }
+                    
+                    //process reverse primers
+                    if (revPrimer.size() != 0) {
+                        int primerStart = 0; int primerEnd = 0;
+                        bool good = findReverse(currSeq, primerStart, primerEnd);
+                        if(!good){     if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
+                        else{ 
+                            //are you aligned
+                            if (aligned) { 
+                                if (!keepprimer)    {  
+                                    if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
+                                } 
+                                else                {  
+                                    if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
+                                } 
+                            }
+                            else { 
+                                if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
+                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
+                            }
+                        }
+                    }
+                }else if (ecolifile != "") {
+                    //make sure the seqs are aligned
+                    lengths.insert(currSeq.getAligned().length());
+                    if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+                    else if (currSeq.getAligned().length() != length) {
+                        m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
+                    }else {
+                        if (keepdots)   { 
+                            currSeq.filterToPos(start); 
+                            currSeq.filterFromPos(end);
+                        }else {
+                            string seqString = currSeq.getAligned().substr(0, end);
+                            seqString = seqString.substr(start);
+                            currSeq.setAligned(seqString); 
+                        }
+                    }
+                }else{ //using start and end to trim
+                    //make sure the seqs are aligned
+                    lengths.insert(currSeq.getAligned().length());
+                    if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+                    else {
+                        if (end != -1) {
+                            if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
+                            else {
+                                if (keepdots)   { currSeq.filterFromPos(end); }
+                                else {
+                                    string seqString = currSeq.getAligned().substr(0, end);
+                                    currSeq.setAligned(seqString); 
+                                }
+                            }
+                        }
+                        if (start != -1) { 
+                            if (keepdots)   {  currSeq.filterToPos(start);  }
+                            else {
+                                string seqString = currSeq.getAligned().substr(start);
+                                currSeq.setAligned(seqString); 
+                            }
+                        }
+                    }
+                }
+                
+                //trimming removed all bases
+                if (currSeq.getUnaligned() == "") { goodSeq = false; }
+                
+                               if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
+                               else {  
+                    badSeqNames.insert(currSeq.getName()); 
+                    currSeq.setName(currSeq.getName() + '|' + trashCode);
+                    currSeq.printSequence(badFile); 
+                }
+                count++;
+                       }
+                       
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            unsigned long long pos = inFASTA.tellg();
+            if ((pos == -1) || (pos >= filePos.end)) { break; }
+#else
+            if (inFASTA.eof()) { break; }
+#endif
+                       
+                       //report progress
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
+               }
+               //report progress
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
+               
+        badFile.close();
+               goodFile.close();
+               inFASTA.close();
+               
+               return count;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "driverPcr");
+               exit(1);
+       }
+}
+//********************************************************************/
+bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(int j=0;j<primers.size();j++){
+                       string oligo = primers[j];
+                       
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = 0; j < rawSequence.length()-olength; j++){
+                if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+        }      
+               
+        primerStart = 0; primerEnd = 0;
+               return false;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripForward");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               
+               for(int i=0;i<revPrimer.size();i++){
+                       string oligo = revPrimer[i];
+                       if(rawSequence.length() < oligo.length()) {  break;  }
+                       
+                       //search for primer
+            int olength = oligo.length();
+            for (int j = rawSequence.length()-olength; j >= 0; j--){
+                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
+                string rawChunk = rawSequence.substr(j, olength);
+            
+                if(compareDNASeq(oligo, rawChunk)) {
+                    primerStart = j;
+                    primerEnd = primerStart + olength;
+                    return true;
+                }
+                
+            }
+               }       
+               
+        primerStart = 0; primerEnd = 0;
+               return false;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "findReverse");
+               exit(1);
+       }
+}
+//********************************************************************/
+bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
+       try {
+        bool isAligned = false;
+        
+        int countBases = 0;
+        for (int i = 0; i < seq.length(); i++) {
+            if (!isalpha(seq[i])) { isAligned = true; }
+            else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
+        }                                                   //ie. the 3rd base may be at spot 10 in the alignment
+                                                            //later when we trim we want to trim from spot 10.
+        return isAligned;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "isAligned");
+               exit(1);
+       }
+}
+//********************************************************************/
+string PcrSeqsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+       
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+
+            else                                               {       reverse += 'N'; }
+        }
+
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+bool PcrSeqsCommand::readOligos(){
+       try {
+               ifstream inOligos;
+               m->openInputFile(oligosfile, inOligos);
+               
+               string type, oligo, group;
+               
+               while(!inOligos.eof()){
+            
+                       inOligos >> type; 
+            
+                       if(type[0] == '#'){ //ignore
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               m->gobble(inOligos);
+                       }else{
+                               m->gobble(inOligos);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               inOligos >> oligo;
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               
+                               if(type == "FORWARD"){
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {       
+                        char c = inOligos.get(); 
+                        if (c == 10 || c == 13){       break;  } 
+                        else if (c == 32 || c == 9){;} //space or tab
+                                       } 
+                                       primers.push_back(oligo);
+                }else if(type == "REVERSE"){
+                    string oligoRC = reverseOligo(oligo);
+                    revPrimer.push_back(oligoRC);
+                    //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+                               }else if(type == "BARCODE"){
+                                       inOligos >> group;
+                               }else if((type == "LINKER")||(type == "SPACER")) {;}
+                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                       }
+                       m->gobble(inOligos);
+               }       
+               inOligos.close();
+               
+               if ((primers.size() == 0) && (revPrimer.size() == 0)) {
+                       m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
+            m->control_pressed = true;
+                       return false;
+               }
+        
+        return true;
+        
+    }catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readOligos");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+bool PcrSeqsCommand::readEcoli(){
+       try {
+               ifstream in;
+               m->openInputFile(ecolifile, in);
+               
+        //read seq
+        if (!in.eof()){ 
+            Sequence ecoli(in); 
+            length = ecoli.getAligned().length();
+            start = ecoli.getStartPos();
+            end = ecoli.getEndPos();
+        }else { in.close(); m->control_pressed = true; return false; }
+        in.close();    
+                       
+        return true;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readEcoli");
+               exit(1);
+       }
+    
+}
+//***************************************************************************************************************
+int PcrSeqsCommand::writeAccnos(set<string> badNames){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+        outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
+        
+        ofstream out;
+        m->openOutputFile(outputFileName, out);
+        
+        for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
+            if (m->control_pressed) { break; }
+            out << (*it) << endl;
+        }
+        
+        out.close();
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
+               exit(1);
+       }
+    
+}
+//******************************************************************/
+bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
+       try {
+               bool success = 1;
+               int length = oligo.length();
+               
+               for(int i=0;i<length;i++){
+                       
+                       if(oligo[i] != seq[i]){
+                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
+                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
+                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
+                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
+                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
+                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
+                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
+                               
+                               if(success == 0)        {       break;   }
+                       }
+                       else{
+                               success = 1;
+                       }
+               }
+               
+               return success;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
+               exit(1);
+       }
+       
+}
+//***************************************************************************************************************
+int PcrSeqsCommand::readName(set<string>& names){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(namefile, in);
+               string name, firstCol, secondCol;
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+               while(!in.eof()){
+                       if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                       
+                       in >> firstCol;         m->gobble(in);          
+                       in >> secondCol;                        
+                       
+            string savedSecond = secondCol;
+                       vector<string> parsedNames;
+                       m->splitAtComma(secondCol, parsedNames);
+                       
+                       vector<string> validSecond;  validSecond.clear();
+                       for (int i = 0; i < parsedNames.size(); i++) {
+                               if (names.count(parsedNames[i]) == 0) {
+                                       validSecond.push_back(parsedNames[i]);
+                               }
+                       }
+                       
+                       if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
+                               for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
+                               removedCount += parsedNames.size();
+                       }else {
+                out << firstCol << '\t' << savedSecond << endl;
+                wroteSomething = true;
+            }
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readName");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PcrSeqsCommand::readGroup(set<string> names){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(groupfile, in);
+               string name, group;
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+               while(!in.eof()){
+                       if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                       
+                       in >> name;                             //read from first column
+                       in >> group;                    //read from second column
+                       
+                       //if this name is in the accnos file
+                       if (names.count(name) == 0) {
+                               wroteSomething = true;
+                               out << name << '\t' << group << endl;
+                       }else {  removedCount++;  }
+            
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
+        
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readGroup");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PcrSeqsCommand::readTax(set<string> names){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(taxfile, in);
+               string name, tax;
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+               while(!in.eof()){
+                       if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                       
+                       in >> name;                             //read from first column
+                       in >> tax;                      //read from second column
+                       
+                       //if this name is in the accnos file
+                       if (names.count(name) == 0) {
+                               wroteSomething = true;
+                               out << name << '\t' << tax << endl;
+                       }else {  removedCount++;  }
+            
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readTax");
+               exit(1);
+       }
+}
+/**************************************************************************************/
+
+
index 582da493e4dc5cdf5267d65606181d1594f56a87..f2fbc80088b1adf2927ee78d5b1e02fbf9691cb1 100644 (file)
@@ -217,17 +217,19 @@ int PreClusterCommand::execute(){
                        m->mothurOutEndLine(); 
                        m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                        m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-                       
+                       m->mothurCalling = true;
+            
                        Command* uniqueCommand = new DeconvoluteCommand(inputString);
                        uniqueCommand->execute();
                        
                        map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                        
                        delete uniqueCommand;
-                       
+                       m->mothurCalling = false;
                        m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                        
                        m->renameFile(filenames["fasta"][0], newFastaFile);
+            m->renameFile(filenames["name"][0], newNamesFile);
                        
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); 
                                
@@ -302,7 +304,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -514,7 +516,7 @@ int PreClusterCommand::readFASTA(){
                m->openInputFile(fastafile, inFasta);
                
                //string firstCol, secondCol, nameString;
-               length = 0;
+               set<int> lengths;
                
                while (!inFasta.eof()) {
                        
@@ -540,17 +542,21 @@ int PreClusterCommand::readFASTA(){
                                        else{
                                                seqPNode tempNode(itSize->second, seq, names[seq.getName()]);
                                                alignSeqs.push_back(tempNode);
-                                               if (seq.getAligned().length() > length) {  length = seq.getAligned().length();  }
+                                               lengths.insert(seq.getAligned().length());
                                        }       
                                }else { //no names file, you are identical to yourself 
                                        seqPNode tempNode(1, seq, seq.getName());
                                        alignSeqs.push_back(tempNode);
-                                       if (seq.getAligned().length() > length) {  length = seq.getAligned().length();  }
+                                       lengths.insert(seq.getAligned().length());
                                }
                        }
                }
                inFasta.close();
                //inNames.close();
+        
+        if (lengths.size() > 1) { m->control_pressed = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
+        else if (lengths.size() == 1) { length = *(lengths.begin()); }
+        
                return alignSeqs.size();
        }
        
@@ -562,7 +568,7 @@ int PreClusterCommand::readFASTA(){
 /**************************************************************************************************/
 int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs){
        try {
-               length = 0;
+               set<int> lengths;
                alignSeqs.clear();
                map<string, string>::iterator it;
                bool error = false;
@@ -585,15 +591,18 @@ int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>&
                                        
                                        seqPNode tempNode(numReps, thisSeqs[i], it->second);
                                        alignSeqs.push_back(tempNode);
-                                       if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
+                    lengths.insert(thisSeqs[i].getAligned().length());
                                }       
                        }else { //no names file, you are identical to yourself 
                                seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
                                alignSeqs.push_back(tempNode);
-                               if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
+                               lengths.insert(thisSeqs[i].getAligned().length());
                        }
                }
                
+        if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
+        else if (lengths.size() == 1) { length = *(lengths.begin()); }
+        
                //sanity check
                if (error) { m->control_pressed = true; }
                
index 3712302bee105cec37e6834dbc2b017584c9e2d5..64efc7fc1d43aca03d17497e28bee2fdb61cc74a 100644 (file)
@@ -113,7 +113,7 @@ struct preClusterData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){ 
        preClusterData* pDataArray;
index 8183d51cc2e68466f186a354e315578d4175ad49..49034b8ecb2a2b937901c9cfda132c00bcd5c4c4 100644 (file)
@@ -36,6 +36,9 @@ public:
        void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
        void updateForwardMap(vector<vector<int> >&, int, int, int);
        void updateReverseMap(vector<vector<int> >&, int, int, int);
+    void setName(string n) { seqName = n; }
+    void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
+    
        
 private:
        
index ca105b7f82fad6be8cc45e7b9e7953da3f4471e4..6a9cb3180172b8aa06aed0039d252de8bdd8873d 100644 (file)
@@ -24,7 +24,7 @@ int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
                if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
                else { increment = percentFreq;  }      
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if(processors == 1){
                                        driver(rcd, increment, nIters); 
                                }else{
@@ -114,7 +114,7 @@ int Rarefact::driver(RarefactionCurveData* rcd, int increment, int nIters = 1000
 
 int Rarefact::createProcesses(vector<int>& procIters, RarefactionCurveData* rcd, int increment) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                vector<int> processIDS;
index b8c1c6f1a6420f8a139eb5c71b0320b87277035e..dabcef486ee7541c183fc594f10da9a9f6624561 100644 (file)
@@ -287,7 +287,7 @@ int RareFactCommand::execute(){
                                
                if (m->control_pressed) { return 0; }
                
-               map<string, string> nameMap;
+               map<int, string> file2Group; //index in outputNames[i] -> group
                for (int p = 0; p < inputFileNames.size(); p++) {
                        
                        string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
@@ -296,7 +296,6 @@ int RareFactCommand::execute(){
                        
                        if (inputFileNames.size() > 1) {
                                m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
-                               nameMap[fileNameRoot] = groups[p];
                        }
                        int i;
                        ValidCalculators validCalculator;
@@ -352,6 +351,7 @@ int RareFactCommand::execute(){
                                                rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
                                                outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
                                        }
+                    if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
                                }
                        }
                        
@@ -450,7 +450,7 @@ int RareFactCommand::execute(){
                if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
 
                //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
-               if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, nameMap);  }
+               if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
 
                if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
 
@@ -467,13 +467,14 @@ int RareFactCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<string, string> nameMap) {
+vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
        try {
                
                vector<string> newFileNames;
                
                //find different types of files
-               map<string, vector<string> > typesFiles;
+               map<string, map<string, string> > typesFiles;
+        map<string, string> temp; 
                for (int i = 0; i < outputNames.size(); i++) {
                        string extension = m->getExtension(outputNames[i]);
                        
@@ -485,7 +486,8 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                        
                        newLine += "\tGroup" + labels.substr(labels.find_first_of('\t'));
                        
-                       typesFiles[extension].push_back(outputNames[i]);
+            temp[outputNames[i]] = file2Group[i];
+                       typesFiles[extension] = temp;
                        
                        string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
                        
@@ -499,37 +501,31 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                
                //for each type create a combo file
                map<int, int> lineToNumber; 
-               for (map<string, vector<string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
+               for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
                        
                        ofstream out;
                        string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
                        m->openOutputFileAppend(combineFileName, out);
                        newFileNames.push_back(combineFileName);
                        
-                       vector<string> thisTypesFiles = it->second;
+                       map<string, string> thisTypesFiles = it->second;
                
                        //open each type summary file
                        map<string, vector<string> > files; //maps file name to lines in file
                        int maxLines = 0;
                        int numColumns = 0;
-                       for (int i=0; i<thisTypesFiles.size(); i++) {
-                                                               
+                       for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
+                
+                string thisfilename = itFileNameGroup->first;
+                string group = itFileNameGroup->second;
+                
                                ifstream temp;
-                               m->openInputFile(thisTypesFiles[i], temp);
+                               m->openInputFile(thisfilename, temp);
                                
                                //read through first line - labels
                                m->getline(temp);       m->gobble(temp);
                                
                                vector<string> thisFilesLines;
-                               string fileNameRoot = m->getRootName(thisTypesFiles[i]);
-                               map<string, string>::iterator itName = nameMap.find(fileNameRoot);
-                               string group = "";
-                               if (itName != nameMap.end()) {
-                                       group = itName->second;
-                               }else {
-                                       group = "not found" + i;
-                                       m->mothurOut("[ERROR]: can't parse filename."); m->mothurOutEndLine();
-                               }
                                
                                thisFilesLines.push_back(group);
                                int count = 1;
@@ -549,13 +545,13 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                                        m->gobble(temp);
                                }
                                
-                               files[thisTypesFiles[i]] = thisFilesLines;
+                               files[thisfilename] = thisFilesLines;
                                
                                //save longest file for below
                                if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
                                
                                temp.close();
-                               m->mothurRemove(thisTypesFiles[i]);
+                               m->mothurRemove(thisfilename);
                        }
                        
                        
@@ -563,17 +559,19 @@ vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map
                        for (int k = 1; k < maxLines; k++) {
                                
                                //grab data for each group
-                               for (int i=0; i<thisTypesFiles.size(); i++) {
-                                       
+                               for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
+                    
+                                       string thisfilename = itFileNameGroup->first;
+                    
                                        map<int, int>::iterator itLine = lineToNumber.find(k);
                                        if (itLine != lineToNumber.end()) {
                                                string output = toString(itLine->second);
-                                               if (k < files[thisTypesFiles[i]].size()) {
-                                                       string line = files[thisTypesFiles[i]][k];
+                                               if (k < files[thisfilename].size()) {
+                                                       string line = files[thisfilename][k];
                                                        output = line.substr(0, line.find_first_of('\t'));
-                                                       output += '\t' + files[thisTypesFiles[i]][0] + '\t' + line.substr(line.find_first_of('\t'));
+                                                       output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t'));
                                                }else{
-                                                       output += '\t' + files[thisTypesFiles[i]][0] + '\t';
+                                                       output += '\t' + files[thisfilename][0] + '\t';
                                                        for (int h = 0; h < numColumns; h++) {
                                                                output += "NA\t";
                                                        }
index 72d24151193076eea753f5b17d97e135da7d42d0..6aaa3de401e07f653abcd29cff4e98c868a94e4b 100644 (file)
@@ -51,7 +51,7 @@ private:
        string outputDir;
        
        vector<string> parseSharedFile(string);
-       vector<string> createGroupFile(vector<string>&, map<string, string>);
+       vector<string> createGroupFile(vector<string>&, map<int, string>);
 };
 
 #endif
index 36400c1324380cda8e5570c028ac48aa4beaa2c9..d91490edc47ec4e175d90b1dff556da66c8c79df 100644 (file)
@@ -27,5 +27,5 @@ void ReferenceDB::clearMemory()  {
 }
 /*******************************************************
 ReferenceDB::~ReferenceDB() { myInstance = NULL; }
-/*******************************************************/
+*******************************************************/
 
index 41067396c6f5333faeaf3346afd9b865ad351531..7ac910d4a74b9fc1bffeacb32f5893941f3eb60f 100644 (file)
@@ -8,7 +8,7 @@
  */
 
 #include "screenseqscommand.h"
-#include "sequence.hpp"
+
 
 //**********************************************************************************************************************
 vector<string> ScreenSeqsCommand::setParameters(){     
@@ -288,17 +288,27 @@ int ScreenSeqsCommand::execute(){
                        getSummary(positions); 
                } 
                else { 
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       positions = m->divideFile(fastafile, processors);
-                       for (int i = 0; i < (positions.size()-1); i++) {
-                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
-                       }
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                positions = m->divideFile(fastafile, processors);
+                for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
                        #else 
-                               positions.push_back(0); positions.push_back(1000);
-                               lines.push_back(new linePair(0, 1000));
+                if(processors == 1){ lines.push_back(linePair(0, 1000));  }
+                else {
+                    int numFastaSeqs = 0;
+                    positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
+                    if (positions.size() < processors) { processors = positions.size(); }
+                
+                    //figure out how many sequences you have to process
+                    int numSeqsPerProcessor = numFastaSeqs / processors;
+                    for (int i = 0; i < processors; i++) {
+                        int startIndex =  i * numSeqsPerProcessor;
+                        if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                        lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+                    }
+                }
                        #endif
                }
-                               
+                                       
                string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile);
                string badAccnosFile =  outputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
                
@@ -351,29 +361,16 @@ int ScreenSeqsCommand::execute(){
                                numSeqsPerProcessor = numFastaSeqs / processors;
                                int startIndex =  pid * numSeqsPerProcessor;
                                if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
-               //      cout << pid << '\t' << numSeqsPerProcessor << '\t' <<   startIndex << endl;
+
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
-               //cout << pid << " done" << endl;
+
                                if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
 
                                for (int i = 1; i < processors; i++) {
-                               
                                        //get bad lists
                                        int badSize;
                                        MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-                                       /*for (int j = 0; j < badSize; j++) {
-                                               int length;
-                                               MPI_Recv(&length, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);  //recv the length of the name
-                                               char* buf2 = new char[length];                                                                          //make space to recieve it
-                                               MPI_Recv(buf2, length, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);  //get name
-                                               
-                                               string tempBuf = buf2;
-                                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
-                                               delete buf2;
-                                               
-                                               badSeqNames.insert(tempBuf);
-                                       }*/
                                }
                        }else{ //you are a child process
                                MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -384,27 +381,15 @@ int ScreenSeqsCommand::execute(){
                                numSeqsPerProcessor = numFastaSeqs / processors;
                                int startIndex =  pid * numSeqsPerProcessor;
                                if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
-               //cout << pid << '\t' << numSeqsPerProcessor << '\t' <<         startIndex << endl;             
+
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
-//cout << pid << " done" << endl;
+
                                if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
                                
                                //send bad list 
                                int badSize = badSeqNames.size();
                                MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
-                               
-                               /*
-                               set<string>::iterator it;
-                               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
-                                       string name = *it;
-                                       int length = name.length();
-                                       char* buf2 = new char[length];
-                                       memcpy(buf2, name.c_str(), length);
-                                       
-                                       MPI_Send(&length, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
-                                       MPI_Send(buf2, length, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
-                               }*/
                        }
                        
                        //close files 
@@ -414,53 +399,10 @@ int ScreenSeqsCommand::execute(){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                        
 #else
-                                               
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){
-                               numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);
-                               
-                               if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
-                               
-                       }else{
-                               processIDS.resize(0);
-                               
-                               numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); 
-                               
-                               rename((goodSeqFile + toString(processIDS[0]) + ".temp").c_str(), goodSeqFile.c_str());
-                               rename((badAccnosFile + toString(processIDS[0]) + ".temp").c_str(), badAccnosFile.c_str());
-                               
-                               //append alignment and report files
-                               for(int i=1;i<processors;i++){
-                                       m->appendFiles((goodSeqFile + toString(processIDS[i]) + ".temp"), goodSeqFile);
-                                       m->mothurRemove((goodSeqFile + toString(processIDS[i]) + ".temp"));
-                       
-                                       m->appendFiles((badAccnosFile + toString(processIDS[i]) + ".temp"), badAccnosFile);
-                                       m->mothurRemove((badAccnosFile + toString(processIDS[i]) + ".temp"));
-                               }
-                               
-                               if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
-                               
-                               //read badSeqs in because root process doesnt know what other "bad" seqs the children found
-                               ifstream inBad;
-                               int ableToOpen = m->openInputFile(badAccnosFile, inBad, "no error");
-                               
-                               if (ableToOpen == 0) {
-                                       badSeqNames.clear();
-                                       string tempName;
-                                       while (!inBad.eof()) {
-                                               inBad >> tempName; m->gobble(inBad);
-                                               badSeqNames.insert(tempName);
-                                       }
-                                       inBad.close();
-                               }
-                       }
-       #else
-                       numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);
-                       
-                       if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
-                       
-       #endif
-
+        if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);      }       
+        else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); }
+        
+        if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
 #endif         
 
                #ifdef USE_MPI
@@ -669,14 +611,25 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                vector<int> ambigBases;
                vector<int> longHomoPolymer;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               vector<unsigned long long> positions = m->divideFile(fastafile, processors);
-               
-               for (int i = 0; i < (positions.size()-1); i++) {
-                       lines.push_back(new linePair(positions[i], positions[(i+1)]));
-               }       
+        vector<unsigned long long> positions;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               positions = m->divideFile(fastafile, processors);
+               for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }   
 #else
-               lines.push_back(new linePair(0, 1000));
+               if(processors == 1){ lines.push_back(linePair(0, 1000));  }
+        else {
+            int numFastaSeqs = 0;
+            positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
+            if (positions.size() < processors) { processors = positions.size(); }
+            
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numFastaSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+            }
+        }
 #endif
                
 #ifdef USE_MPI
@@ -687,7 +640,7 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                        driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
 #else
                int numSeqs = 0;
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(processors == 1){
                                numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
                        }else{
@@ -695,10 +648,10 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
                        }
                                
                        if (m->control_pressed) {  return 0; }
-               #else
-                       numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
-                       if (m->control_pressed) {  return 0; }
-               #endif
+               //#else
+               //      numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
+               //      if (m->control_pressed) {  return 0; }
+               //#endif
 #endif
                sort(startPosition.begin(), startPosition.end());
                sort(endPosition.begin(), endPosition.end());
@@ -753,13 +706,13 @@ int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
        }
 }
 /**************************************************************************************/
-int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair* filePos) {  
+int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair filePos) {   
        try {
                
                ifstream in;
                m->openInputFile(filename, in);
                                
-               in.seekg(filePos->start);
+               in.seekg(filePos.start);
 
                bool done = false;
                int count = 0;
@@ -792,9 +745,9 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                count++;
                        }
                        //if((count) % 100 == 0){       m->mothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine();         }
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = in.tellg();
-                               if ((pos == -1) || (pos >= filePos->end)) { break; }
+                               if ((pos == -1) || (pos >= filePos.end)) { break; }
                        #else
                                if (in.eof()) { break; }
                        #endif
@@ -813,11 +766,13 @@ int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<in
 /**************************************************************************************************/
 int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        
+        int process = 1;
                int num = 0;
-               processIDS.clear();
-               
+               vector<int> processIDS;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -878,8 +833,50 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition,
                        m->mothurRemove(tempFilename);
                }
                
-               return num;
+               
+#else 
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to vectors.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<sumData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+            
+                       // Allocate memory for thread data.
+                       sumData* tempSum = new sumData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+                       pDataArray.push_back(tempSum);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MySumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+        //do your part
+               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[processors-1]);
+         
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {    startPosition.push_back(pDataArray[i]->startPosition[k]);       }
+                       for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
+            for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) {        seqLength.push_back(pDataArray[i]->seqLength[k]);       }
+            for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {       ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
+            for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {  longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
 #endif         
+        return num;
        }
        catch(exception& e) {
                m->errorOut(e, "ScreenSeqsCommand", "createProcessesCreateSummary");
@@ -1128,7 +1125,7 @@ int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
 }
 //**********************************************************************************************************************
 
-int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
        try {
                ofstream goodFile;
                m->openOutputFile(goodFName, goodFile);
@@ -1139,7 +1136,7 @@ int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badAcc
                ifstream inFASTA;
                m->openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(filePos->start);
+               inFASTA.seekg(filePos.start);
 
                bool done = false;
                int count = 0;
@@ -1168,9 +1165,9 @@ int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badAcc
                        count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
-                               if ((pos == -1) || (pos >= filePos->end)) { break; }
+                               if ((pos == -1) || (pos >= filePos.end)) { break; }
                        #else
                                if (inFASTA.eof()) { break; }
                        #endif
@@ -1275,10 +1272,13 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File&
 
 int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
+        
+        vector<int> processIDS;   
+        int process = 1;
                int num = 0;
-               
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -1304,8 +1304,10 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st
                        }
                }
                
+        num = driver(lines[0], goodFileName, badAccnos, filename, badSeqNames);
+        
                //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i<processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }
@@ -1316,10 +1318,80 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); m->mothurRemove(tempFile);
+            
+            m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+            m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+                       
+            m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
+            m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
                }
                
-               return num;
-#endif         
+        //read badSeqs in because root process doesnt know what other "bad" seqs the children found
+        ifstream inBad;
+        int ableToOpen = m->openInputFile(badAccnos, inBad, "no error");
+        
+        if (ableToOpen == 0) {
+            badSeqNames.clear();
+            string tempName;
+            while (!inBad.eof()) {
+                inBad >> tempName; m->gobble(inBad);
+                badSeqNames.insert(tempName);
+            }
+            inBad.close();
+        }
+#else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<sumScreenData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+            
+            string extension = "";
+            if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
+            
+                       // Allocate memory for thread data.
+                       sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
+                       pDataArray.push_back(tempSum);
+                       
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MySumScreenThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+        //do your part
+        num = driver(lines[processors-1], (goodFileName+toString(processors-1)+".temp"), (badAccnos+toString(processors-1)+".temp"), filename, badSeqNames);
+        processIDS.push_back(processors-1);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames.insert(*it);       }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+            m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+                       
+            m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
+            m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
+               }
+
+#endif 
+        
+        return num;
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
index 49d992ac193c7aa42ad4492c9190bda40671fc0b..291d8e6d1acd4605f4827159f10aa74269e1a771 100644 (file)
@@ -11,6 +11,7 @@
  */
 #include "mothur.h"
 #include "command.hpp"
+#include "sequence.hpp"
 
 class ScreenSeqsCommand : public Command {
        
@@ -38,8 +39,7 @@ private:
                linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
        };
 
-       vector<int> processIDS;   //processid
-       vector<linePair*> lines;
+       vector<linePair> lines;
 
        int screenNameGroupFile(set<string>);
        int screenGroupFile(set<string>);
@@ -47,7 +47,7 @@ private:
        int screenQual(set<string>);
        int screenTaxonomy(set<string>);
        
-       int driver(linePair*, string, string, string, set<string>&);
+       int driver(linePair, string, string, string, set<string>&);
        int createProcesses(string, string, string, set<string>&);
        
        #ifdef USE_MPI
@@ -64,7 +64,203 @@ private:
        
        int getSummary(vector<unsigned long long>&);
        int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
-       int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair*);       
+       int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct sumData {
+       vector<int> startPosition;
+       vector<int> endPosition;
+       vector<int> seqLength; 
+       vector<int> ambigBases; 
+       vector<int> longHomoPolymer; 
+       string filename, namefile; 
+       unsigned long long start;
+       unsigned long long end;
+       int count;
+       MothurOut* m;
+       map<string, int> nameMap;
+       
+       
+       sumData(){}
+       sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
+               filename = f;
+        namefile = nf;
+               m = mout;
+               start = st;
+               end = en;
+               nameMap = nam;
+               count = 0;
+       }
+};
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct sumScreenData {
+    int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
+       unsigned long long start;
+       unsigned long long end;
+       int count;
+       MothurOut* m;
+       string goodFName, badAccnosFName, filename;
+    set<string> badSeqNames;
+       
+       
+       sumScreenData(){}
+       sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
+               startPos = s;
+               endPos = e;
+               minLength = minl;
+        maxLength = maxl;
+               maxAmbig = a;
+               maxHomoP = h;
+               filename = f;
+        goodFName = gf;
+        badAccnosFName = bf;
+               m = mout;
+               start = st;
+               end = en;
+               count = 0;
+       }
+};
+
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
+       sumData* pDataArray;
+       pDataArray = (sumData*)lpParam;
+       
+       try {
+               ifstream in;
+               pDataArray->m->openInputFile(pDataArray->filename, in);
+        
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       in.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
+               }
+               
+               pDataArray->count = pDataArray->end;
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+                       
+                       if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
+                       
+                       Sequence current(in); pDataArray->m->gobble(in); 
+                       
+                       if (current.getName() != "") {
+                               
+                               int num = 1;
+                               if (pDataArray->namefile != "") {
+                                       //make sure this sequence is in the namefile, else error 
+                                       map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
+                                       
+                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                                       else { num = it->second; }
+                               }
+                               
+                               //for each sequence this sequence represents
+                               for (int i = 0; i < num; i++) {
+                                       pDataArray->startPosition.push_back(current.getStartPos());
+                                       pDataArray->endPosition.push_back(current.getEndPos());
+                                       pDataArray->seqLength.push_back(current.getNumBases());
+                                       pDataArray->ambigBases.push_back(current.getAmbigBases());
+                                       pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               }
+            }
+               }
+               
+               in.close();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
+               exit(1);
+       }
+} 
+
+/**************************************************************************************************/
+
+static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
+       sumScreenData* pDataArray;
+       pDataArray = (sumScreenData*)lpParam;
+       
+       try {
+        
+        ofstream goodFile;
+               pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
+               
+               ofstream badAccnosFile;
+               pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
+               
+               ifstream in;
+               pDataArray->m->openInputFile(pDataArray->filename, in);
+        
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       in.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
+               }
+               
+               pDataArray->count = pDataArray->end;
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+                       
+                       if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
+                       
+                       Sequence currSeq(in); pDataArray->m->gobble(in); 
+                       
+                       if (currSeq.getName() != "") {
+                               bool goodSeq = 1;               //      innocent until proven guilty
+                               if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
+                               if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
+                               if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                               
+                               if(goodSeq == 1){
+                                       currSeq.printSequence(goodFile);        
+                               }
+                               else{
+                                       badAccnosFile << currSeq.getName() << endl;
+                                       pDataArray->badSeqNames.insert(currSeq.getName());
+                               }
+    
+                       }               
+            //report progress
+                       if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
+               }
+               //report progress
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
+               
+
+               
+               in.close();
+        goodFile.close();
+        badAccnosFile.close();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
+               exit(1);
+       }
+} 
+
+#endif
+
+/**************************************************************************************************/
+
+
+
 #endif
index d8ebe50fb11859367256a28286dec9789cc70d36..0dc15f59ee98d0ee2d93659c4c3ad1f033f8a193 100644 (file)
@@ -298,7 +298,7 @@ int SeqErrorCommand::execute(){
                if(qualFileName == "")  {       qLines = lines; rLines = lines; } //fills with duds
                
                int numSeqs = 0;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        numSeqs = driver(queryFileName, qualFileName, reportFileName, errorSummaryFileName, errorSeqFileName, errorChimeraFileName, lines[0], qLines[0], rLines[0]);
                }else{
@@ -366,7 +366,7 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r
                processIDS.clear();
                map<char, vector<int> >::iterator it;
                int num = 0;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -682,7 +682,8 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                        if(numParentSeqs > 1 && ignoreChimeras == 1)    {       ignoreSeq = 1;  }
                        else                                                                                    {       ignoreSeq = 0;  }
                        
-                       Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
+                       Compare minCompare;
+            getErrors(query, referenceSeqs[closestRefIndex], minCompare);
                        
                        if(namesFileName != ""){
                                it = weights.find(query.getName());
@@ -739,7 +740,7 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                        
                        index++;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = queryFile.tellg();
                                if ((pos == -1) || (pos >= line.end)) { break; }
                        #else
@@ -839,7 +840,7 @@ void SeqErrorCommand::getReferences(){
 
 //***************************************************************************************************************
 
-Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
+int SeqErrorCommand::getErrors(Sequence query, Sequence reference, Compare& errors){
        try {
                if(query.getAlignLength() != reference.getAlignLength()){
                        m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n");
@@ -850,7 +851,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                string r = reference.getAligned();
 
                int started = 0;
-               Compare errors;
+               //Compare errors;
 
                for(int i=0;i<alignLength;i++){
 //                     cout << r[i] << '\t' << q[i] << '\t';
@@ -929,7 +930,8 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                errors.queryName = query.getName();
                errors.refName = reference.getName();
                
-               return errors;
+               //return errors;
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "getErrors");
@@ -1215,7 +1217,7 @@ void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector
 
 int SeqErrorCommand::setLines(string filename, string qfilename, string rfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos, vector<unsigned long long>& rfileFilePos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //set file positions for fasta file
                fastaFilePos = m->divideFile(filename, processors);
                
index cc904ec7f246aed91970b41cf52e39b7d32b896b..e7c97e152d718b8944b0e004b32398dd1f210e99 100644 (file)
  *
  */
 
-#include "mothur.h"
 #include "command.hpp"
 #include "sequence.hpp"
 #include "referencedb.h"
 
-struct Compare {
-       int AA, AT, AG, AC,     TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
-       string refName, queryName, sequence;
-       double errorRate;
-       int weight, matches, mismatches, total;
-       
-       Compare(){
-               AA=0; AT=0; AG=0; AC=0;
-               TA=0; TT=0; TG=0; TC=0;
-               GA=0; GT=0; GG=0; GC=0;
-               CA=0; CT=0; CG=0; CC=0;
-               NA=0; NT=0; NG=0; NC=0;
-               Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
-               dA=0; dT=0; dG=0; dC=0;
-               refName = "";
-               queryName = "";
-               weight = 1;
-               matches = 0;
-               mismatches = 0;
-               total = 0;
-               errorRate = 1.0000;
-               sequence = "";
-       }
-};
 
 class SeqErrorCommand : public Command {
 public:
        SeqErrorCommand(string);
        SeqErrorCommand();
-       ~SeqErrorCommand() {}
+       ~SeqErrorCommand(){}
        
        vector<string> setParameters();
        string getCommandName()                 { return "seq.error";                           }
@@ -65,8 +40,35 @@ private:
                unsigned long long start;
                unsigned long long end;
                linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+        ~linePair(){}
        };
        
+    struct Compare {
+        int AA, AT, AG, AC,    TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
+        string refName, queryName, sequence;
+        double errorRate;
+        int weight, matches, mismatches, total;
+        
+        Compare(){
+            AA=0; AT=0; AG=0; AC=0;
+            TA=0; TT=0; TG=0; TC=0;
+            GA=0; GT=0; GG=0; GC=0;
+            CA=0; CT=0; CG=0; CC=0;
+            NA=0; NT=0; NG=0; NC=0;
+            Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
+            dA=0; dT=0; dG=0; dC=0;
+            refName = "";
+            queryName = "";
+            weight = 1;
+            matches = 0;
+            mismatches = 0;
+            total = 0;
+            errorRate = 1.0000;
+            sequence = "";
+        }
+        ~Compare(){};
+    };
+
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
@@ -74,7 +76,7 @@ private:
 
        void getReferences();
        map<string,int> getWeights();
-       Compare getErrors(Sequence, Sequence);
+       int getErrors(Sequence, Sequence, Compare&);
        void printErrorHeader(ofstream&);
        void printErrorData(Compare, int, ofstream&, ofstream&);
        void printSubMatrix();
index 9ee80106b4346391c66d8fc75650b3f71e337062..8e7a4395fbf20009d135502fde3774b2592a9440 100644 (file)
@@ -1016,4 +1016,4 @@ int main(int argc, char *argv[]){
        return 0;
 }
 
-/**************************************************************************************************/
+**************************************************************************************************/
index e8f73ca10875f8321f624adfb62ee02f7baf2770..68d3bf583f9c7de2db85ebfa27a2fecdd7738922 100644 (file)
@@ -286,11 +286,12 @@ int SeqSummaryCommand::execute(){
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else
                        vector<unsigned long long> positions; 
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                positions = m->divideFile(fastafile, processors);
                                for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
                        #else
                                positions = m->setFilePosFasta(fastafile, numSeqs); 
+                if (positions.size() < processors) { processors = positions.size(); }
                
                                //figure out how many sequences you have to process
                                int numSeqsPerProcessor = numSeqs / processors;
@@ -433,7 +434,7 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = in.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
@@ -529,7 +530,7 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                int num = 0;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -603,34 +604,42 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<seqSumData*> pDataArray; 
-               DWORD   dwThreadIdArray[processors];
-               HANDLE  hThreadArray[processors]; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
                
                //Create processor worker threads.
-               for( int i=0; i<processors; i++ ){
-                       
-                       //cout << i << '\t' << lines[i]->start << '\t' << lines[i]->end << endl;
+               for( int i=0; i<processors-1; i++ ){
+            
+            string extension = "";
+            if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        // Allocate memory for thread data.
-                       seqSumData* tempSum = new seqSumData(&startPosition, &endPosition, &seqLength, &ambigBases, &longHomoPolymer, filename, (sumFile + toString(i) + ".temp"), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+                       seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap);
                        pDataArray.push_back(tempSum);
-                       processIDS.push_back(i);
-                               
+                       
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
                        //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
                        hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
                }
-                       
+               
+        //do your part
+               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, (sumFile+toString(processors-1)+".temp"), lines[processors-1]);
+        processIDS.push_back(processors-1);
+
                //Wait until all threads have terminated.
-               WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                        num += pDataArray[i]->count;
+            for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {    startPosition.push_back(pDataArray[i]->startPosition[k]);       }
+                       for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
+            for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) {        seqLength.push_back(pDataArray[i]->seqLength[k]);       }
+            for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {       ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
+            for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {  longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
-               
-               //rename((sumFile + toString(processIDS[0]) + ".temp").c_str(), sumFile.c_str());
+    
                //append files
                for(int i=0;i<processIDS.size();i++){
                        m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
index d37d6f43b50b3b567d267ab82dd83db459f126df..d8837a374d070298ceb9d224aaf8c621680bf249 100644 (file)
@@ -62,11 +62,11 @@ private:
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
 struct seqSumData {
-       vector<int>* startPosition;
-       vector<int>* endPosition;
-       vector<int>* seqLength; 
-       vector<int>* ambigBases; 
-       vector<int>* longHomoPolymer; 
+       vector<int> startPosition;
+       vector<int> endPosition;
+       vector<int> seqLength; 
+       vector<int> ambigBases; 
+       vector<int> longHomoPolymer; 
        string filename; 
        string sumFile; 
        unsigned long long start;
@@ -78,12 +78,7 @@ struct seqSumData {
        
        
        seqSumData(){}
-       seqSumData(vector<int>* s, vector<int>* e, vector<int>* l, vector<int>* a, vector<int>* h, string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
-               startPosition = s;
-               endPosition = e;
-               seqLength = l;
-               ambigBases = a;
-               longHomoPolymer = h;
+       seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
                filename = f;
                sumFile = sf;
                m = mout;
@@ -96,7 +91,7 @@ struct seqSumData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ 
        seqSumData* pDataArray;
@@ -137,11 +132,11 @@ static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
                                
                                //for each sequence this sequence represents
                                for (int i = 0; i < num; i++) {
-                                       pDataArray->startPosition->push_back(current.getStartPos());
-                                       pDataArray->endPosition->push_back(current.getEndPos());
-                                       pDataArray->seqLength->push_back(current.getNumBases());
-                                       pDataArray->ambigBases->push_back(current.getAmbigBases());
-                                       pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer());
+                                       pDataArray->startPosition.push_back(current.getStartPos());
+                                       pDataArray->endPosition.push_back(current.getEndPos());
+                                       pDataArray->seqLength.push_back(current.getNumBases());
+                                       pDataArray->ambigBases.push_back(current.getAmbigBases());
+                                       pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
                                }
                                
                                outSummary << current.getName() << '\t';
index 090ee14c1c838098aaaeb1a831ec01e3272ed512..d877f4b93f33d8123ff6a1a9f7f839dc680fcf61 100644 (file)
@@ -191,6 +191,59 @@ Sequence::Sequence(ifstream& fastaFile){
 }
 //********************************************************************************************************************
 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
+       try {
+               m = MothurOut::getInstance();
+               initialize();
+               fastaFile >> name;
+        extraInfo = "";
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+                       
+                       string sequence;
+            
+                       //read comments
+                       while ((name[0] == '#') && fastaFile) { 
+                               while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               sequence = getCommentString(fastaFile);
+                               
+                               if (fastaFile) {  
+                                       fastaFile >> name;  
+                                       name = name.substr(1);  
+                               }else { 
+                                       name = "";
+                                       break;
+                               }
+                       }
+                       
+                       //read info after sequence name
+                       while (!fastaFile.eof())        {       
+                char c = fastaFile.get(); 
+                if (c == 10 || c == 13){  break;       }       
+                extraInfo += c;
+            } 
+                       
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
+                       
+                       setAligned(sequence);   
+                       //setUnaligned removes any gap characters for us                                                
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
+               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                                                       
+}
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
        try {
                m = MothurOut::getInstance();
@@ -571,7 +624,45 @@ void Sequence::padToPos(int start){
        startPos = start;
 
 }
+//********************************************************************************************************************
 
+int Sequence::filterToPos(int start){
+    
+    if (start > aligned.length()) { start = aligned.length(); m->mothurOut("[ERROR]: start to large.\n"); }
+    
+       for(int j = 0; j < start-1; j++) {
+               aligned[j] = '.';
+       }
+       
+    //things like ......----------AT become ................AT
+    for(int j = start-1; j < aligned.length(); j++) {
+        if (isalpha(aligned[j])) { break; }
+        else { aligned[j] = '.'; }
+    }
+    setUnaligned(aligned);
+    
+    return 0;
+    
+}
+//********************************************************************************************************************
+
+int Sequence::filterFromPos(int end){
+    
+    if (end > aligned.length()) { end = aligned.length(); m->mothurOut("[ERROR]: end to large.\n"); }
+    
+       for(int j = end; j < aligned.length(); j++) {
+               aligned[j] = '.';
+       }
+       
+    for(int j = aligned.length()-1; j < 0; j--) {
+        if (isalpha(aligned[j])) { break; }
+        else { aligned[j] = '.'; }
+    }
+    
+    setUnaligned(aligned);
+    
+    return 0;
+}
 //********************************************************************************************************************
 
 int Sequence::getEndPos(){
@@ -591,7 +682,7 @@ int Sequence::getEndPos(){
 //********************************************************************************************************************
 
 void Sequence::padFromPos(int end){
-       
+       cout << end << '\t' << endPos << endl;
        for(int j = end; j < endPos; j++) {
                aligned[j] = '.';
        }
index 6a50cb0b8aa31e4a3a3b6c9c97e77416f53eeeb1..db4c4f32b9992a27f63e15908d881a77d5507980 100644 (file)
@@ -27,10 +27,8 @@ public:
        Sequence();
        Sequence(string, string);
        Sequence(ifstream&);
+    Sequence(ifstream&, string&, bool);
        Sequence(istringstream&);
-       Sequence(const Sequence& se) : name(se.name), unaligned(se.unaligned), aligned(se.aligned), pairwise(se.pairwise), numBases(se.numBases), startPos(se.startPos), endPos(se.endPos),
-                                                                       alignmentLength(se.alignmentLength), isAligned(se.isAligned), longHomoPolymer(se.longHomoPolymer), ambigBases(se.ambigBases) { m = MothurOut::getInstance(); }
-       
        //these constructors just set the unaligned string to save space
        Sequence(string, string, string);  
        Sequence(ifstream&, string);
@@ -55,6 +53,8 @@ public:
        int getEndPos();
        void padToPos(int);
        void padFromPos(int);
+    int filterToPos(int); //any character before the pos is changed to . and aligned and unaligned strings changed
+    int filterFromPos(int); //any character after the pos is changed to . and aligned and unaligned strings changed
        int getAlignLength();
        int getAmbigBases();
        void removeAmbigBases();
index 6c98c047af76d2a3c29ee599b1141c336240390b..fd94b246a43217cc9cbfa131bfe4cd672955e001 100644 (file)
@@ -316,6 +316,7 @@ int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){
                                }
                                
                        }else { 
+                //m->mothurOut("Group " + g +  " contains " + toString(seqForThisGroup.size()) + " unique seqs.\n");
                                for (int i = 0; i < seqForThisGroup.size(); i++) {
                                        
                                        if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
index 67a1f59f8388dbfec55ae7b552f02640b5d6b93f..081a306d712a8ee10bb67cf1d72bbf0663d0ff07 100644 (file)
@@ -122,7 +122,7 @@ int SetDirectoryCommand::execute(){
                }else {
                        //add / to name if needed
                        string lastChar = output.substr(output.length()-1);
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if (lastChar != "/") { output += "/"; }
                        #else
                                if (lastChar != "\\") { output += "\\"; }       
@@ -154,7 +154,7 @@ int SetDirectoryCommand::execute(){
                }else {
                        //add / to name if needed
                        string lastChar = input.substr(input.length()-1);
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if (lastChar != "/") { input += "/"; }
                        #else
                                if (lastChar != "\\") { input += "\\"; }        
@@ -196,7 +196,7 @@ int SetDirectoryCommand::execute(){
                }else {
                        //add / to name if needed
                        string lastChar = tempdefault.substr(tempdefault.length()-1);
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                if (lastChar != "/") { tempdefault += "/"; }
                        #else
                                if (lastChar != "\\") { tempdefault += "\\"; }  
index 4965cfd75e06c9f8ba8157056922514581e3e10e..20caead668196fc75d10de1275956b7955bdd3b6 100644 (file)
@@ -16,7 +16,7 @@ vector<string> SffInfoCommand::setParameters(){
                CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
                CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
-               CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow);
+               CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow);
                CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
                CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
                CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
@@ -41,7 +41,7 @@ string SffInfoCommand::getHelpString(){
                helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
                helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
                helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
-               helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n";
+               helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";
                helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
                helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
                helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
@@ -256,7 +256,7 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
                        fasta = m->isTrue(temp); 
                        
-                       temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "F";                             }
+                       temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }
                        flow = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
@@ -298,7 +298,6 @@ SffInfoCommand::SffInfoCommand(string option)  {
 //**********************************************************************************************************************
 int SffInfoCommand::execute(){
        try {
-               
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                for (int s = 0; s < filenames.size(); s++) {
@@ -362,6 +361,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
 
                ofstream outSfftxt, outFasta, outQual, outFlow;
                string outFastaFileName, outQualFileName;
+        string rootName = outputDir + m->getRootName(m->getSimpleName(input));
+        if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
+        
                string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
                string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
                if (trim) {
@@ -406,7 +408,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                        //read data
                        seqRead read; 
                        readSeqData(in, read, header.numFlowsPerRead, readheader.numBases);
-                               
+            bool okay = sanityCheck(readheader, read);
+            if (!okay) { break; }
+            
                        //if you have provided an accosfile and this seq is not in it, then dont print
                        if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
                        
@@ -609,7 +613,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i
                                in.read(buffer, 2);
                                read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
                        }
-       
+            
                        //read flowIndex
                        read.flowIndex.resize(numBases);
                        for (int i = 0; i < numBases; i++) {  
@@ -741,11 +745,39 @@ int SffInfoCommand::printHeader(ofstream& out, Header& header) {
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
+       try {
+        bool okay = true;
+        string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
+        
+        if (header.clipQualLeft > read.bases.length()) {
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
+        }
+        if (header.clipQualRight > read.bases.length()) {
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
+        }
+        if (header.clipQualLeft > read.qualScores.size()) {
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
+        }
+        if (header.clipQualRight > read.qualScores.size()) {
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
+        }
+        
+        if (okay == false) {
+            m->mothurOut(message); m->mothurOutEndLine();
+        }
+        
+               return okay;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "sanityCheck");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
-               
                out << "Flowgram: ";
                for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
                
@@ -775,10 +807,9 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea
 //**********************************************************************************************************************
 int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
-               
                string seq = read.bases;
                
-               if (trim) {
+        if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
                                seq = "NNNN";
                        }
index 903a589cd777b89b7e508e48d1173d550210f864..837435b3a6ca4c4c7f2e7e7579141b8fbece342b 100644 (file)
@@ -82,7 +82,7 @@ private:
        bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos;
        int mycount;
        set<string> seqNames;
-       
+    
        //extract sff file functions
        int extractSffInfo(string, string);
        int readCommonHeader(ifstream&, CommonHeader&);
@@ -98,7 +98,8 @@ private:
        int printQualSeqData(ofstream&, seqRead&, Header&);
        int readAccnosFile(string);
        int parseSffTxt();
-       
+       bool sanityCheck(Header&, seqRead&);
+    
        //parsesfftxt file functions
        int parseHeaderLineToInt(ifstream&);
        vector<unsigned short> parseHeaderLineToFloatVector(ifstream&, int);
index c8ba4fa2347735dc27640321c286384960ddb134..2a06380b6896ae6c908f38222abdcc566bcae077 100644 (file)
@@ -32,7 +32,7 @@ EstOutput SharedAce::getValues(vector<SharedRAbundVector*> shared) {
                S12 = number of shared OTUs in A and B
                This estimator was changed to reflect Caldwell's changes, eliminating the nrare / nrare - 1 */
 
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index 10dfbd544bedeafaa3a82c282174480e9b55d005..cbb9d3003ed84631e5251f9baf6738a165c83c7e 100644 (file)
@@ -21,7 +21,7 @@ EstOutput Anderberg::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index 0182e13bd34ea9164383b5f83c221ae705898677..3711ce77cbb312e5dc2ee2aba14e1856bb942d12 100644 (file)
@@ -24,7 +24,7 @@ EstOutput BrayCurtis::getValues(vector<SharedRAbundVector*> shared) {
                sumSharedAB = the sum of the minimum otus int all shared otus in AB.
                */
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index d8cf60bf0cfe67097bcdadc8025ffc5829af6354..8d47ad2fca1d307bd150a985224b1099e492626a 100644 (file)
@@ -29,7 +29,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                //create and initialize trees to 0.
                initialTree(numGroups);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //get bin values and calc shared 
                        bool sharedByAll = true;
                        temp.clear();
index de48158d9eb7d26adecce2dab1162b32332fec5f..63f83e19a61fec97a6a64121da4f5ea973b5815a 100644 (file)
@@ -572,8 +572,8 @@ int SharedCommand::createMisMatchFile() {
                        
                        m->openOutputFile(outputMisMatchName, outMisMatch);
                        
-                       map<string, string> listNames;
-                       map<string, string>::iterator itList;
+                       set<string> listNames;
+                       set<string>::iterator itList;
                        
                        //go through list and if group returns "not found" output it
                        for (int i = 0; i < SharedList->getNumBins(); i++) {
@@ -581,26 +581,19 @@ int SharedCommand::createMisMatchFile() {
                        
                                string names = SharedList->get(i); 
                                
-                               while (names.find_first_of(',') != -1) { 
-                                       string name = names.substr(0,names.find_first_of(','));
-                                       names = names.substr(names.find_first_of(',')+1, names.length());
+                vector<string> binNames;
+                m->splitAtComma(names, binNames);
+                
+                               for (int j = 0; j < binNames.size(); j++) { 
+                                       string name = binNames[j];
                                        string group = groupMap->getGroup(name);
                                        
                                        if(group == "not found") {      outMisMatch << name << endl;  }
                                        
                                        itList = listNames.find(name);
                                        if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-                                       else { listNames[name] = name; }
+                                       else { listNames.insert(name); }
                                }
-                       
-                               //get last name
-                               string group = groupMap->getGroup(names);
-                               if(group == "not found") {      outMisMatch << names << endl;  }        
-                               
-                               itList = listNames.find(names);
-                               if (itList != listNames.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-                               else { listNames[names] = names; }
-
                        }
                        
                        outMisMatch.close();
@@ -621,9 +614,12 @@ int SharedCommand::createMisMatchFile() {
                                
                                string names = SharedList->get(i); 
                
-                               while (names.find_first_of(',') != -1) { 
-                                       string name = names.substr(0,names.find_first_of(','));
-                                       names = names.substr(names.find_first_of(',')+1, names.length());
+                               vector<string> binNames;
+                m->splitAtComma(names, binNames);
+                
+                               for (int j = 0; j < binNames.size(); j++) { 
+
+                                       string name = binNames[j];
                                        
                                        itList = namesInList.find(name);
                                        if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
@@ -631,12 +627,6 @@ int SharedCommand::createMisMatchFile() {
                                        namesInList[name] = name;
                                        
                                }
-                               
-                               itList = namesInList.find(names);
-                               if (itList != namesInList.end()) {  m->mothurOut(names + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-
-                               //get last name
-                               namesInList[names] = names;                             
                        }
                        
                        //get names of sequences in groupfile
@@ -674,13 +664,12 @@ int SharedCommand::ListGroupSameSeqs() {
                int error = 0; 
                
                vector<string> groupMapsSeqs = groupMap->getNamesSeqs();
-               
+               
                set<string> groupNamesSeqs;
                for(int i = 0; i < groupMapsSeqs.size(); i++) {
                        groupNamesSeqs.insert(groupMapsSeqs[i]);
                }
                
-               
                //go through list and if group returns "not found" output it
                for (int i = 0; i < SharedList->getNumBins(); i++) {
                        if (m->control_pressed) { return 0; } 
index ac3e94eaffaa8ac339935a97267c56b0d4e832db..ed21335eddc1ab2360d74b761ea9eff842c6791a 100644 (file)
@@ -21,7 +21,7 @@ EstOutput Jclass::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index 6aad5df26f2a6912b950df9e8d8211e730ef0fe1..5c91ddb65c654c5788c748596e87ddda94ae25f1 100644 (file)
@@ -21,7 +21,7 @@ EstOutput Kulczynski::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index de90252769a3925c96a463a62c63a1349c3ef710..8c8b7f7f4e181760b5bc9c7adb8117ab535b5c3d 100644 (file)
@@ -21,7 +21,7 @@ EstOutput KulczynskiCody::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index f3ed5ece3850f6528ecb56cc99a17765b7766ff7..52192756e00cbb5487e1baa9988c1ba0c709ed5a 100644 (file)
@@ -21,7 +21,7 @@ EstOutput Lennon::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index bd4923f32ddf4337ce6f8570fd57fcd199ccaf6f..16a759089f079e426d302745e34575f55640f30c 100644 (file)
@@ -20,14 +20,14 @@ EstOutput MorHorn::getValues(vector<SharedRAbundVector*> shared) {
                morhorn = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0;
                
                //get the total values we need to calculate the theta denominator sums
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        Atotal += shared[0]->getAbundance(i);
                        Btotal += shared[1]->getAbundance(i);
                }
                
                //calculate the denominator sums
-               for (int j = 0; j < shared[0]->size(); j++) {
+               for (int j = 0; j < shared[0]->getNumBins(); j++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(j);
                        tempB = shared[1]->getAbundance(j);
index 004e535bdb37d4247ec9f9d8aa0e4ad96e4c4ccf..b49fa4a3cc5730b0a97b0eefe30f952a7671f932 100644 (file)
@@ -21,7 +21,7 @@ EstOutput Ochiai::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index bea2f8c83808b287428bc309f513b8b64d548366..8abf92023620096dd820dbf2f16fcfb1fe34464c 100644 (file)
@@ -348,7 +348,7 @@ SharedRAbundVector SharedRAbundFloatVector::getSharedRAbundVector(){
                exit(1);
        }               
 }
-/***********************************************************************/
+***********************************************************************/
 vector<SharedRAbundFloatVector*> SharedRAbundFloatVector::getSharedRAbundFloatVectors(){
        try {
                SharedUtil* util;
@@ -419,7 +419,7 @@ SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 
 SAbundVector SharedRAbundFloatVector::getSAbundVector() {
        try {
@@ -461,7 +461,7 @@ SharedOrderVector SharedRAbundFloatVector::getSharedOrderVector() {
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 //this is not functional, not sure how to handle it yet, but I need the stub because it is a pure function
 OrderVector SharedRAbundFloatVector::getOrderVector(map<string,int>* nameMap = NULL) {
        try {
index 5ad4b2d7e70d2620c037b498a66b4d16cef62371..70b09603be0f4cd85081f9f1b530c5ec0b61040e 100644 (file)
@@ -55,7 +55,7 @@ SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : Data
 }
 
 
-/***********************************************************************/
+***********************************************************************/
 //reads a shared file
 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
index 36cacd6935727ba1763b436e25f334ab2c1862bc..f9b5eddcd919acdd409c9b0792294034dbcb7391 100644 (file)
@@ -19,7 +19,7 @@ EstOutput SharedSobs::getValues(vector<SharedRAbundVector*> shared){
                double observed = 0;
 
                //loop through the species in each group
-               for (int k = 0; k < shared[0]->size(); k++) {
+               for (int k = 0; k < shared[0]->getNumBins(); k++) {
                        //if you have found a new species
                        if (shared[0]->getAbundance(k) != 0) { observed++; } 
                        else if ((shared[0]->getAbundance(k) == 0) && (shared[1]->getAbundance(k) != 0)) { observed++; }
index e2e169c58788dfcd7e9aa313b7105a446bef7867..fffed0290b1136da46e42d31c99542da19ec9a0a 100644 (file)
@@ -19,7 +19,7 @@ EstOutput SharedSobsCS::getValues(vector<SharedRAbundVector*> shared){
                double observed = 0;
                int numGroups = shared.size();
 
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //get bin values and set sharedByAll 
                        bool sharedByAll = true;
                        for (int j = 0; j < numGroups; j++) {
index 32728f575a52f94c9d7b8b287fd14a7147ad53e9..85609dad69311e08474a2115755d74761052ab13 100644 (file)
@@ -21,7 +21,7 @@ EstOutput SorClass::getValues(vector<SharedRAbundVector*> shared) {
 
                data.resize(1,0);
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index 7c42a18d662f9b8d976a39515a47c4e79d509e22..644adee677820f92a7c8cf87c41d65908a1e957b 100644 (file)
@@ -20,14 +20,14 @@ EstOutput ThetaN::getValues(vector<SharedRAbundVector*> shared) {
                numerator = 0.0; denominator = 0.0; thetaN = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0;
                
                //get the total values we need to calculate the theta denominator sums
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        Atotal += shared[0]->getAbundance(i);
                        Btotal += shared[1]->getAbundance(i);
                }
                
                //calculate the theta denominator sums
-               for (int j = 0; j < shared[0]->size(); j++) {
+               for (int j = 0; j < shared[0]->getNumBins(); j++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(j);
                        tempB = shared[1]->getAbundance(j);
index 315a61f1310f8381e7fab74546f059d1823f11f7..6c0f6c7f91bf46bf314c7e16cfcf6997bed4f388 100644 (file)
@@ -29,14 +29,14 @@ EstOutput ThetaYC::getValues(vector<SharedRAbundVector*> shared) {
                double sumPsqQ = 0;
                
                //get the total values we need to calculate the theta denominator sums
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        Atotal += (double)shared[0]->getAbundance(i);
                        Btotal += (double)shared[1]->getAbundance(i);
                }
                
                //calculate the theta denominator sums
-               for (int j = 0; j < shared[0]->size(); j++) {
+               for (int j = 0; j < shared[0]->getNumBins(); j++) {
                        //store in temps to avoid multiple repetitive function calls
                        pi = shared[0]->getAbundance(j) / Atotal;
                        qi = shared[1]->getAbundance(j) / Btotal;
index 1ad0b27390421d2bf43049d398c3375786077b93..32891c333ab5229aec50ed05ff5f03b6799d1396 100644 (file)
@@ -9,15 +9,6 @@
 
 #include "shhhercommand.h"
 
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "rabundvector.hpp"
-#include "sabundvector.hpp"
-#include "listvector.hpp"
-#include "cluster.hpp"
-#include "sparsematrix.hpp"
-#include <cfloat>
-
 //**********************************************************************************************************************
 vector<string> ShhherCommand::setParameters(){ 
        try {
@@ -76,18 +67,15 @@ ShhherCommand::ShhherCommand(){
 
 ShhherCommand::ShhherCommand(string option) {
        try {
-
+        
 #ifdef USE_MPI
                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
                MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
-
+        
                if(pid == 0){
 #endif
-               
-               
                abort = false; calledHelp = false;   
                
-               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -168,6 +156,67 @@ ShhherCommand::ShhherCommand(string option) {
                                m->openOutputFile(compositeNamesFileName, temp);
                                temp.close();
                        }
+            
+            if(flowFilesFileName != "not found"){
+                string fName;
+                
+                ifstream flowFilesFile;
+                m->openInputFile(flowFilesFileName, flowFilesFile);
+                while(flowFilesFile){
+                    fName = m->getline(flowFilesFile);
+                    
+                    //test if file is valid
+                    ifstream in;
+                    int ableToOpen = m->openInputFile(fName, in, "noerror");
+                    in.close();        
+                    if (ableToOpen == 1) {
+                        if (inputDir != "") { //default path is set
+                            string tryPath = inputDir + fName;
+                            m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fName = tryPath;
+                        }
+                    }
+                    
+                    if (ableToOpen == 1) {
+                        if (m->getDefaultPath() != "") { //default path is set
+                            string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
+                            m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fName = tryPath;
+                        }
+                    }
+                    
+                    //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+                    if (ableToOpen == 1) {
+                        string exepath = m->argv;
+                        string tempPath = exepath;
+                        for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+                        exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+                        
+                        string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
+                        m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+                        ifstream in2;
+                        ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                        in2.close();
+                        fName = tryPath;
+                    }
+                    
+                    if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
+                    else { flowFileVector.push_back(fName); }
+                    m->gobble(flowFilesFile);
+                }
+                flowFilesFile.close();
+                if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
+            }
+            else{
+                flowFileVector.push_back(flowFileName);
+            }
+
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -272,11 +321,9 @@ ShhherCommand::ShhherCommand(string option) {
                        }
                        
                }
-                       
 #ifdef USE_MPI
                }                               
 #endif
-                               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "ShhherCommand");
@@ -291,7 +338,7 @@ int ShhherCommand::execute(){
                
                int tag = 1976;
                MPI_Status status; 
-
+                       
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -305,10 +352,10 @@ int ShhherCommand::execute(){
                        getSingleLookUp();      if (m->control_pressed) { return 0; }
                        getJointLookUp();       if (m->control_pressed) { return 0; }
                        
-                       vector<string> flowFileVector;
+            vector<string> flowFileVector;
                        if(flowFilesFileName != "not found"){
                                string fName;
-
+                
                                ifstream flowFilesFile;
                                m->openInputFile(flowFilesFileName, flowFilesFile);
                                while(flowFilesFile){
@@ -320,6 +367,7 @@ int ShhherCommand::execute(){
                        else{
                                flowFileVector.push_back(flowFileName);
                        }
+            
                        int numFiles = flowFileVector.size();
 
                        for(int i=1;i<ncpus;i++){
@@ -697,7 +745,36 @@ int ShhherCommand::execute(){
                exit(1);
        }
 }
-
+/**************************************************************************************************/
+string ShhherCommand::createNamesFile(){
+       try{
+               
+               vector<string> duplicateNames(numUniques, "");
+               for(int i=0;i<numSeqs;i++){
+                       duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+               }
+               
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+               
+               ofstream nameFile;
+               m->openOutputFile(nameFileName, nameFile);
+               
+               for(int i=0;i<numUniques;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+            //                 nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+                       nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+               }
+               
+               nameFile.close();
+               return  nameFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "createNamesFile");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
@@ -744,316 +821,1528 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
                return fDistFileName;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
+               m->errorOut(e, "ShhherCommand", "flowDistMPI");
                exit(1);
        }
 }
+/**************************************************************************************************/
+void ShhherCommand::getOTUData(string listFileName){
+    try {
+        
+        ifstream listFile;
+        m->openInputFile(listFileName, listFile);
+        string label;
+        
+        listFile >> label >> numOTUs;
+        
+        otuData.assign(numSeqs, 0);
+        cumNumSeqs.assign(numOTUs, 0);
+        nSeqsPerOTU.assign(numOTUs, 0);
+        aaP.clear();aaP.resize(numOTUs);
+        
+        seqNumber.clear();
+        aaI.clear();
+        seqIndex.clear();
+        
+        string singleOTU = "";
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            listFile >> singleOTU;
+            
+            istringstream otuString(singleOTU);
+            
+            while(otuString){
+                
+                string seqName = "";
+                
+                for(int j=0;j<singleOTU.length();j++){
+                    char letter = otuString.get();
+                    
+                    if(letter != ','){
+                        seqName += letter;
+                    }
+                    else{
+                        map<string,int>::iterator nmIt = nameMap.find(seqName);
+                        int index = nmIt->second;
+                        
+                        nameMap.erase(nmIt);
+                        
+                        otuData[index] = i;
+                        nSeqsPerOTU[i]++;
+                        aaP[i].push_back(index);
+                        seqName = "";
+                    }
+                }
+                
+                map<string,int>::iterator nmIt = nameMap.find(seqName);
+                
+                int index = nmIt->second;
+                nameMap.erase(nmIt);
+                
+                otuData[index] = i;
+                nSeqsPerOTU[i]++;
+                aaP[i].push_back(index);       
+                
+                otuString.get();
+            }
+            
+            sort(aaP[i].begin(), aaP[i].end());
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                seqNumber.push_back(aaP[i][j]);
+            }
+            for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+                aaP[i].push_back(0);
+            }
+            
+            
+        }
+        
+        for(int i=1;i<numOTUs;i++){
+            cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+        }
+        aaI = aaP;
+        seqIndex = seqNumber;
+        
+        listFile.close();      
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getOTUData");
+        exit(1);       
+    }          
+}
 
-#else
-//**********************************************************************************************************************
-
-int ShhherCommand::execute(){
-       try {
-               if (abort == true) { return 0; }
-               
-               getSingleLookUp();      if (m->control_pressed) { return 0; }
-               getJointLookUp();       if (m->control_pressed) { return 0; }
-                               
-               
-               vector<string> flowFileVector;
-               if(flowFilesFileName != "not found"){
-                       string fName;
-                       
-                       ifstream flowFilesFile;
-                       m->openInputFile(flowFilesFileName, flowFilesFile);
-                       while(flowFilesFile){
-                               fName = m->getline(flowFilesFile);
-                               flowFileVector.push_back(fName);
-                               m->gobble(flowFilesFile);
-                       }
-               }
-               else{
-                       flowFileVector.push_back(flowFileName);
-               }
-               int numFiles = flowFileVector.size();
-               
-               
-               for(int i=0;i<numFiles;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       flowFileName = flowFileVector[i];
-
-                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
-                       m->mothurOut("Reading flowgrams...\n");
-                       getFlowData();
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("Identifying unique flowgrams...\n");
-                       getUniques();
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("Calculating distances between flowgrams...\n");
-                       string distFileName = createDistFile(processors);
-                       string namesFileName = createNamesFile();
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("\nClustering flowgrams...\n");
-                       string listFileName = cluster(distFileName, namesFileName);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       getOTUData(listFileName);
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurRemove(distFileName);
-                       m->mothurRemove(namesFileName);
-                       m->mothurRemove(listFileName);
-                       
-                       initPyroCluster();
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       double maxDelta = 0;
-                       int iter = 0;
-                       
-                       double begClock = clock();
-                       unsigned long long begTime = time(NULL);
+/**************************************************************************************************/
 
-                       m->mothurOut("\nDenoising flowgrams...\n");
-                       m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
-                       
-                       while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               double cycClock = clock();
-                               unsigned long long cycTime = time(NULL);
-                               fill();
-                               
-                               if (m->control_pressed) { break; }
+void ShhherCommand::initPyroCluster(){                          
+    try{
+        if (numOTUs < processors) { processors = 1; }
+        
+        dist.assign(numSeqs * numOTUs, 0);
+        change.assign(numOTUs, 1);
+        centroids.assign(numOTUs, -1);
+        weight.assign(numOTUs, 0);
+        singleTau.assign(numSeqs, 1.0);
+        
+        nSeqsBreaks.assign(processors+1, 0);
+        nOTUsBreaks.assign(processors+1, 0);
+        
+        nSeqsBreaks[0] = 0;
+        for(int i=0;i<processors;i++){
+            nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
+            nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
+        }
+        nSeqsBreaks[processors] = numSeqs;
+        nOTUsBreaks[processors] = numOTUs;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "initPyroCluster");
+        exit(1);       
+    }
+}
 
-                               calcCentroids();
-                               
-                               if (m->control_pressed) { break; }
+/**************************************************************************************************/
 
-                               maxDelta = getNewWeights();  if (m->control_pressed) { break; }
-                               double nLL = getLikelihood(); if (m->control_pressed) { break; }
-                               checkCentroids();
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               calcNewDistances();
-                               
-                               if (m->control_pressed) { break; }
-                               
-                               iter++;
-                               
-                               m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+void ShhherCommand::fill(){
+    try {
+        int index = 0;
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            cumNumSeqs[i] = index;
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                seqNumber[index] = aaP[i][j];
+                seqIndex[index] = aaI[i][j];
+                
+                index++;
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "fill");
+        exit(1);       
+    }          
+}
 
-                       }       
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       m->mothurOut("\nFinalizing...\n");
-                       fill();
+/**************************************************************************************************/
+void ShhherCommand::getFlowData(){
+    try{
+        ifstream flowFile;
+        m->openInputFile(flowFileName, flowFile);
+        
+        string seqName;
+        seqNameVector.clear();
+        lengths.clear();
+        flowDataIntI.clear();
+        nameMap.clear();
+        
+        
+        int currentNumFlowCells;
+        
+        float intensity;
+        
+        flowFile >> numFlowCells;
+        int index = 0;//pcluster
+        while(!flowFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            flowFile >> seqName >> currentNumFlowCells;
+            lengths.push_back(currentNumFlowCells);
+            
+            seqNameVector.push_back(seqName);
+            nameMap[seqName] = index++;//pcluster
+            
+            for(int i=0;i<numFlowCells;i++){
+                flowFile >> intensity;
+                if(intensity > 9.99)   {       intensity = 9.99;       }
+                int intI = int(100 * intensity + 0.0001);
+                flowDataIntI.push_back(intI);
+            }
+            m->gobble(flowFile);
+        }
+        flowFile.close();
+        
+        numSeqs = seqNameVector.size();                
+        
+        for(int i=0;i<numSeqs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int iNumFlowCells = i * numFlowCells;
+            for(int j=lengths[i];j<numFlowCells;j++){
+                flowDataIntI[iNumFlowCells + j] = 0;
+            }
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getFlowData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
+       
+       try{
+               vector<double> newTau(numOTUs,0);
+               vector<double> norms(numSeqs, 0);
+               otuIndex.clear();
+               seqIndex.clear();
+               singleTau.clear();
+               
+               for(int i=startSeq;i<stopSeq;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       setOTUs();
+                       double offset = 1e8;
+                       int indexOffset = i * numOTUs;
                        
-                       if (m->control_pressed) { break; }
+                       for(int j=0;j<numOTUs;j++){
+                               
+                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                               }
+                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                                       offset = dist[indexOffset + j];
+                               }
+                       }
                        
-                       vector<int> otuCounts(numOTUs, 0);
-                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT){
+                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                                       norms[i] += newTau[j];
+                               }
+                               else{
+                                       newTau[j] = 0.0;
+                               }
+                       }
                        
-                       calcCentroidsDriver(0, numOTUs);        if (m->control_pressed) { break; }
-                       writeQualities(otuCounts);                      if (m->control_pressed) { break; }
-                       writeSequences(otuCounts);                      if (m->control_pressed) { break; }
-                       writeNames(otuCounts);                          if (m->control_pressed) { break; }
-                       writeClusters(otuCounts);                       if (m->control_pressed) { break; }
-                       writeGroups();                                          if (m->control_pressed) { break; }
+                       for(int j=0;j<numOTUs;j++){
+                
+                               newTau[j] /= norms[i];
+                               
+                               if(newTau[j] > MIN_TAU){
+                                       otuIndex.push_back(j);
+                                       seqIndex.push_back(i);
+                                       singleTau.push_back(newTau[j]);
+                               }
+                       }
                        
-                       m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
-               }
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
-               
-               if(compositeFASTAFileName != ""){
-                       outputNames.push_back(compositeFASTAFileName);
-                       outputNames.push_back(compositeNamesFileName);
                }
-
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
-               
-               return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "execute");
-               exit(1);
-       }
+               m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
+               exit(1);        
+       }               
 }
-#endif
+
 /**************************************************************************************************/
 
-void ShhherCommand::getFlowData(){
-       try{
-               ifstream flowFile;
-               m->openInputFile(flowFileName, flowFile);
-               
-               string seqName;
-               seqNameVector.clear();
-               lengths.clear();
-               flowDataIntI.clear();
-               nameMap.clear();
-               
-               
-               int currentNumFlowCells;
+void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
+    
+    try{
+        
+        int total = 0;
+        vector<double> newTau(numOTUs,0);
+        vector<double> norms(numSeqs, 0);
+        nSeqsPerOTU.assign(numOTUs, 0);
+        
+        for(int i=startSeq;i<stopSeq;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int indexOffset = i * numOTUs;
+            
+            double offset = 1e8;
+            
+            for(int j=0;j<numOTUs;j++){
+                
+                if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                    dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                }
+                
+                if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                    offset = dist[indexOffset + j];
+                }
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                if(weight[j] > MIN_WEIGHT){
+                    newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                    norms[i] += newTau[j];
+                }
+                else{
+                    newTau[j] = 0.0;
+                }
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                newTau[j] /= norms[i];
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                if(newTau[j] > MIN_TAU){
+                    
+                    int oldTotal = total;
+                    
+                    total++;
+                    
+                    singleTau.resize(total, 0);
+                    seqNumber.resize(total, 0);
+                    seqIndex.resize(total, 0);
+                    
+                    singleTau[oldTotal] = newTau[j];
+                    
+                    aaP[j][nSeqsPerOTU[j]] = oldTotal;
+                    aaI[j][nSeqsPerOTU[j]] = i;
+                    nSeqsPerOTU[j]++;
+                }
+            }
+            
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::setOTUs(){
+    
+    try {
+        vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                double tauValue = singleTau[seqNumber[index]];
+                int sIndex = seqIndex[index];
+                bigTauMatrix[sIndex * numOTUs + i] = tauValue;                         
+            }
+        }
+        
+        for(int i=0;i<numSeqs;i++){
+            double maxTau = -1.0000;
+            int maxOTU = -1;
+            for(int j=0;j<numOTUs;j++){
+                if(bigTauMatrix[i * numOTUs + j] > maxTau){
+                    maxTau = bigTauMatrix[i * numOTUs + j];
+                    maxOTU = j;
+                }
+            }
+            
+            otuData[i] = maxOTU;
+        }
+        
+        nSeqsPerOTU.assign(numOTUs, 0);                
+        
+        for(int i=0;i<numSeqs;i++){
+            int index = otuData[i];
+            
+            singleTau[i] = 1.0000;
+            dist[i] = 0.0000;
+            
+            aaP[index][nSeqsPerOTU[index]] = i;
+            aaI[index][nSeqsPerOTU[index]] = i;
+            
+            nSeqsPerOTU[index]++;
+        }
+        fill();        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "setOTUs");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+void ShhherCommand::getUniques(){
+    try{
+        
+        
+        numUniques = 0;
+        uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
+        uniqueCount.assign(numSeqs, 0);                                                        //      anWeights
+        uniqueLengths.assign(numSeqs, 0);
+        mapSeqToUnique.assign(numSeqs, -1);
+        mapUniqueToSeq.assign(numSeqs, -1);
+        
+        vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
+        
+        for(int i=0;i<numSeqs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = 0;
+            
+            vector<short> current(numFlowCells);
+            for(int j=0;j<numFlowCells;j++){
+                current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+            }
+            
+            for(int j=0;j<numUniques;j++){
+                int offset = j * numFlowCells;
+                bool toEnd = 1;
+                
+                int shorterLength;
+                if(lengths[i] < uniqueLengths[j])      {       shorterLength = lengths[i];                     }
+                else                                                           {       shorterLength = uniqueLengths[j];       }
+                
+                for(int k=0;k<shorterLength;k++){
+                    if(current[k] != uniqueFlowgrams[offset + k]){
+                        toEnd = 0;
+                        break;
+                    }
+                }
+                
+                if(toEnd){
+                    mapSeqToUnique[i] = j;
+                    uniqueCount[j]++;
+                    index = j;
+                    if(lengths[i] > uniqueLengths[j])  {       uniqueLengths[j] = lengths[i];  }
+                    break;
+                }
+                index++;
+            }
+            
+            if(index == numUniques){
+                uniqueLengths[numUniques] = lengths[i];
+                uniqueCount[numUniques] = 1;
+                mapSeqToUnique[i] = numUniques;//anMap
+                mapUniqueToSeq[numUniques] = i;//anF
+                
+                for(int k=0;k<numFlowCells;k++){
+                    uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
+                    uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
+                }
+                
+                numUniques++;
+            }
+        }
+        uniqueFlowDataIntI.resize(numFlowCells * numUniques);
+        uniqueLengths.resize(numUniques);      
+        
+        flowDataPrI.resize(numSeqs * numFlowCells, 0);
+        for(int i=0;i<flowDataPrI.size();i++)  {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getUniques");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
+    try{
+        int minLength = lengths[mapSeqToUnique[seqA]];
+        if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]];      }
+        
+        int ANumFlowCells = seqA * numFlowCells;
+        int BNumFlowCells = seqB * numFlowCells;
+        
+        float dist = 0;
+        
+        for(int i=0;i<minLength;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int flowAIntI = flowDataIntI[ANumFlowCells + i];
+            float flowAPrI = flowDataPrI[ANumFlowCells + i];
+            
+            int flowBIntI = flowDataIntI[BNumFlowCells + i];
+            float flowBPrI = flowDataPrI[BNumFlowCells + i];
+            dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
+        }
+        
+        dist /= (float) minLength;
+        return dist;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
+        exit(1);
+    }
+}
+
+//**********************************************************************************************************************/
+
+string ShhherCommand::cluster(string distFileName, string namesFileName){
+    try {
+        
+        ReadMatrix* read = new ReadColumnMatrix(distFileName);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+        clusterNameMap->readMap();
+        read->read(clusterNameMap);
+        
+        ListVector* list = read->getListVector();
+        SparseMatrix* matrix = read->getMatrix();
+        
+        delete read; 
+        delete clusterNameMap; 
+        
+        RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+        
+        Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
+        string tag = cluster->getTag();
+        
+        double clusterCutoff = cutoff;
+        while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { break; }
+            
+            cluster->update(clusterCutoff);
+        }
+        
+        list->setLabel(toString(cutoff));
+        
+        string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+        ofstream listFile;
+        m->openOutputFile(listFileName, listFile);
+        list->print(listFile);
+        listFile.close();
+        
+        delete matrix; delete cluster; delete rabund; delete list;
+        
+        return listFileName;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "cluster");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
+    
+    //this function gets the most likely homopolymer length at a flow position for a group of sequences
+    //within an otu
+    
+    try{
+        
+        for(int i=start;i<finish;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            double count = 0;
+            int position = 0;
+            int minFlowGram = 100000000;
+            double minFlowValue = 1e8;
+            change[i] = 0; //FALSE
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+            }
+            
+            if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                vector<double> adF(nSeqsPerOTU[i]);
+                vector<int> anL(nSeqsPerOTU[i]);
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    int nI = seqIndex[index];
+                    int nIU = mapSeqToUnique[nI];
+                    
+                    int k;
+                    for(k=0;k<position;k++){
+                        if(nIU == anL[k]){
+                            break;
+                        }
+                    }
+                    if(k == position){
+                        anL[position] = nIU;
+                        adF[position] = 0.0000;
+                        position++;
+                    }                                          
+                }
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    int nI = seqIndex[index];
+                    
+                    double tauValue = singleTau[seqNumber[index]];
+                    
+                    for(int k=0;k<position;k++){
+                        double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
+                        adF[k] += dist * tauValue;
+                    }
+                }
+                
+                for(int j=0;j<position;j++){
+                    if(adF[j] < minFlowValue){
+                        minFlowGram = j;
+                        minFlowValue = adF[j];
+                    }
+                }
+                
+                if(centroids[i] != anL[minFlowGram]){
+                    change[i] = 1;
+                    centroids[i] = anL[minFlowGram];
+                }
+            }
+            else if(centroids[i] != -1){
+                change[i] = 1;
+                centroids[i] = -1;                     
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
+    try{
+        
+        int flowAValue = cent * numFlowCells;
+        int flowBValue = flow * numFlowCells;
+        
+        double dist = 0;
+        
+        for(int i=0;i<length;i++){
+            dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+            flowAValue++;
+            flowBValue++;
+        }
+        
+        return dist / (double)length;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getDistToCentroid");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getNewWeights(){
+    try{
+        
+        double maxChange = 0;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            double difference = weight[i];
+            weight[i] = 0;
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                double tauValue = singleTau[seqNumber[index]];
+                weight[i] += tauValue;
+            }
+            
+            difference = fabs(weight[i] - difference);
+            if(difference > maxChange){        maxChange = difference; }
+        }
+        return maxChange;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getNewWeights");
+        exit(1);       
+    }          
+}
+ /**************************************************************************************************/
+double ShhherCommand::getLikelihood(){
+    
+    try{
+        
+        vector<long double> P(numSeqs, 0);
+        int effNumOTUs = 0;
+        
+        for(int i=0;i<numOTUs;i++){
+            if(weight[i] > MIN_WEIGHT){
+                effNumOTUs++;
+            }
+        }
+        
+        string hold;
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                int nI = seqIndex[index];
+                double singleDist = dist[seqNumber[index]];
+                
+                P[nI] += weight[i] * exp(-singleDist * sigma);
+            }
+        }
+        double nLL = 0.00;
+        for(int i=0;i<numSeqs;i++){
+            if(P[i] == 0){     P[i] = DBL_EPSILON;     }
+            
+            nLL += -log(P[i]);
+        }
+        
+        nLL = nLL -(double)numSeqs * log(sigma);
+        
+        return nLL; 
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getNewWeights");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::checkCentroids(){
+    try{
+        vector<int> unique(numOTUs, 1);
+        
+        for(int i=0;i<numOTUs;i++){
+            if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                unique[i] = -1;
+            }
+        }
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            if(unique[i] == 1){
+                for(int j=i+1;j<numOTUs;j++){
+                    if(unique[j] == 1){
+                        
+                        if(centroids[j] == centroids[i]){
+                            unique[j] = 0;
+                            centroids[j] = -1;
+                            
+                            weight[i] += weight[j];
+                            weight[j] = 0.0;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "checkCentroids");
+        exit(1);       
+    }          
+}
+ /**************************************************************************************************/
+
+
+void ShhherCommand::writeQualities(vector<int> otuCounts){
+    
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+        
+        ofstream qualityFile;
+        m->openOutputFile(qualityFileName, qualityFile);
+        
+        qualityFile.setf(ios::fixed, ios::floatfield);
+        qualityFile.setf(ios::showpoint);
+        qualityFile << setprecision(6);
+        
+        vector<vector<int> > qualities(numOTUs);
+        vector<double> pr(HOMOPS, 0);
+        
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = 0;
+            int base = 0;
+            
+            if(nSeqsPerOTU[i] > 0){
+                qualities[i].assign(1024, -1);
+                
+                while(index < numFlowCells){
+                    double maxPrValue = 1e8;
+                    short maxPrIndex = -1;
+                    double count = 0.0000;
+                    
+                    pr.assign(HOMOPS, 0);
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int lIndex = cumNumSeqs[i] + j;
+                        double tauValue = singleTau[seqNumber[lIndex]];
+                        int sequenceIndex = aaI[i][j];
+                        short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+                        
+                        count += tauValue;
+                        
+                        for(int s=0;s<HOMOPS;s++){
+                            pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
+                        }
+                    }
+                    
+                    maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+                    maxPrValue = pr[maxPrIndex];
+                    
+                    if(count > MIN_COUNT){
+                        double U = 0.0000;
+                        double norm = 0.0000;
+                        
+                        for(int s=0;s<HOMOPS;s++){
+                            norm += exp(-(pr[s] - maxPrValue));
+                        }
+                        
+                        for(int s=1;s<=maxPrIndex;s++){
+                            int value = 0;
+                            double temp = 0.0000;
+                            
+                            U += exp(-(pr[s-1]-maxPrValue))/norm;
+                            
+                            if(U>0.00){
+                                temp = log10(U);
+                            }
+                            else{
+                                temp = -10.1;
+                            }
+                            temp = floor(-10 * temp);
+                            value = (int)floor(temp);
+                            if(value > 100){   value = 100;    }
+                            
+                            qualities[i][base] = (int)value;
+                            base++;
+                        }
+                    }
+                    
+                    index++;
+                }
+            }
+            
+            
+            if(otuCounts[i] > 0){
+                qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+                
+                int j=4;       //need to get past the first four bases
+                while(qualities[i][j] != -1){
+                    qualityFile << qualities[i][j] << ' ';
+                    j++;
+                }
+                qualityFile << endl;
+            }
+        }
+        qualityFile.close();
+        outputNames.push_back(qualityFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeQualities");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeSequences(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+        ofstream fastaFile;
+        m->openOutputFile(fastaFileName, fastaFile);
+        
+        vector<string> names(numOTUs, "");
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = centroids[i];
+            
+            if(otuCounts[i] > 0){
+                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+                
+                string newSeq = "";
+                
+                for(int j=0;j<numFlowCells;j++){
+                    
+                    char base = flowOrder[j % 4];
+                    for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+                        newSeq += base;
+                    }
+                }
+                
+                fastaFile << newSeq.substr(4) << endl;
+            }
+        }
+        fastaFile.close();
+        
+        outputNames.push_back(fastaFileName);
+        
+        if(compositeFASTAFileName != ""){
+            m->appendFiles(fastaFileName, compositeFASTAFileName);
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeSequences");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeNames(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+        ofstream nameFile;
+        m->openOutputFile(nameFileName, nameFile);
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            if(otuCounts[i] > 0){
+                nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+                
+                for(int j=1;j<nSeqsPerOTU[i];j++){
+                    nameFile << ',' << seqNameVector[aaI[i][j]];
+                }
+                
+                nameFile << endl;
+            }
+        }
+        nameFile.close();
+        outputNames.push_back(nameFileName);
+        
+        
+        if(compositeNamesFileName != ""){
+            m->appendFiles(nameFileName, compositeNamesFileName);
+        }              
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeNames");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeGroups(){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
+        string groupFileName = fileRoot + "shhh.groups";
+        ofstream groupFile;
+        m->openOutputFile(groupFileName, groupFile);
+        
+        for(int i=0;i<numSeqs;i++){
+            if (m->control_pressed) { break; }
+            groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+        }
+        groupFile.close();
+        outputNames.push_back(groupFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeGroups");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeClusters(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+        ofstream otuCountsFile;
+        m->openOutputFile(otuCountsFileName, otuCountsFile);
+        
+        string bases = flowOrder;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) {
+                break;
+            }
+            //output the translated version of the centroid sequence for the otu
+            if(otuCounts[i] > 0){
+                int index = centroids[i];
+                
+                otuCountsFile << "ideal\t";
+                for(int j=8;j<numFlowCells;j++){
+                    char base = bases[j % 4];
+                    for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+                        otuCountsFile << base;
+                    }
+                }
+                otuCountsFile << endl;
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int sequence = aaI[i][j];
+                    otuCountsFile << seqNameVector[sequence] << '\t';
+                    
+                    string newSeq = "";
+                    
+                    for(int k=0;k<lengths[sequence];k++){
+                        char base = bases[k % 4];
+                        int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+                        
+                        for(int s=0;s<freq;s++){
+                            newSeq += base;
+                            //otuCountsFile << base;
+                        }
+                    }
+                    otuCountsFile << newSeq.substr(4) << endl;
+                }
+                otuCountsFile << endl;
+            }
+        }
+        otuCountsFile.close();
+        outputNames.push_back(otuCountsFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeClusters");
+        exit(1);       
+    }          
+}
+
+#else
+//**********************************************************************************************************************
+
+int ShhherCommand::execute(){
+       try {
+               if (abort == true) { return 0; }
                
-               float intensity;
+               getSingleLookUp();      if (m->control_pressed) { return 0; }
+               getJointLookUp();       if (m->control_pressed) { return 0; }
                
-               flowFile >> numFlowCells;
-               int index = 0;//pcluster
-               while(!flowFile.eof()){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       flowFile >> seqName >> currentNumFlowCells;
-                       lengths.push_back(currentNumFlowCells);
+        int numFiles = flowFileVector.size();
+               
+        if (numFiles < processors) { processors = numFiles; }
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
+        else { createProcesses(flowFileVector); } //each processor processes one file
+#else
+        driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
+#endif
+        
+               if(compositeFASTAFileName != ""){
+                       outputNames.push_back(compositeFASTAFileName);
+                       outputNames.push_back(compositeNamesFileName);
+               }
 
-                       seqNameVector.push_back(seqName);
-                       nameMap[seqName] = index++;//pcluster
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "execute");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
 
-                       for(int i=0;i<numFlowCells;i++){
-                               flowFile >> intensity;
-                               if(intensity > 9.99)    {       intensity = 9.99;       }
-                               int intI = int(100 * intensity + 0.0001);
-                               flowDataIntI.push_back(intI);
-                       }
-                       m->gobble(flowFile);
+int ShhherCommand::createProcesses(vector<string> filenames){
+    try {
+        vector<int> processIDS;
+               int process = 1;
+               int num = 0;
+               
+               //sanity check
+               if (filenames.size() < processors) { processors = filenames.size(); }
+               
+               //divide the groups between the processors
+               vector<linePair> lines;
+               int numFilesPerProcessor = filenames.size() / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numFilesPerProcessor;
+                       int endIndex = (i+1) * numFilesPerProcessor;
+                       if(i == (processors - 1)){      endIndex = filenames.size();    }
+                       lines.push_back(linePair(startIndex, endIndex));
                }
-               flowFile.close();
                
-               numSeqs = seqNameVector.size();         
+        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
-               for(int i=0;i<numSeqs;i++){
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
                        
-                       if (m->control_pressed) { break; }
-                       
-                       int iNumFlowCells = i * numFlowCells;
-                       for(int j=lengths[i];j<numFlowCells;j++){
-                               flowDataIntI[iNumFlowCells + j] = 0;
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
                        }
                }
                
-       }
+               //do my part
+               driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        #else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+        
+        /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<shhhFlowsData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; }
+                       
+            shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
+                       pDataArray.push_back(tempFlow);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+               //using the main process as a worker saves time and memory
+               //do my part
+               driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+               
+        #endif
+        
+        for (int i=0;i<processIDS.size();i++) { 
+            if (compositeFASTAFileName != "") {
+                m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
+                m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
+                m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
+                m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
+            }
+        }
+        
+        return 0;
+        
+    }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getFlowData");
+               m->errorOut(e, "ShhherCommand", "createProcesses");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::getSingleLookUp(){
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+    try {
+        
+        for(int i=start;i<end;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       string flowFileName = filenames[i];
+            
+                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
+                       m->mothurOut("Reading flowgrams...\n");
+                       
+            vector<string> seqNameVector;
+            vector<int> lengths;
+            vector<short> flowDataIntI;
+            vector<double> flowDataPrI;
+            map<string, int> nameMap;
+            vector<short> uniqueFlowgrams;
+            vector<int> uniqueCount;
+            vector<int> mapSeqToUnique;
+            vector<int> mapUniqueToSeq;
+            vector<int> uniqueLengths;
+            int numFlowCells;
+            
+            int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       m->mothurOut("Identifying unique flowgrams...\n");
+                       int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       m->mothurOut("Calculating distances between flowgrams...\n");
+            string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+            unsigned long long begTime = time(NULL);
+            double begClock = clock();
+        
+            flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);    
+            
+            m->mothurOutEndLine();
+            m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+
+            
+                       string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+                       createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       m->mothurOut("\nClustering flowgrams...\n");
+            string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+                       cluster(listFileName, distFileName, namesFileName);
+                       
+                       if (m->control_pressed) { break; }
+            
+            vector<int> otuData;
+            vector<int> cumNumSeqs;
+            vector<int> nSeqsPerOTU;
+            vector<vector<int> > aaP;  //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+            vector<vector<int> > aaI;  //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+            vector<int> seqNumber;             //tMaster->anP:         the sequence id number sorted by OTU
+            vector<int> seqIndex;              //tMaster->anI;         the index that corresponds to seqNumber
+
+                       
+                       int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       m->mothurRemove(distFileName);
+                       m->mothurRemove(namesFileName);
+                       m->mothurRemove(listFileName);
+                       
+            vector<double> dist;               //adDist - distance of sequences to centroids
+            vector<short> change;              //did the centroid sequence change? 0 = no; 1 = yes
+            vector<int> centroids;             //the representative flowgram for each cluster m
+            vector<double> weight;
+            vector<double> singleTau;  //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+            vector<int> nSeqsBreaks;
+            vector<int> nOTUsBreaks;
+            
+                       dist.assign(numSeqs * numOTUs, 0);
+            change.assign(numOTUs, 1);
+            centroids.assign(numOTUs, -1);
+            weight.assign(numOTUs, 0);
+            singleTau.assign(numSeqs, 1.0);
+            
+            nSeqsBreaks.assign(2, 0);
+            nOTUsBreaks.assign(2, 0);
+            
+            nSeqsBreaks[0] = 0;
+            nSeqsBreaks[1] = numSeqs;
+            nOTUsBreaks[1] = numOTUs;
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       double maxDelta = 0;
+                       int iter = 0;
+                       
+                       begClock = clock();
+                       begTime = time(NULL);
+            
+                       m->mothurOut("\nDenoising flowgrams...\n");
+                       m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+                       
+                       while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               double cycClock = clock();
+                               unsigned long long cycTime = time(NULL);
+                               fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                               
+                               if (m->control_pressed) { break; }
+                
+                               calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+                               
+                               if (m->control_pressed) { break; }
+                
+                               maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                
+                if (m->control_pressed) { break; }
+                
+                               double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                
+                if (m->control_pressed) { break; }
+                
+                               checkCentroids(numOTUs, centroids, weight);
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               iter++;
+                               
+                               m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+                
+                       }       
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       m->mothurOut("\nFinalizing...\n");
+                       fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       vector<int> otuCounts(numOTUs, 0);
+                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                       
+                       calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
+            
+            if (m->control_pressed) { break; }
+            
+                       writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
+                       writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
+                       writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
+                       writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                       if (m->control_pressed) { break; }
+                       writeGroups(flowFileName, numSeqs, seqNameVector);                                              if (m->control_pressed) { break; }
+                       
+                       m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
+               }
+               
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        return 0;
+        
+    }catch(exception& e) {
+            m->errorOut(e, "ShhherCommand", "driver");
+            exit(1);
+    }
+}
+
+/**************************************************************************************************/
+int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
        try{
-               //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
-               singleLookUp.assign(HOMOPS * NUMBINS, 0);
+       
+               ifstream flowFile;
+       
+               m->openInputFile(filename, flowFile);
+               
+               string seqName;
+               int currentNumFlowCells;
+               float intensity;
+        thisSeqNameVector.clear();
+               thisLengths.clear();
+               thisFlowDataIntI.clear();
+               thisNameMap.clear();
+               
+               flowFile >> numFlowCells;
+               int index = 0;//pcluster
+               while(!flowFile.eof()){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       flowFile >> seqName >> currentNumFlowCells;
+                       thisLengths.push_back(currentNumFlowCells);
+           
+                       thisSeqNameVector.push_back(seqName);
+                       thisNameMap[seqName] = index++;//pcluster
+
+                       for(int i=0;i<numFlowCells;i++){
+                               flowFile >> intensity;
+                               if(intensity > 9.99)    {       intensity = 9.99;       }
+                               int intI = int(100 * intensity + 0.0001);
+                               thisFlowDataIntI.push_back(intI);
+                       }
+                       m->gobble(flowFile);
+               }
+               flowFile.close();
                
-               int index = 0;
-               ifstream lookUpFile;
-               m->openInputFile(lookupFileName, lookUpFile);
+               int numSeqs = thisSeqNameVector.size();         
                
-               for(int i=0;i<HOMOPS;i++){
+               for(int i=0;i<numSeqs;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       float logFracFreq;
-                       lookUpFile >> logFracFreq;
-                       
-                       for(int j=0;j<NUMBINS;j++)      {
-                               lookUpFile >> singleLookUp[index];
-                               index++;
+                       int iNumFlowCells = i * numFlowCells;
+                       for(int j=thisLengths[i];j<numFlowCells;j++){
+                               thisFlowDataIntI[iNumFlowCells + j] = 0;
                        }
-               }       
-               lookUpFile.close();
+               }
+        
+        return numSeqs;
+               
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getSingleLookUp");
+               m->errorOut(e, "ShhherCommand", "getFlowData");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::getJointLookUp(){
-       try{
-               
-               //      the most likely joint probability (-log) that two intenities have the same polymer length
-               jointLookUp.resize(NUMBINS * NUMBINS, 0);
+int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
+       try{            
+        
+               ostringstream outStream;
+               outStream.setf(ios::fixed, ios::floatfield);
+               outStream.setf(ios::dec, ios::basefield);
+               outStream.setf(ios::showpoint);
+               outStream.precision(6);
                
-               for(int i=0;i<NUMBINS;i++){
+               int begTime = time(NULL);
+               double begClock = clock();
+        
+               for(int i=0;i<stopSeq;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       for(int j=0;j<NUMBINS;j++){             
-                               
-                               double minSum = 100000000;
-                               
-                               for(int k=0;k<HOMOPS;k++){
-                                       double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
-                                       
-                                       if(sum < minSum)        {       minSum = sum;           }
-                               }       
-                               jointLookUp[i * NUMBINS + j] = minSum;
+                       for(int j=0;j<i;j++){
+                               float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+                
+                               if(flowDistance < 1e-6){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
+                               }
+                               else if(flowDistance <= cutoff){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
+                               }
                        }
+                       if(i % 100 == 0){
+                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
+                               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+                               m->mothurOutEndLine();
+                       }
+               }
+               
+               ofstream distFile(distFileName.c_str());
+               distFile << outStream.str();            
+               distFile.close();
+               
+               if (m->control_pressed) {}
+               else {
+                       m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
+                       m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+                       m->mothurOutEndLine();
                }
+        
+        return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getJointLookUp");
+               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
-double ShhherCommand::getProbIntensity(int intIntensity){                          
+float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
        try{
-
-               double minNegLogProb = 100000000; 
-
+               int minLength = lengths[mapSeqToUnique[seqA]];
+               if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
                
-               for(int i=0;i<HOMOPS;i++){//loop signal strength
+               int ANumFlowCells = seqA * numFlowCells;
+               int BNumFlowCells = seqB * numFlowCells;
+               
+               float dist = 0;
+               
+               for(int i=0;i<minLength;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
-                       if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
+                       int flowAIntI = flowDataIntI[ANumFlowCells + i];
+                       float flowAPrI = flowDataPrI[ANumFlowCells + i];
+                       
+                       int flowBIntI = flowDataIntI[BNumFlowCells + i];
+                       float flowBPrI = flowDataPrI[BNumFlowCells + i];
+                       dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
                }
                
-               return minNegLogProb;
+               dist /= (float) minLength;
+               return dist;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getProbIntensity");
+               m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
                exit(1);
        }
 }
 
 /**************************************************************************************************/
 
-void ShhherCommand::getUniques(){
+int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
        try{
-               
-               
-               numUniques = 0;
+               int numUniques = 0;
                uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
                uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
                uniqueLengths.assign(numSeqs, 0);
@@ -1072,7 +2361,7 @@ void ShhherCommand::getUniques(){
                        for(int j=0;j<numFlowCells;j++){
                                current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
                        }
-                                               
+            
                        for(int j=0;j<numUniques;j++){
                                int offset = j * numFlowCells;
                                bool toEnd = 1;
@@ -1080,7 +2369,7 @@ void ShhherCommand::getUniques(){
                                int shorterLength;
                                if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
                                else                                                            {       shorterLength = uniqueLengths[j];       }
-
+                
                                for(int k=0;k<shorterLength;k++){
                                        if(current[k] != uniqueFlowgrams[offset + k]){
                                                toEnd = 0;
@@ -1118,220 +2407,16 @@ void ShhherCommand::getUniques(){
         
                flowDataPrI.resize(numSeqs * numFlowCells, 0);
                for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
+        
+        return numUniques;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getUniques");
                exit(1);
        }
 }
-
-/**************************************************************************************************/
-
-float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
-       try{
-               int minLength = lengths[mapSeqToUnique[seqA]];
-               if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
-               
-               int ANumFlowCells = seqA * numFlowCells;
-               int BNumFlowCells = seqB * numFlowCells;
-               
-               float dist = 0;
-               
-               for(int i=0;i<minLength;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       int flowAIntI = flowDataIntI[ANumFlowCells + i];
-                       float flowAPrI = flowDataPrI[ANumFlowCells + i];
-                       
-                       int flowBIntI = flowDataIntI[BNumFlowCells + i];
-                       float flowBPrI = flowDataPrI[BNumFlowCells + i];
-                       dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
-               }
-               
-               dist /= (float) minLength;
-               return dist;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){
-       try{            
-
-               ostringstream outStream;
-               outStream.setf(ios::fixed, ios::floatfield);
-               outStream.setf(ios::dec, ios::basefield);
-               outStream.setf(ios::showpoint);
-               outStream.precision(6);
-               
-               int begTime = time(NULL);
-               double begClock = clock();
-
-               for(int i=startSeq;i<stopSeq;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       for(int j=0;j<i;j++){
-                               float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
-
-                               if(flowDistance < 1e-6){
-                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
-                               }
-                               else if(flowDistance <= cutoff){
-                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
-                               }
-                       }
-                       if(i % 100 == 0){
-                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
-                               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                               m->mothurOutEndLine();
-                       }
-               }
-               
-               ofstream distFile(distFileName.c_str());
-               distFile << outStream.str();            
-               distFile.close();
-               
-               if (m->control_pressed) {}
-               else {
-                       m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
-                       m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                       m->mothurOutEndLine();
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-string ShhherCommand::createDistFile(int processors){
-       try{
-//////////////////////// until I figure out the shared memory issue //////////////////////             
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-#else
-               processors=1;
-#endif
-//////////////////////// until I figure out the shared memory issue //////////////////////             
-               
-               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
-                               
-               unsigned long long begTime = time(NULL);
-               double begClock = clock();
-               
-               if (numSeqs < processors){      processors = 1; }
-               
-               if(processors == 1)     {       flowDistParentFork(fDistFileName, 0, numUniques);               }
-               
-               else{ //you have multiple processors
-                       
-                       vector<int> start(processors, 0);
-                       vector<int> end(processors, 0);
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-                       
-                       for (int i = 0; i < processors; i++) {
-                               start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
-                               end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
-                       }
-                       
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = fork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]);
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                       
-                       //parent does its part
-                       flowDistParentFork(fDistFileName, start[0], end[0]);
-                       
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-#else
-                       //////////////////////////////////////////////////////////////////////////////////////////////////////
-                       //Windows version shared memory, so be careful when passing variables through the flowDistParentForkData struct. 
-                       //Above fork() will clone, so memory is separate, but that's not the case with windows, 
-                       //////////////////////////////////////////////////////////////////////////////////////////////////////
-                       
-                       vector<flowDistParentForkData*> pDataArray; 
-                       DWORD   dwThreadIdArray[processors-1];
-                       HANDLE  hThreadArray[processors-1]; 
-                       
-                       //Create processor worker threads.
-                       for(int i = 0; i < processors-1; i++){
-                               // Allocate memory for thread data.
-                               string extension = extension = toString(i) + ".temp"; 
-                               
-                               flowDistParentForkData* tempdist = new flowDistParentForkData((fDistFileName + extension), mapUniqueToSeq, mapSeqToUnique, lengths, flowDataIntI, flowDataPrI, jointLookUp, m, start[i+1], end[i+1], numFlowCells, cutoff, i);
-                               pDataArray.push_back(tempdist);
-                               processIDs.push_back(i);
-                               
-                               //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
-                               //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
-                               hThreadArray[i] = CreateThread(NULL, 0, MyflowDistParentForkThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
-                       }
-                       
-                       //parent does its part
-                       flowDistParentFork(fDistFileName, start[0], end[0]);
-                       
-                       //Wait until all threads have terminated.
-                       WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-                       
-                       //Close all thread handles and free memory allocations.
-                       for(int i=0; i < pDataArray.size(); i++){
-                               CloseHandle(hThreadArray[i]);
-                               delete pDataArray[i];
-                       }
-                       
-#endif
-                       
-                       //append and remove temp files
-                       for (int i=0;i<processIDs.size();i++) { 
-                               m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
-                               m->mothurRemove((fDistFileName + toString(processIDs[i]) + ".temp"));
-                       }
-                       
-               }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
-               
-               return fDistFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "createDistFile");
-               exit(1);
-       }
-       
-}
-
 /**************************************************************************************************/
-
-string ShhherCommand::createNamesFile(){
+int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
        try{
                
                vector<string> duplicateNames(numUniques, "");
@@ -1339,31 +2424,29 @@ string ShhherCommand::createNamesFile(){
                        duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
                }
                
-               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
-               
                ofstream nameFile;
-               m->openOutputFile(nameFileName, nameFile);
+               m->openOutputFile(filename, nameFile);
                
                for(int i=0;i<numUniques;i++){
                        
                        if (m->control_pressed) { break; }
                        
-//                     nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+            //                 nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
                        nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
                }
                
                nameFile.close();
-               return  nameFileName;
+        
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "createNamesFile");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-string ShhherCommand::cluster(string distFileName, string namesFileName){
+int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
        try {
                
                ReadMatrix* read = new ReadColumnMatrix(distFileName);  
@@ -1378,7 +2461,7 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                
                delete read; 
                delete clusterNameMap; 
-                               
+        
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
                Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
@@ -1394,33 +2477,39 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                
                list->setLabel(toString(cutoff));
                
-               string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
                ofstream listFile;
-               m->openOutputFile(listFileName, listFile);
+               m->openOutputFile(filename, listFile);
                list->print(listFile);
                listFile.close();
                
                delete matrix;  delete cluster; delete rabund; delete list;
-       
-               return listFileName;
+        
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "cluster");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::getOTUData(string listFileName){
+int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
+                               vector<int>& cumNumSeqs,
+                               vector<int>& nSeqsPerOTU,
+                               vector<vector<int> >& aaP,      //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+                               vector<vector<int> >& aaI,      //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+                               vector<int>& seqNumber,         //tMaster->anP:         the sequence id number sorted by OTU
+                               vector<int>& seqIndex,
+                               map<string, int>& nameMap){
        try {
-
+        
                ifstream listFile;
-               m->openInputFile(listFileName, listFile);
+               m->openInputFile(fileName, listFile);
                string label;
+        int numOTUs;
                
                listFile >> label >> numOTUs;
-
+        
                otuData.assign(numSeqs, 0);
                cumNumSeqs.assign(numOTUs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
@@ -1435,11 +2524,11 @@ void ShhherCommand::getOTUData(string listFileName){
                for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
-
+            
                        listFile >> singleOTU;
                        
                        istringstream otuString(singleOTU);
-
+            
                        while(otuString){
                                
                                string seqName = "";
@@ -1464,10 +2553,10 @@ void ShhherCommand::getOTUData(string listFileName){
                                }
                                
                                map<string,int>::iterator nmIt = nameMap.find(seqName);
-
+                
                                int index = nmIt->second;
                                nameMap.erase(nmIt);
-
+                
                                otuData[index] = i;
                                nSeqsPerOTU[i]++;
                                aaP[i].push_back(index);        
@@ -1493,6 +2582,8 @@ void ShhherCommand::getOTUData(string listFileName){
                seqIndex = seqNumber;
                
                listFile.close();       
+        
+        return numOTUs;
                
        }
        catch(exception& e) {
@@ -1500,124 +2591,28 @@ void ShhherCommand::getOTUData(string listFileName){
                exit(1);        
        }               
 }
-
-/**************************************************************************************************/
-
-void ShhherCommand::initPyroCluster(){                          
-       try{
-               if (numOTUs < processors) { processors = 1; }
-
-               dist.assign(numSeqs * numOTUs, 0);
-               change.assign(numOTUs, 1);
-               centroids.assign(numOTUs, -1);
-               weight.assign(numOTUs, 0);
-               singleTau.assign(numSeqs, 1.0);
-               
-               nSeqsBreaks.assign(processors+1, 0);
-               nOTUsBreaks.assign(processors+1, 0);
-
-               nSeqsBreaks[0] = 0;
-               for(int i=0;i<processors;i++){
-                       nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
-                       nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
-               }
-               nSeqsBreaks[processors] = numSeqs;
-               nOTUsBreaks[processors] = numOTUs;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "initPyroCluster");
-               exit(1);        
-       }
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::fill(){
-       try {
-               int index = 0;
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       cumNumSeqs[i] = index;
-                       for(int j=0;j<nSeqsPerOTU[i];j++){
-                               seqNumber[index] = aaP[i][j];
-                               seqIndex[index] = aaI[i][j];
-                               
-                               index++;
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "fill");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::calcCentroids(){                          
-       try{
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               
-               if(processors == 1)     {
-                       calcCentroidsDriver(0, numOTUs);                
-               }
-               else{ //you have multiple processors
-                       if (numOTUs < processors){      processors = 1; }
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-                       
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = vfork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]);
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                       
-                       //parent does its part
-                       calcCentroidsDriver(nOTUsBreaks[0], nOTUsBreaks[1]);
-
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-               }
-               
-#else
-               calcCentroidsDriver(0, numOTUs);
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
-               exit(1);        
-       }               
-}
-
 /**************************************************************************************************/
 
-void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
+int ShhherCommand::calcCentroidsDriver(int numOTUs, 
+                                          vector<int>& cumNumSeqs,
+                                          vector<int>& nSeqsPerOTU,
+                                          vector<int>& seqIndex,
+                                          vector<short>& change,               //did the centroid sequence change? 0 = no; 1 = yes
+                                          vector<int>& centroids,              //the representative flowgram for each cluster m
+                                          vector<double>& singleTau,   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+                                          vector<int>& mapSeqToUnique,
+                                          vector<short>& uniqueFlowgrams,
+                                          vector<short>& flowDataIntI,
+                                          vector<int>& lengths,
+                                          int numFlowCells,
+                                          vector<int>& seqNumber){                          
        
        //this function gets the most likely homopolymer length at a flow position for a group of sequences
        //within an otu
        
        try{
                
-               for(int i=start;i<finish;i++){
+               for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
                        
@@ -1630,7 +2625,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
                        }
-
+            
                        if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
                                vector<double> adF(nSeqsPerOTU[i]);
                                vector<int> anL(nSeqsPerOTU[i]);
@@ -1660,7 +2655,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                                        double tauValue = singleTau[seqNumber[index]];
                                        
                                        for(int k=0;k<position;k++){
-                                               double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
+                                               double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
                                                adF[k] += dist * tauValue;
                                        }
                                }
@@ -1682,23 +2677,25 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                                centroids[i] = -1;                      
                        }
                }
+        
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
+double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
+                                        vector<short>& flowDataIntI, int numFlowCells){
        try{
                
                int flowAValue = cent * numFlowCells;
                int flowBValue = flow * numFlowCells;
                
                double dist = 0;
-
+        
                for(int i=0;i<length;i++){
                        dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
                        flowAValue++;
@@ -1712,10 +2709,9 @@ double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-double ShhherCommand::getNewWeights(){
+double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
        try{
                
                double maxChange = 0;
@@ -1733,300 +2729,109 @@ double ShhherCommand::getNewWeights(){
                                weight[i] += tauValue;
                        }
                        
-                       difference = fabs(weight[i] - difference);
-                       if(difference > maxChange){     maxChange = difference; }
-               }
-               return maxChange;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getNewWeights");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-double ShhherCommand::getLikelihood(){
-       
-       try{
-               
-               vector<long double> P(numSeqs, 0);
-               int effNumOTUs = 0;
-               
-               for(int i=0;i<numOTUs;i++){
-                       if(weight[i] > MIN_WEIGHT){
-                               effNumOTUs++;
-                       }
-               }
-               
-               string hold;
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       for(int j=0;j<nSeqsPerOTU[i];j++){
-                               int index = cumNumSeqs[i] + j;
-                               int nI = seqIndex[index];
-                               double singleDist = dist[seqNumber[index]];
-                               
-                               P[nI] += weight[i] * exp(-singleDist * sigma);
-                       }
-               }
-               double nLL = 0.00;
-               for(int i=0;i<numSeqs;i++){
-                       if(P[i] == 0){  P[i] = DBL_EPSILON;     }
-
-                       nLL += -log(P[i]);
-               }
-               
-               nLL = nLL -(double)numSeqs * log(sigma);
-
-               return nLL; 
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getNewWeights");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::checkCentroids(){
-       try{
-               vector<int> unique(numOTUs, 1);
-               
-               for(int i=0;i<numOTUs;i++){
-                       if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
-                               unique[i] = -1;
-                       }
-               }
-               
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       if(unique[i] == 1){
-                               for(int j=i+1;j<numOTUs;j++){
-                                       if(unique[j] == 1){
-                                               
-                                               if(centroids[j] == centroids[i]){
-                                                       unique[j] = 0;
-                                                       centroids[j] = -1;
-                                                       
-                                                       weight[i] += weight[j];
-                                                       weight[j] = 0.0;
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "checkCentroids");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::calcNewDistances(){                          
-       try{
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
-               if(processors == 1)     {
-                       calcNewDistancesParent(0, numSeqs);             
-               }
-               else{ //you have multiple processors
-                       if (numSeqs < processors){      processors = 1; }
-                       
-                       vector<vector<int> > child_otuIndex(processors);
-                       vector<vector<int> > child_seqIndex(processors);
-                       vector<vector<double> > child_singleTau(processors);                    
-                       vector<int> totals(processors);
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = vfork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]);
-                                       totals[process] = child_otuIndex[process].size();
-
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                               
-                       //parent does its part
-                       calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
-                       int total = seqIndex.size();
-
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-
-                       for(int i=1;i<processors;i++){
-                               int oldTotal = total;
-                               total += totals[i];
-
-                               singleTau.resize(total, 0);
-                               seqIndex.resize(total, 0);
-                               seqNumber.resize(total, 0);
-                               
-                               int childIndex = 0;
-                               
-                               for(int j=oldTotal;j<total;j++){
-                                       int otuI = child_otuIndex[i][childIndex];
-                                       int seqI = child_seqIndex[i][childIndex];
-
-                                       singleTau[j] = child_singleTau[i][childIndex];
-                                       aaP[otuI][nSeqsPerOTU[otuI]] = j;
-                                       aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
-                                       nSeqsPerOTU[otuI]++;
-
-                                       childIndex++;
-                               }
-                       }
-               }
-#else
-               calcNewDistancesParent(0, numSeqs);             
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistances");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-#ifdef USE_MPI
-void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
-       
-       try{
-               vector<double> newTau(numOTUs,0);
-               vector<double> norms(numSeqs, 0);
-               otuIndex.clear();
-               seqIndex.clear();
-               singleTau.clear();
-               
-               for(int i=startSeq;i<stopSeq;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       double offset = 1e8;
-                       int indexOffset = i * numOTUs;
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               
-                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
-                               }
-                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
-                                       offset = dist[indexOffset + j];
-                               }
-                       }
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT){
-                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
-                                       norms[i] += newTau[j];
-                               }
-                               else{
-                                       newTau[j] = 0.0;
-                               }
-                       }
-                       
-                       for(int j=0;j<numOTUs;j++){
-
-                               newTau[j] /= norms[i];
-                               
-                               if(newTau[j] > MIN_TAU){
-                                       otuIndex.push_back(j);
-                                       seqIndex.push_back(i);
-                                       singleTau.push_back(newTau[j]);
-                               }
-                       }
-                       
+                       difference = fabs(weight[i] - difference);
+                       if(difference > maxChange){     maxChange = difference; }
                }
+               return maxChange;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
                exit(1);        
        }               
 }
-#endif
+
 /**************************************************************************************************/
 
-void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector<int>& child_otuIndex, vector<int>& child_seqIndex, vector<double>& child_singleTau){
+double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
        
        try{
-               vector<double> newTau(numOTUs,0);
-               vector<double> norms(numSeqs, 0);
-               child_otuIndex.resize(0);
-               child_seqIndex.resize(0);
-               child_singleTau.resize(0);
                
-               for(int i=startSeq;i<stopSeq;i++){
+               vector<long double> P(numSeqs, 0);
+               int effNumOTUs = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(weight[i] > MIN_WEIGHT){
+                               effNumOTUs++;
+                       }
+               }
+               
+               string hold;
+               for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       double offset = 1e8;
-                       int indexOffset = i * numOTUs;
-                       
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
-                               }
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               int nI = seqIndex[index];
+                               double singleDist = dist[seqNumber[index]];
                                
-                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
-                                       offset = dist[indexOffset + j];
-                               }
+                               P[nI] += weight[i] * exp(-singleDist * sigma);
                        }
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT){
-                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
-                                       norms[i] += newTau[j];
-                               }
-                               else{
-                                       newTau[j] = 0.0;
-                               }
+               }
+               double nLL = 0.00;
+               for(int i=0;i<numSeqs;i++){
+                       if(P[i] == 0){  P[i] = DBL_EPSILON;     }
+            
+                       nLL += -log(P[i]);
+               }
+               
+               nLL = nLL -(double)numSeqs * log(sigma);
+        
+               return nLL; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
+       try{
+               vector<int> unique(numOTUs, 1);
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                               unique[i] = -1;
                        }
+               }
+               
+               for(int i=0;i<numOTUs;i++){
                        
-                       for(int j=0;j<numOTUs;j++){
-                               newTau[j] /= norms[i];
-                               
-                               if(newTau[j] > MIN_TAU){
-                                       child_otuIndex.push_back(j);
-                                       child_seqIndex.push_back(i);
-                                       child_singleTau.push_back(newTau[j]);
+                       if (m->control_pressed) { break; }
+                       
+                       if(unique[i] == 1){
+                               for(int j=i+1;j<numOTUs;j++){
+                                       if(unique[j] == 1){
+                                               
+                                               if(centroids[j] == centroids[i]){
+                                                       unique[j] = 0;
+                                                       centroids[j] = -1;
+                                                       
+                                                       weight[i] += weight[j];
+                                                       weight[j] = 0.0;
+                                               }
+                                       }
                                }
                        }
                }
+        
+        return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesChild");
+               m->errorOut(e, "ShhherCommand", "checkCentroids");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
+void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
+                                     vector<double>& weight, vector<short>& change, vector<int>& centroids,
+                                     vector<vector<int> >& aaP,        vector<double>& singleTau, vector<vector<int> >& aaI,   
+                                     vector<int>& seqNumber, vector<int>& seqIndex,
+                                     vector<short>& uniqueFlowgrams,
+                                     vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
        
        try{
                
@@ -2034,26 +2839,26 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                vector<double> newTau(numOTUs,0);
                vector<double> norms(numSeqs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
-
-               for(int i=startSeq;i<stopSeq;i++){
+        
+               for(int i=0;i<numSeqs;i++){
                        
                        if (m->control_pressed) { break; }
                        
                        int indexOffset = i * numOTUs;
-
+            
                        double offset = 1e8;
                        
                        for(int j=0;j<numOTUs;j++){
-
+                
                                if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
                                }
-       
+                
                                if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
                                        offset = dist[indexOffset + j];
                                }
                        }
-
+            
                        for(int j=0;j<numOTUs;j++){
                                if(weight[j] > MIN_WEIGHT){
                                        newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
@@ -2063,11 +2868,11 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                                        newTau[j] = 0.0;
                                }
                        }
-
+            
                        for(int j=0;j<numOTUs;j++){
                                newTau[j] /= norms[i];
                        }
-       
+            
                        for(int j=0;j<numOTUs;j++){
                                if(newTau[j] > MIN_TAU){
                                        
@@ -2086,19 +2891,44 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                                        nSeqsPerOTU[j]++;
                                }
                        }
-
+            
                }
-
+        
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
+               m->errorOut(e, "ShhherCommand", "calcNewDistances");
                exit(1);        
        }               
 }
+/**************************************************************************************************/
 
+int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
+       try {
+               int index = 0;
+               for(int i=0;i<numOTUs;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       cumNumSeqs[i] = index;
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               seqNumber[index] = aaP[i][j];
+                               seqIndex[index] = aaI[i][j];
+                               
+                               index++;
+                       }
+               }
+        
+        return 0; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "fill");
+               exit(1);        
+       }               
+}
 /**************************************************************************************************/
 
-void ShhherCommand::setOTUs(){
+void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
+                            vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
        
        try {
                vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
@@ -2141,26 +2971,28 @@ void ShhherCommand::setOTUs(){
                        
                        nSeqsPerOTU[index]++;
                }
-               fill(); 
+        
+               fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistances");
+               m->errorOut(e, "ShhherCommand", "setOTUs");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::writeQualities(vector<int> otuCounts){
+void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
+                                   vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
+                                   vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
        
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
-
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
+        
                ofstream qualityFile;
                m->openOutputFile(qualityFileName, qualityFile);
-
+        
                qualityFile.setf(ios::fixed, ios::floatfield);
                qualityFile.setf(ios::showpoint);
                qualityFile << setprecision(6);
@@ -2238,18 +3070,19 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
                        
                        if(otuCounts[i] > 0){
                                qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
-                               
+                       
                                int j=4;        //need to get past the first four bases
                                while(qualities[i][j] != -1){
-                                       qualityFile << qualities[i][j] << ' ';
-                                       j++;
+                    qualityFile << qualities[i][j] << ' ';
+                    if (j > qualities[i].size()) { break; }
+                    j++;
                                }
                                qualityFile << endl;
                        }
                }
                qualityFile.close();
                outputNames.push_back(qualityFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeQualities");
@@ -2259,11 +3092,11 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeSequences(vector<int> otuCounts){
+void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
                
@@ -2292,11 +3125,11 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        }
                }
                fastaFile.close();
-
+        
                outputNames.push_back(fastaFileName);
-
-               if(compositeFASTAFileName != ""){
-                       m->appendFiles(fastaFileName, compositeFASTAFileName);
+        
+               if(thisCompositeFASTAFileName != ""){
+                       m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
                }
        }
        catch(exception& e) {
@@ -2307,11 +3140,11 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeNames(vector<int> otuCounts){
+void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
                
@@ -2333,8 +3166,8 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
                outputNames.push_back(nameFileName);
                
                
-               if(compositeNamesFileName != ""){
-                       m->appendFiles(nameFileName, compositeNamesFileName);
+               if(thisCompositeNamesFileName != ""){
+                       m->appendFiles(nameFileName, thisCompositeNamesFileName);
                }               
        }
        catch(exception& e) {
@@ -2345,12 +3178,12 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeGroups(){
+void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
-               string groupFileName = fileRoot + ".shhh.groups";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
+               string groupFileName = fileRoot + "shhh.groups";
                ofstream groupFile;
                m->openOutputFile(groupFileName, groupFile);
                
@@ -2360,7 +3193,7 @@ void ShhherCommand::writeGroups(){
                }
                groupFile.close();
                outputNames.push_back(groupFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeGroups");
@@ -2370,11 +3203,11 @@ void ShhherCommand::writeGroups(){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeClusters(vector<int> otuCounts){
+void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
@@ -2407,7 +3240,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                                       
+                        
                                                for(int s=0;s<freq;s++){
                                                        newSeq += base;
                                                        //otuCountsFile << base;
@@ -2420,7 +3253,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                }
                otuCountsFile.close();
                outputNames.push_back(otuCountsFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeClusters");
@@ -2428,4 +3261,92 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
        }               
 }
 
-//**********************************************************************************************************************
+/**************************************************************************************************/
+
+void ShhherCommand::getSingleLookUp(){
+       try{
+               //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
+               singleLookUp.assign(HOMOPS * NUMBINS, 0);
+               
+               int index = 0;
+               ifstream lookUpFile;
+               m->openInputFile(lookupFileName, lookUpFile);
+               
+               for(int i=0;i<HOMOPS;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       float logFracFreq;
+                       lookUpFile >> logFracFreq;
+                       
+                       for(int j=0;j<NUMBINS;j++)      {
+                               lookUpFile >> singleLookUp[index];
+                               index++;
+                       }
+               }       
+               lookUpFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getSingleLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getJointLookUp(){
+       try{
+               
+               //      the most likely joint probability (-log) that two intenities have the same polymer length
+               jointLookUp.resize(NUMBINS * NUMBINS, 0);
+               
+               for(int i=0;i<NUMBINS;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       for(int j=0;j<NUMBINS;j++){             
+                               
+                               double minSum = 100000000;
+                               
+                               for(int k=0;k<HOMOPS;k++){
+                                       double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
+                                       
+                                       if(sum < minSum)        {       minSum = sum;           }
+                               }       
+                               jointLookUp[i * NUMBINS + j] = minSum;
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getJointLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getProbIntensity(int intIntensity){                          
+       try{
+
+               double minNegLogProb = 100000000; 
+
+               
+               for(int i=0;i<HOMOPS;i++){//loop signal strength
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
+                       if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
+               }
+               
+               return minNegLogProb;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getProbIntensity");
+               exit(1);
+       }
+}
+
+
+
+
index b3a0071079bdbcbdb6a1069cf6bc1d2f53f8fad9..c9772af6f0e87e32d24518ee0419885770e11998 100644 (file)
 
 #include "mothur.h"
 #include "command.hpp"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "cluster.hpp"
+#include "sparsematrix.hpp"
+#include <cfloat>
 
 //**********************************************************************************************************************
 
@@ -42,20 +50,92 @@ public:
        void help() { m->mothurOut(getHelpString()); }          
 private:
        
+    struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+    
        int abort;
-       
        string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
 
        int processors, maxIters;
        float cutoff, sigma, minDelta;
        string flowOrder;
-       
-       vector<int> nSeqsBreaks;
-       vector<int> nOTUsBreaks;
+    
+    vector<string> outputNames;
        vector<double> singleLookUp;
        vector<double> jointLookUp;
+    vector<string> flowFileVector;
+       
+    int driver(vector<string>, string, string, int, int);
+    int createProcesses(vector<string>);
+    int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
+    int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    int flowDistParentFork(int, string, int, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    float calcPairwiseDist(int, int, int, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    int createNamesFile(int, int, string, vector<string>&, vector<int>&, vector<int>&);
+    int cluster(string, string, string);
+    int getOTUData(int numSeqs, string,  vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&, vector<int>&, vector<int>&,map<string, int>&);
+    int calcCentroidsDriver(int numOTUs, vector<int>&, vector<int>&, vector<int>&, vector<short>&, vector<int>&, vector<double>&, vector<int>&, vector<short>&, vector<short>&, vector<int>&, int, vector<int>&);
+    double getDistToCentroid(int, int, int, vector<short>&, vector<short>&, int);
+    double getNewWeights(int, vector<int>&, vector<int>&, vector<double>&, vector<int>&, vector<double>&);
+    
+    double getLikelihood(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<double>&);
+    int checkCentroids(int, vector<int>&, vector<double>&);
+    void calcNewDistances(int, int, vector<int>& , vector<double>&,vector<double>& , vector<short>& change, vector<int>&,vector<vector<int> >&,        vector<double>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&, vector<short>&, int, vector<int>&);
+    int fill(int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&);
+    void setOTUs(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&,
+                 vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, vector<vector<int> >&);
+    void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
+    void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
+    void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
+    void writeGroups(string, int, vector<string>&);
+    void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
+    
+       void getSingleLookUp();
+       void getJointLookUp();
+    double getProbIntensity(int);
        
-       vector<string> seqNameVector;
+       
+#ifdef USE_MPI
+       string flowDistMPI(int, int);
+       void calcNewDistancesChildMPI(int, int, vector<int>&);
+
+       int pid, ncpus; 
+    
+     void getFlowData();
+     void getUniques();
+     
+     float calcPairwiseDist(int, int);
+     void flowDistParentFork(string, int, int);
+     
+     string createDistFile(int);
+     string createNamesFile();
+     string cluster(string, string);
+     
+     void getOTUData(string);
+     void initPyroCluster();
+     void fill();
+     void calcCentroids();
+     void calcCentroidsDriver(int, int);
+     double getDistToCentroid(int, int, int);
+     double getNewWeights();
+     double getLikelihood();
+     void checkCentroids();
+     void calcNewDistances();
+     void calcNewDistancesParent(int, int);
+     void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
+     
+     
+     void setOTUs();
+     void writeQualities(vector<int>);
+     void writeSequences(vector<int>);
+     void writeNames(vector<int>);
+     void writeGroups();
+     void writeClusters(vector<int>);
+    
+    vector<string> seqNameVector;
        vector<int> lengths;
        vector<short> flowDataIntI;
        vector<double> flowDataPrI;
@@ -77,50 +157,10 @@ private:
        vector<int> mapSeqToUnique;
        vector<int> mapUniqueToSeq;
        vector<int> uniqueLengths;
+    int numSeqs, numUniques, numOTUs, numFlowCells;
+    vector<int> nSeqsBreaks;
+       vector<int> nOTUsBreaks;
 
-       vector<string> outputNames;
-
-       int numSeqs, numUniques, numOTUs, numFlowCells;
-       
-       void getSingleLookUp();
-       void getJointLookUp();
-       void getFlowData();
-       void getUniques();
-       double getProbIntensity(int);
-       float calcPairwiseDist(int, int);
-       void flowDistParentFork(string, int, int);
-       
-       string createDistFile(int);
-       string createNamesFile();
-       string cluster(string, string);
-       
-       void getOTUData(string);
-       void initPyroCluster();
-       void fill();
-       void calcCentroids();
-       void calcCentroidsDriver(int, int);
-       double getDistToCentroid(int, int, int);
-       double getNewWeights();
-       double getLikelihood();
-       void checkCentroids();
-       void calcNewDistances();
-       void calcNewDistancesParent(int, int);
-       void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
-
-
-       void setOTUs();
-       void writeQualities(vector<int>);
-       void writeSequences(vector<int>);
-       void writeNames(vector<int>);
-       void writeGroups();
-       void writeClusters(vector<int>);
-
-       
-#ifdef USE_MPI
-       string flowDistMPI(int, int);
-       void calcNewDistancesChildMPI(int, int, vector<int>&);
-
-       int pid, ncpus; 
 #endif
        
 };
@@ -129,116 +169,1193 @@ private:
 //custom data structure for threads to use.
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
-struct flowDistParentForkData {
-       string distFileName; 
-       vector<int> mapUniqueToSeq;
-       vector<int> mapSeqToUnique;
-       vector<int> lengths;
-       vector<short> flowDataIntI;
-       vector<double> flowDataPrI;
+struct shhhFlowsData {
+       int threadID, maxIters;
+       float cutoff, sigma, minDelta;
+       string flowOrder;
+       vector<double> singleLookUp;
        vector<double> jointLookUp;
+    vector<string> filenames;
+    vector<string> outputNames;
+    string thisCompositeFASTAFileName, thisCompositeNameFileName, outputDir;
+    int start, stop;
        MothurOut* m;
-       int threadID, startSeq, stopSeq, numFlowCells;
-       float cutoff;
        
-       flowDistParentForkData(){}
-       flowDistParentForkData(string d, vector<int> mapU, vector<int> mapS, vector<int> l, vector<short> flowD, vector<double> flowDa, vector<double> j, MothurOut* mout, int st, int sp, int n, float cut, int tid) {
-               distFileName = d;
-               mapUniqueToSeq = mapU;
-               mapSeqToUnique = mapS;
-               lengths = l;
-               flowDataIntI = flowD;
-               flowDataPrI = flowDa;
-               jointLookUp = j;
+       shhhFlowsData(){}
+       shhhFlowsData(vector<string> f, string cf, string cn, string ou, string flor, vector<double> jl, vector<double> sl, MothurOut* mout, int st, int sp, float cut, float si, float mD, int mx, int tid) {
+               filenames = f;
+        thisCompositeFASTAFileName = cf;
+        thisCompositeNameFileName = cn;
+        outputDir = ou;
+        flowOrder = flor;
                m = mout;
-               startSeq = st;
-               stopSeq = sp;
-               numFlowCells = n;
+               start = st;
+               stop = sp;
                cutoff= cut;
+        sigma = si;
+        minDelta = mD;
+        maxIters = mx;
+        jointLookUp = jl;
+        singleLookUp = sl;
                threadID = tid;
        }
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
-static DWORD WINAPI MyflowDistParentForkThreadFunction(LPVOID lpParam){ 
-       flowDistParentForkData* pDataArray;
-       pDataArray = (flowDistParentForkData*)lpParam;
+static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ 
+       shhhFlowsData* pDataArray;
+       pDataArray = (shhhFlowsData*)lpParam;
        
        try {
-               ostringstream outStream;
-               outStream.setf(ios::fixed, ios::floatfield);
-               outStream.setf(ios::dec, ios::basefield);
-               outStream.setf(ios::showpoint);
-               outStream.precision(6);
-               
-               int begTime = time(NULL);
-               double begClock = clock();
-               string tempOut = "start and end = " + toString(pDataArray->startSeq) +'\t' + toString(pDataArray->stopSeq) + "-";
-               cout << tempOut << endl;
+        
+               for(int l=pDataArray->start;l<pDataArray->stop;l++){
+                       
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string flowFileName = pDataArray->filenames[l];
+            
+                       pDataArray->m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(l+1) + " of " + toString(pDataArray->filenames.size()) + ")\t<<<<<\n");
+                       pDataArray->m->mothurOut("Reading flowgrams...\n");
+                       
+            vector<string> seqNameVector;
+            vector<int> lengths;
+            vector<short> flowDataIntI;
+            vector<double> flowDataPrI;
+            map<string, int> nameMap;
+            vector<short> uniqueFlowgrams;
+            vector<int> uniqueCount;
+            vector<int> mapSeqToUnique;
+            vector<int> mapUniqueToSeq;
+            vector<int> uniqueLengths;
+            int numFlowCells;
+            
+            //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
+            /*****************************************************************************************************/
+            
+            ifstream flowFile;
+           // cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
+            pDataArray->m->openInputFile(flowFileName, flowFile);
+            
+           // cout << "herethread " << flowFileName << endl;
+            string seqName;
+            int currentNumFlowCells;
+            float intensity;
+                        
+            flowFile >> numFlowCells;
+            int index = 0;//pcluster
+            while(!flowFile.eof()){
+                
+                if (pDataArray->m->control_pressed) { flowFile.close(); return 0; }
+                
+                flowFile >> seqName >> currentNumFlowCells;
+                lengths.push_back(currentNumFlowCells);
+             //  cout << "herethread " << seqName << endl;  
+                seqNameVector.push_back(seqName);
+                nameMap[seqName] = index++;//pcluster
+                
+                for(int i=0;i<numFlowCells;i++){
+                    flowFile >> intensity;
+                    if(intensity > 9.99)       {       intensity = 9.99;       }
+                    int intI = int(100 * intensity + 0.0001);
+                    flowDataIntI.push_back(intI);
+                }
+                pDataArray->m->gobble(flowFile);
+            }
+            flowFile.close();
+            
+            int numSeqs = seqNameVector.size();                
+          //  cout << numSeqs << endl;   
+            for(int i=0;i<numSeqs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                int iNumFlowCells = i * numFlowCells;
+                for(int j=lengths[i];j<numFlowCells;j++){
+                    flowDataIntI[iNumFlowCells + j] = 0;
+                }
+            }
+          //  cout << "here" << endl; 
+            /*****************************************************************************************************/
        
-               for(int i=pDataArray->startSeq;i<pDataArray->stopSeq;i++){
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
+                       //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
+            /*****************************************************************************************************/
+            int numUniques = 0;
+            uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
+            uniqueCount.assign(numSeqs, 0);                                                    //      anWeights
+            uniqueLengths.assign(numSeqs, 0);
+            mapSeqToUnique.assign(numSeqs, -1);
+            mapUniqueToSeq.assign(numSeqs, -1);
+            
+            vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
+            
+            for(int i=0;i<numSeqs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                int index = 0;
+                
+                vector<short> current(numFlowCells);
+                for(int j=0;j<numFlowCells;j++){
+                    current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+                }
+                
+                for(int j=0;j<numUniques;j++){
+                    int offset = j * numFlowCells;
+                    bool toEnd = 1;
+                    
+                    int shorterLength;
+                    if(lengths[i] < uniqueLengths[j])  {       shorterLength = lengths[i];                     }
+                    else                                                               {       shorterLength = uniqueLengths[j];       }
+                    
+                    for(int k=0;k<shorterLength;k++){
+                        if(current[k] != uniqueFlowgrams[offset + k]){
+                            toEnd = 0;
+                            break;
+                        }
+                    }
+                    
+                    if(toEnd){
+                        mapSeqToUnique[i] = j;
+                        uniqueCount[j]++;
+                        index = j;
+                        if(lengths[i] > uniqueLengths[j])      {       uniqueLengths[j] = lengths[i];  }
+                        break;
+                    }
+                    index++;
+                }
+                
+                if(index == numUniques){
+                    uniqueLengths[numUniques] = lengths[i];
+                    uniqueCount[numUniques] = 1;
+                    mapSeqToUnique[i] = numUniques;//anMap
+                    mapUniqueToSeq[numUniques] = i;//anF
+                    
+                    for(int k=0;k<numFlowCells;k++){
+                        uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
+                        uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
+                    }
+                    
+                    numUniques++;
+                }
+            }
+            uniqueFlowDataIntI.resize(numFlowCells * numUniques);
+            uniqueLengths.resize(numUniques);  
+            
+            flowDataPrI.resize(numSeqs * numFlowCells, 0);
+            for(int i=0;i<flowDataPrI.size();i++)      {       
+                if (pDataArray->m->control_pressed) { return 0; } 
+                //flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);  
+                
+                flowDataPrI[i] = 100000000; 
+            
+                for(int j=0;j<HOMOPS;j++){//loop signal strength
+                    
+                    if (pDataArray->m->control_pressed) { return 0; }
+                    
+                    float negLogProb = pDataArray->singleLookUp[j * NUMBINS + flowDataIntI[i]];
+                    if(negLogProb < flowDataPrI[i])    {       flowDataPrI[i] = negLogProb; }
+                }
+            }            
+            
+            /*****************************************************************************************************/
+                       
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("Calculating distances between flowgrams...\n");
+            string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+            unsigned long long begTime = time(NULL);
+            double begClock = clock();
+            
+            //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);  
+            /*****************************************************************************************************/
+            ostringstream outStream;
+            outStream.setf(ios::fixed, ios::floatfield);
+            outStream.setf(ios::dec, ios::basefield);
+            outStream.setf(ios::showpoint);
+            outStream.precision(6);
+            
+            int thisbegTime = time(NULL);
+            double thisbegClock = clock();
+            
+            for(int i=0;i<numUniques;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                for(int j=0;j<i;j++){
+                    //float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+                    /*****************************************************************************************************/
+                    int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
+                    int minLength = lengths[mapSeqToUnique[seqA]];
+                    if(lengths[seqB] < minLength){     minLength = lengths[mapSeqToUnique[seqB]];      }
+                    
+                    int ANumFlowCells = seqA * numFlowCells;
+                    int BNumFlowCells = seqB * numFlowCells;
+                    
+                    float flowDistance = 0;
+                    
+                    for(int k=0;k<minLength;k++){
+                        
+                        if (pDataArray->m->control_pressed) { break; }
+                        
+                        int flowAIntI = flowDataIntI[ANumFlowCells + k];
+                        float flowAPrI = flowDataPrI[ANumFlowCells + k];
+                        
+                        int flowBIntI = flowDataIntI[BNumFlowCells + k];
+                        float flowBPrI = flowDataPrI[BNumFlowCells + k];
+                        flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
+                    }
+                    
+                    flowDistance /= (float) minLength;
+                    /*****************************************************************************************************/
+
+                    if(flowDistance < 1e-6){
+                        outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
+                    }
+                    else if(flowDistance <= pDataArray->cutoff){
+                        outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
+                    }
+                }
+                if(i % 100 == 0){
+                    pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - thisbegTime));
+                    pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
+                    pDataArray->m->mothurOutEndLine();
+                }
+            }
+            
+            ofstream distFile(distFileName.c_str());
+            distFile << outStream.str();               
+            distFile.close();
+            
+            if (pDataArray->m->control_pressed) {}
+            else {
+                pDataArray->m->mothurOut(toString(numUniques-1) + "\t" + toString(time(NULL) - thisbegTime));
+                pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
+                pDataArray->m->mothurOutEndLine();
+            }
+            /*****************************************************************************************************/
+
+            pDataArray->m->mothurOutEndLine();
+            pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+            
+                       string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+                       //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+            /*****************************************************************************************************/
+            vector<string> duplicateNames(numUniques, "");
+            for(int i=0;i<numSeqs;i++){
+                duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+            }
+            
+            ofstream nameFile;
+            pDataArray->m->openOutputFile(namesFileName, nameFile);
+            
+            for(int i=0;i<numUniques;i++){
+                if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
+                nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+            }
+            nameFile.close();
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("\nClustering flowgrams...\n");
+            string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+                       //cluster(listFileName, distFileName, namesFileName);
+            /*****************************************************************************************************/
+            ReadMatrix* read = new ReadColumnMatrix(distFileName);     
+            read->setCutoff(pDataArray->cutoff);
+            
+            NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+            clusterNameMap->readMap();
+            read->read(clusterNameMap);
+            
+            ListVector* list = read->getListVector();
+            SparseMatrix* matrix = read->getMatrix();
+            
+            delete read; 
+            delete clusterNameMap; 
+            
+            RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+            
+            Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest"); 
+            string tag = cluster->getTag();
+            
+            double clusterCutoff = pDataArray->cutoff;
+            while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                cluster->update(clusterCutoff);
+            }
+            
+            list->setLabel(toString(pDataArray->cutoff));
+            
+            ofstream listFileOut;
+            pDataArray->m->openOutputFile(listFileName, listFileOut);
+            list->print(listFileOut);
+            listFileOut.close();
+            
+            delete matrix;     delete cluster; delete rabund; delete list;
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+            
+            vector<int> otuData;
+            vector<int> cumNumSeqs;
+            vector<int> nSeqsPerOTU;
+            vector<vector<int> > aaP;  //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+            vector<vector<int> > aaI;  //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+            vector<int> seqNumber;             //tMaster->anP:         the sequence id number sorted by OTU
+            vector<int> seqIndex;              //tMaster->anI;         the index that corresponds to seqNumber
+            
+                       
+                       //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+                       /*****************************************************************************************************/
+            ifstream listFile;
+            pDataArray->m->openInputFile(listFileName, listFile);
+            string label;
+            int numOTUs;
+            
+            listFile >> label >> numOTUs;
+            
+            otuData.assign(numSeqs, 0);
+            cumNumSeqs.assign(numOTUs, 0);
+            nSeqsPerOTU.assign(numOTUs, 0);
+            aaP.clear();aaP.resize(numOTUs);
+            
+            seqNumber.clear();
+            aaI.clear();
+            seqIndex.clear();
+            
+            string singleOTU = "";
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                listFile >> singleOTU;
+                
+                istringstream otuString(singleOTU);
+                
+                while(otuString){
+                    
+                    string seqName = "";
+                    
+                    for(int j=0;j<singleOTU.length();j++){
+                        char letter = otuString.get();
+                        
+                        if(letter != ','){
+                            seqName += letter;
+                        }
+                        else{
+                            map<string,int>::iterator nmIt = nameMap.find(seqName);
+                            int index = nmIt->second;
+                            
+                            nameMap.erase(nmIt);
+                            
+                            otuData[index] = i;
+                            nSeqsPerOTU[i]++;
+                            aaP[i].push_back(index);
+                            seqName = "";
+                        }
+                    }
+                    
+                    map<string,int>::iterator nmIt = nameMap.find(seqName);
+                    
+                    int index = nmIt->second;
+                    nameMap.erase(nmIt);
+                    
+                    otuData[index] = i;
+                    nSeqsPerOTU[i]++;
+                    aaP[i].push_back(index);   
+                    
+                    otuString.get();
+                }
+                
+                sort(aaP[i].begin(), aaP[i].end());
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber.push_back(aaP[i][j]);
+                }
+                for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+                    aaP[i].push_back(0);
+                }
+                
+                
+            }
+            
+            for(int i=1;i<numOTUs;i++){
+                cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+            }
+            aaI = aaP;
+            seqIndex = seqNumber;
+            
+            listFile.close();      
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurRemove(distFileName);
+                       pDataArray->m->mothurRemove(namesFileName);
+                       pDataArray->m->mothurRemove(listFileName);
+                       
+            vector<double> dist;               //adDist - distance of sequences to centroids
+            vector<short> change;              //did the centroid sequence change? 0 = no; 1 = yes
+            vector<int> centroids;             //the representative flowgram for each cluster m
+            vector<double> weight;
+            vector<double> singleTau;  //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+            vector<int> nSeqsBreaks;
+            vector<int> nOTUsBreaks;
+            
+                       dist.assign(numSeqs * numOTUs, 0);
+            change.assign(numOTUs, 1);
+            centroids.assign(numOTUs, -1);
+            weight.assign(numOTUs, 0);
+            singleTau.assign(numSeqs, 1.0);
+            
+            nSeqsBreaks.assign(2, 0);
+            nOTUsBreaks.assign(2, 0);
+            
+            nSeqsBreaks[0] = 0;
+            nSeqsBreaks[1] = numSeqs;
+            nOTUsBreaks[1] = numOTUs;
                        
                        if (pDataArray->m->control_pressed) { break; }
-                       cout << "thread i = " << i << endl;
-                       for(int j=0;j<i;j++){
+                       
+                       double maxDelta = 0;
+                       int iter = 0;
+                       
+                       begClock = clock();
+                       begTime = time(NULL);
+            
+                       pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
+                       pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+                       
+                       while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
+                               
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               cout << "thread j = " << j << endl;
-                               float flowDistance = 0.0;
-                               ////////////////// calcPairwiseDist ///////////////////
-                               //needed because this is a static global function that can't see the classes internal functions
+                               double cycClock = clock();
+                               unsigned long long cycTime = time(NULL);
+                               //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                /*****************************************************************************************************/
+                int indexFill = 0;
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { return 0; }
+                    
+                    cumNumSeqs[i] = indexFill;
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        seqNumber[indexFill] = aaP[i][j];
+                        seqIndex[indexFill] = aaI[i][j];
+                        
+                        indexFill++;
+                    }
+                }
+                /*****************************************************************************************************/
+
                                
-                               int minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[i]]];
-                               if(pDataArray->lengths[pDataArray->mapUniqueToSeq[j]] < minLength){     minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[j]]];     }
+                               if (pDataArray->m->control_pressed) { break; }
+                
+                               //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+                /*****************************************************************************************************/
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    double count = 0;
+                    int position = 0;
+                    int minFlowGram = 100000000;
+                    double minFlowValue = 1e8;
+                    change[i] = 0; //FALSE
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+                    }
+                    
+                    if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                        vector<double> adF(nSeqsPerOTU[i]);
+                        vector<int> anL(nSeqsPerOTU[i]);
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int index = cumNumSeqs[i] + j;
+                            int nI = seqIndex[index];
+                            int nIU = mapSeqToUnique[nI];
+                            
+                            int k;
+                            for(k=0;k<position;k++){
+                                if(nIU == anL[k]){
+                                    break;
+                                }
+                            }
+                            if(k == position){
+                                anL[position] = nIU;
+                                adF[position] = 0.0000;
+                                position++;
+                            }                                          
+                        }
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int index = cumNumSeqs[i] + j;
+                            int nI = seqIndex[index];
+                            
+                            double tauValue = singleTau[seqNumber[index]];
+                            
+                            for(int k=0;k<position;k++){
+                               // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                                /*****************************************************************************************************/
+                                int flowAValue = anL[k] * numFlowCells;
+                                int flowBValue = nI * numFlowCells;
+                                
+                                double dist = 0;
+                                
+                                for(int l=0;l<lengths[nI];l++){
+                                    dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                    flowAValue++;
+                                    flowBValue++;
+                                }
+                                
+                                dist = dist / (double)lengths[nI];
+                                /*****************************************************************************************************/
+                                adF[k] += dist * tauValue;
+                            }
+                        }
+                        
+                        for(int j=0;j<position;j++){
+                            if(adF[j] < minFlowValue){
+                                minFlowGram = j;
+                                minFlowValue = adF[j];
+                            }
+                        }
+                        
+                        if(centroids[i] != anL[minFlowGram]){
+                            change[i] = 1;
+                            centroids[i] = anL[minFlowGram];
+                        }
+                    }
+                    else if(centroids[i] != -1){
+                        change[i] = 1;
+                        centroids[i] = -1;                     
+                    }
+                }
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
+                
+                               //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                /*****************************************************************************************************/
+                double maxChange = 0;
+                
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    double difference = weight[i];
+                    weight[i] = 0;
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        double tauValue = singleTau[seqNumber[index]];
+                        weight[i] += tauValue;
+                    }
+                    
+                    difference = fabs(weight[i] - difference);
+                    if(difference > maxChange){        maxChange = difference; }
+                }
+                maxDelta = maxChange;
+                /*****************************************************************************************************/
+
+                if (pDataArray->m->control_pressed) { break; }
+                
+                               //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                /*****************************************************************************************************/
+                vector<long double> P(numSeqs, 0);
+                int effNumOTUs = 0;
+                
+                for(int i=0;i<numOTUs;i++){
+                    if(weight[i] > MIN_WEIGHT){
+                        effNumOTUs++;
+                    }
+                }
+                
+                string hold;
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        double singleDist = dist[seqNumber[index]];
+                        
+                        P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
+                    }
+                }
+                double nLL = 0.00;
+                for(int i=0;i<numSeqs;i++){
+                    if(P[i] == 0){     P[i] = DBL_EPSILON;     }
+                    
+                    nLL += -log(P[i]);
+                }
+                
+                nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
+                /*****************************************************************************************************/
+
+                if (pDataArray->m->control_pressed) { break; }
+                
+                               //checkCentroids(numOTUs, centroids, weight);
+                /*****************************************************************************************************/
+                vector<int> unique(numOTUs, 1);
+                
+                for(int i=0;i<numOTUs;i++){
+                    if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                        unique[i] = -1;
+                    }
+                }
+                
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    if(unique[i] == 1){
+                        for(int j=i+1;j<numOTUs;j++){
+                            if(unique[j] == 1){
+                                
+                                if(centroids[j] == centroids[i]){
+                                    unique[j] = 0;
+                                    centroids[j] = -1;
+                                    
+                                    weight[i] += weight[j];
+                                    weight[j] = 0.0;
+                                }
+                            }
+                        }
+                    }
+                }
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               int ANumFlowCells = pDataArray->mapUniqueToSeq[i] * pDataArray->numFlowCells;
-                               int BNumFlowCells = pDataArray->mapUniqueToSeq[j] * pDataArray->numFlowCells;
+                               //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+                /*****************************************************************************************************/
+                int total = 0;
+                vector<double> newTau(numOTUs,0);
+                vector<double> norms(numSeqs, 0);
+                nSeqsPerOTU.assign(numOTUs, 0);
+                
+                for(int i=0;i<numSeqs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    int indexOffset = i * numOTUs;
+                    
+                    double offset = 1e8;
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        
+                        if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                            //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                            /*****************************************************************************************************/
+                            int flowAValue = centroids[j] * numFlowCells;
+                            int flowBValue = i * numFlowCells;
+                            
+                            double distTemp = 0;
+                            
+                            for(int l=0;l<lengths[i];l++){
+                                distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                flowAValue++;
+                                flowBValue++;
+                            }
+                            
+                            dist[indexOffset + j] = distTemp / (double)lengths[i];
+                            /*****************************************************************************************************/
+
+                        }
+                        
+                        if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                            offset = dist[indexOffset + j];
+                        }
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        if(weight[j] > MIN_WEIGHT){
+                            newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                            norms[i] += newTau[j];
+                        }
+                        else{
+                            newTau[j] = 0.0;
+                        }
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        newTau[j] /= norms[i];
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        if(newTau[j] > MIN_TAU){
+                            
+                            int oldTotal = total;
+                            
+                            total++;
+                            
+                            singleTau.resize(total, 0);
+                            seqNumber.resize(total, 0);
+                            seqIndex.resize(total, 0);
+                            
+                            singleTau[oldTotal] = newTau[j];
+                            
+                            aaP[j][nSeqsPerOTU[j]] = oldTotal;
+                            aaI[j][nSeqsPerOTU[j]] = i;
+                            nSeqsPerOTU[j]++;
+                        }
+                    }
+                    
+                }
+
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               for(int k=0;k<minLength;k++){
-                                       
-                                       if (pDataArray->m->control_pressed) { break; }
-                                       
-                                       int flowAIntI = pDataArray->flowDataIntI[ANumFlowCells + k];
-                                       float flowAPrI = pDataArray->flowDataPrI[ANumFlowCells + k];
-                                       
-                                       int flowBIntI = pDataArray->flowDataIntI[BNumFlowCells + k];
-                                       float flowBPrI = pDataArray->flowDataPrI[BNumFlowCells + k];
-                                       flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
-                               }
+                               iter++;
                                
-                               flowDistance /= (float) minLength;
-                               //cout << flowDistance << endl;
-                               ////////////////// end of calcPairwiseDist ///////////////////
-                                                               
-                               if(flowDistance < 1e-6){
-                                       outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
-                               }
-                               else if(flowDistance <= pDataArray->cutoff){
-                                       outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << flowDistance << endl;
-                               }
-                       }
-                       if(i % 100 == 0){
-                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
-                               pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                               pDataArray->m->mothurOutEndLine();
-                       }
+                               pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+                
+                       }       
+                       
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       pDataArray->m->mothurOut("\nFinalizing...\n");
+                       //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+            /*****************************************************************************************************/
+            int indexFill = 0;
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                cumNumSeqs[i] = indexFill;
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber[indexFill] = aaP[i][j];
+                    seqIndex[indexFill] = aaI[i][j];
+                    
+                    indexFill++;
+                }
+            }
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+            /*****************************************************************************************************/
+            vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    double tauValue = singleTau[seqNumber[index]];
+                    int sIndex = seqIndex[index];
+                    bigTauMatrix[sIndex * numOTUs + i] = tauValue;                             
+                }
+            }
+            
+            for(int i=0;i<numSeqs;i++){
+                double maxTau = -1.0000;
+                int maxOTU = -1;
+                for(int j=0;j<numOTUs;j++){
+                    if(bigTauMatrix[i * numOTUs + j] > maxTau){
+                        maxTau = bigTauMatrix[i * numOTUs + j];
+                        maxOTU = j;
+                    }
+                }
+                
+                otuData[i] = maxOTU;
+            }
+            
+            nSeqsPerOTU.assign(numOTUs, 0);            
+            
+            for(int i=0;i<numSeqs;i++){
+                int index = otuData[i];
+                
+                singleTau[i] = 1.0000;
+                dist[i] = 0.0000;
+                
+                aaP[index][nSeqsPerOTU[index]] = i;
+                aaI[index][nSeqsPerOTU[index]] = i;
+                
+                nSeqsPerOTU[index]++;
+            }
+            
+            //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);   
+            /*****************************************************************************************************/
+            indexFill = 0;
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                cumNumSeqs[i] = indexFill;
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber[indexFill] = aaP[i][j];
+                    seqIndex[indexFill] = aaI[i][j];
+                    
+                    indexFill++;
+                }
+            }
+            /*****************************************************************************************************/
+
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       vector<int> otuCounts(numOTUs, 0);
+                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                       
+                       //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);       
+            /*****************************************************************************************************/
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                double count = 0;
+                int position = 0;
+                int minFlowGram = 100000000;
+                double minFlowValue = 1e8;
+                change[i] = 0; //FALSE
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+                }
+                
+                if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                    vector<double> adF(nSeqsPerOTU[i]);
+                    vector<int> anL(nSeqsPerOTU[i]);
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        int nIU = mapSeqToUnique[nI];
+                        
+                        int k;
+                        for(k=0;k<position;k++){
+                            if(nIU == anL[k]){
+                                break;
+                            }
+                        }
+                        if(k == position){
+                            anL[position] = nIU;
+                            adF[position] = 0.0000;
+                            position++;
+                        }                                              
+                    }
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        
+                        double tauValue = singleTau[seqNumber[index]];
+                        
+                        for(int k=0;k<position;k++){
+                            // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                            /*****************************************************************************************************/
+                            int flowAValue = anL[k] * numFlowCells;
+                            int flowBValue = nI * numFlowCells;
+                            
+                            double dist = 0;
+                            
+                            for(int l=0;l<lengths[nI];l++){
+                                dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                flowAValue++;
+                                flowBValue++;
+                            }
+                            
+                            dist = dist / (double)lengths[nI];
+                            /*****************************************************************************************************/
+                            adF[k] += dist * tauValue;
+                        }
+                    }
+                    
+                    for(int j=0;j<position;j++){
+                        if(adF[j] < minFlowValue){
+                            minFlowGram = j;
+                            minFlowValue = adF[j];
+                        }
+                    }
+                    
+                    if(centroids[i] != anL[minFlowGram]){
+                        change[i] = 1;
+                        centroids[i] = anL[minFlowGram];
+                    }
+                }
+                else if(centroids[i] != -1){
+                    change[i] = 1;
+                    centroids[i] = -1;                 
+                }
+            }
+
+            /*****************************************************************************************************/
+
+            if (pDataArray->m->control_pressed) { break; }
+            
+                       //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); 
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            string thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
+            
+            ofstream qualityFile;
+            pDataArray->m->openOutputFile(qualityFileName, qualityFile);
+            
+            qualityFile.setf(ios::fixed, ios::floatfield);
+            qualityFile.setf(ios::showpoint);
+            qualityFile << setprecision(6);
+            
+            vector<vector<int> > qualities(numOTUs);
+            vector<double> pr(HOMOPS, 0);
+            
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                int index = 0;
+                int base = 0;
+                
+                if(nSeqsPerOTU[i] > 0){
+                    qualities[i].assign(1024, -1);
+                    
+                    while(index < numFlowCells){
+                        double maxPrValue = 1e8;
+                        short maxPrIndex = -1;
+                        double count = 0.0000;
+                        
+                        pr.assign(HOMOPS, 0);
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int lIndex = cumNumSeqs[i] + j;
+                            double tauValue = singleTau[seqNumber[lIndex]];
+                            int sequenceIndex = aaI[i][j];
+                            short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+                            
+                            count += tauValue;
+                            
+                            for(int s=0;s<HOMOPS;s++){
+                                pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
+                            }
+                        }
+                        
+                        maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+                        maxPrValue = pr[maxPrIndex];
+                        
+                        if(count > MIN_COUNT){
+                            double U = 0.0000;
+                            double norm = 0.0000;
+                            
+                            for(int s=0;s<HOMOPS;s++){
+                                norm += exp(-(pr[s] - maxPrValue));
+                            }
+                            
+                            for(int s=1;s<=maxPrIndex;s++){
+                                int value = 0;
+                                double temp = 0.0000;
+                                
+                                U += exp(-(pr[s-1]-maxPrValue))/norm;
+                                
+                                if(U>0.00){
+                                    temp = log10(U);
+                                }
+                                else{
+                                    temp = -10.1;
+                                }
+                                temp = floor(-10 * temp);
+                                value = (int)floor(temp);
+                                if(value > 100){       value = 100;    }
+                                
+                                qualities[i][base] = (int)value;
+                                base++;
+                            }
+                        }
+                        
+                        index++;
+                    }
+                }
+                
+                
+                if(otuCounts[i] > 0){
+                    qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+                    
+                    int j=4;   //need to get past the first four bases
+                    while(qualities[i][j] != -1){
+                        qualityFile << qualities[i][j] << ' ';
+                        j++;
+                    }
+                    qualityFile << endl;
+                }
+            }
+            qualityFile.close();
+            pDataArray->outputNames.push_back(qualityFileName);
+            /*****************************************************************************************************/
+
+           // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
+            ofstream fastaFile;
+            pDataArray->m->openOutputFile(fastaFileName, fastaFile);
+            
+            vector<string> names(numOTUs, "");
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                int index = centroids[i];
+                
+                if(otuCounts[i] > 0){
+                    fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+                    
+                    string newSeq = "";
+                    
+                    for(int j=0;j<numFlowCells;j++){
+                        
+                        char base = pDataArray->flowOrder[j % 4];
+                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+                            newSeq += base;
+                        }
+                    }
+                    
+                    fastaFile << newSeq.substr(4) << endl;
+                }
+            }
+            fastaFile.close();
+            
+            pDataArray->outputNames.push_back(fastaFileName);
+            
+            if(pDataArray->thisCompositeFASTAFileName != ""){
+                pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
+            }
+
+            /*****************************************************************************************************/
+
+            //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                               
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
+            ofstream nameFileOut;
+            pDataArray->m->openOutputFile(nameFileName, nameFileOut);
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                if(otuCounts[i] > 0){
+                    nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+                    
+                    for(int j=1;j<nSeqsPerOTU[i];j++){
+                        nameFileOut << ',' << seqNameVector[aaI[i][j]];
+                    }
+                    
+                    nameFileOut << endl;
+                }
+            }
+            nameFileOut.close();
+            pDataArray->outputNames.push_back(nameFileName);
+            
+            
+            if(pDataArray->thisCompositeNameFileName != ""){
+                pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
+            }          
+            /*****************************************************************************************************/
+
+            //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                        
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
+            ofstream otuCountsFile;
+            pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
+            
+            string bases = pDataArray->flowOrder;
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) {
+                    break;
+                }
+                //output the translated version of the centroid sequence for the otu
+                if(otuCounts[i] > 0){
+                    int index = centroids[i];
+                    
+                    otuCountsFile << "ideal\t";
+                    for(int j=8;j<numFlowCells;j++){
+                        char base = bases[j % 4];
+                        for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+                            otuCountsFile << base;
+                        }
+                    }
+                    otuCountsFile << endl;
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int sequence = aaI[i][j];
+                        otuCountsFile << seqNameVector[sequence] << '\t';
+                        
+                        string newSeq = "";
+                        
+                        for(int k=0;k<lengths[sequence];k++){
+                            char base = bases[k % 4];
+                            int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+                            
+                            for(int s=0;s<freq;s++){
+                                newSeq += base;
+                                //otuCountsFile << base;
+                            }
+                        }
+                        otuCountsFile << newSeq.substr(4) << endl;
+                    }
+                    otuCountsFile << endl;
+                }
+            }
+            otuCountsFile.close();
+            pDataArray->outputNames.push_back(otuCountsFileName);
+            /*****************************************************************************************************/
+
+            //writeGroups(flowFileName, numSeqs, seqNameVector);                                               
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
+            string groupFileName = fileRoot + "shhh.groups";
+            ofstream groupFile;
+            pDataArray->m->openOutputFile(groupFileName, groupFile);
+            
+            for(int i=0;i<numSeqs;i++){
+                if (pDataArray->m->control_pressed) { break; }
+                groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+            }
+            groupFile.close();
+            pDataArray->outputNames.push_back(groupFileName);
+            /*****************************************************************************************************/
+
+            pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
-               ofstream distFile(pDataArray->distFileName.c_str());
-               distFile << outStream.str();            
-               distFile.close();
-               
-               if (pDataArray->m->control_pressed) {}
-               else {
-                       pDataArray->m->mothurOut(toString(pDataArray->stopSeq-1) + "\t" + toString(time(NULL) - begTime));
-                       pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                       pDataArray->m->mothurOutEndLine();
-               }               
+        if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
+        
+        return 0;
                
        }
        catch(exception& e) {
-               pDataArray->m->errorOut(e, "ShhherCommand", "MyflowDistParentForkThreadFunction");
+               pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
                exit(1);
        }
 } 
index 72504196039c9f337f8051ecdf54a4af1eed5c90..5c6359ec58bb776ce9a050a132e8409e60cf197b 100644 (file)
@@ -364,7 +364,7 @@ vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, st
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -695,14 +695,15 @@ int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){
                string inputString = "fasta=" + fastaFile + ", name=" + nameFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                string newnameFile = filenames["name"][0];
index 1b0211afba43e5577abce79f89704114b3705b2f..7174ac70caf3f55e146d1cdf09e2b15e5b0cfaf4 100644 (file)
@@ -97,7 +97,7 @@ struct shhhseqsData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){ 
        shhhseqsData* pDataArray;
diff --git a/sortseqscommand.cpp b/sortseqscommand.cpp
new file mode 100644 (file)
index 0000000..0236a50
--- /dev/null
@@ -0,0 +1,1090 @@
+//
+//  sortseqscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/3/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "sortseqscommand.h"
+#include "sequence.hpp"
+#include "qualityscores.h"
+
+//**********************************************************************************************************************
+vector<string> SortSeqsCommand::setParameters(){       
+       try {
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
+        CommandParameter pflow("flow", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pflow);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+               CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
+               CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SortSeqsCommand::getHelpString(){       
+       try {
+               string helpString = "";
+               helpString += "The sort.seqs command puts the sequences in the same order for the following file types: accnos fasta, name, group, taxonomy, flow or quality file.\n";
+        helpString += "The sort.seqs command parameters are accnos, fasta, name, group, taxonomy, flow, qfile and large.\n";
+        helpString += "The accnos file allows you to specify the order you want the files in.  If none is provided, mothur will use the order of the first file it reads.\n";
+        helpString += "The large parameters is used to indicate your files are too large to fit in RAM.\n";
+               helpString += "The sort.seqs command should be in the following format: sort.seqs(fasta=yourFasta).\n";
+               helpString += "Example sort.seqs(fasta=amazon.fasta).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
+SortSeqsCommand::SortSeqsCommand(){    
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["taxonomy"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
+        outputTypes["flow"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SortSeqsCommand::SortSeqsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["taxonomy"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
+            outputTypes["flow"] = tempOutNames;
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("qfile");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("flow");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["flow"] = inputDir + it->second;             }
+                               }
+                       }
+            
+                       
+                       //check for parameters
+            accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { accnosfile = ""; abort = true; }
+                       else if (accnosfile == "not found") {  accnosfile = "";  }      
+                       else { m->setAccnosFile(accnosfile); }
+            
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }        
+                       else { m->setFastaFile(fastafile); }
+            
+            flowfile = validParameter.validFile(parameters, "flow", true);
+                       if (flowfile == "not open") { flowfile = ""; abort = true; }
+                       else if (flowfile == "not found") {  flowfile = "";  }  
+                       else { m->setFlowFile(flowfile); }
+            
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { namefile = ""; abort = true; }
+                       else if (namefile == "not found") {  namefile = "";  }  
+                       else { m->setNameFile(namefile); } 
+            
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  }
+                       else { m->setGroupFile(groupfile); }
+                       
+                       taxfile = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxfile == "not open") { abort = true; }
+                       else if (taxfile == "not found") {  taxfile = "";  }
+                       else { m->setTaxonomyFile(taxfile); }
+                       
+                       qualfile = validParameter.validFile(parameters, "qfile", true);
+                       if (qualfile == "not open") { abort = true; }
+                       else if (qualfile == "not found") {  qualfile = "";  }                  
+                       else { m->setQualFile(qualfile); }
+                       
+            string temp = validParameter.validFile(parameters, "large", false);                if (temp == "not found") { temp = "f"; }
+                       large = m->isTrue(temp);
+            
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (taxfile == "") && (flowfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, flow or quality."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((fastafile != "") && (namefile == "")) {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+               }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int SortSeqsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               //read through the correct file and output lines you want to keep
+        if (accnosfile != "")          {               readAccnos();   }
+               if (fastafile != "")            {               readFasta();    }
+        if (flowfile != "")         {          readFlow();     }
+        if (qualfile != "")                    {               readQual();             }
+        if (namefile != "")                    {               readName();             }
+               if (groupfile != "")            {               readGroup();    }
+        if (taxfile != "")                     {               readTax();              }
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
+        
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
+                       
+                       //set fasta file as new current fastafile
+                       string current = "";
+                       itTypes = outputTypes.find("fasta");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+                       }
+                       
+                       itTypes = outputTypes.find("name");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+                       }
+                       
+                       itTypes = outputTypes.find("group");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+                       }
+                       
+                       
+                       itTypes = outputTypes.find("taxonomy");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
+                       }
+                       
+                       itTypes = outputTypes.find("qfile");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
+                       }       
+            
+            itTypes = outputTypes.find("flow");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
+                       }       
+               }
+               
+               return 0;               
+       }
+    
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SortSeqsCommand::readFasta(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "sorted" + m->getExtension(fastafile);
+               outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(fastafile, in);
+               string name;
+               
+        if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+            
+            if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
+                //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
+                //this way we only store 1000 seqs in memory at a time.
+                
+                int numNames = names.size();
+                int numNamesInFile = 0;
+                
+                //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    Sequence currSeq(in);
+                    name = currSeq.getName();
+                    
+                    if (name != "") {
+                        numNamesInFile++;
+                        map<string, int>::iterator it = names.find(name);
+                        if (it == names.end()) { 
+                            names[name] = numNames; numNames++;
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();
+                out.close();
+                
+                int numLeft = names.size();
+                if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
+                
+                int size = 1000; //assume that user can hold 1000 seqs in memory
+                if (numLeft < size) { size = numLeft; }
+                int times = 0;
+                
+                vector<Sequence> seqs; seqs.resize(size);
+                for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
+                
+                while (numLeft > 0) {
+                    
+                    ifstream in2;
+                    m->openInputFile(fastafile, in2);
+                    
+                    if (m->control_pressed) { in2.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    int found = 0;
+                    int needToFind = size;
+                    if (numLeft < size) { needToFind = numLeft; }
+                    
+                    while(!in2.eof()){
+                        if (m->control_pressed) { in2.close();   m->mothurRemove(outputFileName);  return 0; }
+                        
+                        //stop reading if we already found the seqs we are looking for
+                        if (found >= needToFind) { break; }
+                        
+                        Sequence currSeq(in2);
+                        name = currSeq.getName();
+                        
+                        if (name != "") {
+                            map<string, int>::iterator it = names.find(name);
+                            if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                                //is it in the set of seqs we are looking for this time around
+                                int thisSeqsPlace = it->second;
+                                thisSeqsPlace -= (times * size);
+                                if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
+                                    seqs[thisSeqsPlace] = currSeq; 
+                                    found++;
+                                }
+                            }else { m->mothurOut("[ERROR]: in logic of readFasta function.\n"); m->control_pressed = true; }
+                        }
+                        m->gobble(in2);
+                    }
+                    in2.close();       
+
+                    ofstream out2;
+                    m->openOutputFileAppend(outputFileName, out2);
+                    
+                    int output = seqs.size();
+                    if (numLeft < seqs.size()) { output = numLeft; }
+                        
+                    for (int i = 0; i < output; i++) {
+                        if (seqs[i].getName() != "") { seqs[i].printSequence(out2); }
+                    }
+                    out2.close();
+                    
+                    times++;
+                    numLeft -= output;
+                }
+                
+                m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + fastafile + ".\n");
+            }else {
+                
+                vector<Sequence> seqs; seqs.resize(names.size());
+                for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
+                
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    Sequence currSeq(in);
+                    name = currSeq.getName();
+                    
+                    if (name != "") {
+                        map<string, int>::iterator it = names.find(name);
+                        if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                            seqs[it->second] = currSeq;  
+                        }else { //if we cant find it then add it to the end
+                            names[name] = seqs.size();
+                            seqs.push_back(currSeq);
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();    
+                
+                int count = 0;
+                for (int i = 0; i < seqs.size(); i++) {
+                    if (seqs[i].getName() != "") {
+                        seqs[i].printSequence(out); count++;
+                    }
+                }
+                out.close();
+                
+                m->mothurOut("Ordered " + toString(count) + " sequences from " + fastafile + ".\n");
+            }
+                        
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                Sequence currSeq(in);
+                name = currSeq.getName();
+                
+                if (name != "") {
+                    //if this name is in the accnos file
+                    names[name] = count;
+                    count++;
+                    currSeq.printSequence(out);
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + fastafile + " to determine the order. It contains " + toString(count) + " sequences.\n");
+        }
+                               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readFasta");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SortSeqsCommand::readFlow(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(flowfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowfile)) + "sorted" + m->getExtension(flowfile);
+               outputTypes["flow"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(flowfile, in);
+        int numFlows;
+               string name;
+        
+        in >> numFlows; m->gobble(in);
+               
+        if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+            
+            if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
+                //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
+                //this way we only store 1000 seqs in memory at a time.
+                
+                int numNames = names.size();
+                int numNamesInFile = 0;
+                
+                //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    in >> name;        
+                    string rest = m->getline(in);
+                    
+                    if (name != "") {
+                        numNamesInFile++;
+                        map<string, int>::iterator it = names.find(name);
+                        if (it == names.end()) { 
+                            names[name] = numNames; numNames++;
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();
+                out.close();
+                
+                int numLeft = names.size();
+                if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
+                
+                int size = 1000; //assume that user can hold 1000 seqs in memory
+                if (numLeft < size) { size = numLeft; }
+                int times = 0;
+                
+                vector<string> seqs; seqs.resize(size, "");
+                
+                while (numLeft > 0) {
+                    
+                    ifstream in2;
+                    m->openInputFile(flowfile, in2); in2 >> numFlows; m->gobble(in2);
+                    
+                    if (m->control_pressed) { in2.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    int found = 0;
+                    int needToFind = size;
+                    if (numLeft < size) { needToFind = numLeft; }
+                    
+                    while(!in2.eof()){
+                        if (m->control_pressed) { in2.close();   m->mothurRemove(outputFileName);  return 0; }
+                        
+                        //stop reading if we already found the seqs we are looking for
+                        if (found >= needToFind) { break; }
+                        
+                        in2 >> name;   
+                        string rest = m->getline(in2);
+                        
+                        if (name != "") {
+                            map<string, int>::iterator it = names.find(name);
+                            if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                                //is it in the set of seqs we are looking for this time around
+                                int thisSeqsPlace = it->second;
+                                thisSeqsPlace -= (times * size);
+                                if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
+                                    seqs[thisSeqsPlace] = (name +'\t' + rest); 
+                                    found++;
+                                }
+                            }else { m->mothurOut("[ERROR]: in logic of readFlow function.\n"); m->control_pressed = true; }
+                        }
+                        m->gobble(in2);
+                    }
+                    in2.close();       
+                    
+                    ofstream out2;
+                    m->openOutputFileAppend(outputFileName, out2);
+                    
+                    int output = seqs.size();
+                    if (numLeft < seqs.size()) { output = numLeft; }
+                    
+                    for (int i = 0; i < output; i++) {
+                        if (seqs[i] != "") {
+                            out2 << seqs[i] << endl;
+                        }
+                    }
+                    out2.close();
+                    
+                    times++;
+                    numLeft -= output;
+                }
+                
+                m->mothurOut("Ordered " + toString(numNamesInFile) + " flows from " + flowfile + ".\n");
+            }else {
+                
+                vector<string> seqs; seqs.resize(names.size(), "");
+                
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    in >> name;        
+                    string rest = m->getline(in);
+                    
+                    if (name != "") {
+                        map<string, int>::iterator it = names.find(name);
+                        if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                            seqs[it->second] = (name + '\t' + rest);  
+                        }else { //if we cant find it then add it to the end
+                            names[name] = seqs.size();
+                            seqs.push_back((name + '\t' + rest));
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();    
+                
+                int count = 0;
+                for (int i = 0; i < seqs.size(); i++) {
+                    if (seqs[i] != "") {
+                        out << seqs[i] << endl;
+                        count++;
+                    }
+                }
+                out.close();
+                
+                m->mothurOut("Ordered " + toString(count) + " flows from " + flowfile + ".\n");
+            }
+            
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> name;    
+                string rest = m->getline(in);
+                
+                if (name != "") {
+                    //if this name is in the accnos file
+                    names[name] = count;
+                    count++;
+                    out << name << '\t' << rest << endl;
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + flowfile + " to determine the order. It contains " + toString(count) + " flows.\n");
+        }
+        
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readFlow");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SortSeqsCommand::readQual(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "sorted" +  m->getExtension(qualfile);
+        outputTypes["qfile"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(qualfile, in);
+               string name;
+               
+        if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+            
+            if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming.
+                //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000.
+                //this way we only store 1000 seqs in memory at a time.
+                
+                int numNames = names.size();
+                int numNamesInFile = 0;
+                
+                //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    QualityScores currQual;
+                    currQual = QualityScores(in); 
+                    name = currQual.getName();
+                    
+                    if (name != "") {
+                        numNamesInFile++;
+                        map<string, int>::iterator it = names.find(name);
+                        if (it == names.end()) { 
+                            names[name] = numNames; numNames++;
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();
+                out.close();
+                
+                int numLeft = names.size();
+                if (numNamesInFile < numLeft) { numLeft = numNamesInFile; }
+                
+                int size = 1000; //assume that user can hold 1000 seqs in memory
+                if (numLeft < size) { size = numLeft; }
+                int times = 0;
+
+                
+                vector<QualityScores> seqs; seqs.resize(size);
+                for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
+                
+                while (numLeft > 0) {
+                    
+                    ifstream in2;
+                    m->openInputFile(qualfile, in2);
+                    
+                    if (m->control_pressed) { in2.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    int found = 0;
+                    int needToFind = size;
+                    if (numLeft < size) { needToFind = numLeft; }
+                    
+                    while(!in2.eof()){
+                        if (m->control_pressed) { in2.close();   m->mothurRemove(outputFileName);  return 0; }
+                        
+                        //stop reading if we already found the seqs we are looking for
+                        if (found >= needToFind) { break; }
+                        
+                        QualityScores currQual;
+                        currQual = QualityScores(in2); 
+                        name = currQual.getName();
+                        
+                        if (name != "") {
+                            map<string, int>::iterator it = names.find(name);
+                            if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                                //is it in the set of seqs we are looking for this time around
+                                int thisSeqsPlace = it->second;
+                                thisSeqsPlace -= (times * size);
+                                if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) {
+                                    seqs[thisSeqsPlace] = currQual; 
+                                    found++;
+                                }
+                            }else { m->mothurOut("[ERROR]: in logic of readQual function.\n"); m->control_pressed = true; }
+                        }
+                        m->gobble(in2);
+                    }
+                    in2.close();       
+                    
+                    ofstream out2;
+                    m->openOutputFileAppend(outputFileName, out2);
+                    
+                    int output = seqs.size();
+                    if (numLeft < seqs.size()) { output = numLeft; }
+                    
+                    for (int i = 0; i < output; i++) {
+                        if (seqs[i].getName() != "") {
+                            seqs[i].printQScores(out2);
+                        }
+                    }
+                    out2.close();
+                    
+                    times++;
+                    numLeft -= output;
+                }
+                
+                 m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + qualfile + ".\n");
+                
+            }else {
+                
+                vector<QualityScores> seqs; seqs.resize(names.size());
+                for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage
+                
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    QualityScores currQual;
+                    currQual = QualityScores(in); 
+                    name = currQual.getName();
+                    
+                    if (name != "") {
+                        map<string, int>::iterator it = names.find(name);
+                        if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                            seqs[it->second] = currQual;  
+                        }else { //if we cant find it then add it to the end
+                            names[name] = seqs.size();
+                            seqs.push_back(currQual);
+                            m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                    m->gobble(in);
+                }
+                in.close();    
+                
+                int count = 0;
+                for (int i = 0; i < seqs.size(); i++) {
+                    if (seqs[i].getName() != "") { seqs[i].printQScores(out); count++; }
+                }
+                out.close();
+                
+                m->mothurOut("Ordered " + toString(count) + " sequences from " + qualfile + ".\n");
+            }
+            
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                QualityScores currQual;
+                currQual = QualityScores(in);  
+                               
+                m->gobble(in);
+                
+                if (currQual.getName() != "") {
+                    //if this name is in the accnos file
+                    names[currQual.getName()] = count;
+                    count++;
+                    currQual.printQScores(out);
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + qualfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
+        }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readQual");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SortSeqsCommand::readName(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "sorted" + m->getExtension(namefile);
+        outputTypes["name"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(namefile, in);
+               string name, firstCol, secondCol;
+               
+        if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+        
+                vector<string> seqs; seqs.resize(names.size(), "");
+                
+                while(!in.eof()){
+                    if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                    
+                    in >> firstCol;            m->gobble(in);          
+                    in >> secondCol;    m->gobble(in);
+                    
+                    if (firstCol != "") {
+                        map<string, int>::iterator it = names.find(firstCol);
+                        if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                            seqs[it->second] = firstCol + '\t' + secondCol;  
+                        }else { //if we cant find it then add it to the end
+                            names[firstCol] = seqs.size();
+                            seqs.push_back((firstCol + '\t' + secondCol));
+                            m->mothurOut(firstCol + " was not in the contained the file which determined the order, adding it to the end.\n");
+                        }
+                    }
+                }
+                in.close();    
+                
+                int count = 0;
+                for (int i = 0; i < seqs.size(); i++) {
+                    if (seqs[i] != "") { out << seqs[i] << endl; count++; }
+                }
+                out.close();
+                
+                m->mothurOut("Ordered " + toString(count) + " sequences from " + namefile + ".\n");
+            
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> firstCol;                m->gobble(in);          
+                in >> secondCol;    m->gobble(in);
+                
+                if (firstCol != "") {
+                    //if this name is in the accnos file
+                    names[firstCol] = count;
+                    count++;
+                    out << firstCol << '\t' << secondCol << endl;
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + namefile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
+        }
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readName");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SortSeqsCommand::readGroup(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
+               outputTypes["group"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(groupfile, in);
+               string name, group;
+               
+               if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+            
+            vector<string> seqs; seqs.resize(names.size(), "");
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> name;            m->gobble(in);          
+                in >> group;    m->gobble(in);
+                
+                if (name != "") {
+                    map<string, int>::iterator it = names.find(name);
+                    if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                        seqs[it->second] = name + '\t' + group;  
+                    }else { //if we cant find it then add it to the end
+                        names[name] = seqs.size();
+                        seqs.push_back((name + '\t' + group));
+                        m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                    }
+                }
+            }
+            in.close();        
+            
+            int count = 0;
+            for (int i = 0; i < seqs.size(); i++) {
+                if (seqs[i] != "") { out << seqs[i] << endl; count++; }
+            }
+            out.close();
+            
+            m->mothurOut("Ordered " + toString(count) + " sequences from " + groupfile + ".\n");
+            
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> name;            m->gobble(in);          
+                in >> group;    m->gobble(in);
+                
+                if (name != "") {
+                    //if this name is in the accnos file
+                    names[name] = count;
+                    count++;
+                    out << name << '\t' << group << endl;
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + groupfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
+        }
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readGroup");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SortSeqsCommand::readTax(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
+        outputTypes["taxonomy"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               ifstream in;
+               m->openInputFile(taxfile, in);
+               string name, tax;
+               
+               if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have
+            
+            vector<string> seqs; seqs.resize(names.size(), "");
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> name;            m->gobble(in);          
+                in >> tax;    m->gobble(in);
+                
+                if (name != "") {
+                    map<string, int>::iterator it = names.find(name);
+                    if (it != names.end()) { //we found it, so put it in the vector in the right place.
+                        seqs[it->second] = name + '\t' + tax;  
+                    }else { //if we cant find it then add it to the end
+                        names[name] = seqs.size();
+                        seqs.push_back((name + '\t' + tax));
+                        m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n");
+                    }
+                }
+            }
+            in.close();        
+            
+            int count = 0;
+            for (int i = 0; i < seqs.size(); i++) {
+                if (seqs[i] != "") { out << seqs[i] << endl; count++; }
+            }
+            out.close();
+            
+            m->mothurOut("Ordered " + toString(count) + " sequences from " + taxfile + ".\n");
+            
+        }else { //read in file to fill names
+            int count = 0;
+            
+            while(!in.eof()){
+                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+                
+                in >> name;            m->gobble(in);          
+                in >> tax;    m->gobble(in);
+                
+                if (name != "") {
+                    //if this name is in the accnos file
+                    names[name] = count;
+                    count++;
+                    out << name << '\t' << tax << endl;
+                }
+                m->gobble(in);
+            }
+            in.close();        
+            out.close();
+            
+            m->mothurOut("\nUsing " + taxfile + " to determine the order. It contains " + toString(count) + " sequences.\n");
+        }
+        
+               return 0;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readTax");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SortSeqsCommand::readAccnos(){
+       try {
+               
+               ifstream in;
+               m->openInputFile(accnosfile, in);
+               string name;
+        int count = 0;
+               
+               while(!in.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+                       in >> name; m->gobble(in);
+            
+            if (name != "") {
+                names[name] = count;
+                count++;
+            }
+               }
+               in.close();             
+        
+        m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(count) + " representative sequences.\n");
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SortSeqsCommand", "readAccnos");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+
+
+
+
diff --git a/sortseqscommand.h b/sortseqscommand.h
new file mode 100644 (file)
index 0000000..0685d3b
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef Mothur_sortseqscommand_h
+#define Mothur_sortseqscommand_h
+
+
+//
+//  sortseqscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/3/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+
+#include "command.hpp"
+
+class SortSeqsCommand : public Command {
+       
+public:
+       
+    SortSeqsCommand(string);   
+    SortSeqsCommand();
+    ~SortSeqsCommand(){}
+       
+    vector<string> setParameters();
+    string getCommandName()                    { return "sort.seqs";                           }
+    string getCommandCategory()                { return "Sequence Processing";         }
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/Sort.seqs"; }
+    string getDescription()            { return "puts sequences from a fasta, name, group, quality, flow or taxonomy file in the same order"; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+       
+       
+private:
+    map<string, int> names;
+    string accnosfile, fastafile, namefile, groupfile, taxfile, qualfile, flowfile, outputDir;
+    bool abort, large;
+    vector<string> outputNames;
+    
+    int readFasta();
+    int readFlow();
+    int readName();
+    int readGroup();
+    int readAccnos();
+    int readTax();
+    int readQual();
+    
+};
+
+#endif
+
+
diff --git a/subsample.cpp b/subsample.cpp
new file mode 100644 (file)
index 0000000..e6dd845
--- /dev/null
@@ -0,0 +1,132 @@
+//
+//  subsample.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 4/2/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "subsample.h"
+
+//**********************************************************************************************************************
+vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
+       try {
+               
+               //save mothurOut's binLabels to restore for next label
+               vector<string> saveBinLabels = m->currentBinLabels;
+               
+               int numBins = thislookup[0]->getNumBins();
+               for (int i = 0; i < thislookup.size(); i++) {           
+                       int thisSize = thislookup[i]->getNumSeqs();
+                       
+                       if (thisSize != size) {
+                               
+                               string thisgroup = thislookup[i]->getGroup();
+                               
+                               OrderVector order;
+                               for(int p=0;p<numBins;p++){
+                                       for(int j=0;j<thislookup[i]->getAbundance(p);j++){
+                                               order.push_back(p);
+                                       }
+                               }
+                               random_shuffle(order.begin(), order.end());
+                               
+                               SharedRAbundVector* temp = new SharedRAbundVector(numBins);
+                               temp->setLabel(thislookup[i]->getLabel());
+                               temp->setGroup(thislookup[i]->getGroup());
+                               
+                               delete thislookup[i];
+                               thislookup[i] = temp;
+                               
+                               
+                               for (int j = 0; j < size; j++) {
+                                       
+                                       if (m->control_pressed) {  return m->currentBinLabels; }
+                                       
+                                       int bin = order.get(j);
+                                       
+                                       int abund = thislookup[i]->getAbundance(bin);
+                                       thislookup[i]->set(bin, (abund+1), thisgroup);
+                               }       
+                       }
+               }
+               
+               //subsampling may have created some otus with no sequences in them
+               eliminateZeroOTUS(thislookup);
+               
+               if (m->control_pressed) { return m->currentBinLabels; }
+               
+               //save mothurOut's binLabels to restore for next label
+        vector<string> subsampleBinLabels = m->currentBinLabels;
+               m->currentBinLabels = saveBinLabels;
+               
+               return subsampleBinLabels;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "getSample");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
+       try {
+               
+               vector<SharedRAbundVector*> newLookup;
+               for (int i = 0; i < thislookup.size(); i++) {
+                       SharedRAbundVector* temp = new SharedRAbundVector();
+                       temp->setLabel(thislookup[i]->getLabel());
+                       temp->setGroup(thislookup[i]->getGroup());
+                       newLookup.push_back(temp);
+               }
+               
+               //for each bin
+               vector<string> newBinLabels;
+               string snumBins = toString(thislookup[0]->getNumBins());
+               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                       
+                       //look at each sharedRabund and make sure they are not all zero
+                       bool allZero = true;
+                       for (int j = 0; j < thislookup.size(); j++) {
+                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+                       }
+                       
+                       //if they are not all zero add this bin
+                       if (!allZero) {
+                               for (int j = 0; j < thislookup.size(); j++) {
+                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+                               }
+                               //if there is a bin label use it otherwise make one
+                               string binLabel = "Otu";
+                               string sbinNumber = toString(i+1);
+                               if (sbinNumber.length() < snumBins.length()) { 
+                                       int diff = snumBins.length() - sbinNumber.length();
+                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                               }
+                               binLabel += sbinNumber; 
+                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               
+                               newBinLabels.push_back(binLabel);
+                       }
+               }
+               
+               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
+               thislookup.clear();
+               
+               thislookup = newLookup;
+               m->currentBinLabels = newBinLabels;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "eliminateZeroOTUS");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
+
+
diff --git a/subsample.h b/subsample.h
new file mode 100644 (file)
index 0000000..09c7dcd
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef Mothur_subsample_h
+#define Mothur_subsample_h
+
+//
+//  subsample.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 4/2/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "mothurout.h"
+#include "sharedrabundvector.h"
+
+//subsampling overwrites the sharedRabunds.  If you need to reuse the original use the getSamplePreserve function.
+
+class SubSample {
+       
+    public:
+    
+        SubSample() { m = MothurOut::getInstance(); }
+        ~SubSample() {}
+    
+        vector<string> getSample(vector<SharedRAbundVector*>&, int); //returns the bin labels for the subsample, mothurOuts binlabels are preserved so you can run this multiple times. Overwrites original vector passed in, if you need to preserve it deep copy first.
+    
+    
+    private:
+    
+        MothurOut* m;
+        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
+
+};
+
+#endif
index c352feb099a83879bae9e1aca0f35d5bfe56e691..717b1d3231c20368a2d23e9b86d52a6707f03b12 100644 (file)
@@ -10,6 +10,7 @@
 #include "subsamplecommand.h"
 #include "sharedutilities.h"
 #include "deconvolutecommand.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::setParameters(){      
@@ -531,14 +532,16 @@ int SubSampleCommand::getSubSampleFasta() {
                        string inputString = "fasta=" + outputFileName;
                        m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                        m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-                       
+                       m->mothurCalling = true;
+            
                        Command* uniqueCommand = new DeconvoluteCommand(inputString);
                        uniqueCommand->execute();
                        
                        map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                        
                        delete uniqueCommand;
-                       
+                       m->mothurCalling = false;
+            
                        outputTypes["name"].push_back(filenames["name"][0]);  outputNames.push_back(filenames["name"][0]);
                        m->mothurRemove(outputFileName);
                        outputFileName = filenames["fasta"][0];
@@ -799,68 +802,28 @@ int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
                string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + ".subsample" + m->getExtension(sharedfile);
-               
-               
-               ofstream out;
+        
+        SubSample sample;
+        vector<string> subsampledLabels = sample.getSample(thislookup, size);
+        
+        if (m->control_pressed) {  return 0; }
+        
+        ofstream out;
                m->openOutputFile(outputFileName, out);
                outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
-               int numBins = thislookup[0]->getNumBins();
-               for (int i = 0; i < thislookup.size(); i++) {           
-                       int thisSize = thislookup[i]->getNumSeqs();
-                       
-                       if (thisSize != size) {
-                               
-                               string thisgroup = thislookup[i]->getGroup();
-                               
-                               OrderVector* order = new OrderVector();
-                               for(int p=0;p<numBins;p++){
-                                       for(int j=0;j<thislookup[i]->getAbundance(p);j++){
-                                               order->push_back(p);
-                                       }
-                               }
-                               random_shuffle(order->begin(), order->end());
-                               
-                               SharedRAbundVector* temp = new SharedRAbundVector(numBins);
-                               temp->setLabel(thislookup[i]->getLabel());
-                               temp->setGroup(thislookup[i]->getGroup());
-                               
-                               delete thislookup[i];
-                               thislookup[i] = temp;
-                               
-                               
-                               for (int j = 0; j < size; j++) {
-                                       
-                                       if (m->control_pressed) { delete order; out.close(); return 0; }
-                                       
-                                       //get random number to sample from order between 0 and thisSize-1.
-                                       //don't need this because of the random shuffle above
-                                       //int myrand = int((float)(thisSize) * (float)(rand()) / ((float)RAND_MAX+1.0));
-                                       
-                                       int bin = order->get(j);
-                                       
-                                       int abund = thislookup[i]->getAbundance(bin);
-                                       thislookup[i]->set(bin, (abund+1), thisgroup);
-                               }       
-                               delete order;
-                       }
-               }
-               
-               //subsampling may have created some otus with no sequences in them
-               eliminateZeroOTUS(thislookup);
-               
-               if (m->control_pressed) { out.close(); return 0; }
-               
+        m->currentBinLabels = subsampledLabels;
+        
                thislookup[0]->printHeaders(out);
                
                for (int i = 0; i < thislookup.size(); i++) {
                        out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
                        thislookup[i]->print(out);
                }
-               
                out.close();
-               
-               //save mothurOut's binLabels to restore for next label
+        
+        
+        //save mothurOut's binLabels to restore for next label
                m->currentBinLabels = saveBinLabels;
                
                return 0;
@@ -1521,64 +1484,6 @@ int SubSampleCommand::processSabund(SAbundVector*& sabund, ofstream& out) {
        }
 }                      
 //**********************************************************************************************************************
-int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
-       try {
-               
-               vector<SharedRAbundVector*> newLookup;
-               for (int i = 0; i < thislookup.size(); i++) {
-                       SharedRAbundVector* temp = new SharedRAbundVector();
-                       temp->setLabel(thislookup[i]->getLabel());
-                       temp->setGroup(thislookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               //for each bin
-               vector<string> newBinLabels;
-               string snumBins = toString(thislookup[0]->getNumBins());
-               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
-                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                       
-                       //look at each sharedRabund and make sure they are not all zero
-                       bool allZero = true;
-                       for (int j = 0; j < thislookup.size(); j++) {
-                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
-                       }
-                       
-                       //if they are not all zero add this bin
-                       if (!allZero) {
-                               for (int j = 0; j < thislookup.size(); j++) {
-                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
-                               }
-                               //if there is a bin label use it otherwise make one
-                               string binLabel = "Otu";
-                               string sbinNumber = toString(i+1);
-                               if (sbinNumber.length() < snumBins.length()) { 
-                                       int diff = snumBins.length() - sbinNumber.length();
-                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
-                               }
-                               binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
-                               
-                               newBinLabels.push_back(binLabel);
-                       }
-               }
-               
-               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-               thislookup.clear();
-               
-               thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
 
 
 
index 4be357059793f3c06d574ac227b4cc8049383479..7235a7b68ec569412bb97d1b98f076ff3b439bdb 100644 (file)
@@ -45,7 +45,6 @@ private:
        vector<string> names;
        map<string, vector<string> > nameMap;
        
-       int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
        int getSubSampleShared();
        int getSubSampleList();
        int getSubSampleRabund();
index ca1ff42a11c6bcbd689e0e49e2cdfb36604e0807..3796801a73c852bd61b6785d453d1cce825a0e66 100644 (file)
 #include "mothur.h"
 #include "database.hpp"
 #include "suffixtree.hpp"
-//class SuffixTree;
 
 class SuffixDB : public Database {
        
 public:
        SuffixDB(int);
        SuffixDB();
-       SuffixDB(const SuffixDB& sdb) : count(sdb.count), Database(sdb) {
-               for (int i = 0; i < sdb.suffixForest.size(); i++) {
-                       SuffixTree temp(sdb.suffixForest[i]);
-                       suffixForest.push_back(temp);
-               }
-       }
        ~SuffixDB();
        
        void generateDB() {}; //adding sequences generates the db
index 1e078a63a759a99b4b47ed123526da752e2404bd..6a22c4d4359b8ee6120c3e683b2d14318fd0c05c 100644 (file)
@@ -25,7 +25,6 @@ class SuffixNode {
        
 public:
        SuffixNode(int, int, int);
-       SuffixNode(const SuffixNode& sn) : parentNode(sn.parentNode), startCharPosition(sn.startCharPosition), endCharPosition(sn.endCharPosition) {m = MothurOut::getInstance();}
        virtual ~SuffixNode() {}
        virtual void print(string, int) = 0;
        virtual void setChildren(char, int);
@@ -63,7 +62,6 @@ class SuffixBranch : public SuffixNode {
        
 public:
        SuffixBranch(int, int, int);
-       SuffixBranch(const SuffixBranch& sb) : suffixNode(sb.suffixNode), childNodes(sb.childNodes), SuffixNode(sb.parentNode, sb.startCharPosition, sb.endCharPosition) {}
        ~SuffixBranch() {}
        void print(string, int);                //      need a special method for printing the node because there are children
        void eraseChild(char);                  //      need a special method for erasing the children
index fd18109513bea9f59590e16a1c705128f61ca43e..9cd835185c31a61bd024590f5a3fc7396d31610f 100644 (file)
@@ -33,25 +33,6 @@ inline bool compareParents(SuffixNode* left, SuffixNode* right){//   this is neces
        return (left->getParentNode() < right->getParentNode());        //      nodes in order of their parent
 }
 
-//********************************************************************************************************************
-SuffixTree::SuffixTree(const SuffixTree& st) : root(st.root), activeEndPosition(st.activeEndPosition), activeStartPosition(st.activeStartPosition), activeNode(st.activeNode),
-                                                                                               nodeCounter(st.nodeCounter), seqName(st.seqName), sequence(st.sequence) { 
-       try {
-               m = MothurOut::getInstance(); 
-               
-               for (int i = 0; i < st.nodeVector.size(); i++) {
-                       SuffixNode* temp = new SuffixBranch(*((SuffixBranch*)st.nodeVector[i]));
-                       nodeVector.push_back(temp);
-               }
-               
-               
-       }catch(exception& e) {
-               m->errorOut(e, "SuffixTree", "SuffixTree");
-               exit(1);
-       }
-}
 //********************************************************************************************************************
 
 SuffixTree::SuffixTree(){ m = MothurOut::getInstance(); }
index 492db54772ddcc1b2e2d60ea46c1700529453e22..d2b69e42325777d508a23000b0bc0924e0cc34f9 100644 (file)
@@ -36,8 +36,6 @@ class SuffixTree {
 public:
        SuffixTree();
        ~SuffixTree();
-//     SuffixTree(string, string);
-       SuffixTree(const SuffixTree&);
 
        void loadSequence(Sequence);
        string getSeqName();
index 2a40e0862ba0b0444db035246cdcb95409fe4923..5c175106ecb2584d07ca15a06c71f27284a3571c 100644 (file)
@@ -162,7 +162,7 @@ int SummaryQualCommand::execute(){
                if (namefile != "") { nameMap = m->readNames(namefile); }
                
                vector<unsigned long long> positions; 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                positions = m->divideFile(qualfile, processors);
                for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
 #else  
@@ -170,6 +170,7 @@ int SummaryQualCommand::execute(){
                        lines.push_back(linePair(0, 1000)); 
                }else {
                        positions = m->setFilePosFasta(qualfile, numSeqs); 
+            if (positions.size() < processors) { processors = positions.size(); }
                        
                        //figure out how many sequences you have to process
                        int numSeqsPerProcessor = numSeqs / processors;
@@ -267,7 +268,7 @@ int SummaryQualCommand::driverCreateSummary(vector<int>& position, vector<int>&
                                count += num;
                        }
                        
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        unsigned long long pos = in.tellg();
                        if ((pos == -1) || (pos >= filePos.end)) { break; }
 #else
@@ -291,7 +292,7 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                int numSeqs = 0;
                processIDS.clear();
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -373,7 +374,7 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                //Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
-               //Taking advantage of shared memory to allow both threads to add info to vectors.
+               //Taking advantage of shared memory to pass results vectors.
                //////////////////////////////////////////////////////////////////////////////////////////////////////
                
                vector<seqSumQualData*> pDataArray; 
@@ -384,12 +385,10 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                for( int i=0; i<processors; i++ ){
                        
                        // Allocate memory for thread data.
-                       seqSumQualData* tempSum = new seqSumQualData(&position, &averageQ, &scores, filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+                       seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
                        pDataArray.push_back(tempSum);
                        processIDS.push_back(i);
-                       
-                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
-                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+        
                        hThreadArray[i] = CreateThread(NULL, 0, MySeqSumQualThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
                }
                
@@ -399,6 +398,18 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
                        numSeqs += pDataArray[i]->count;
+            int tempNum = pDataArray[i]->position.size();
+            if (position.size() < tempNum) { position.resize(tempNum, 0); }
+                       if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
+                       if (scores.size() < tempNum) { 
+                               scores.resize(tempNum); 
+                               for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
+                       }
+            
+            for (int k = 0; k < tempNum; k++)                  {                position[k]    +=  pDataArray[i]->position[k];         }               
+                       for (int k = 0; k < tempNum; k++)                       {                averageQ[k]    +=  pDataArray[i]->averageQ[k];         }               
+                       for (int k = 0; k < tempNum; k++)                       {       for (int j = 0; j < 41; j++) {  scores[k][j] += pDataArray[i]->scores[k][j];   }        }
+
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
index 1ec3cf1891893227d07d256cecd15f742ee9d127..bbd103c1d14f7b40badd9654152aec0fd2b47a10 100644 (file)
@@ -58,9 +58,9 @@ private:
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
 struct seqSumQualData {
-       vector<int>* position;
-       vector<int>* averageQ;
-       vector< vector<int> >* scores; 
+       vector<int> position;
+       vector<int> averageQ;
+       vector< vector<int> > scores; 
        string filename, namefile; 
        unsigned long long start;
        unsigned long long end;
@@ -69,10 +69,7 @@ struct seqSumQualData {
        map<string, int> nameMap;
        
        ~seqSumQualData(){}
-       seqSumQualData(vector<int>* p, vector<int>* a, vector< vector<int> >* s, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map<string, int> nam) {
-               position = p;
-               averageQ = a;
-               scores = s;
+       seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map<string, int> nam) {
                filename = f;
                m = mout;
                start = st;
@@ -84,7 +81,7 @@ struct seqSumQualData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){ 
        seqSumQualData* pDataArray;
@@ -122,20 +119,20 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){
                                vector<int> thisScores = current.getQualityScores();
                                
                                //resize to num of positions setting number of seqs with that size to 1
-                               if (pDataArray->position->size() < thisScores.size()) { pDataArray->position->resize(thisScores.size(), 0); }
-                               if (pDataArray->averageQ->size() < thisScores.size()) { pDataArray->averageQ->resize(thisScores.size(), 0); }
-                               if (pDataArray->scores->size() < thisScores.size()) { 
-                                       pDataArray->scores->resize(thisScores.size()); 
-                                       for (int i = 0; i < pDataArray->scores->size(); i++) { pDataArray->scores->at(i).resize(41, 0); }
+                               if (pDataArray->position.size() < thisScores.size()) { pDataArray->position.resize(thisScores.size(), 0); }
+                               if (pDataArray->averageQ.size() < thisScores.size()) { pDataArray->averageQ.resize(thisScores.size(), 0); }
+                               if (pDataArray->scores.size() < thisScores.size()) { 
+                                       pDataArray->scores.resize(thisScores.size()); 
+                                       for (int i = 0; i < pDataArray->scores.size(); i++) { pDataArray->scores.at(i).resize(41, 0); }
                                }
                                
                                //increase counts of number of seqs with this position
                                //average is really the total, we will average in execute
                                for (int i = 0; i < thisScores.size(); i++) { 
-                                       pDataArray->position->at(i) += num; 
-                                       pDataArray->averageQ->at(i) += (thisScores[i] * num); //weighting for namesfile
+                                       pDataArray->position.at(i) += num; 
+                                       pDataArray->averageQ.at(i) += (thisScores[i] * num); //weighting for namesfile
                                        if (thisScores[i] > 40) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
-                                       else { pDataArray->scores->at(i)[thisScores[i]] += num; }  
+                                       else { pDataArray->scores.at(i)[thisScores[i]] += num; }  
                                }
                                
                                count += num;
index 7116e46e0f9e96045fc3c60a717ae41819949118..8c4ea0d6cd5b4cf26e15ec3460da8e5fd74e5fb6 100644 (file)
@@ -8,47 +8,6 @@
  */
 
 #include "summarysharedcommand.h"
-#include "sharedsobscollectsummary.h"
-#include "sharedchao1.h"
-#include "sharedace.h"
-#include "sharednseqs.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedkstest.h"
-#include "whittaker.h"
-#include "sharedochiai.h"
-#include "sharedanderbergs.h"
-#include "sharedkulczynski.h"
-#include "sharedkulczynskicody.h"
-#include "sharedlennon.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-#include "sharedjackknife.h"
-#include "whittaker.h"
-#include "odum.h"
-#include "canberra.h"
-#include "structeuclidean.h"
-#include "structchord.h"
-#include "hellinger.h"
-#include "manhattan.h"
-#include "structpearson.h"
-#include "soergel.h"
-#include "spearman.h"
-#include "structkulczynski.h"
-#include "structchi2.h"
-#include "speciesprofile.h"
-#include "hamming.h"
-#include "gower.h"
-#include "memchi2.h"
-#include "memchord.h"
-#include "memeuclidean.h"
-#include "mempearson.h"
 
 //**********************************************************************************************************************
 vector<string> SummarySharedCommand::setParameters(){  
@@ -508,152 +467,209 @@ int SummarySharedCommand::execute(){
 /***********************************************************/
 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
        try {
-                       vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
-                       calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
-                               
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
-                                       m->appendFiles((sumFileName + ".temp"), sumFileName);
-                                       m->mothurRemove((sumFileName + ".temp"));
-                                       if (mult) {
-                                               m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
-                                               m->mothurRemove((sumAllFileName + ".temp"));
-                                       }
-                               }else{
-                                       int process = 1;
-                                       vector<int> processIDS;
-               
-                                       //loop through and create all the processes you want
-                                       while (process != processors) {
-                                               int pid = fork();
-                                               
-                                               if (pid > 0) {
-                                                       processIDS.push_back(pid); 
-                                                       process++;
-                                               }else if (pid == 0){
-                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
-                                                       
-                                                       //only do this if you want a distance file
-                                                       if (createPhylip) {
-                                                               string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
-                                                               ofstream outtemp;
-                                                               m->openOutputFile(tempdistFileName, outtemp);
-                                                               
-                                                               for (int i = 0; i < calcDists.size(); i++) {
-                                                                       outtemp << calcDists[i].size() << endl;
-                                                                       
-                                                                       for (int j = 0; j < calcDists[i].size(); j++) {
-                                                                               outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
-                                                                       }
-                                                               }
-                                                               outtemp.close();
-                                                       }
-                                                       
-                                                       exit(0);
-                                               }else { 
-                                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                                                       for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                                                       exit(0);
-                                               }
-                                       }
-                                       
-                                       //parent do your part
-                                       driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
-                                       m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
-                                       m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
-                                       if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
-                                               
-                                       //force parent to wait until all the processes are done
-                                       for (int i = 0; i < processIDS.size(); i++) {
-                                               int temp = processIDS[i];
-                                               wait(&temp);
-                                       }
-                                       
-                                       for (int i = 0; i < processIDS.size(); i++) {
-                                               m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
-                                               m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
-                                               if (mult) {     m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
-                                               
-                                               if (createPhylip) {
-                                                       string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
-                                                       ifstream intemp;
-                                                       m->openInputFile(tempdistFileName, intemp);
-                                                       
-                                                       for (int k = 0; k < calcDists.size(); k++) {
-                                                               int size = 0;
-                                                               intemp >> size; m->gobble(intemp);
-                                                                       
-                                                               for (int j = 0; j < size; j++) {
-                                                                       int seq1 = 0;
-                                                                       int seq2 = 0;
-                                                                       float dist = 1.0;
-                                                                       
-                                                                       intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
-                                                                       
-                                                                       seqDist tempDist(seq1, seq2, dist);
-                                                                       calcDists[k].push_back(tempDist);
-                                                               }
-                                                       }
-                                                       intemp.close();
-                                                       m->mothurRemove(tempdistFileName);
-                                               }
-                                       }
+        vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
+        calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+        
+        
+        if(processors == 1){
+            driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+            m->appendFiles((sumFileName + ".temp"), sumFileName);
+            m->mothurRemove((sumFileName + ".temp"));
+            if (mult) {
+                m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                m->mothurRemove((sumAllFileName + ".temp"));
+            }
+        }else{
+            
+            int process = 1;
+            vector<int> processIDS;
+            
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            //loop through and create all the processes you want
+            while (process != processors) {
+                int pid = fork();
+                
+                if (pid > 0) {
+                    processIDS.push_back(pid); 
+                    process++;
+                }else if (pid == 0){
+                    driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                    
+                    //only do this if you want a distance file
+                    if (createPhylip) {
+                        string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                        ofstream outtemp;
+                        m->openOutputFile(tempdistFileName, outtemp);
+                        
+                        for (int i = 0; i < calcDists.size(); i++) {
+                            outtemp << calcDists[i].size() << endl;
+                            
+                            for (int j = 0; j < calcDists[i].size(); j++) {
+                                outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                            }
+                        }
+                        outtemp.close();
+                    }
+                    
+                    exit(0);
+                }else { 
+                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                    exit(0);
+                }
+            }
+            
+            //parent do your part
+            driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+            m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+            m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
+            if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+            
+            //force parent to wait until all the processes are done
+            for (int i = 0; i < processIDS.size(); i++) {
+                int temp = processIDS[i];
+                wait(&temp);
+            }
+            
+            for (int i = 0; i < processIDS.size(); i++) {
+                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                if (mult) {    m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
+                
+                if (createPhylip) {
+                    string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                    ifstream intemp;
+                    m->openInputFile(tempdistFileName, intemp);
+                    
+                    for (int k = 0; k < calcDists.size(); k++) {
+                        int size = 0;
+                        intemp >> size; m->gobble(intemp);
+                        
+                        for (int j = 0; j < size; j++) {
+                            int seq1 = 0;
+                            int seq2 = 0;
+                            float dist = 1.0;
+                            
+                            intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                            
+                            seqDist tempDist(seq1, seq2, dist);
+                            calcDists[k].push_back(tempDist);
+                        }
+                    }
+                    intemp.close();
+                    m->mothurRemove(tempdistFileName);
+                }
+            }
+#else
+            //////////////////////////////////////////////////////////////////////////////////////////////////////
+            //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+            //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+            //Taking advantage of shared memory to pass results vectors.
+            //////////////////////////////////////////////////////////////////////////////////////////////////////
 
-                               }
-                       #else
-                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
-                               m->appendFiles((sumFileName + ".temp"), sumFileName);
-                               m->mothurRemove((sumFileName + ".temp"));
-                               if (mult) {
-                                       m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
-                                       m->mothurRemove((sumAllFileName + ".temp"));
-                               }
-                       #endif
-                       
-                       if (createPhylip) {
-                               for (int i = 0; i < calcDists.size(); i++) {
-                                       if (m->control_pressed) { break; }
+            vector<summarySharedData*> pDataArray; 
+            DWORD   dwThreadIdArray[processors-1];
+            HANDLE  hThreadArray[processors-1]; 
+            
+            //Create processor worker threads.
+            for( int i=1; i<processors; i++ ){
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisLookup[k]->getLabel());
+                    temp->setGroup(thisLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                    for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+                }
+
+                // Allocate memory for thread data.
+                summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
+                pDataArray.push_back(tempSum);
+                processIDS.push_back(i);
+                
+                hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+            }
+            
+            //parent do your part
+            driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
+            m->appendFiles((sumFileName + "0.temp"), sumFileName);
+            m->mothurRemove((sumFileName + "0.temp"));
+            if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+            
+            //Wait until all threads have terminated.
+            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            
+            //Close all thread handles and free memory allocations.
+            for(int i=0; i < pDataArray.size(); i++){
+                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                
+                for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
+                
+                if (createPhylip) {
+                    for (int k = 0; k < calcDists.size(); k++) {
+                        int size = pDataArray[i]->calcDists[k].size();
+                        for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
+                    }
+                }
+              
+                CloseHandle(hThreadArray[i]);
+                delete pDataArray[i];
+            }
+
+#endif
+        }
+              
+        if (createPhylip) {
+            for (int i = 0; i < calcDists.size(); i++) {
+                if (m->control_pressed) { break; }
                                
-                                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
-                                       outputNames.push_back(distFileName);
-                                       ofstream outDist;
-                                       m->openOutputFile(distFileName, outDist);
-                                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-                                       
-                                       //initialize matrix
-                                       vector< vector<float> > matrix; //square matrix to represent the distance
-                                       matrix.resize(thisLookup.size());
-                                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
-                                       
-                                       
-                                       for (int j = 0; j < calcDists[i].size(); j++) {
-                                               int row = calcDists[i][j].seq1;
-                                               int column = calcDists[i][j].seq2;
-                                               float dist = calcDists[i][j].dist;
-                                               
-                                               matrix[row][column] = dist;
-                                               matrix[column][row] = dist;
-                                       }
+                string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+                outputNames.push_back(distFileName);
+                ofstream outDist;
+                m->openOutputFile(distFileName, outDist);
+                outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                
+                //initialize matrix
+                vector< vector<float> > matrix; //square matrix to represent the distance
+                matrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                
+                
+                for (int j = 0; j < calcDists[i].size(); j++) {
+                    int row = calcDists[i][j].seq1;
+                    int column = calcDists[i][j].seq2;
+                    float dist = calcDists[i][j].dist;
+                    
+                    matrix[row][column] = dist;
+                    matrix[column][row] = dist;
+                }
+                
+                //output to file
+                outDist << thisLookup.size() << endl;
+                for (int r=0; r<thisLookup.size(); r++) { 
+                    //output name
+                    string name = thisLookup[r]->getGroup();
+                    if (name.length() < 10) { //pad with spaces to make compatible
+                        while (name.length() < 10) {  name += " ";  }
+                    }
+                    outDist << name << '\t';
                                        
-                                       //output to file
-                                       outDist << thisLookup.size() << endl;
-                                       for (int r=0; r<thisLookup.size(); r++) { 
-                                               //output name
-                                               string name = thisLookup[r]->getGroup();
-                                               if (name.length() < 10) { //pad with spaces to make compatible
-                                                       while (name.length() < 10) {  name += " ";  }
-                                               }
-                                               outDist << name << '\t';
-                                       
-                                               //output distances
-                                               for (int l = 0; l < r; l++) {   outDist  << matrix[r][l] << '\t';  }
-                                               outDist << endl;
-                                       }
-                                       
-                                       outDist.close();
-                               }
-                       }
+                    //output distances
+                    for (int l = 0; l < r; l++) {      outDist  << matrix[r][l] << '\t';  }
+                    outDist << endl;
+                }
+                
+                outDist.close();
+            }
+        }
                return 0;
        }
        catch(exception& e) {
index f366d0f62432e0bc34c475b190f946a04ba26335..fbfea7bd98fac33bb8570934b6bcd9e9d99ceaba 100644 (file)
 #include "inputdata.h"
 #include "calculator.h"
 #include "validcalculator.h"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
 
 class SummarySharedCommand : public Command {
 
@@ -55,4 +96,184 @@ private:
 
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+//main process handling the calcs that can do more than 2 groups
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct summarySharedData {
+    vector<SharedRAbundVector*> thisLookup;
+    vector< vector<seqDist> > calcDists;
+    vector<string>  Estimators;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       string sumFile;
+       
+       summarySharedData(){}
+       summarySharedData(string sf, MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
+               sumFile = sf;
+               m = mout;
+               start = st;
+               end = en;
+        Estimators = est;
+        thisLookup = lu;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){ 
+       summarySharedData* pDataArray;
+       pDataArray = (summarySharedData*)lpParam;
+       
+       try {
+        
+        vector<Calculator*> sumCalculators;
+        ValidCalculators validCalculator;
+        for (int i=0; i<pDataArray->Estimators.size(); i++) {
+            if (validCalculator.isValidCalculator("sharedsummary", pDataArray->Estimators[i]) == true) { 
+                if (pDataArray->Estimators[i] == "sharedsobs") { 
+                    sumCalculators.push_back(new SharedSobsCS());
+                }else if (pDataArray->Estimators[i] == "sharedchao") { 
+                    sumCalculators.push_back(new SharedChao1());
+                }else if (pDataArray->Estimators[i] == "sharedace") { 
+                    sumCalculators.push_back(new SharedAce());
+                }else if (pDataArray->Estimators[i] == "jabund") {     
+                    sumCalculators.push_back(new JAbund());
+                }else if (pDataArray->Estimators[i] == "sorabund") { 
+                    sumCalculators.push_back(new SorAbund());
+                }else if (pDataArray->Estimators[i] == "jclass") { 
+                    sumCalculators.push_back(new Jclass());
+                }else if (pDataArray->Estimators[i] == "sorclass") { 
+                    sumCalculators.push_back(new SorClass());
+                }else if (pDataArray->Estimators[i] == "jest") { 
+                    sumCalculators.push_back(new Jest());
+                }else if (pDataArray->Estimators[i] == "sorest") { 
+                    sumCalculators.push_back(new SorEst());
+                }else if (pDataArray->Estimators[i] == "thetayc") { 
+                    sumCalculators.push_back(new ThetaYC());
+                }else if (pDataArray->Estimators[i] == "thetan") { 
+                    sumCalculators.push_back(new ThetaN());
+                }else if (pDataArray->Estimators[i] == "kstest") { 
+                    sumCalculators.push_back(new KSTest());
+                }else if (pDataArray->Estimators[i] == "sharednseqs") { 
+                    sumCalculators.push_back(new SharedNSeqs());
+                }else if (pDataArray->Estimators[i] == "ochiai") { 
+                    sumCalculators.push_back(new Ochiai());
+                }else if (pDataArray->Estimators[i] == "anderberg") { 
+                    sumCalculators.push_back(new Anderberg());
+                }else if (pDataArray->Estimators[i] == "kulczynski") { 
+                    sumCalculators.push_back(new Kulczynski());
+                }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
+                    sumCalculators.push_back(new KulczynskiCody());
+                }else if (pDataArray->Estimators[i] == "lennon") { 
+                    sumCalculators.push_back(new Lennon());
+                }else if (pDataArray->Estimators[i] == "morisitahorn") { 
+                    sumCalculators.push_back(new MorHorn());
+                }else if (pDataArray->Estimators[i] == "braycurtis") { 
+                    sumCalculators.push_back(new BrayCurtis());
+                }else if (pDataArray->Estimators[i] == "whittaker") { 
+                    sumCalculators.push_back(new Whittaker());
+                }else if (pDataArray->Estimators[i] == "odum") { 
+                    sumCalculators.push_back(new Odum());
+                }else if (pDataArray->Estimators[i] == "canberra") { 
+                    sumCalculators.push_back(new Canberra());
+                }else if (pDataArray->Estimators[i] == "structeuclidean") { 
+                    sumCalculators.push_back(new StructEuclidean());
+                }else if (pDataArray->Estimators[i] == "structchord") { 
+                    sumCalculators.push_back(new StructChord());
+                }else if (pDataArray->Estimators[i] == "hellinger") { 
+                    sumCalculators.push_back(new Hellinger());
+                }else if (pDataArray->Estimators[i] == "manhattan") { 
+                    sumCalculators.push_back(new Manhattan());
+                }else if (pDataArray->Estimators[i] == "structpearson") { 
+                    sumCalculators.push_back(new StructPearson());
+                }else if (pDataArray->Estimators[i] == "soergel") { 
+                    sumCalculators.push_back(new Soergel());
+                }else if (pDataArray->Estimators[i] == "spearman") { 
+                    sumCalculators.push_back(new Spearman());
+                }else if (pDataArray->Estimators[i] == "structkulczynski") { 
+                    sumCalculators.push_back(new StructKulczynski());
+                }else if (pDataArray->Estimators[i] == "speciesprofile") { 
+                    sumCalculators.push_back(new SpeciesProfile());
+                }else if (pDataArray->Estimators[i] == "hamming") { 
+                    sumCalculators.push_back(new Hamming());
+                }else if (pDataArray->Estimators[i] == "structchi2") { 
+                    sumCalculators.push_back(new StructChi2());
+                }else if (pDataArray->Estimators[i] == "gower") { 
+                    sumCalculators.push_back(new Gower());
+                }else if (pDataArray->Estimators[i] == "memchi2") { 
+                    sumCalculators.push_back(new MemChi2());
+                }else if (pDataArray->Estimators[i] == "memchord") { 
+                    sumCalculators.push_back(new MemChord());
+                }else if (pDataArray->Estimators[i] == "memeuclidean") { 
+                    sumCalculators.push_back(new MemEuclidean());
+                }else if (pDataArray->Estimators[i] == "mempearson") { 
+                    sumCalculators.push_back(new MemPearson());
+                }
+            }
+        }
+        
+        pDataArray->calcDists.resize(sumCalculators.size());
+        
+               ofstream outputFileHandle;
+               pDataArray->m->openOutputFile(pDataArray->sumFile, outputFileHandle);
+               
+               vector<SharedRAbundVector*> subset;
+               for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
+            
+                       for (int l = 0; l < k; l++) {
+                               
+                               outputFileHandle << pDataArray->thisLookup[0]->getLabel() << '\t';
+                               
+                               subset.clear(); //clear out old pair of sharedrabunds
+                               //add new pair of sharedrabunds
+                               subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
+                               
+                               //sort groups to be alphanumeric
+                               if (pDataArray->thisLookup[k]->getGroup() > pDataArray->thisLookup[l]->getGroup()) {
+                                       outputFileHandle << (pDataArray->thisLookup[l]->getGroup() +'\t' + pDataArray->thisLookup[k]->getGroup()) << '\t'; //print out groups
+                               }else{
+                                       outputFileHandle << (pDataArray->thisLookup[k]->getGroup() +'\t' + pDataArray->thisLookup[l]->getGroup()) << '\t'; //print out groups
+                               }
+                               
+                               for(int i=0;i<sumCalculators.size();i++) {
+                                       
+                                       //if this calc needs all groups to calculate the pair load all groups
+                                       if (sumCalculators[i]->getNeedsAll()) { 
+                                               //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+                                               for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
+                                                       if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
+                                               }
+                                       }
+                                       
+                                       vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                       
+                                       if (pDataArray->m->control_pressed) { for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; } outputFileHandle.close(); return 1; }
+                                       
+                                       outputFileHandle << '\t';
+                                       sumCalculators[i]->print(outputFileHandle);
+                                       
+                                       seqDist temp(l, k, tempdata[0]);
+                                       pDataArray->calcDists[i].push_back(temp);
+                               }
+                               outputFileHandle << endl;
+                       }
+               }
+               
+               outputFileHandle.close();
+        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "SummarySharedCommand", "MySummarySharedThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
 #endif
index b99373510ba4d6e9d3e0d323021178227ede92e7..d9d71aea2dd1859d6e198302325fb51d2fdf9367 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -706,7 +706,7 @@ void Tree::randomLabels(string groupA, string groupB) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 void Tree::randomBlengths()  {
        try {
                for(int i=numNodes-1;i>=0;i--){
diff --git a/trialSwap2.cpp b/trialSwap2.cpp
new file mode 100644 (file)
index 0000000..c580436
--- /dev/null
@@ -0,0 +1,1024 @@
+#include "trialswap2.h"
+
+
+//The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92.
+
+
+/**************************************************************************************************
+int TrialSwap2::intrand(int n){
+    try {
+        double z;
+        
+        z = (double)random() * (double)n / (double)RAND_MAX;
+        if(z>=n)
+            z=n-1;
+        if(z<0)
+            z=0;
+        return((int)floor(z));
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "intrand");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/* completely random matrix, all column and row totals are variable, matrix size is the same
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim1(vector<vector<int> > &co_matrix){ 
+    try {
+        vector<int> randRow;
+        vector<vector<int> > tmpmatrix;
+        int nrows = co_matrix.size();
+        int ncols = co_matrix[0].size();
+        
+        //clear co_matrix
+        //     for(i=0;i<nrows;i++)
+        //     {
+        //         co_matrix.clear();
+        //     }
+        
+        //cout << "building matrix" << endl;
+        for(int i=0;i<nrows;i++){
+            if (m->control_pressed) { break; }
+            
+            for(int j=0;j<ncols;j++){
+                double randNum = rand() / double(RAND_MAX);
+                //cout << randNum << endl;
+                
+                if(randNum > 0.5) {
+                    randRow.push_back(1);
+                }else{
+                    randRow.push_back(0);
+                }
+            }
+            tmpmatrix.push_back(randRow);
+            randRow.clear();
+            //cout << endl;
+        }
+        co_matrix = tmpmatrix;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim1");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ *row sums fixed, columns equiprobable 
+ */
+void TrialSwap2::sim2(vector<vector<int> > &co_matrix)
+{ 
+    try {
+        
+        for(int i=0;i<co_matrix.size();i++)
+        {
+            if (m->control_pressed) { break; }
+            random_shuffle( co_matrix[i].begin(), co_matrix[i].end() ); 
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim2");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int TrialSwap2::sim2plus(vector<int> rowtotal, vector<vector<int> > &co_matrix)
+{
+    try {
+        int nrows = co_matrix.size();
+        int ncols = co_matrix[0].size();
+        double cellprob = 1.0/ncols;
+        vector<double> cellprobvec;
+        vector<int> tmprow;
+        vector<vector<int> > tmpmatrix;
+        //double randNum;
+        
+        double start = 0.0;
+        
+        for(int i=0; i<ncols; i++)
+        {
+            if (m->control_pressed) { return 0; }
+            cellprobvec.push_back(start + cellprob);
+            start = cellprobvec[i];
+        }
+        
+        for(int i=0; i<nrows; i++)
+        {
+            tmprow.assign(ncols, 0);
+            
+            while( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
+            {
+                if (m->control_pressed) { return 0; }
+                double randNum = rand() / double(RAND_MAX);
+                //cout << randNum << endl;
+                if(randNum <= cellprobvec[0])
+                {
+                    tmprow[0] = 1;
+                    continue;
+                }
+                for(int j=1;j<ncols;j++)
+                {
+                    //cout << range[j] << endl;
+                    if(randNum <= cellprobvec[j] && randNum > cellprobvec[j-1] && tmprow[j] != 1)
+                    {
+                        tmprow[j] = 1;
+                    }
+                }
+            }
+            tmpmatrix.push_back(tmprow);
+            tmprow.clear();
+        }
+        co_matrix = tmpmatrix;
+        tmpmatrix.clear();
+        cellprobvec.clear();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim2plus");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition
+ * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose
+ * every null matrix before using an index and use the random matrix as a seed for the next null.
+ */
+/**************************************************************************************************/
+void TrialSwap2::sim3(vector<vector<int> > &initmatrix)
+{
+    try {
+        for(int i=0;i<initmatrix.size();i++)
+        {
+            if (m->control_pressed) { break; }
+            random_shuffle( initmatrix[i].begin(), initmatrix[i].end() ); 
+        }
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim3");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ *
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim4(vector<int> columntotal, vector<int> rowtotal, vector<vector<int> > &co_matrix)
+{   
+    try {
+        vector<double> colProb;
+        vector<int> tmprow;//(ncols, 7);
+        vector<vector<int> > tmpmatrix;
+        vector<double> range;
+        vector<double> randNums;
+        int ncols = columntotal.size();
+        int nrows = rowtotal.size();
+        tmprow.clear();
+        
+        double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
+        //cout << "col sum: " << colSum << endl;
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            colProb.push_back(columntotal[i]/colSum);
+        }
+        
+        double start = 0.0;
+        
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            range.push_back(start + colProb[i]);
+            start = range[i];
+        }
+        
+        for(int i=0;i<nrows;i++)
+        {
+            tmprow.assign(ncols, 0);
+            if (m->control_pressed) { return 0; }
+            
+            while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
+            {
+                if (m->control_pressed) { return 0; }
+                
+                double randNum = rand() / double(RAND_MAX);
+                if(randNum <= range[0])
+                {
+                    tmprow[0] = 1;
+                    continue;
+                }
+                for(int j=1;j<ncols;j++)
+                {
+                    if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
+                    {
+                        tmprow[j] = 1;
+                    }
+                    
+                }
+            }
+            tmpmatrix.push_back(tmprow);
+            tmprow.clear();
+        }
+        
+        co_matrix = tmpmatrix;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim4");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim5(vector<int> initcolumntotal,vector<int> initrowtotal, vector<vector<int> > &initmatrix)
+{
+    try {
+        vector<double> colProb;
+        vector<int> tmprow;//(ncols, 7);
+        vector<vector<int> > tmpmatrix;
+        vector<double> range;
+        vector<double> randNums;
+        int ncols = initcolumntotal.size();
+        int nrows = initrowtotal.size();
+        
+        tmprow.clear();
+        
+        double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
+        //cout << "col sum: " << colSum << endl;
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            colProb.push_back(initcolumntotal[i]/colSum);
+        }
+        
+        double start = 0.0;
+        
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            range.push_back(start + colProb[i]);
+            start = range[i];
+        }
+        
+        for(int i=0;i<nrows;i++)
+        {
+            tmprow.assign(ncols, 0);
+            if (m->control_pressed) { return 0; }
+            
+            while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i])
+            {
+                if (m->control_pressed) { return 0; }
+                
+                double randNum = rand() / double(RAND_MAX);
+                if(randNum <= range[0])
+                {
+                    tmprow[0] = 1;
+                    continue;
+                }
+                for(int j=1;j<ncols;j++)
+                {
+                    if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
+                    {
+                        tmprow[j] = 1;
+                    }
+                    
+                }
+            }
+            tmpmatrix.push_back(tmprow);
+            tmprow.clear();
+        }
+        
+        initmatrix = tmpmatrix;
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim5");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ *
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim6(vector<int> columntotal, vector<vector<int> > &co_matrix)
+{
+    try {
+        vector<vector<int> > tmpmatrix;
+        vector<double> colProb;
+        vector<int> tmprow;
+        vector<double> range;
+        int ncols = columntotal.size();
+        int nrows = co_matrix.size();
+        
+        int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
+        
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            colProb.push_back(columntotal[i]/double (colSum));
+        }
+        
+        double start = 0.0;
+        
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            range.push_back(start + colProb[i]);
+            start = range[i];
+        }
+        
+        for(int i=0;i<nrows;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            tmprow.assign(ncols, 0);
+            int tmprowtotal;
+            tmprowtotal = (rand() / double (RAND_MAX)) * 10;
+            while ( tmprowtotal > ncols) {
+                if (m->control_pressed) { return 0; }
+                tmprowtotal = (rand() / double (RAND_MAX)) * 10;
+            }
+            //cout << tmprowtotal << endl;
+            //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
+            
+            while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
+            {
+                if (m->control_pressed) { return 0; }
+                double randNum = rand() / double(RAND_MAX);
+                //cout << randNum << endl;
+                if(randNum <= range[0])
+                {
+                    tmprow[0] = 1;
+                    continue;
+                }
+                for(int j=1;j<ncols;j++)
+                {
+                    //cout << range[j] << endl;
+                    if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
+                    {
+                        tmprow[j] = 1;
+                    }
+                    
+                }
+                
+                
+            }
+            
+            tmpmatrix.push_back(tmprow);
+            tmprow.clear();
+        }
+        
+        co_matrix = tmpmatrix;
+        tmpmatrix.clear();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim6");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim7(vector<int> initrowtotal, vector<vector<int> > &co_matrix)
+{
+    try {
+        vector<vector<double> > probmatrix;
+        vector<vector<int> > tmpmatrix;
+        vector<double> colProb;
+        vector<double> probrow;
+        vector<int> tmprow;
+        vector<double> range;
+        double nc;
+        int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); 
+        
+        tmpmatrix.assign(nrows, vector<int>(ncols, 0.));
+        
+        int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 );
+        
+        nc = rowsum * ncols;
+        //cout << nc << endl;
+        
+        //assign null matrix based on probabilities
+        
+        double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix 
+        
+        for(int i=0;i<nrows;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            //cout << initrowtotal[i]/double(nc) << endl;
+            double cellprob = initrowtotal[i]/double(nc);
+            //cout << cellprob << endl;
+            for(int j=0;j<ncols;j++)
+            {
+                
+                probrow.push_back(start + cellprob);
+                //cout << probrow[j] << endl;
+                //cout << start << endl;
+                start = start + cellprob;
+            }
+            probmatrix.push_back(probrow);
+            probrow.clear();
+        }
+        
+        
+        //while(tmprowsum < rowsum)
+        //for(int k=0;k<rowsum;k++)
+        int k = 0;
+        while(k < rowsum)
+        {
+            if (m->control_pressed) { return 0; }
+        done:
+            //cout << k << endl;
+            //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 );
+            double randNum = rand() / double(RAND_MAX);
+            //cout << randNum << "+" << endl;
+            //special case for the first entry
+            if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1)
+            {
+                tmpmatrix[0][0] = 1;
+                k++;
+                //cout << k << endl;
+                continue;
+            }
+            
+            
+            for(int i=0;i<nrows;i++)
+            {
+                if (m->control_pressed) { return 0; }
+                for(int j=0;j<ncols;j++)
+                {
+                    //cout << probmatrix[i][j] << endl;
+                    if(randNum <= probmatrix[i][j] && randNum > probmatrix[i][j-1] && tmpmatrix[i][j] != 1)
+                    {
+                        tmpmatrix[i][j] = 1;
+                        k++;
+                        //cout << k << endl;
+                        goto done;
+                    }
+                    //else
+                    //k = k-1;
+                }
+                
+            }
+            
+        }
+        
+        co_matrix = tmpmatrix;
+        return 0;
+    //build probibility matrix
+    /* for(int i=0;i<nrows;i++)
+     {
+     for(int j=0;j<ncols;j++)
+     {
+     probrow.push_back(rowtotal[i]/nc);
+     }
+     probmatrix.pushback(probrow);
+     probrow.clear;
+     }
+     */
+    
+    /* int colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
+        
+        for(int i=0;i<ncols;i++)
+        {
+            colProb.push_back(initcolumntotal[i]/double (colSum));
+        }
+        
+        double start = 0.0;
+        
+        for(int i=0;i<ncols;i++)
+        {
+            range.push_back(start + colProb[i]);
+            start = range[i];
+        }
+        
+        for(int i=0;i<nrows;i++)
+        {
+            tmprow.assign(ncols, 0);
+            int tmprowtotal;
+            tmprowtotal = (rand() / double (RAND_MAX)) * 10;
+            while ( tmprowtotal > ncols)
+                tmprowtotal = (rand() / double (RAND_MAX)) * 10;
+            //cout << tmprowtotal << endl;
+            //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
+            
+            while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
+            {
+                double randNum = rand() / double(RAND_MAX);
+                //cout << randNum << endl;
+                if(randNum <= range[0])
+                {
+                    tmprow[0] = 1;
+                    continue;
+                }
+                for(int j=1;j<ncols;j++)
+                {
+                    //cout << range[j] << endl;
+                    if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
+                    {
+                        tmprow[j] = 1;
+                    }
+                }
+            }
+            
+            tmpmatrix.push_back(tmprow);
+            tmprow.clear();
+        }
+
+        initmatrix = tmpmatrix;
+     */
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim7");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+/*
+ *
+ *
+ *
+ */
+/**************************************************************************************************/
+int TrialSwap2::sim8(vector<int> columntotal, vector<int> rowtotal, vector<vector<int> > &co_matrix)
+{   
+    try {
+        double prob; 
+        double start = 0.0;
+        int ncols = columntotal.size(); int nrows = rowtotal.size(); 
+        double probarray[nrows * ncols];
+        double randnum;
+        int grandtotal; 
+        int total = 0;
+        
+        //double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
+        double rowSum = accumulate( rowtotal.begin(), rowtotal.end(), 0 );
+        
+        if (m->control_pressed) { return 0; }
+        
+        //cout << "rowsum: " << rowSum << endl;
+        
+        grandtotal = rowSum;
+        
+        //create probability matrix with each site being between 0 and 1
+        for (int i=0;i<nrows;i++) {
+            if (m->control_pressed) { return 0; }
+            for (int j=0;j<ncols;j++) {
+                prob = (rowtotal[i] * columntotal[j])/(rowSum*rowSum);
+                if (prob == 0.0)
+                    probarray[ncols * i + j] = -1;
+                else
+                    probarray[ncols * i + j] = start + prob;
+                //probmatrixrow.pushback(start + prob);
+                start += prob;
+            }
+        }
+        //cout << "prbarray" << endl;
+        //for(int i=0;i<(nrows*ncols);i++)
+        //cout << probarray[i] << " ";
+        //cout << endl;
+        
+        //generate random muber between 0 and 1 and interate through probarray until found
+        while (total < grandtotal)  {
+            if (m->control_pressed) { return 0; }
+            randnum = rand() / double(RAND_MAX);
+            //cout << "rand num: " << randnum << endl;
+            if((randnum <= probarray[0]) && (probarray[0] != 2) ) {
+                probarray[0] = 2;
+                total++;
+                continue;
+            }
+            for(int i=1;i<(nrows*ncols);i++) {
+                if (m->control_pressed) { return 0; }
+                if((randnum <= probarray[i]) && (randnum > probarray[i-1]) && (probarray[i] != 2) ) {
+                    probarray[i] = 2;
+                    total++;
+                    break;
+                }
+                else
+                    continue;
+            }
+        }
+        //cout << "prbarray" << endl;
+        //for(int i=0;i<(nrows*ncols);i++)
+        //cout << probarray[i] << " ";
+        //cout << endl;
+        for(int i=0;i<nrows;i++) {
+            if (m->control_pressed) { return 0; }
+            for(int j=0;j<ncols;j++) {
+                if(probarray[ncols * i + j] == 2)
+                    co_matrix[i][j] = 1;
+                else
+                    co_matrix[i][j] = 0;
+            }
+        }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "sim8");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+double TrialSwap2::calc_c_score (vector<vector<int> > &co_matrix,vector<int>  rowtotal)
+{
+    try {
+        double cscore = 0.0;
+        double maxD;
+        double D;
+        double normcscore = 0.0;
+        int nonzeros = 0;
+        int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); 
+        vector<vector<double> > s; s.resize(nrows);
+        for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0.0); }//only fill half the matrix
+
+        
+        for(int i=0;i<nrows-1;i++)
+        {
+            
+            for(int j=i+1;j<nrows;j++)
+            {
+                if (m->control_pressed) { return 0; }
+                for(int k=0;k<ncols;k++)
+                {
+                    if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
+                        s[i][j]++; //s counts co-occurrences
+                }
+                
+                //rowtotal[i] = A, rowtotal[j] = B, ncols = P, s[i][j] = J
+                cscore += (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);///(nrows*(nrows-1)/2);
+                D = (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
+                
+                if(ncols < (rowtotal[i] + rowtotal[j]))
+                {
+                    maxD = (ncols-rowtotal[i])*(ncols-rowtotal[j]);
+                }
+                else
+                {
+                    maxD = rowtotal[i] * rowtotal[j];
+                }
+                
+                if(maxD != 0)
+                {
+                    normcscore += D/maxD;
+                    nonzeros++;    
+                }            
+            }
+        }
+        
+        cscore = cscore/(double)(nrows*(nrows-1)/2);
+        //cout << "normalized c score: " << normcscore/nonzeros << endl;
+        
+        return cscore;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "calc_c_score");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int TrialSwap2::calc_checker (vector<vector<int> > &co_matrix, vector<int>  rowtotal)
+{
+    try {
+        int cunits=0;
+        //int s[nrows][ncols];
+        int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); 
+        vector<vector<int> > s; s.resize(nrows);
+        for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0); }//only fill half the matrix
+        
+        for(int i=0;i<nrows-1;i++)
+        {
+            for(int j=i+1;j<nrows;j++)
+            {
+                if (m->control_pressed) { return 0; }
+                //s[i][j]=0;
+                for(int k=0;k<ncols;k++)
+                {
+                    //cout << s[i][j] << endl;
+                    //iterates through the row and counts co-occurrences. The total number of co-occurrences for each row pair is kept in matrix s at location s[i][j].
+                    if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
+                        s[i][j]++; //s counts co-occurrences
+                    
+                }
+                //cout << "rowtotal: " << rowtotal[i] << endl;
+                //cout << "co-occurrences: " << s[i][j] << endl;
+                //cunits+=(rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
+                if (s[i][j] == 0)
+                {
+                    cunits+=1;
+                }
+                //cunits+=s[i][j];
+            }
+        }
+        
+        return cunits;   
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrialSwap2", "calc_checker");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+double TrialSwap2::calc_vratio (vector<int> rowtotal, vector<int> columntotal)
+{
+    try {
+        int nrows = rowtotal.size();
+        int ncols = columntotal.size();
+        int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 );
+       // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 );
+        
+        double colAvg = (double) sumCol / (double) ncols;
+ //       double rowAvg = (double) sumRow / (double) nrows;
+        
+        double p = 0.0;
+        
+ //       double totalRowVar = 0.0;
+        double rowVar = 0.0;
+        double colVar = 0.0;
+        
+        for(int i=0;i<nrows;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            p = (double) rowtotal[i]/(double) ncols;
+            rowVar += p * (1.0-p);
+        } 
+        
+        for(int i=0;i<ncols;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            colVar += pow(((double) columntotal[i]-colAvg),2);
+        }
+        
+        colVar = (1.0/(double)ncols) * colVar;
+        
+        return colVar/rowVar;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "calc_vratio");
+        exit(1);
+    }
+         
+}
+/**************************************************************************************************/
+int TrialSwap2::calc_combo (vector<vector<int> > &initmatrix)
+{
+    try {
+        int initrows = initmatrix.size();
+        int unique = 0;
+        int match = 0;
+        int matches = 0;
+        for(int i=0;i<initrows;i++)
+        {
+            match = 0;
+            for(int j=i+1;j<=initrows;j++)
+            {
+                if (m->control_pressed) { return 0; }
+                if( (initmatrix[i] == initmatrix[j])) 
+                {
+                    match++;
+                    matches++;
+                    break;
+                }
+            }        
+            
+            //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted
+            if (match == 0)
+                unique++;
+        }
+        return unique;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "calc_combo");
+        exit(1);
+    }
+} 
+/**************************************************************************************************/
+int TrialSwap2::swap_checkerboards (vector<vector<int> > &co_matrix)
+{
+    try {
+        int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); 
+        int i, j, k, l;
+        i = m->getRandomIndex(nrows-1);
+        while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }}
+        k = m->getRandomIndex(ncols-1);
+        while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }}
+                
+        //cout << co_matrix[i][k] << " " << co_matrix[j][l] << endl;
+        //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
+        //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
+        //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
+        if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap
+        {
+            co_matrix[i][k]=1-co_matrix[i][k];
+            co_matrix[i][l]=1-co_matrix[i][l];
+            co_matrix[j][k]=1-co_matrix[j][k];
+            co_matrix[j][l]=1-co_matrix[j][l];
+            //cout << "swapped!" << endl;
+        }
+        //cout << "i: " << i << " j: " << j << " k: " << " l: " << l << endl;
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "swap_checkerboards");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double TrialSwap2::calc_pvalue_greaterthan (vector<double> scorevec, double initialscore)
+{
+    try {
+        int runs = scorevec.size();
+        double p = 0.0;
+        for( int i=0;i<runs;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            if(scorevec[i]>=initialscore)
+                p++;
+        }
+        return p/(double)runs;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "calc_pvalue_greaterthan");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double TrialSwap2::calc_pvalue_lessthan (vector<double> scorevec, double initialscore)
+{
+    try {
+        int runs = scorevec.size();
+        double p = 0.0;
+        for( int i=0;i<runs;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            if(scorevec[i]<=initialscore)
+                p++;
+        }
+        return p/(double)runs;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "calc_pvalue_lessthan");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vector<double> scorevec)
+{
+    try {
+        double t;
+        double sampleSD;
+        double sum = 0;
+        
+        for(int i=0;i<runs;i++)
+        {
+            if (m->control_pressed) { return 0; }
+            sum += pow((scorevec[i] - nullMean),2);
+            //cout << "scorevec[" << i << "]" << scorevec[i] << endl;
+        }
+        
+        m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine();
+        
+        m->mothurOut("sum: " + toString(sum));  m->mothurOutEndLine();
+        
+        sampleSD = sqrt( (1/runs) * sum );
+        
+        m->mothurOut("samplSD: " + toString(sampleSD));  m->mothurOutEndLine();
+        
+        t = (nullMean - initialscore) / (sampleSD / sqrt(runs));
+        
+        return t;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "t_test");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int TrialSwap2::print_matrix(vector<vector<int> > &matrix, int nrows, int ncols)
+{
+    try {
+         m->mothurOut("matrix:");  m->mothurOutEndLine();
+        
+        for (int i = 0; i < nrows; i++)
+        {
+            if (m->control_pressed) { return 0; }
+            for (int j = 0; j < ncols; j++)
+            {
+                m->mothurOut(toString(matrix[i][j]));            
+            }    
+            m->mothurOutEndLine();
+        }
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "print_matrix");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int TrialSwap2::transpose_matrix (vector<vector<int> > &initmatrix, vector<vector<int> > &co_matrix)//, int nrows, int nocols)
+{    
+    try {
+        int ncols = initmatrix.size(); int nrows = initmatrix[0].size(); 
+        int tmpnrows = nrows;
+        //vector<vector<int> > tmpvec;
+        vector<int> tmprow;
+        if(!co_matrix.empty())
+            co_matrix.clear();
+        for (int i=0;i<nrows;i++)
+        {       
+            if (m->control_pressed) { return 0; }
+            for (int j=0;j<ncols;j++)
+            {
+                tmprow.push_back(initmatrix[j][i]);
+            }
+            /*if (accumulate( tmprow.begin(), tmprow.end(), 0 ) == 0)
+             {
+             tmpnrows--;
+             }
+             else */
+            co_matrix.push_back(tmprow);
+            tmprow.clear();
+        }
+        nrows = tmpnrows;
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "transpose_matrix");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int TrialSwap2::update_row_col_totals(vector<vector<int> > &co_matrix, vector<int> &rowtotal, vector<int> &columntotal)
+{
+    try {
+        //rowtotal.clear();
+        //columntotal.clear();
+        //generate (rowtotal.begin(), rowtotal.end(), 0);
+        //generate (columntotal.begin(), columntotal.end(), 0);
+        int nrows = co_matrix.size();
+        int ncols = co_matrix[0].size();
+        vector<int> tmpcolumntotal; tmpcolumntotal.resize(ncols, 0);
+        vector<int> tmprowtotal; tmprowtotal.resize(nrows, 0);
+        
+        int rowcount = 0;
+        
+        for (int i = 0; i < nrows; i++)
+        {
+            if (m->control_pressed) { return 0; }
+            for (int j = 0; j < ncols; j++)
+            {
+                if (co_matrix[i][j] == 1)
+                {
+                    rowcount++;
+                    tmpcolumntotal[j]++;
+                }           
+            }    
+            tmprowtotal[i] = rowcount;
+            rowcount = 0;
+        }
+        columntotal = tmpcolumntotal;
+        rowtotal = tmprowtotal;
+        /*cout << "rowtotal: ";
+        for(int i = 0; i<nrows; i++) { cout << rowtotal[i]; }
+        cout << "  ";
+        cout << " coltotal: ";
+        for(int i = 0; i<ncols; i++) { cout << columntotal[i]; }
+        cout << endl;*/
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrialSwap2", "update_row_col_totals");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+
+
+
+
diff --git a/trialswap2.h b/trialswap2.h
new file mode 100644 (file)
index 0000000..6e68e95
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef TRIALSWAP2
+#define TRIALSWAP2
+
+/*
+ *  trialswap2.h
+ *  Mothur
+ *
+ *  Created by Kathryn Iverson on June 27, 2011.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothurout.h"
+
+
+class TrialSwap2 {
+    
+public:
+       TrialSwap2(){  m = MothurOut::getInstance(); };
+    ~TrialSwap2(){};
+    
+    double calc_pvalue_lessthan (vector<double>, double);
+    double calc_pvalue_greaterthan (vector<double>, double);
+    int swap_checkerboards (vector<vector<int> > &);
+    int calc_combo (vector<vector<int> > &);
+    double calc_vratio (vector<int>, vector<int>);
+    int calc_checker (vector<vector<int> > &,vector<int>);
+    double calc_c_score (vector<vector<int> > &,vector<int>);
+    
+    int sim1 (vector<vector<int> > &);
+    void sim2(vector<vector<int> >&);
+    int sim2plus(vector<int>, vector<vector<int> > &);
+    void sim3(vector<vector<int> > &);
+    int sim4(vector<int>, vector<int>, vector<vector<int> > &);
+    int sim5(vector<int>, vector<int>, vector<vector<int> > &);
+    int sim6(vector<int>, vector<vector<int> > &);
+    int sim7(vector<int>, vector<vector<int> > &);
+    int sim8(vector<int>, vector<int>, vector<vector<int> > &);
+    int transpose_matrix (vector<vector<int> > &, vector<vector<int> > &);
+    int update_row_col_totals(vector<vector<int> > &, vector<int>&, vector<int>&);
+
+    
+private:
+    MothurOut* m;
+    
+    double t_test (double, int, double, vector<double>);
+    int print_matrix(vector<vector<int> > &, int, int);
+    
+    
+
+};
+
+#endif
+
+
index 6a26fac949a335999be1cf7c1355f9c5c4f7d48c..86de668b4f57596bf11efb80807b18f932136551 100644 (file)
@@ -21,7 +21,9 @@ vector<string> TrimFlowsCommand::setParameters(){
                CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
                CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
@@ -178,10 +180,17 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
                        m->mothurConvert(temp, pdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);
-                       if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
                        m->mothurConvert(temp, tdiffs);
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -228,7 +237,7 @@ int TrimFlowsCommand::execute(){
                }
                
                vector<unsigned long long> flowFilePos;
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                flowFilePos = getFlowFileBreaks();
                for (int i = 0; i < (flowFilePos.size()-1); i++) {
                        lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
@@ -297,7 +306,7 @@ int TrimFlowsCommand::execute(){
                                                        m->mothurRemove(barcodePrimerComboFileNames[i][j]);
                                                }
                                                else{
-                                                       output << barcodePrimerComboFileNames[i][j] << endl;
+                                                       output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl;
                                                        outputNames.push_back(barcodePrimerComboFileNames[i][j]);
                                                        outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
                                                }
@@ -311,7 +320,7 @@ int TrimFlowsCommand::execute(){
                        flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
                        m->openOutputFile(flowFilesFileName, output);
                        
-                       output << trimFlowFileName << endl;
+                       output << m->getFullPathName(trimFlowFileName) << endl;
                        
                        output.close();
                }
@@ -380,7 +389,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                int count = 0;
                bool moreSeqs = 1;
                
-               TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
                
                while(moreSeqs) {
                        //cout << "driver " << count << endl;
@@ -405,12 +414,26 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        int primerIndex = 0;
                        int barcodeIndex = 0;
                        
+            if(numLinkers != 0){
+                success = trimOligos.stripLinker(currSeq);
+                if(success > ldiffs)           {       trashCode += 'k';       }
+                else{ currentSeqDiffs += success;  }
+                
+            }
+            
                        if(barcodes.size() != 0){
                                success = trimOligos.stripBarcode(currSeq, barcodeIndex);
                                if(success > bdiffs)            {       trashCode += 'b';       }
                                else{ currentSeqDiffs += success;  }
                        }
                        
+            if(numSpacers != 0){
+                success = trimOligos.stripSpacer(currSeq);
+                if(success > sdiffs)           {       trashCode += 's';       }
+                else{ currentSeqDiffs += success;  }
+                
+            }
+            
                        if(numFPrimers != 0){
                                success = trimOligos.stripForward(currSeq, primerIndex);
                                if(success > pdiffs)            {       trashCode += 'f';       }
@@ -448,7 +471,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        //report progress
                        if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        unsigned long long pos = flowFile.tellg();
 
                        if ((pos == -1) || (pos >= line->end)) { break; }
@@ -522,9 +545,8 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
 
                                }
                                else if(type == "REVERSE"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
+                                       string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
                                }
                                else if(type == "BARCODE"){
                                        oligosFile >> group;
@@ -535,6 +557,10 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
 
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
                                }
                                else{
                                        m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
@@ -600,6 +626,8 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
+        numLinkers = linker.size();
+        numSpacers = spacer.size();
                
        }
        catch(exception& e) {
@@ -607,6 +635,47 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                exit(1);
        }
 }
+//********************************************************************/
+string TrimFlowsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimFlowsCommand", "reverseOligo");
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
 
@@ -689,7 +758,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                processIDS.clear();
                int exitCommand = 1;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                //loop through and create all the processes you want
index 8656fd0b5b314084c6fd08cf614288aca2bbc763..27bafd530cad4a66ef22e41696eef7e2328bde97 100644 (file)
@@ -49,7 +49,8 @@ private:
        vector<unsigned long long> getFlowFileBreaks();
        int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
        int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
-
+    string reverseOligo(string);
+    
        vector<string> outputNames;
        set<string> filesToRemove;
        
@@ -71,6 +72,8 @@ private:
        map<string, int> barcodes;
        map<string, int> primers;
        vector<string> revPrimer;
+    vector<string> linker;
+    vector<string> spacer;
 
        vector<string> primerNameVector;        //needed here?
        vector<string> barcodeNameVector;       //needed here?
@@ -131,7 +134,7 @@ struct trimFlowData {
 };
 
 /**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ 
        trimFlowData* pDataArray;
index f0b5a80880240e78674d6d97198ba7d47c8793ab..8c523ce68d8a042bb702900d468def2aadad157b 100644 (file)
 #include "needlemanoverlap.hpp"
 
 
+/********************************************************************/
+//strip, pdiffs, bdiffs, primers, barcodes, revPrimers
+TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
+       try {
+               m = MothurOut::getInstance();
+               
+               pdiffs = p;
+               bdiffs = b;
+        ldiffs = l;
+        sdiffs = s;
+               
+               barcodes = br;
+               primers = pr;
+               revPrimer = r;
+        linker = lk;
+        spacer = sp;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "TrimOligos");
+               exit(1);
+       }
+}
 /********************************************************************/
 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
@@ -69,9 +91,9 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                        
                        Alignment* alignment;
                        if (barcodes.size() > 0) {
-                               map<string,int>::iterator it=barcodes.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=barcodes.end();it++){
+                               for(it=barcodes.begin();it!=barcodes.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -132,7 +154,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                        else{                                                                                                   //use the best match
                                group = minGroup;
                                seq.setUnaligned(rawSequence.substr(minPos));
-                               
+    
                                if(qual.getName() != ""){
                                        qual.trimQScores(minPos, -1);
                                }
@@ -296,9 +318,9 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
                        
                        Alignment* alignment;
                        if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=primers.end();it++){
+                               for(it=primers.begin();it!=primers.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -375,7 +397,7 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
        }
 }
 //*******************************************************************/
-int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
+int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
        try {
                string rawSequence = seq.getUnaligned();
                int success = pdiffs + 1;       //guilty until proven innocent
@@ -390,9 +412,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        
                        if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
                                group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
                                if(qual.getName() != ""){
-                                       qual.trimQScores(oligo.length(), -1);
+                                       if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
                                }
                                success = 0;
                                break;
@@ -408,9 +430,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        
                        Alignment* alignment;
                        if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=primers.end();it++){
+                               for(it=primers.begin();it!=primers.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -470,9 +492,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
                        else{                                                                                                   //use the best match
                                group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
+                               if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
                                if(qual.getName() != ""){
-                                       qual.trimQScores(minPos, -1);
+                                       if (!keepForward) { qual.trimQScores(minPos, -1); }
                                }
                                success = minDiff;
                        }
@@ -550,6 +572,437 @@ bool TrimOligos::stripReverse(Sequence& seq){
                exit(1);
        }
 }
+//******************************************************************/
+bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
+       try {
+               string rawSequence = seq.getUnaligned();
+               bool success = ldiffs + 1;      //guilty until proven innocent
+               
+               for(int i=0;i<linker.size();i++){
+                       string oligo = linker[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = ldiffs + 10;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(oligo.length(), -1);
+                               }
+                               success = 0;
+                               break;
+                       }
+               }
+        
+        //if you found the linker or if you don't want to allow for diffs
+               if ((ldiffs == 0) || (success == 0)) { return success;  }
+               
+               else { //try aligning and see if you can find it
+                       
+                       int maxLength = 0;
+                       
+                       Alignment* alignment;
+                       if (linker.size() > 0) {
+                               for(int i = 0; i < linker.size(); i++){
+                                       if(linker[i].length() > maxLength){
+                                               maxLength = linker[i].length();
+                                       }
+                               }
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
+                               
+                       }else{ alignment = NULL; } 
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       int minPos = 0;
+                       
+                       for(int i = 0; i < linker.size(); i++){
+                               string oligo = linker[i];
+                               //                              int length = oligo.length();
+                               
+                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                                       success = ldiffs + 10;
+                                       break;
+                               }
+                               
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                               
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){
+                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
+                               }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                               
+                               int numDiff = countDiffs(oligo, temp);
+                               
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       minPos++;
+                                               }
+                                       }
+                               }
+                               else if(numDiff == minDiff){
+                                       minCount++;
+                               }
+                               
+                       }
+                       
+                       if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
+                       else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
+                       else{                                                                                                   //use the best match
+                               seq.setUnaligned(rawSequence.substr(minPos));
+                               
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(minPos, -1);
+                               }
+                               success = minDiff;
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+                       
+               }
+
+       
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripLinker");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::stripLinker(Sequence& seq){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               bool success = ldiffs +1;       //guilty until proven innocent
+               
+               for(int i=0;i<linker.size();i++){
+                       string oligo = linker[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = ldiffs +10;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               success = 0;
+                               break;
+                       }
+               }       
+               
+        //if you found the linker or if you don't want to allow for diffs
+               if ((ldiffs == 0) || (success == 0)) { return success;  }
+               
+               else { //try aligning and see if you can find it
+                       
+                       int maxLength = 0;
+                       
+                       Alignment* alignment;
+                       if (linker.size() > 0) {
+                               for(int i = 0; i < linker.size(); i++){
+                                       if(linker[i].length() > maxLength){
+                                               maxLength = linker[i].length();
+                                       }
+                               }
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+ldiffs+1));  
+                               
+                       }else{ alignment = NULL; } 
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       int minPos = 0;
+                       
+                       for(int i = 0; i < linker.size(); i++){
+                               string oligo = linker[i];
+                               //                              int length = oligo.length();
+                               
+                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                                       success = ldiffs + 10;
+                                       break;
+                               }
+                               
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                               
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){
+                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
+                               }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                               
+                               int numDiff = countDiffs(oligo, temp);
+                               
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       minPos++;
+                                               }
+                                       }
+                               }
+                               else if(numDiff == minDiff){
+                                       minCount++;
+                               }
+                               
+                       }
+                       
+                       if(minDiff > ldiffs)    {       success = minDiff;              }       //no good matches
+                       else if(minCount > 1)   {       success = ldiffs + 100; }       //can't tell the difference between multiple barcodes
+                       else{                                                                                                   //use the best match
+                               seq.setUnaligned(rawSequence.substr(minPos));
+                               success = minDiff;
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+                       
+               }
+
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripLinker");
+               exit(1);
+       }
+}
+
+//******************************************************************/
+bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
+       try {
+               string rawSequence = seq.getUnaligned();
+               bool success = sdiffs+1;        //guilty until proven innocent
+               
+               for(int i=0;i<spacer.size();i++){
+                       string oligo = spacer[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = sdiffs+10;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(oligo.length(), -1);
+                               }
+                               success = 0;
+                               break;
+                       }
+               }
+        
+        //if you found the spacer or if you don't want to allow for diffs
+               if ((sdiffs == 0) || (success == 0)) { return success;  }
+               
+               else { //try aligning and see if you can find it
+                       
+                       int maxLength = 0;
+                       
+                       Alignment* alignment;
+                       if (spacer.size() > 0) {
+                               for(int i = 0; i < spacer.size(); i++){
+                                       if(spacer[i].length() > maxLength){
+                                               maxLength = spacer[i].length();
+                                       }
+                               }
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
+                               
+                       }else{ alignment = NULL; } 
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       int minPos = 0;
+                       
+                       for(int i = 0; i < spacer.size(); i++){
+                               string oligo = spacer[i];
+                               //                              int length = oligo.length();
+                               
+                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                                       success = sdiffs + 10;
+                                       break;
+                               }
+                               
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                               
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){
+                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
+                               }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                               
+                               int numDiff = countDiffs(oligo, temp);
+                               
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       minPos++;
+                                               }
+                                       }
+                               }
+                               else if(numDiff == minDiff){
+                                       minCount++;
+                               }
+                               
+                       }
+                       
+                       if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
+                       else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
+                       else{                                                                                                   //use the best match
+                               seq.setUnaligned(rawSequence.substr(minPos));
+                               
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(minPos, -1);
+                               }
+                               success = minDiff;
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+                       
+               }
+        
+
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripSpacer");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::stripSpacer(Sequence& seq){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               bool success = sdiffs+1;        //guilty until proven innocent
+               
+               for(int i=0;i<spacer.size();i++){
+                       string oligo = spacer[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = sdiffs+10;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               success = 0;
+                               break;
+                       }
+               }       
+               
+        //if you found the spacer or if you don't want to allow for diffs
+               if ((sdiffs == 0) || (success == 0)) { return success;  }
+               
+               else { //try aligning and see if you can find it
+                       
+                       int maxLength = 0;
+                       
+                       Alignment* alignment;
+                       if (spacer.size() > 0) {
+                               for(int i = 0; i < spacer.size(); i++){
+                                       if(spacer[i].length() > maxLength){
+                                               maxLength = spacer[i].length();
+                                       }
+                               }
+                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+sdiffs+1));  
+                               
+                       }else{ alignment = NULL; } 
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       int minPos = 0;
+                       
+                       for(int i = 0; i < spacer.size(); i++){
+                               string oligo = spacer[i];
+                               //                              int length = oligo.length();
+                               
+                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
+                                       success = sdiffs + 10;
+                                       break;
+                               }
+                               
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                               
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){
+                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
+                               }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                               
+                               int numDiff = countDiffs(oligo, temp);
+                               
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       minPos++;
+                                               }
+                                       }
+                               }
+                               else if(numDiff == minDiff){
+                                       minCount++;
+                               }
+                               
+                       }
+                       
+                       if(minDiff > sdiffs)    {       success = minDiff;              }       //no good matches
+                       else if(minCount > 1)   {       success = sdiffs + 100; }       //can't tell the difference between multiple barcodes
+                       else{                                                                                                   //use the best match
+                               seq.setUnaligned(rawSequence.substr(minPos));
+                               success = minDiff;
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+                       
+               }
+
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripSpacer");
+               exit(1);
+       }
+}
 
 //******************************************************************/
 bool TrimOligos::compareDNASeq(string oligo, string seq){
index 8830dff57af8985c4f0e0bd4464f0860a46e8b81..e3ea7d55537e323f3869a6157ff3aed4bbd99eea 100644 (file)
 class TrimOligos {
        
        public:
-               TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+        TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+               TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
                ~TrimOligos();
        
                int stripBarcode(Sequence&, int&);      
                int stripBarcode(Sequence&, QualityScores&, int&);
        
                int stripForward(Sequence&, int&);
-               int stripForward(Sequence&, QualityScores&, int&);
+               int stripForward(Sequence&, QualityScores&, int&, bool);
        
                bool stripReverse(Sequence&);
                bool stripReverse(Sequence&, QualityScores&);
+    
+        bool stripLinker(Sequence&);
+        bool stripLinker(Sequence&, QualityScores&);
+    
+        bool stripSpacer(Sequence&);
+        bool stripSpacer(Sequence&, QualityScores&);
                                
        
        private:
-               int pdiffs, bdiffs;
+               int pdiffs, bdiffs, ldiffs, sdiffs;
        
                map<string, int> barcodes;
                map<string, int> primers;
                vector<string> revPrimer;
+        vector<string> linker;
+        vector<string> spacer;
        
                MothurOut* m;
        
index dd3427b9a9733285e5e793f78cabe5bdc3ed1cf3..f00743c546d22879f930bf25c741cdbcdd3e7fe5 100644 (file)
@@ -25,9 +25,12 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
+               CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
@@ -64,9 +67,11 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
                helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
-               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
+               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The qfile parameter allows you to provide a quality file.\n";
                helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
                helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
@@ -75,6 +80,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+               helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
                helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
                helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
@@ -229,11 +235,17 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
                        m->mothurConvert(temp, tdiffs);
                        
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
                        
                        temp = validParameter.validFile(parameters, "qfile", true);     
                        if (temp == "not found")        {       qFileName = "";         }
@@ -274,6 +286,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
+                       keepforward = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -314,6 +329,8 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+        numSpacers = 0;
+        numLinkers = 0;
                createGroup = false;
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
@@ -356,27 +373,16 @@ int TrimSeqsCommand::execute(){
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                }
+       
+        //fills lines and qlines
+               setLines(fastaFile, qFileName);
                
-               vector<unsigned long long> fastaFilePos;
-               vector<unsigned long long> qFilePos;
+        if(processors == 1){
+            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+        }else{
+            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
+        }      
                
-               setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
-               
-               for (int i = 0; i < (fastaFilePos.size()-1); i++) {
-                       lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
-                       if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
-               }       
-               if(qFileName == "")     {       qLines = lines; } //files with duds
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-                               }else{
-                                       createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
-                               }       
-               #else
-                               driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
-               #endif
                
                if (m->control_pressed) {  return 0; }                  
        
@@ -488,7 +494,7 @@ int TrimSeqsCommand::execute(){
                
 /**************************************************************************************/
 
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {   
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {     
                
        try {
                
@@ -535,17 +541,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                ifstream inFASTA;
                m->openInputFile(filename, inFASTA);
-               inFASTA.seekg(line->start);
+               inFASTA.seekg(line.start);
                
                ifstream qFile;
                if(qFileName != "")     {
                        m->openInputFile(qFileName, qFile);
-                       qFile.seekg(qline->start);  
+                       qFile.seekg(qline.start);  
                }
                
                int count = 0;
                bool moreSeqs = 1;
-               TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
        
                while (moreSeqs) {
                                
@@ -578,14 +584,28 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                int barcodeIndex = 0;
                                int primerIndex = 0;
                                
+                if(numLinkers != 0){
+                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       if(success > ldiffs)            {       trashCode += 'k';       }
+                                       else{ currentSeqsDiffs += success;  }
+
+                               }
+                
                                if(barcodes.size() != 0){
                                        success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
+                if(numSpacers != 0){
+                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       if(success > sdiffs)            {       trashCode += 's';       }
+                                       else{ currentSeqsDiffs += success;  }
+
+                               }
+                
                                if(numFPrimers != 0){
-                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex);
+                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
                                        if(success > pdiffs)            {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
@@ -728,9 +748,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                count++;
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                unsigned long long pos = inFASTA.tellg();
-                               if ((pos == -1) || (pos >= line->end)) { break; }
+                               if ((pos == -1) || (pos >= line.end)) { break; }
                        
                        #else
                                if (inFASTA.eof()) { break; }
@@ -763,12 +783,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        
+        int process = 1;
                int exitCommand = 1;
                processIDS.clear();
                
-               //loop through and create all the processes you want
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
                        
@@ -859,8 +880,105 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        int temp = processIDS[i];
                        wait(&temp);
                }
-               
-               //append files
+#else
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the trimData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<trimData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++){
+                       
+            string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+            vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+            vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+            vector<vector<string> > tempNameFileNames = nameFileNames;
+            
+            if(allFiles){
+                ofstream temp;
+                
+                for(int i=0;i<tempFASTAFileNames.size();i++){
+                    for(int j=0;j<tempFASTAFileNames[i].size();j++){
+                        if (tempFASTAFileNames[i][j] != "") {
+                            tempFASTAFileNames[i][j] += extension;
+                            m->openOutputFile(tempFASTAFileNames[i][j], temp);                 temp.close();
+                            
+                            if(qFileName != ""){
+                                tempPrimerQualFileNames[i][j] += extension;
+                                m->openOutputFile(tempPrimerQualFileNames[i][j], temp);                temp.close();
+                            }
+                            if(nameFile != ""){
+                                tempNameFileNames[i][j] += extension;
+                                m->openOutputFile(tempNameFileNames[i][j], temp);              temp.close();
+                            }
+                        }
+                    }
+                }
+            }
+
+            
+                       trimData* tempTrim = new trimData(filename,
+                                              qFileName, nameFile,
+                                              (trimFASTAFileName+extension),
+                                              (scrapFASTAFileName+extension),
+                                              (trimQualFileName+extension),
+                                              (scrapQualFileName+extension),
+                                              (trimNameFileName+extension),
+                                              (scrapNameFileName+extension),
+                                              (groupFile+extension),
+                                              tempFASTAFileNames,
+                                              tempPrimerQualFileNames,
+                                              tempNameFileNames,
+                                              lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
+                                             primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+                       pDataArray.push_back(tempTrim);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+        
+        //parent do my part
+               ofstream temp;
+               m->openOutputFile(trimFASTAFileName, temp);             temp.close();
+               m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
+               if(qFileName != ""){
+                       m->openOutputFile(trimQualFileName, temp);              temp.close();
+                       m->openOutputFile(scrapQualFileName, temp);             temp.close();
+               }
+               if (nameFile != "") {
+                       m->openOutputFile(trimNameFileName, temp);              temp.close();
+                       m->openOutputFile(scrapNameFileName, temp);             temp.close();
+               }
+        
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+        processIDS.push_back(processors-1);
+
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
+                map<string, int>::iterator it2 = groupCounts.find(it->first);
+                if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
+                else { groupCounts[it->first] += it->second; }
+            }
+            CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+        
+#endif         
+        
+        
+        //append files
                for(int i=0;i<processIDS.size();i++){
                        
                        m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
@@ -911,6 +1029,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                        }
                        
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(createGroup){
                                ifstream in;
                                string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
@@ -923,7 +1042,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                if (tempNum != 0) {
                                        while (!in.eof()) { 
                                                in >> group >> tempNum; m->gobble(in);
-                               
+                        
                                                map<string, int>::iterator it = groupCounts.find(group);
                                                if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
                                                else { groupCounts[it->first] += tempNum; }
@@ -931,11 +1050,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                }
                                in.close(); m->mothurRemove(tempFile);
                        }
-                       
+            #endif
                }
-       
-               return exitCommand;
-#endif         
+
+        return exitCommand;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
@@ -945,14 +1063,16 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
 
 /**************************************************************************************************/
 
-int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
+int TrimSeqsCommand::setLines(string filename, string qfilename) {
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+        
+        vector<unsigned long long> fastaFilePos;
+               vector<unsigned long long> qfileFilePos;
+               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                //set file positions for fasta file
                fastaFilePos = m->divideFile(filename, processors);
                
-               if (qfilename == "") { return processors; }
-               
                //get name of first sequence in each chunk
                map<string, int> firstSeqNames;
                for (int i = 0; i < (fastaFilePos.size()-1); i++) {
@@ -965,66 +1085,103 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
                
                        in.close();
                }
-                               
-               //seach for filePos of each first name in the qfile and save in qfileFilePos
-               ifstream inQual;
-               m->openInputFile(qfilename, inQual);
-               
-               string input;
-               while(!inQual.eof()){   
-                       input = m->getline(inQual);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ //this is a sequence name line
-                                       istringstream nameStream(input);
-                                       
-                                       string sname = "";  nameStream >> sname;
-                                       sname = sname.substr(1);
-                                       
-                                       map<string, int>::iterator it = firstSeqNames.find(sname);
-                                       
-                                       if(it != firstSeqNames.end()) { //this is the start of a new chunk
-                                               unsigned long long pos = inQual.tellg(); 
-                                               qfileFilePos.push_back(pos - input.length() - 1);       
-                                               firstSeqNames.erase(it);
-                                       }
-                               }
-                       }
-                       
-                       if (firstSeqNames.size() == 0) { break; }
-               }
-               inQual.close();
                
-               
-               if (firstSeqNames.size() != 0) { 
-                       for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
-                               m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
-                       }
-                       qFileName = "";
-                       return processors;
-               }
-
-               //get last file position of qfile
-               FILE * pFile;
-               unsigned long long size;
-               
-               //get num bytes in file
-               pFile = fopen (qfilename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
-               
-               qfileFilePos.push_back(size);
+               if(qfilename != "")     {
+            //seach for filePos of each first name in the qfile and save in qfileFilePos
+            ifstream inQual;
+            m->openInputFile(qfilename, inQual);
+            
+            string input;
+            while(!inQual.eof()){      
+                input = m->getline(inQual);
+                
+                if (input.length() != 0) {
+                    if(input[0] == '>'){ //this is a sequence name line
+                        istringstream nameStream(input);
+                        
+                        string sname = "";  nameStream >> sname;
+                        sname = sname.substr(1);
+                        
+                        map<string, int>::iterator it = firstSeqNames.find(sname);
+                        
+                        if(it != firstSeqNames.end()) { //this is the start of a new chunk
+                            unsigned long long pos = inQual.tellg(); 
+                            qfileFilePos.push_back(pos - input.length() - 1);  
+                            firstSeqNames.erase(it);
+                        }
+                    }
+                }
+                
+                if (firstSeqNames.size() == 0) { break; }
+            }
+            inQual.close();
+            
+            
+            if (firstSeqNames.size() != 0) { 
+                for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+                    m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+                }
+                qFileName = "";
+                return processors;
+            }
+            
+            //get last file position of qfile
+            FILE * pFile;
+            unsigned long long size;
+            
+            //get num bytes in file
+            pFile = fopen (qfilename.c_str(),"rb");
+            if (pFile==NULL) perror ("Error opening file");
+            else{
+                fseek (pFile, 0, SEEK_END);
+                size=ftell (pFile);
+                fclose (pFile);
+            }
+            
+            qfileFilePos.push_back(size);
+        }
+        
+        for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+                       lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
+                       if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
+               }       
+               if(qfilename == "")     {       qLines = lines; } //files with duds
                
                return processors;
                
                #else
-               
-                       fastaFilePos.push_back(0); qfileFilePos.push_back(0);
-                       fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+            
+        if (processors == 1) { //save time
+                       //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
+                       //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+            lines.push_back(linePair(0, 1000));
+            if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
+        }else{
+            int numFastaSeqs = 0;
+            fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
+            if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
+        
+            if (qfilename != "") { 
+                int numQualSeqs = 0;
+                qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
+                
+                if (numFastaSeqs != numQualSeqs) {
+                    m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
+                }
+            }
+        
+            //figure out how many sequences you have to process
+            int numSeqsPerProcessor = numFastaSeqs / processors;
+            for (int i = 0; i < processors; i++) {
+                int startIndex =  i * numSeqsPerProcessor;
+                if(i == (processors - 1)){     numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
+                lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
+                cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
+                if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
+            }
+        
+            if(qfilename == "")        {       qLines = lines; } //files with duds
+        }
                        return 1;
                
                #endif
@@ -1088,9 +1245,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        primerNameVector.push_back(group);
                                }
                                else if(type == "REVERSE"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
@@ -1101,6 +1259,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                                
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
                                }
                                else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
                        }
@@ -1192,6 +1354,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                }
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
+        numLinkers = linker.size();
+        numSpacers = spacer.size();
                
                bool allBlank = true;
                for (int i = 0; i < barcodeNameVector.size(); i++) {
@@ -1307,6 +1471,46 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){
        }
        
 }
+//********************************************************************/
+string TrimSeqsCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************
 
index 137cb735719fba42f0d103af842de80c9930cc3e..9ba64c4b62b7c7e87f213472b730a91bd0b37305 100644 (file)
@@ -15,6 +15,8 @@
 #include "sequence.hpp"
 #include "qualityscores.h"
 #include "groupmap.h"
+#include "trimoligos.h"
+
 
 class TrimSeqsCommand : public Command {
 public:
@@ -35,25 +37,27 @@ public:
 private:
        
        GroupMap* groupMap;
-       
-       struct linePair {
-               unsigned long long start;
-               unsigned long long end;
-               linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
-       };
-       
+    
+    struct linePair {
+        unsigned long long start;
+        unsigned long long end;
+        linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+        linePair() {}
+    };
+
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
        bool cullLength(Sequence&);
        bool cullHomoP(Sequence&);
        bool cullAmbigs(Sequence&);
+    string reverseOligo(string);
 
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
        
-       bool flip, allFiles, qtrim;
-       int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts;
+       bool flip, allFiles, qtrim, keepforward;
+       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        vector<string> revPrimer, outputNames;
@@ -61,6 +65,8 @@ private:
        map<string, int> barcodes;
        vector<string> groupVector;
        map<string, int> primers;
+    vector<string>  linker;
+    vector<string>  spacer;
        map<string, int> combos;
        map<string, int> groupToIndex;
        vector<string> primerNameVector;        //needed here?
@@ -69,12 +75,418 @@ private:
        map<string, string> nameMap;
 
        vector<int> processIDS;   //processid
-       vector<linePair*> lines;
-       vector<linePair*> qLines;
+       vector<linePair> lines;
+       vector<linePair> qLines;
        
-       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair*, linePair*);  
+       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
        int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
-       int setLines(string, string, vector<unsigned long long>&, vector<unsigned long long>&);
+       int setLines(string, string);
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct trimData {
+    unsigned long long start, end;
+    MothurOut* m;
+    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+       vector<vector<string> > fastaFileNames;
+    vector<vector<string> > qualFileNames;
+    vector<vector<string> > nameFileNames;
+    unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
+    bool flip, allFiles, qtrim, keepforward, createGroup;
+       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+       int qWindowSize, qWindowStep, keepFirst, removeLast, count;
+       double qRollAverage, qThreshold, qWindowAverage, qAverage;
+    vector<string> revPrimer;
+       map<string, int> barcodes;
+       map<string, int> primers;
+    vector<string>  linker;
+    vector<string>  spacer;
+       map<string, int> combos;
+       vector<string> primerNameVector;        
+       vector<string> barcodeNameVector;       
+       map<string, int> groupCounts;  
+       map<string, string> nameMap;
+    
+       trimData(){}
+       trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
+                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, 
+                      vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
+                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+        filename = fn;
+        qFileName = qn;
+        nameFile = nf;
+        trimFileName = tn;
+        scrapFileName = sn;
+        trimQFileName = tqn;
+        scrapQFileName = sqn;
+        trimNFileName = tnn;
+        scrapNFileName = snn;
+        groupFileName = gn;
+        fastaFileNames = ffn;
+        qualFileNames = qfn;
+        nameFileNames = nfn;
+        lineStart = lstart;
+        lineEnd = lend;
+        qlineStart = qstart;
+        qlineEnd = qend;
+               m = mout;
+        
+        pdiffs = pd;
+        bdiffs = bd;
+        ldiffs = ld;
+        sdiffs = sd;
+        tdiffs = td;
+        barcodes = bar;
+        primers = pri;      numFPrimers = primers.size();
+        revPrimer = revP;   numRPrimers = revPrimer.size();
+        linker = li;        numLinkers = linker.size();
+        spacer = spa;       numSpacers = spacer.size();
+        primerNameVector = priNameVector;
+        barcodeNameVector = barNameVector;
+        
+        createGroup = cGroup;
+        allFiles = aFiles;
+        keepforward = keepF;
+        keepFirst = keepfi;
+        removeLast = removeL;
+        qWindowStep = WindowStep;
+        qWindowSize = WindowSize;
+        qWindowAverage = WindowAverage;
+        qtrim = trim;
+        qThreshold = Threshold;
+        qAverage = Average;
+        qRollAverage = RollAverage;
+        minLength = minL;
+        maxAmbig = maxA;
+        maxHomoP = maxH;
+        maxLength = maxL;
+        flip = fli;
+        nameMap = nm;
+        count = 0;
+       }
 };
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ 
+       trimData* pDataArray;
+       pDataArray = (trimData*)lpParam;
+       
+       try {
+        ofstream trimFASTAFile;
+               pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
+               
+               ofstream scrapFASTAFile;
+               pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
+               
+               ofstream trimQualFile;
+               ofstream scrapQualFile;
+               if(pDataArray->qFileName != ""){
+                       pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
+                       pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
+               }
+               
+               ofstream trimNameFile;
+               ofstream scrapNameFile;
+               if(pDataArray->nameFile != ""){
+                       pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
+                       pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
+               }
+               
+               
+               ofstream outGroupsFile;
+               if (pDataArray->createGroup){   pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
+               if(pDataArray->allFiles){
+                       for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
+                               for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
+                                       if (pDataArray->fastaFileNames[i][j] != "") {
+                                               ofstream temp;
+                                               pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp);                  temp.close();
+                                               if(pDataArray->qFileName != ""){
+                                                       pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp);                   temp.close();
+                                               }
+                                               
+                                               if(pDataArray->nameFile != ""){
+                                                       pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp);                   temp.close();
+                                               }
+                                       }
+                               }
+                       }
+               }
+               
+               ifstream inFASTA;
+               pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
+               if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
+                       inFASTA.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA); 
+               }
+               
+               ifstream qFile;
+               if(pDataArray->qFileName != "") {
+                       pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
+                       if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
+                qFile.seekg(0);
+            }else { //this accounts for the difference in line endings. 
+                qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile); 
+            } 
+               }
+               
+               
+               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+        
+               pDataArray->count = pDataArray->lineEnd;
+               for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
+                                  
+                       if (pDataArray->m->control_pressed) { 
+                               inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
+                               if (pDataArray->createGroup) {   outGroupsFile.close();   }
+                               if(pDataArray->qFileName != ""){ qFile.close(); }
+                               return 0;
+                       }
+                       
+                       int success = 1;
+                       string trashCode = "";
+                       int currentSeqsDiffs = 0;
+            
+                       Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+                       
+                       QualityScores currQual;
+                       if(pDataArray->qFileName != ""){
+                               currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
+                       }
+                       
+                       string origSeq = currSeq.getUnaligned();
+                       if (origSeq != "") {
+                               
+                               int barcodeIndex = 0;
+                               int primerIndex = 0;
+                               
+                if(pDataArray->numLinkers != 0){
+                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
+                                       else{ currentSeqsDiffs += success;  }
+                               }
+                
+                               if(pDataArray->barcodes.size() != 0){
+                                       success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+                                       if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
+                                       else{ currentSeqsDiffs += success;  }
+                               }
+                               
+                if(pDataArray->numSpacers != 0){
+                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
+                                       else{ currentSeqsDiffs += success;  }
+
+                               }
+                
+                               if(pDataArray->numFPrimers != 0){
+                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
+                                       if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
+                                       else{ currentSeqsDiffs += success;  }
+                               }
+                               
+                               if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
+                               
+                               if(pDataArray->numRPrimers != 0){
+                                       success = trimOligos.stripReverse(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 'r';       }
+                               }
+                
+                               if(pDataArray->keepFirst != 0){
+                                       //success = keepFirstTrim(currSeq, currQual);
+                    success = 1;
+                    if(currQual.getName() != ""){
+                        currQual.trimQScores(-1, pDataArray->keepFirst);
+                    }
+                    currSeq.trim(pDataArray->keepFirst);
+                               }
+                               
+                               if(pDataArray->removeLast != 0){
+                                       //success = removeLastTrim(currSeq, currQual);
+                    success = 0;
+                    int length = currSeq.getNumBases() - pDataArray->removeLast;
+                    
+                    if(length > 0){
+                        if(currQual.getName() != ""){
+                            currQual.trimQScores(-1, length);
+                        }
+                        currSeq.trim(length);
+                        success = 1;
+                    }
+                    else{ success = 0; }
+                    
+                                       if(!success)                            {       trashCode += 'l';       }
+                               }
+                
+                               
+                               if(pDataArray->qFileName != ""){
+                                       int origLength = currSeq.getNumBases();
+                                       
+                                       if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
+                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
+                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
+                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
+                                       else                                            {       success = 1;                            }
+                                       
+                                       //you don't want to trim, if it fails above then scrap it
+                                       if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
+                                       
+                                       if(!success)                            {       trashCode += 'q';       }
+                               }                               
+                
+                               if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
+                                       //success = cullLength(currSeq);
+                    int length = currSeq.getNumBases();
+                    success = 0;       //guilty until proven innocent
+                    if(length >= pDataArray->minLength && pDataArray->maxLength == 0)                  {       success = 1;    }
+                    else if(length >= pDataArray->minLength && length <= pDataArray->maxLength)        {       success = 1;    }
+                    else                                                                                               {       success = 0;    }
+                    
+                                       if(!success)                            {       trashCode += 'l';       }
+                               }
+                               if(pDataArray->maxHomoP > 0){
+                                       //success = cullHomoP(currSeq);
+                    int longHomoP = currSeq.getLongHomoPolymer();
+                    success = 0;       //guilty until proven innocent
+                    if(longHomoP <= pDataArray->maxHomoP){     success = 1;    }
+                    else                                       {       success = 0;    }
+                    
+                                       if(!success)                            {       trashCode += 'h';       }
+                               }
+                               if(pDataArray->maxAmbig != -1){
+                                       //success = cullAmbigs(currSeq);
+                    int numNs = currSeq.getAmbigBases();
+                    success = 0;       //guilty until proven innocent
+                    if(numNs <= pDataArray->maxAmbig)  {       success = 1;    }
+                    else                                       {       success = 0;    }
+                                       if(!success)                            {       trashCode += 'n';       }
+                               }
+                               
+                               if(pDataArray->flip){           // should go last                       
+                                       currSeq.reverseComplement();
+                                       if(pDataArray->qFileName != ""){
+                                               currQual.flipQScores(); 
+                                       }
+                               }
+                               
+                               if(trashCode.length() == 0){
+                                       currSeq.setAligned(currSeq.getUnaligned());
+                                       currSeq.printSequence(trimFASTAFile);
+                                       
+                                       if(pDataArray->qFileName != ""){
+                                               currQual.printQScores(trimQualFile);
+                                       }
+                                       
+                                       if(pDataArray->nameFile != ""){
+                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                               if (itName != pDataArray->nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
+                                               else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                                       }
+                                       
+                                       if (pDataArray->createGroup) {
+                                               if(pDataArray->barcodes.size() != 0){
+                                                       string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
+                                                       if (pDataArray->primers.size() != 0) { 
+                                                               if (pDataArray->primerNameVector[primerIndex] != "") { 
+                                                                       if(thisGroup != "") {
+                                                                               thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
+                                                                       }else {
+                                                                               thisGroup = pDataArray->primerNameVector[primerIndex]; 
+                                                                       }
+                                                               } 
+                                                       }
+                                                       
+                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       
+                                                       if (pDataArray->nameFile != "") {
+                                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                                               if (itName != pDataArray->nameMap.end()) { 
+                                                                       vector<string> thisSeqsNames; 
+                                                                       pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                                                       for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                                                               outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+                                                                       }
+                                                               }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }                                                   
+                                                       }
+                                                       
+                                                       map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+                                                       if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1; }
+                                                       else { pDataArray->groupCounts[it->first]++; }
+                            
+                                               }
+                                       }
+                                       
+                                       if(pDataArray->allFiles){
+                                               ofstream output;
+                                               pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
+                                               currSeq.printSequence(output);
+                                               output.close();
+                                               
+                                               if(pDataArray->qFileName != ""){
+                                                       pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
+                                                       currQual.printQScores(output);
+                                                       output.close();                                                 
+                                               }
+                                               
+                                               if(pDataArray->nameFile != ""){
+                                                       map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                                       if (itName != pDataArray->nameMap.end()) { 
+                                                               pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
+                                                               output << itName->first << '\t' << itName->second << endl; 
+                                                               output.close();
+                                                       }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                                               }
+                                       }
+                               }
+                               else{
+                                       if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
+                                               map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+                                               if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
+                                               else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                                       }
+                                       currSeq.setName(currSeq.getName() + '|' + trashCode);
+                                       currSeq.setUnaligned(origSeq);
+                                       currSeq.setAligned(origSeq);
+                                       currSeq.printSequence(scrapFASTAFile);
+                                       if(pDataArray->qFileName != ""){
+                                               currQual.printQScores(scrapQualFile);
+                                       }
+                               }
+                               
+                       }
+                       
+                       //report progress
+                       if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
+                       
+               }
+               //report progress
+               if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
+               
+               
+               inFASTA.close();
+               trimFASTAFile.close();
+               scrapFASTAFile.close();
+               if (pDataArray->createGroup) {   outGroupsFile.close();   }
+               if(pDataArray->qFileName != "") {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+               if(pDataArray->nameFile != "")  {       scrapNameFile.close(); trimNameFile.close();    }
+               
+        return 0;
+            
+        }
+        catch(exception& e) {
+            pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
+            exit(1);
+        }
+    } 
+#endif
+    
+
+/**************************************************************************************************/
 
 #endif
index f9dc450444b242555710f91dc3567f96e69e6d3d..b3a54c929ace3221fe3ba736a3b5fad7f87818f9 100644 (file)
@@ -342,7 +342,7 @@ int UnifracWeightedCommand::execute() {
                                
                                lines.clear();
                                
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        if(processors != 1){
                                                int numPairs = namesOfGroupCombos.size();
                                                int numPairsPerProcessor = numPairs / processors;
@@ -361,7 +361,7 @@ int UnifracWeightedCommand::execute() {
                                //get scores for random trees
                                for (int j = 0; j < iters; j++) {
                                
-                                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                                if(processors == 1){
                                                        driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
                                                }else{
@@ -459,7 +459,7 @@ int UnifracWeightedCommand::execute() {
 
 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;
                
index 431323f066665bd07edce8da80c32dadb1600cf2..d4fd32731336913a5ab767736cf52ab1ffc2cd0a 100644 (file)
@@ -48,7 +48,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
                        }
                }
 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(processors == 1){
                                data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
                        }else{
@@ -83,7 +83,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
 
 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;
                
@@ -291,7 +291,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
                        }
                }
 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(processors == 1){
                                data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true);
                        }else{
@@ -326,7 +326,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
 
 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;
                
index 8aa166610a3b7d3a38bfd882db6206d6982af4de..f0ca81e99748534f3eff5b28d401e04ec8b116b6 100644 (file)
--- a/uvest.cpp
+++ b/uvest.cpp
@@ -29,7 +29,7 @@ EstOutput UVEst::getUVest(vector<SharedRAbundVector*> shared) {
                sumSharedA1 = the sum of all shared otus in A where B = 1
                sumSharedB1 = the sum of all shared otus in B where A = 1 */
                
-               for (int i = 0; i < shared[0]->size(); i++) {
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
                        //store in temps to avoid multiple repetitive function calls
                        tempA = shared[0]->getAbundance(i);
                        tempB = shared[1]->getAbundance(i);
index d8a4b857ad19d8057ed4eb11a592aa11e6787721..7a31da4d55d398f47d022de7b80396bb44f6c6b4 100644 (file)
@@ -34,7 +34,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
                        }
                }
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        if(processors == 1){
                                data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
                        }else{
@@ -69,7 +69,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
 
 EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                vector<int> processIDS;