]> git.donarmstrong.com Git - mothur.git/commitdiff
added Jensen-Shannon calc. working on get.communitytype command. fixed bug in get...
authorSarah Westcott <mothur.westcott@gmail.com>
Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
32 files changed:
Mothur.xcodeproj/project.pbxproj
collectsharedcommand.cpp
communitytype.cpp [new file with mode: 0644]
communitytype.h [new file with mode: 0644]
flowdata.cpp
getmetacommunitycommand.cpp
getmetacommunitycommand.h
getoturepcommand.cpp
kmeans.cpp [new file with mode: 0644]
kmeans.h [new file with mode: 0644]
linearalgebra.cpp
linearalgebra.h
matrixoutputcommand.cpp
matrixoutputcommand.h
metastatscommand.cpp
pam.cpp [new file with mode: 0644]
pam.h [new file with mode: 0644]
qFinderDMM.cpp
qFinderDMM.h
qualityscores.cpp
qualityscores.h
seqsummarycommand.cpp
shannonrange.cpp [new file with mode: 0644]
shannonrange.h [new file with mode: 0644]
sharedjsd.cpp [new file with mode: 0644]
sharedjsd.h [new file with mode: 0644]
summarysharedcommand.cpp
summarysharedcommand.h
trimflowscommand.cpp
trimseqscommand.cpp
trimseqscommand.h
validcalculator.cpp

index f3b6a09ea7e690a55a31fcbe6b9fc457fa59ca2a..26456a3ee2c230477db7c945e2b9586dc45aea8d 100644 (file)
@@ -18,6 +18,7 @@
                A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
                A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
                A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
                A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
                A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
                A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
+               A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7132EB2184E792700AAA402 /* communitytype.cpp */; };
                A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
                A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
                A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
                A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
@@ -29,6 +30,7 @@
                A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
                A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
                A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
                A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
                A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
                A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
+               A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7222D721856277C0055A993 /* sharedjsd.cpp */; };
                A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
                A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
                A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
                A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
                A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
                A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
                A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
                A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
                A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
                A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
+               A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */; };
                A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; };
                A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; };
+               A7B093C018579F0400843CD1 /* pam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B093BF18579F0400843CD1 /* pam.cpp */; };
                A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
                A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
                A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
                A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
                A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
                A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; };
                A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
                A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
                A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
                A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
                A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
                A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; };
+               A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D395C3184FA3A200A350D7 /* kmeans.cpp */; };
                A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; };
                A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
                A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; };
                A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
                A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = "<group>"; };
                A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = "<group>"; };
                A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = "<group>"; };
                A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = "<group>"; };
+               A7132EAE184E76EB00AAA402 /* communitytype.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = communitytype.h; sourceTree = "<group>"; };
+               A7132EB2184E792700AAA402 /* communitytype.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = communitytype.cpp; sourceTree = "<group>"; };
                A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
                A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
                A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
                A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
                A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
                A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
                A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
                A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
                A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
                A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
+               A7222D711856276C0055A993 /* sharedjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedjsd.h; sourceTree = "<group>"; };
+               A7222D721856277C0055A993 /* sharedjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjsd.cpp; sourceTree = "<group>"; };
                A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
                A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
                A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
                A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
                A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
                A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
                A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
                A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
                A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
                A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
                A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
                A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
+               A7A09B0E18773BF700FAA081 /* shannonrange.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = shannonrange.h; sourceTree = "<group>"; };
+               A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shannonrange.cpp; sourceTree = "<group>"; };
                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
                A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
                A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = "<group>"; };
                A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = "<group>"; };
                A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
                A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = "<group>"; };
                A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = "<group>"; };
+               A7B093BE18579EF600843CD1 /* pam.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pam.h; sourceTree = "<group>"; };
+               A7B093BF18579F0400843CD1 /* pam.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pam.cpp; sourceTree = "<group>"; };
                A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
                A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
                A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
                A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
                A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
                A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
                A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = "<group>"; };
                A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = "<group>"; };
                A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = "<group>"; };
                A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = "<group>"; };
                A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = "<group>"; };
                A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = "<group>"; };
+               A7D395C2184FA39300A350D7 /* kmeans.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = kmeans.h; sourceTree = "<group>"; };
+               A7D395C3184FA3A200A350D7 /* kmeans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmeans.cpp; sourceTree = "<group>"; };
                A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
                A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
                A7D9378917B146B5001E90B0 /* wilcox.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wilcox.cpp; sourceTree = "<group>"; };
                A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
                A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
                A7D9378917B146B5001E90B0 /* wilcox.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wilcox.cpp; sourceTree = "<group>"; };
                                A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */,
                                A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */,
                                A7DAAFA3133A254E003956EB /* commandparameter.h */,
                                A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */,
                                A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */,
                                A7DAAFA3133A254E003956EB /* commandparameter.h */,
+                               A7D395C1184FA34300A350D7 /* communitytype */,
                                A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */,
                                A7E9BA4212D3960D00DA6239 /* containers */,
                                A7E9B6B612D37EC400DA6239 /* consensus.h */,
                                A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */,
                                A7E9BA4212D3960D00DA6239 /* containers */,
                                A7E9B6B612D37EC400DA6239 /* consensus.h */,
                                A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
                                A7E9B79B12D37EC400DA6239 /* progress.cpp */,
                                A7E9B79C12D37EC400DA6239 /* progress.hpp */,
                                A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
                                A7E9B79B12D37EC400DA6239 /* progress.cpp */,
                                A7E9B79C12D37EC400DA6239 /* progress.hpp */,
-                               A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
-                               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
                                A77B7187173D4041002163C2 /* randomnumber.h */,
                                A77B7186173D4041002163C2 /* randomnumber.cpp */,
                                A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
                                A77B7187173D4041002163C2 /* randomnumber.h */,
                                A77B7186173D4041002163C2 /* randomnumber.cpp */,
                                A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
                        name = fortran;
                        sourceTree = "<group>";
                };
                        name = fortran;
                        sourceTree = "<group>";
                };
+               A7D395C1184FA34300A350D7 /* communitytype */ = {
+                       isa = PBXGroup;
+                       children = (
+                               A7132EAE184E76EB00AAA402 /* communitytype.h */,
+                               A7132EB2184E792700AAA402 /* communitytype.cpp */,
+                               A7D395C2184FA39300A350D7 /* kmeans.h */,
+                               A7D395C3184FA3A200A350D7 /* kmeans.cpp */,
+                               A7B093BE18579EF600843CD1 /* pam.h */,
+                               A7B093BF18579F0400843CD1 /* pam.cpp */,
+                               A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
+                               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+                       );
+                       name = communitytype;
+                       sourceTree = "<group>";
+               };
                A7E9BA3812D3956100DA6239 /* commands */ = {
                        isa = PBXGroup;
                        children = (
                A7E9BA3812D3956100DA6239 /* commands */ = {
                        isa = PBXGroup;
                        children = (
                                A7E9B7E612D37EC400DA6239 /* shannon.h */,
                                A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */,
                                A7E9B7E812D37EC400DA6239 /* shannoneven.h */,
                                A7E9B7E612D37EC400DA6239 /* shannon.h */,
                                A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */,
                                A7E9B7E812D37EC400DA6239 /* shannoneven.h */,
+                               A7A09B0E18773BF700FAA081 /* shannonrange.h */,
+                               A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */,
                                A7E9B7E912D37EC400DA6239 /* sharedace.cpp */,
                                A7E9B7EA12D37EC400DA6239 /* sharedace.h */,
                                A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */,
                                A7E9B7E912D37EC400DA6239 /* sharedace.cpp */,
                                A7E9B7EA12D37EC400DA6239 /* sharedace.h */,
                                A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */,
                                A7E9B7F912D37EC400DA6239 /* sharedjclass.h */,
                                A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */,
                                A7E9B7FB12D37EC400DA6239 /* sharedjest.h */,
                                A7E9B7F912D37EC400DA6239 /* sharedjclass.h */,
                                A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */,
                                A7E9B7FB12D37EC400DA6239 /* sharedjest.h */,
+                               A7222D711856276C0055A993 /* sharedjsd.h */,
+                               A7222D721856277C0055A993 /* sharedjsd.cpp */,
                                A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */,
                                A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */,
                                A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */,
                                A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */,
                                A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */,
                                A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */,
                                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
                                A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
                                A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
                                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
                                A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
                                A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
+                               A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */,
+                               A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */,
+                               A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */,
+                               A7B093C018579F0400843CD1 /* pam.cpp in Sources */,
+                               A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 40ee3d255cc03c3a005bc79d77eeee78fb25d510..56e60d88bc88200a1b21d82d70b4a9c1f22ebdef 100644 (file)
@@ -49,6 +49,7 @@
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
+#include "sharedjsd.h"
 
 
 //**********************************************************************************************************************
 
 
 //**********************************************************************************************************************
@@ -57,7 +58,7 @@ vector<string> CollectSharedCommand::setParameters(){
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -141,6 +142,7 @@ string CollectSharedCommand::getOutputPattern(string type) {
         else if (type == "memchord")            {  pattern =  "[filename],memchord";        }
         else if (type == "memeuclidean")        {  pattern =  "[filename],memeuclidean";    }
         else if (type == "mempearson")          {  pattern =  "[filename],mempearson";      }
         else if (type == "memchord")            {  pattern =  "[filename],memchord";        }
         else if (type == "memeuclidean")        {  pattern =  "[filename],memeuclidean";    }
         else if (type == "mempearson")          {  pattern =  "[filename],mempearson";      }
+        else if (type == "jsd")                 {  pattern =  "[filename],jsd";             }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -196,6 +198,7 @@ CollectSharedCommand::CollectSharedCommand(){
                outputTypes["memchord"] = tempOutNames;
                outputTypes["memeuclidean"] = tempOutNames;
                outputTypes["mempearson"] = tempOutNames;
                outputTypes["memchord"] = tempOutNames;
                outputTypes["memeuclidean"] = tempOutNames;
                outputTypes["mempearson"] = tempOutNames;
+        outputTypes["jsd"] = tempOutNames;
                
        }
        catch(exception& e) {
                
        }
        catch(exception& e) {
@@ -268,6 +271,7 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                        outputTypes["memchord"] = tempOutNames;
                        outputTypes["memeuclidean"] = tempOutNames;
                        outputTypes["mempearson"] = tempOutNames;
                        outputTypes["memchord"] = tempOutNames;
                        outputTypes["memeuclidean"] = tempOutNames;
                        outputTypes["mempearson"] = tempOutNames;
+            outputTypes["jsd"] = tempOutNames;
                        
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -460,6 +464,9 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                                                }else if (Estimators[i] == "mempearson") { 
                                                        cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(getOutputFileName("mempearson", variables))));
                                                        outputNames.push_back(getOutputFileName("mempearson", variables)); outputTypes["mempearson"].push_back(getOutputFileName("mempearson", variables));
                                                }else if (Estimators[i] == "mempearson") { 
                                                        cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(getOutputFileName("mempearson", variables))));
                                                        outputNames.push_back(getOutputFileName("mempearson", variables)); outputTypes["mempearson"].push_back(getOutputFileName("mempearson", variables));
+                                               }else if (Estimators[i] == "jsd") {
+                                                       cDisplays.push_back(new CollectDisplay(new JSD(), new SharedOneColumnFile(getOutputFileName("jsd", variables))));
+                                                       outputNames.push_back(getOutputFileName("jsd", variables)); outputTypes["jsd"].push_back(getOutputFileName("jsd", variables));
                                                }
                                                
                                        }
                                                }
                                                
                                        }
diff --git a/communitytype.cpp b/communitytype.cpp
new file mode 100644 (file)
index 0000000..f88932c
--- /dev/null
@@ -0,0 +1,512 @@
+//
+//  communitytype.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/3/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+//can we get these psi/psi1 calculations into their own math class?
+//psi calcualtions swiped from gsl library...
+
+static const double psi_cs[23] = {
+    -.038057080835217922,
+    .491415393029387130,
+    -.056815747821244730,
+    .008357821225914313,
+    -.001333232857994342,
+    .000220313287069308,
+    -.000037040238178456,
+    .000006283793654854,
+    -.000001071263908506,
+    .000000183128394654,
+    -.000000031353509361,
+    .000000005372808776,
+    -.000000000921168141,
+    .000000000157981265,
+    -.000000000027098646,
+    .000000000004648722,
+    -.000000000000797527,
+    .000000000000136827,
+    -.000000000000023475,
+    .000000000000004027,
+    -.000000000000000691,
+    .000000000000000118,
+    -.000000000000000020
+};
+
+static double apsi_cs[16] = {
+    -.0204749044678185,
+    -.0101801271534859,
+    .0000559718725387,
+    -.0000012917176570,
+    .0000000572858606,
+    -.0000000038213539,
+    .0000000003397434,
+    -.0000000000374838,
+    .0000000000048990,
+    -.0000000000007344,
+    .0000000000001233,
+    -.0000000000000228,
+    .0000000000000045,
+    -.0000000000000009,
+    .0000000000000002,
+    -.0000000000000000
+};
+
+/**************************************************************************************************/
+/* coefficients for Maclaurin summation in hzeta()
+ * B_{2j}/(2j)!
+ */
+static double hzeta_c[15] = {
+    1.00000000000000000000000000000,
+    0.083333333333333333333333333333,
+    -0.00138888888888888888888888888889,
+    0.000033068783068783068783068783069,
+    -8.2671957671957671957671957672e-07,
+    2.0876756987868098979210090321e-08,
+    -5.2841901386874931848476822022e-10,
+    1.3382536530684678832826980975e-11,
+    -3.3896802963225828668301953912e-13,
+    8.5860620562778445641359054504e-15,
+    -2.1748686985580618730415164239e-16,
+    5.5090028283602295152026526089e-18,
+    -1.3954464685812523340707686264e-19,
+    3.5347070396294674716932299778e-21,
+    -8.9535174270375468504026113181e-23
+};
+
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ofstream& out, double chi, vector<double> sils){
+    try {
+        out << setprecision (6) << numPartitions << '\t'  << chi << '\t';
+        for (int i = 0; i < sils.size(); i++) {
+            out << sils[i] << '\t';
+        }
+        out << endl;
+        
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printSilData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ostream& out, double chi, vector<double> sils){
+    try {
+        out << setprecision (6) << numPartitions << '\t'  << chi << '\t';
+        m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(chi) + '\t');
+        for (int i = 0; i < sils.size(); i++) {
+            out << sils[i] << '\t';
+            m->mothurOutJustToLog(toString(sils[i]) + '\t');
+        }
+        out << endl;
+        m->mothurOutJustToLog("\n");
+        
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printSilData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printZMatrix(string fileName, vector<string> sampleName){
+    try {
+        ofstream printMatrix;
+        m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
+        printMatrix.setf(ios::fixed, ios::floatfield);
+        printMatrix.setf(ios::showpoint);
+        
+        for(int i=0;i<numPartitions;i++){   printMatrix << "\tPartition_" << i+1;   }   printMatrix << endl;
+        
+        for(int i=0;i<numSamples;i++){
+            printMatrix << sampleName[i];
+            for(int j=0;j<numPartitions;j++){
+                printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
+            }
+            printMatrix << endl;
+        }
+        printMatrix.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommunityTypeFinder", "printZMatrix");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printRelAbund(string fileName, vector<string> otuNames){
+    try {
+        ofstream printRA;
+        m->openOutputFile(fileName, printRA); //(fileName.c_str());
+        printRA.setf(ios::fixed, ios::floatfield);
+        printRA.setf(ios::showpoint);
+        
+        vector<double> totals(numPartitions, 0.0000);
+        for(int i=0;i<numPartitions;i++){
+            for(int j=0;j<numOTUs;j++){
+                totals[i] += exp(lambdaMatrix[i][j]);
+            }
+        }
+        
+        printRA << "Taxon";
+        for(int i=0;i<numPartitions;i++){
+            printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
+            printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
+        }
+        printRA << endl;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            printRA << otuNames[i];
+            for(int j=0;j<numPartitions;j++){
+                
+                if(error[j][i] >= 0.0000){
+                    double std = sqrt(error[j][i]);
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
+                }
+                else{
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << "NA";
+                    printRA << '\t' << "NA";
+                }
+            }
+            printRA << endl;
+        }
+        
+        printRA.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommunityTypeFinder", "printRelAbund");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+vector<vector<double> > CommunityTypeFinder::getHessian(){
+    try {
+        vector<double> alpha(numOTUs, 0.0000);
+        double alphaSum = 0.0000;
+        
+        vector<double> pi = zMatrix[currentPartition];
+        vector<double> psi_ajk(numOTUs, 0.0000);
+        vector<double> psi_cjk(numOTUs, 0.0000);
+        vector<double> psi1_ajk(numOTUs, 0.0000);
+        vector<double> psi1_cjk(numOTUs, 0.0000);
+        
+        for(int j=0;j<numOTUs;j++){
+            
+            if (m->control_pressed) {  break; }
+            
+            alpha[j] = exp(lambdaMatrix[currentPartition][j]);
+            alphaSum += alpha[j];
+            
+            for(int i=0;i<numSamples;i++){
+                double X = (double) countMatrix[i][j];
+                
+                psi_ajk[j] += pi[i] * psi(alpha[j]);
+                psi1_ajk[j] += pi[i] * psi1(alpha[j]);
+                
+                psi_cjk[j] += pi[i] * psi(alpha[j] + X);
+                psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
+            }
+        }
+        
+        
+        double psi_Ck = 0.0000;
+        double psi1_Ck = 0.0000;
+        
+        double weight = 0.0000;
+        
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  break; }
+            weight += pi[i];
+            double sum = 0.0000;
+            for(int j=0;j<numOTUs;j++){     sum += alpha[j] + countMatrix[i][j];    }
+            
+            psi_Ck += pi[i] * psi(sum);
+            psi1_Ck += pi[i] * psi1(sum);
+        }
+        
+        double psi_Ak = weight * psi(alphaSum);
+        double psi1_Ak = weight * psi1(alphaSum);
+        
+        vector<vector<double> > hessian(numOTUs);
+        for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  break; }
+            double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
+            double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
+            double term3 = 0.1 * alpha[i];
+            
+            hessian[i][i] = term1 + term2 + term3;
+            
+            for(int j=0;j<i;j++){
+                hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
+                hessian[j][i] = hessian[i][j];
+            }
+        }
+        
+        return hessian;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "getHessian");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi1(double xx){
+    try {
+        
+        /* Euler-Maclaurin summation formula
+         * [Moshier, p. 400, with several typo corrections]
+         */
+        
+        double s = 2.0000;
+        const int jmax = 12;
+        const int kmax = 10;
+        int j, k;
+        const double pmax  = pow(kmax + xx, -s);
+        double scp = s;
+        double pcp = pmax / (kmax + xx);
+        double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
+        
+        for(k=0; k<kmax; k++) {
+            if (m->control_pressed) {  return 0; }
+            value += pow(k + xx, -s);
+        }
+        
+        for(j=0; j<=jmax; j++) {
+            if (m->control_pressed) {  return 0; }
+            double delta = hzeta_c[j+1] * scp * pcp;
+            value += delta;
+            
+            if(fabs(delta/value) < 0.5*EPSILON) break;
+            
+            scp *= (s+2*j+1)*(s+2*j+2);
+            pcp /= (kmax + xx)*(kmax + xx);
+        }
+        
+        return value;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "psi1");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi(double xx){
+    try {
+        double psiX = 0.0000;
+        
+        if(xx < 1.0000){
+            
+            double t1 = 1.0 / xx;
+            psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
+            psiX = -t1 + psiX;
+            
+        }
+        else if(xx < 2.0000){
+            
+            const double v = xx - 1.0;
+            psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
+            
+        }
+        else{
+            const double t = 8.0/(xx*xx)-1.0;
+            psiX = cheb_eval(apsi_cs, 15, t);
+            psiX += log(xx) - 0.5/xx;
+        }
+        
+        return psiX;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "psi");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::cheb_eval(const double seriesData[], int order, double xx){
+    try {
+        double d = 0.0000;
+        double dd = 0.0000;
+        
+        double x2 = xx * 2.0000;
+        
+        for(int j=order;j>=1;j--){
+            if (m->control_pressed) {  return 0; }
+            double temp = d;
+            d = x2 * d - dd + seriesData[j];
+            dd = temp;
+        }
+        
+        d = xx * d - dd + 0.5 * seriesData[0];
+        return d;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "cheb_eval");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+int CommunityTypeFinder::findkMeans(){
+    try {
+        error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); }
+        vector<vector<double> > relativeAbundance(numSamples);
+        vector<vector<double> > alphaMatrix;
+        
+        alphaMatrix.resize(numPartitions);
+        lambdaMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            alphaMatrix[i].assign(numOTUs, 0);
+            lambdaMatrix[i].assign(numOTUs, 0);
+        }
+        
+        //get relative abundance
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  return 0; }
+            int groupTotal = 0;
+            
+            relativeAbundance[i].assign(numOTUs, 0.0);
+            
+            for(int j=0;j<numOTUs;j++){
+                groupTotal += countMatrix[i][j];
+            }
+            for(int j=0;j<numOTUs;j++){
+                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
+            }
+        }
+        
+        //randomly assign samples into partitions
+        zMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            zMatrix[i].assign(numSamples, 0);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            zMatrix[rand()%numPartitions][i] = 1;
+        }
+        
+        double maxChange = 1;
+        int maxIters = 1000;
+        int iteration = 0;
+        
+        weights.assign(numPartitions, 0);
+        
+        while(maxChange > 1e-6 && iteration < maxIters){
+            
+            if (m->control_pressed) {  return 0; }
+            //calcualte average relative abundance
+            maxChange = 0.0000;
+            for(int i=0;i<numPartitions;i++){
+                
+                double normChange = 0.0;
+                
+                weights[i] = 0;
+                
+                for(int j=0;j<numSamples;j++){
+                    weights[i] += (double)zMatrix[i][j];
+                }
+                
+                vector<double> averageRelativeAbundance(numOTUs, 0);
+                for(int j=0;j<numOTUs;j++){
+                    for(int k=0;k<numSamples;k++){
+                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
+                    }
+                }
+                
+                for(int j=0;j<numOTUs;j++){
+                    averageRelativeAbundance[j] /= weights[i];
+                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
+                    normChange += difference * difference;
+                    alphaMatrix[i][j] = averageRelativeAbundance[j];
+                }
+                
+                normChange = sqrt(normChange);
+                
+                if(normChange > maxChange){ maxChange = normChange; }
+            }
+            
+            
+            //calcualte distance between each sample in partition and the average relative abundance
+            for(int i=0;i<numSamples;i++){
+                if (m->control_pressed) {  return 0; }
+                
+                double normalizationFactor = 0;
+                vector<double> totalDistToPartition(numPartitions, 0);
+                
+                for(int j=0;j<numPartitions;j++){
+                    for(int k=0;k<numOTUs;k++){
+                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
+                        totalDistToPartition[j] += difference * difference;
+                    }
+                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
+                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
+                }
+                
+                
+                for(int j=0;j<numPartitions;j++){
+                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
+                }
+                
+            }
+            
+            iteration++;
+            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
+            
+        }
+        
+        //    cout << "Iter:-1";
+        for(int i=0;i<numPartitions;i++){
+            weights[i] = 0.0000;
+            
+            for(int j=0;j<numSamples;j++){
+                weights[i] += zMatrix[i][j];
+            }
+            //        printf("\tw_%d=%.3f", i, weights[i]);
+        }
+        //    cout << endl;
+        
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return 0; }
+            for(int j=0;j<numPartitions;j++){
+                if(alphaMatrix[j][i] > 0){
+                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
+                }
+                else{
+                    lambdaMatrix[j][i] = -10.0;
+                }
+            }
+        }
+        
+        return 0;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "kMeans");
+        exit(1);
+    }
+}
+
+
+/**************************************************************************************************/
+
diff --git a/communitytype.h b/communitytype.h
new file mode 100644 (file)
index 0000000..59291f5
--- /dev/null
@@ -0,0 +1,75 @@
+//
+//  communitytype.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/3/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_communitytype_h
+#define Mothur_communitytype_h
+
+#define EPSILON numeric_limits<double>::epsilon()
+
+
+#include "mothurout.h"
+#include "linearalgebra.h"
+/**************************************************************************************************/
+
+class CommunityTypeFinder {
+    
+public:
+       CommunityTypeFinder(){  m = MothurOut::getInstance();  }
+       virtual ~CommunityTypeFinder(){};
+    
+    virtual void printZMatrix(string, vector<string>);
+    virtual void printRelAbund(string, vector<string>);
+    virtual void printFitData(ofstream&) {}
+    virtual void printFitData(ostream&, double) {}
+    virtual void printSilData(ofstream&, double, vector<double>);
+    virtual void printSilData(ostream&, double, vector<double>);
+    
+    virtual double getNLL()     {    return currNLL;        }
+    virtual double getAIC()     {    return aic;            }
+    virtual double getBIC()     {    return bic;            }
+    virtual double getLogDet()  {    return logDeterminant; }
+    virtual double getLaplace() {    return laplace;        }
+    
+    virtual double calcCHIndex(vector< vector< double> >) {return 0;}  //Calinski-Harabasz
+    virtual vector<double> calcSilhouettes(vector< vector< double> >) {  vector<double> s; return s; } //if none provided by child class
+
+
+protected:
+    
+    int findkMeans();
+    vector<vector<double> > getHessian();
+    double psi1(double);
+    double psi(double);
+    double cheb_eval(const double[], int, double);
+
+    
+       MothurOut* m;
+    vector<vector<double> > zMatrix;
+    vector<vector<double> > lambdaMatrix;
+    vector<vector<double> > error;
+    vector<vector<int> > countMatrix;
+    vector<double> weights;
+
+
+
+    int numPartitions;
+    int numSamples;
+    int numOTUs;
+    int currentPartition;
+    
+    double currNLL, aic, bic, logDeterminant, laplace;
+     
+       
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
index 7d61f8c265e060f2dc0002c94f69edfd054f89ae..2891281c9de412a27b2d27229683c8d3bb7cf41b 100644 (file)
@@ -40,12 +40,19 @@ FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP, string
 //**********************************************************************************************************************
 
 bool FlowData::getNext(ifstream& flowFile){
 //**********************************************************************************************************************
 
 bool FlowData::getNext(ifstream& flowFile){
-       
        try {
        try {
+        
         seqName = getSequenceName(flowFile);
         seqName = getSequenceName(flowFile);
-               flowFile >> endFlow;    
+        if (m->debug) {  m->mothurOut("[DEBUG]: flow = " + seqName + " "); }
+               flowFile >> endFlow;
+        if (m->debug) {  m->mothurOut(toString(endFlow) + " "); }
         if (!m->control_pressed) {
         if (!m->control_pressed) {
-            for(int i=0;i<numFlows;i++)        {       flowFile >> flowData[i];        }
+            if (m->debug) {  m->mothurOut(" "); }
+            for(int i=0;i<numFlows;i++)        {
+                flowFile >> flowData[i];
+                if (m->debug) {  m->mothurOut(toString(flowData[i]) + " "); }
+            }
+            if (m->debug) {  m->mothurOut("\n"); }
             updateEndFlow(); 
             translateFlow();
             m->gobble(flowFile);
             updateEndFlow(); 
             translateFlow();
             m->gobble(flowFile);
index 08cd35a9470406ba0d4827b7e3b10a94f7de94ad..b3160749e9031072ceafdea29b92a33a2909cff6 100644 (file)
@@ -7,7 +7,10 @@
 //
 
 #include "getmetacommunitycommand.h"
 //
 
 #include "getmetacommunitycommand.h"
-
+#include "communitytype.h"
+#include "kmeans.h"
+#include "validcalculator.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> GetMetaCommunityCommand::setParameters(){
 
 //**********************************************************************************************************************
 vector<string> GetMetaCommunityCommand::setParameters(){
@@ -15,13 +18,17 @@ vector<string> GetMetaCommunityCommand::setParameters(){
         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
         CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
         CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
         CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
         CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
         CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
         CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
+        
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -35,9 +42,13 @@ vector<string> GetMetaCommunityCommand::setParameters(){
 string GetMetaCommunityCommand::getHelpString(){
        try {
                string helpString = "";
 string GetMetaCommunityCommand::getHelpString(){
        try {
                string helpString = "";
-               helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
+               helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
         helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
         helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
+        helpString += "The method parameter allows to select the method you would like to use.  Options are dmm, kmeans and pam. Default=dmm.\n";
+        helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam method. By default the jsd calculator is used.\n";
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matirx for the pam method.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam method.\n";
                helpString += "The minpartitions parameter is used to .... Default=5.\n";
         helpString += "The maxpartitions parameter is used to .... Default=10.\n";
         helpString += "The optimizegap parameter is used to .... Default=3.\n";
                helpString += "The minpartitions parameter is used to .... Default=5.\n";
         helpString += "The maxpartitions parameter is used to .... Default=10.\n";
         helpString += "The optimizegap parameter is used to .... Default=3.\n";
@@ -171,6 +182,37 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
                                if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
                                if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
+            
+            method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "dmm"; }
+                       
+                       if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
+                       else { m->mothurOut("[ERROR]: " + method + " is not a valid method.  Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
+            
+            calc = validParameter.validFile(parameters, "calc", false);
+                       if (calc == "not found") { calc = "jsd";  }
+                       else {
+                if (calc == "default")  {  calc = "jsd";  }
+                       }
+                       m->splitAtDash(calc, Estimators);
+                       if (m->inUsersGroups("citation", Estimators)) {
+                               ValidCalculators validCalc; validCalc.printCitations(Estimators);
+                               //remove citation from list of calcs
+                               for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
+                       }
+            if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
+            
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters);
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 0; }
                }
                
        }
                }
                
        }
@@ -194,6 +236,35 @@ int GetMetaCommunityCommand::execute(){
         set<string> processedLabels;
         set<string> userLabels = labels;
         
         set<string> processedLabels;
         set<string> userLabels = labels;
         
+        if (subsample) {
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = lookup[0]->getNumSeqs();
+                for (int i = 1; i < lookup.size(); i++) {
+                    int thisSize = lookup[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                m->clearGroups();
+                Groups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (lookup[i]->getNumSeqs() < subsampleSize) {
+                        m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete lookup[i];
+                    }else {
+                        Groups.push_back(lookup[i]->getGroup());
+                        temp.push_back(lookup[i]);
+                    }
+                }
+                lookup = temp;
+                m->setGroups(Groups);
+            }
+            
+            if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true;  return 0; }
+        }
+
+        
         //as long as you are not at the end of the file or done wih the lines you want
         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
             
         //as long as you are not at the end of the file or done wih the lines you want
         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
             
@@ -363,7 +434,12 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
                }
                
                //do my part
                }
                
                //do my part
-        m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+        if (method == "dmm") {  m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");  }
+        else {
+            m->mothurOut("K\tCH\t");
+            for (int i = 0; i < thislookup.size(); i++) {  m->mothurOut(thislookup[i]->getGroup() + '\t'); }
+            m->mothurOut("\n");
+        }
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
                
                //force parent to wait until all the processes are done
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
                
                //force parent to wait until all the processes are done
@@ -437,56 +513,6 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
         }
         
 #else
         }
         
 #else
-        /*
-        vector<metaCommunityData*> pDataArray;
-               DWORD   dwThreadIdArray[processors-1];
-               HANDLE  hThreadArray[processors-1];
-        
-               //Create processor worker threads.
-               for( int i=1; i<processors; i++ ){
-            //copy lookup
-            //make copy of lookup so we don't get access violations
-            vector<SharedRAbundVector*> newLookup;
-            for (int k = 0; k < thislookup.size(); k++) {
-                SharedRAbundVector* temp = new SharedRAbundVector();
-                temp->setLabel(thislookup[k]->getLabel());
-                temp->setGroup(thislookup[k]->getGroup());
-                newLookup.push_back(temp);
-            }
-            
-            //for each bin
-            for (int k = 0; k < thislookup[0]->getNumBins(); k++) {
-                if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); }
-            }
-
-            processIDS.push_back(i);
-            
-                       // Allocate memory for thread data.
-                       metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap);
-                       pDataArray.push_back(tempMeta);
-                       
-                       hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
-               }
-               
-        //do my part
-               minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]);
-        
-               //Wait until all threads have terminated.
-               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-               
-               //Close all thread handles and free memory allocations.
-               for(int i=0; i < pDataArray.size(); i++){
-            if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; }
-            for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
-                outputNames.push_back(pDataArray[i]->outputNames[j]);
-            }
-            m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
-            m->mothurRemove(outputFileName + toString(processIDS[i]));
-                       CloseHandle(hThreadArray[i]);
-                       delete pDataArray[i];
-               } */
-        //do my part
         m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
 #endif
         m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
 #endif
@@ -501,7 +527,8 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
         
         //run generate Summary function for smallest minPartition
         variables["[tag]"] = toString(minPartition);
         
         //run generate Summary function for smallest minPartition
         variables["[tag]"] = toString(minPartition);
-        generateSummaryFile(minPartition, variables);
+        vector<double> piValues = generateDesignFile(minPartition, variables);
+        if (method == "dmm") {  generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
         
         return 0;
 
         
         return 0;
 
@@ -516,12 +543,25 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
        try {
         
         double minLaplace = 1e10;
        try {
         
         double minLaplace = 1e10;
-        int minPartition = 0;
+        int minPartition = 1;
+        vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
+        
+               ofstream fitData, silData;
+        if (method == "dmm") {
+            m->openOutputFile(outputFileName, fitData);
+            fitData.setf(ios::fixed, ios::floatfield);
+            fitData.setf(ios::showpoint);
+            fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
+            minLaplace = 0;
+            m->openOutputFile(outputFileName, silData);
+            silData.setf(ios::fixed, ios::floatfield);
+            silData.setf(ios::showpoint);
+            silData << "K\tCH\t";
+            for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t';  }
+            silData << endl;
+        } 
         
         
-               ofstream fitData;
-               m->openOutputFile(outputFileName, fitData);
-        fitData.setf(ios::fixed, ios::floatfield);
-        fitData.setf(ios::showpoint);
         cout.setf(ios::fixed, ios::floatfield);
         cout.setf(ios::showpoint);
 
         cout.setf(ios::fixed, ios::floatfield);
         cout.setf(ios::showpoint);
 
@@ -529,7 +569,18 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
         vector<string> thisGroups;
         for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
         
         vector<string> thisGroups;
         for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
         
-        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        vector< vector<double> > dists; //do we want to output this matrix??
+        if ((method == "pam") || (method == "kmeans")) {  dists = generateDistanceMatrix(thislookup);  }
+        
+        if (m->debug) {
+            m->mothurOut("[DEBUG]: dists = \n");
+            for (int i = 0; i < dists.size(); i++) {
+                if (m->control_pressed) { break; }
+                m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
+                for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
+                m->mothurOut("\n");
+            }
+        }
         
         for(int i=0;i<parts.size();i++){
             
         
         for(int i=0;i<parts.size();i++){
             
@@ -551,33 +602,51 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                 }
             }
             
                 }
             }
             
-            qFinderDMM findQ(sharedMatrix, numPartitions);
-            
-            double laplace = findQ.getLaplace();
-            m->mothurOut(toString(numPartitions) + '\t');
-            cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
-            cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
-            m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
-            
-            fitData << numPartitions << '\t';
-            fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
-            
-            if(laplace < minLaplace){
-                minPartition = numPartitions;
-                minLaplace = laplace;
-                m->mothurOut("***");
+            CommunityTypeFinder* finder = NULL;
+            if (method == "dmm")            {   finder = new qFinderDMM(sharedMatrix, numPartitions);   }
+            else if (method == "kmeans")    {   finder = new KMeans(sharedMatrix, numPartitions);       }
+            else if (method == "pam")       {   finder = new Pam(sharedMatrix, dists, numPartitions);                 }
+            else {
+                if (i == 0) {  m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
+                finder = new qFinderDMM(sharedMatrix, numPartitions);
             }
             }
-            m->mothurOutEndLine();
             
             
+            double chi; vector<double> silhouettes;
+            if (method == "dmm") {
+                double laplace = finder->getLaplace();
+                if(laplace < minLaplace){
+                    minPartition = numPartitions;
+                    minLaplace = laplace;
+                }
+            }else {
+                chi = finder->calcCHIndex(dists);
+                silhouettes = finder->calcSilhouettes(dists);
+                if (chi > minLaplace) { //save partition with maximum ch index score
+                    minPartition = numPartitions;
+                    minLaplace = chi;
+                    minSilhouettes = silhouettes;
+                }
+            }
             string relabund = relabunds[i];
             string relabund = relabunds[i];
-            outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
             string matrixName = matrix[i];
             outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
             
             string matrixName = matrix[i];
             outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
             
-            findQ.printZMatrix(matrixName, thisGroups);
-            findQ.printRelAbund(relabund, m->currentSharedBinLabels);
+            finder->printZMatrix(matrixName, thisGroups);
+            
+            if (method == "dmm") {
+                finder->printFitData(cout, minLaplace);
+                finder->printFitData(fitData);
+                finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+            }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
+                finder->printSilData(cout, chi, silhouettes);
+                finder->printSilData(silData, chi, silhouettes);
+                if (method == "kmeans") {
+                    finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                    outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+                }
+            }
+            delete finder;
             
             if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
                 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
             
             if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
                 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
@@ -588,9 +657,7 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                 break;
             }
         }
                 break;
             }
         }
-        fitData.close();
-        
-        //minPartition = 4;
+        if (method == "dmm") { fitData.close(); }
         
         if (m->control_pressed) { return 0; }
 
         
         if (m->control_pressed) { return 0; }
 
@@ -672,7 +739,7 @@ vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, ma
 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
 
 /**************************************************************************************************/
 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
 
 /**************************************************************************************************/
-int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v){
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
     try {
         vector<summaryData> summary;
         
     try {
         vector<summaryData> summary;
         
@@ -683,9 +750,6 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
         string name, header;
         double mean, lci, uci;
         
         string name, header;
         double mean, lci, uci;
         
-        
-        vector<double> piValues = generateDesignFile(numPartitions, v);
-        
         ifstream referenceFile;
         map<string, string> variables;
         variables["[filename]"] = v["[filename]"];
         ifstream referenceFile;
         map<string, string> variables;
         variables["[filename]"] = v["[filename]"];
@@ -808,4 +872,236 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
     
 }
 //**********************************************************************************************************************
     
 }
 //**********************************************************************************************************************
+vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
+    try {
+        vector<vector<double> > results;
+        
+        Calculator* matrixCalculator;
+        ValidCalculators validCalculator;
+        int i = 0;
+        
+        if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
+            if (Estimators[i] == "sharedsobs") {
+                matrixCalculator = new SharedSobsCS();
+            }else if (Estimators[i] == "sharedchao") {
+                matrixCalculator = new SharedChao1();
+            }else if (Estimators[i] == "sharedace") {
+                matrixCalculator = new SharedAce();
+            }else if (Estimators[i] == "jabund") {
+                matrixCalculator = new JAbund();
+            }else if (Estimators[i] == "sorabund") {
+                matrixCalculator = new SorAbund();
+            }else if (Estimators[i] == "jclass") {
+                matrixCalculator = new Jclass();
+            }else if (Estimators[i] == "sorclass") {
+                matrixCalculator = new SorClass();
+            }else if (Estimators[i] == "jest") {
+                matrixCalculator = new Jest();
+            }else if (Estimators[i] == "sorest") {
+                matrixCalculator = new SorEst();
+            }else if (Estimators[i] == "thetayc") {
+                matrixCalculator = new ThetaYC();
+            }else if (Estimators[i] == "thetan") {
+                matrixCalculator = new ThetaN();
+            }else if (Estimators[i] == "kstest") {
+                matrixCalculator = new KSTest();
+            }else if (Estimators[i] == "sharednseqs") {
+                matrixCalculator = new SharedNSeqs();
+            }else if (Estimators[i] == "ochiai") {
+                matrixCalculator = new Ochiai();
+            }else if (Estimators[i] == "anderberg") {
+                matrixCalculator = new Anderberg();
+            }else if (Estimators[i] == "kulczynski") {
+                matrixCalculator = new Kulczynski();
+            }else if (Estimators[i] == "kulczynskicody") {
+                matrixCalculator = new KulczynskiCody();
+            }else if (Estimators[i] == "lennon") {
+                matrixCalculator = new Lennon();
+            }else if (Estimators[i] == "morisitahorn") {
+                matrixCalculator = new MorHorn();
+            }else if (Estimators[i] == "braycurtis") {
+                matrixCalculator = new BrayCurtis();
+            }else if (Estimators[i] == "whittaker") {
+                matrixCalculator = new Whittaker();
+            }else if (Estimators[i] == "odum") {
+                matrixCalculator = new Odum();
+            }else if (Estimators[i] == "canberra") {
+                matrixCalculator = new Canberra();
+            }else if (Estimators[i] == "structeuclidean") {
+                matrixCalculator = new StructEuclidean();
+            }else if (Estimators[i] == "structchord") {
+                matrixCalculator = new StructChord();
+            }else if (Estimators[i] == "hellinger") {
+                matrixCalculator = new Hellinger();
+            }else if (Estimators[i] == "manhattan") {
+                matrixCalculator = new Manhattan();
+            }else if (Estimators[i] == "structpearson") {
+                matrixCalculator = new StructPearson();
+            }else if (Estimators[i] == "soergel") {
+                matrixCalculator = new Soergel();
+            }else if (Estimators[i] == "spearman") {
+                matrixCalculator = new Spearman();
+            }else if (Estimators[i] == "structkulczynski") {
+                matrixCalculator = new StructKulczynski();
+            }else if (Estimators[i] == "speciesprofile") {
+                matrixCalculator = new SpeciesProfile();
+            }else if (Estimators[i] == "hamming") {
+                matrixCalculator = new Hamming();
+            }else if (Estimators[i] == "structchi2") {
+                matrixCalculator = new StructChi2();
+            }else if (Estimators[i] == "gower") {
+                matrixCalculator = new Gower();
+            }else if (Estimators[i] == "memchi2") {
+                matrixCalculator = new MemChi2();
+            }else if (Estimators[i] == "memchord") {
+                matrixCalculator = new MemChord();
+            }else if (Estimators[i] == "memeuclidean") {
+                matrixCalculator = new MemEuclidean();
+            }else if (Estimators[i] == "mempearson") {
+                matrixCalculator = new MemPearson();
+            }else if (Estimators[i] == "jsd") {
+                matrixCalculator = new JSD();
+            }else {
+                m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
+            }
+        }
+        
+        //calc distances
+        vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, then each groupCombos dists. this will be used to make .dist files
+        vector< vector<seqDist> > calcDists; calcDists.resize(1);
+        
+        for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+            if (subsample && (thisIter != 0)) {
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return results; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+            
+           
+            driver(thisItersLookup, calcDists, matrixCalculator);
+                     
+            if (subsample && (thisIter != 0)) {
+                if((thisIter) % 100 == 0){     m->mothurOutJustToScreen(toString(thisIter)+"\n");              }
+                calcDistsTotals.push_back(calcDists);
+                for (int i = 0; i < calcDists.size(); i++) {
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        if (m->debug) {  m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n");  }
+                    }
+                }
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+            }else { //print results for whole dataset
+                for (int i = 0; i < calcDists.size(); i++) {
+                    if (m->control_pressed) { break; }
+                    
+                    //initialize matrix
+                    results.resize(thisLookup.size());
+                    for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                    
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        int row = calcDists[i][j].seq1;
+                        int column = calcDists[i][j].seq2;
+                        double dist = calcDists[i][j].dist;
+                        
+                        results[row][column] = dist;
+                        results[column][row] = dist;
+                    }
+                }
+            }
+            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+               }
+               
+        if (iters != 0) {
+            //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, "average");
+            
+            //print results
+            for (int i = 0; i < calcDists.size(); i++) {
+                results.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    
+                    results[row][column] = dist;
+                    results[column][row] = dist;
+                }
+            }
+        }
+
+        
+        return results;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
+       try {
+               vector<SharedRAbundVector*> subset;
+        
+               for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
+                       
+                       for (int l = 0; l < k; l++) {
+                               
+                               if (k != l) { //we dont need to similiarity of a groups to itself
+                                       subset.clear(); //clear out old pair of sharedrabunds
+                                       //add new pair of sharedrabunds
+                                       subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+                                       
+                                       
+                    
+                    //if this calc needs all groups to calculate the pair load all groups
+                    if (matrixCalculator->getNeedsAll()) {
+                        //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+                        for (int w = 0; w < thisLookup.size(); w++) {
+                            if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+                        }
+                    }
+                    
+                    vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
+                    
+                    if (m->control_pressed) { return 1; }
+                    
+                    seqDist temp(l, k, tempdata[0]);
+                    //cout << l << '\t' << k << '\t' <<  tempdata[0] << endl;
+                    calcDists[0].push_back(temp);
+                }
+                               
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MatrixOutputCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 
index 7697430cca6164b695ed92526dd190926ec1eb5e..9c17e4d2eb844e6a2eb1c7b6bfee0bf96910ded4 100644 (file)
 #include "command.hpp"
 #include "inputdata.h"
 #include "qFinderDMM.h"
 #include "command.hpp"
 #include "inputdata.h"
 #include "qFinderDMM.h"
+#include "pam.h"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
+#include "sharedjsd.h"
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
@@ -40,18 +83,20 @@ private:
                unsigned long long end;
                linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
        };
                unsigned long long end;
                linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
        };
-    bool abort, allLines;
+    bool abort, allLines, subsample;
     string outputDir;
     vector<string> outputNames;
     string outputDir;
     vector<string> outputNames;
-    string sharedfile;
-    int minpartitions, maxpartitions, optimizegap, processors;
-    vector<string> Groups;
+    string sharedfile, method, calc;
+    int minpartitions, maxpartitions, optimizegap, processors, iters, subsampleSize;
+    vector<string> Groups, Estimators;
     set<string> labels;
     
     set<string> labels;
     
+    vector<vector<double> > generateDistanceMatrix(vector<SharedRAbundVector*>& lookup);
+    int driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator*);
     int processDriver(vector<SharedRAbundVector*>&, vector<int>&, string, vector<string>, vector<string>, vector<string>, int);
     int createProcesses(vector<SharedRAbundVector*>&);
     vector<double> generateDesignFile(int, map<string,string>);
     int processDriver(vector<SharedRAbundVector*>&, vector<int>&, string, vector<string>, vector<string>, vector<string>, int);
     int createProcesses(vector<SharedRAbundVector*>&);
     vector<double> generateDesignFile(int, map<string,string>);
-    int generateSummaryFile(int, map<string,string>);
+    int generateSummaryFile(int, map<string,string>, vector<double>);
 
 };
 
 
 };
 
@@ -63,111 +108,5 @@ struct summaryData {
     vector<double> partMean, partLCI, partUCI;
     
 };
     vector<double> partMean, partLCI, partUCI;
     
 };
-/**************************************************************************************************/
-
-struct metaCommunityData {
-    vector<SharedRAbundVector*> thislookup;
-       MothurOut* m;
-       string outputFileName;
-    vector<string> relabunds, matrix, outputNames;
-    int minpartitions, maxpartitions, optimizegap;
-    vector<int> parts;
-    int minPartition;
-       
-       metaCommunityData(){}
-       metaCommunityData(vector<SharedRAbundVector*> lu, MothurOut* mout, vector<int> dp, string fit, vector<string> rels, vector<string> mat, int minp, int maxp, int opg) {
-               m = mout;
-        thislookup = lu;
-        parts = dp;
-        outputFileName = fit;
-        relabunds = rels;
-        matrix = mat;
-        minpartitions = minp;
-        maxpartitions = maxp;
-        optimizegap = opg;
-        minPartition = 0;
-       }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyMetaCommunityThreadFunction(LPVOID lpParam){
-       metaCommunityData* pDataArray;
-       pDataArray = (metaCommunityData*)lpParam;
-       
-       try {
-        
-        double minLaplace = 1e10;
-        
-               ofstream fitData;
-               pDataArray->m->openOutputFile(pDataArray->outputFileName, fitData);
-        fitData.setf(ios::fixed, ios::floatfield);
-        fitData.setf(ios::showpoint);
-        cout.setf(ios::fixed, ios::floatfield);
-        cout.setf(ios::showpoint);
-        
-        vector< vector<int> > sharedMatrix;
-        for (int i = 0; i < pDataArray->thislookup.size(); i++) { sharedMatrix.push_back(pDataArray->thislookup[i]->getAbundances()); }
-        
-        pDataArray->m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
-        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
-        
-        for(int i=0;i<pDataArray->parts.size();i++){
-            
-            int numPartitions = pDataArray->parts[i];
-            
-            if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); }
-            
-            if (pDataArray->m->control_pressed) { break; }
-            
-            qFinderDMM* findQ = new qFinderDMM(sharedMatrix, numPartitions);
-            
-            if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: done finding Q " + toString(numPartitions) + "\n"); }
-            
-            double laplace = findQ->getLaplace();
-            pDataArray->m->mothurOut(toString(numPartitions) + '\t');
-            cout << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
-            pDataArray->m->mothurOutJustToLog(toString(findQ->getNLL()) + '\t' + toString(findQ->getLogDet()) + '\t');
-            cout << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace;
-            pDataArray->m->mothurOutJustToLog(toString(findQ->getBIC()) + '\t' + toString(findQ->getAIC()) + '\t' + toString(laplace));
-            
-            fitData << numPartitions << '\t';
-            fitData << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
-            fitData << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace << endl;
-            
-            if(laplace < minLaplace){
-                pDataArray->minPartition = numPartitions;
-                minLaplace = laplace;
-                pDataArray->m->mothurOut("***");
-            }
-            pDataArray->m->mothurOutEndLine();
-            
-            pDataArray->outputNames.push_back(pDataArray->relabunds[i]);
-            pDataArray->outputNames.push_back(pDataArray->matrix[i]);
-            
-            findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups());
-            findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentSharedBinLabels);
-            
-            if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; }
-            
-            delete findQ;
-        }
-        fitData.close();
-        
-        //minPartition = 4;
-        
-        if (pDataArray->m->control_pressed) { return 0; }
-        
-        return 0;
-               
-       }
-       catch(exception& e) {
-               pDataArray->m->errorOut(e, "GetMetaCommunityCommand", "MyMetaCommunityThreadFunction");
-               exit(1);
-       }
-}
-#endif
-
-
 
 #endif
 
 #endif
index 61092372b18ae196a652d0b0f66c6b04d22d0266..c64549334ded9f4d32140ad64cbe898c5c41a023 100644 (file)
@@ -760,6 +760,8 @@ string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
        try{
         vector<string> reps;
         string rep = "notFound";
        try{
         vector<string> reps;
         string rep = "notFound";
+    
+        if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); }
         
         if ((names.size() == 1)) {
             return names[0];
         
         if ((names.size() == 1)) {
             return names[0];
@@ -773,7 +775,7 @@ string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
                 if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                     int numRep = 0;
                     if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
                 if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                     int numRep = 0;
                     if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
-                    else { numRep = ct.getGroupCount(names[i]);  }
+                    else { numRep = ct.getNumSeqs(names[i]);  }
                     if (numRep > maxAbund) {
                         reps.clear();
                         reps.push_back(names[i]);
                     if (numRep > maxAbund) {
                         reps.clear();
                         reps.push_back(names[i]);
@@ -834,7 +836,7 @@ string GetOTURepCommand::findRep(vector<string> names, string group) {
                         if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                             int numRep = 0;
                             if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
                         if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                             int numRep = 0;
                             if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
-                            else { numRep = ct.getGroupCount(names[i]);  }
+                            else { numRep = ct.getNumSeqs(names[i]);  }
                             for (int j = 1; j < numRep; j++) { //don't add yourself again
                                 seqIndex.push_back(nameToIndex[names[i]]);
                             }
                             for (int j = 1; j < numRep; j++) { //don't add yourself again
                                 seqIndex.push_back(nameToIndex[names[i]]);
                             }
diff --git a/kmeans.cpp b/kmeans.cpp
new file mode 100644 (file)
index 0000000..dab5c7d
--- /dev/null
@@ -0,0 +1,237 @@
+//
+//  kmeans.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/4/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "kmeans.h"
+
+/**************************************************************************************************/
+
+KMeans::KMeans(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
+    try {
+        countMatrix = cm;
+        numSamples = (int)countMatrix.size();
+        numOTUs = (int)countMatrix[0].size();
+        numPartitions = p;
+        
+        findkMeans();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "KMeans", "KMeans");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> KMeans::calcSilhouettes(vector<vector<double> > dists) {
+    try {
+        vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+        if (numPartitions < 2) { return silhouettes; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int j = 0; j < numSamples; j++) {
+            double maxValue = 0.0;
+            for (int i = 0; i < numPartitions; i++) {
+                if (m->control_pressed) { return silhouettes; }
+                if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+                    clusterMap[j] = i;
+                    maxValue = zMatrix[i][j];
+                }
+            }
+        }
+        
+        vector<int> nextClosestPartition;
+        findSecondClosest(nextClosestPartition, dists, clusterMap);
+        
+        if (m->control_pressed) { return silhouettes; }
+        
+        vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+        
+        //calc a - all a[i] are the same in the same partition
+        for (int k = 0; k < numPartitions; k++) {
+            if (m->control_pressed) { break; }
+            
+            int count = 0;
+            double totalA = 0.0;
+            for (int i = 0; i < numSamples; i++) {
+                for (int j = 0; j < numSamples; j++) {
+                    if (m->control_pressed) { break; }
+                    if ((clusterMap[i] == k) && (clusterMap[j] == k)){ //are both samples in the partition, if so add there distance
+                        totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+                        count++;
+                    }
+                }
+            }
+            totalA /= (double) count;
+            
+            //set a[i] to average for cluster
+            for (int i = 0; i < numSamples; i++) {
+                if (clusterMap[i] == k) { a[i] = totalA; }
+            }
+        }
+        
+        //calc b
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            int thisPartition = nextClosestPartition[i];
+            int count = 0;
+            double totalB = 0.0;
+            for (int j = 0; j < numSamples; j++) {
+                if (clusterMap[j] == thisPartition) { //this sample is in this partition
+                    totalB += dists[i][j];
+                    count++;
+                }
+            }
+            b[i] = totalB / (double) count;
+        }
+        
+        //calc silhouettes
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            double denom = a[i];
+            if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+            
+            silhouettes[i] = (b[i] - a[i]) / denom;
+            
+            //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+        }
+        
+        return silhouettes;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "calcSilhouettes");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int KMeans::findSecondClosest(vector<int>& nextClosestPartition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+    try {
+        vector<double> minScores; minScores.resize(numSamples, 1e6);
+        nextClosestPartition.resize(numSamples, 0);
+        
+        
+        for (int i = 0; i < numSamples; i++) {
+            for (int j = 0; j < numPartitions; j++) {
+                if (m->control_pressed) { break; }
+                
+                //is this the one we are assigned to - ie the "best" cluster. We want second best.
+                //if numPartitions = 2, then special case??
+                if (clusterMap[i] != j) {
+                    double score = 1e6;
+                    if (numPartitions == 2) {
+                        score = 0.0;  //choose other option, there are only 2.
+                    }else{   score = calcScore(i, j, dists, clusterMap); }
+                
+                    if (m->debug) { m->mothurOut("[DEBUG]: sample = " + toString(i) + " partition = " + toString(j) + " score = " + toString(score) + "\n"); }
+                    
+                    //is this better than our last find
+                    if (score < minScores[i]) {
+                        minScores[i] = score;
+                        nextClosestPartition[i] = j;
+                    }
+                }else {} //best case, ignore
+            }
+        }
+        
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "findSecondClosest");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double KMeans::calcScore(int sample, int partition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+    try {
+        //square the distances and then for each pair of clusters, calculate the sum of the squraed distances between the clusters
+        //then with the sum of hte squared dsitances take the square root and divide by the number of distances in the sum
+        
+        double sum = 0.0; int count = 0;
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            if (clusterMap[i] == partition) { //samples in this cluster
+                sum += (dists[sample][i] * dists[sample][i]);
+                count++;
+            }
+        }
+        
+        sum = sqrt(sum);
+        sum /= (double) count;
+                
+        return sum;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "calcScore");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+ CHk=Bk/(k−1)/Wk/(n−k)
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double KMeans::calcCHIndex(vector< vector< double> > dists){
+    try {
+        double CH = 0.0;
+        
+        if (numPartitions < 2) { return CH; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int j = 0; j < numSamples; j++) {
+            double maxValue = 0.0;
+            for (int i = 0; i < numPartitions; i++) {
+                if (m->control_pressed) { return 0.0; }
+                if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+                    clusterMap[j] = i;
+                    maxValue = zMatrix[i][j];
+                }
+            }
+        }
+        
+        double sumBetweenCluster = 0.0;
+        double sumWithinClusters = 0.0;
+        
+        for (int i = 0; i < numSamples; i++) { //lt
+            for (int j = 0; j < i; j++) {
+                if (m->control_pressed) { return 0.0; }
+                int partitionI = clusterMap[i];
+                int partitionJ = clusterMap[j];
+                
+                if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+                    sumWithinClusters += (dists[i][j] * dists[i][j]);
+                }else { //they are NOT from the same cluster so this distance is added to Bk
+                    sumBetweenCluster += (dists[i][j] * dists[i][j]);
+                }
+            }
+        }
+        //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' <<  (numSamples - numPartitions) << endl;
+        
+        CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+        
+        return CH;
+    }
+    catch(exception& e){
+        m->errorOut(e, "KMeans", "calcCHIndex");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+
+
+
diff --git a/kmeans.h b/kmeans.h
new file mode 100644 (file)
index 0000000..97dfb68
--- /dev/null
+++ b/kmeans.h
@@ -0,0 +1,32 @@
+//
+//  kmeans.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/4/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_kmeans_h
+#define Mothur_kmeans_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class KMeans : public CommunityTypeFinder {
+    
+public:
+    KMeans(vector<vector<int> >, int);
+    vector<double> calcSilhouettes(vector< vector< double> >);
+    double calcCHIndex(vector< vector< double> >);
+    
+private:
+
+    int findSecondClosest(vector<int>&, vector<vector<double> >&, map<int, int>);
+    double calcScore(int sample, int partition, vector<vector<double> >&, map<int, int>);
+
+};
+
+/**************************************************************************************************/
+
+#endif
index b208aab042b6be5c0c096531651b9067976d8f45..bacf2c1c07bb420430d07416b6ca7354c650a79b 100644 (file)
@@ -114,7 +114,7 @@ double LinearAlgebra::gammp(const double a, const double x) {
 }
 /*********************************************************************************************************************************/
 //Numerical Recipes pg. 223
 }
 /*********************************************************************************************************************************/
 //Numerical Recipes pg. 223
-double LinearAlgebra::gammq(const double a, const double x) {
+/*double LinearAlgebra::gammq(const double a, const double x) {
     try {
         double gamser,gammcf,gln;
         
     try {
         double gamser,gammcf,gln;
         
@@ -130,11 +130,11 @@ double LinearAlgebra::gammq(const double a, const double x) {
         return 0;
     }
        catch(exception& e) {
         return 0;
     }
        catch(exception& e) {
-               m->errorOut(e, "LinearAlgebra", "gammp");
+               m->errorOut(e, "LinearAlgebra", "gammq");
                exit(1);
        }
 }
                exit(1);
        }
 }
-/*********************************************************************************************************************************/
+*********************************************************************************************************************************/
 //Numerical Recipes pg. 224
 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
     try {
 //Numerical Recipes pg. 224
 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
     try {
@@ -161,7 +161,7 @@ double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double
             h *= del;
             if (fabs(del-1.0) <= EPS) break;
         }
             h *= del;
             if (fabs(del-1.0) <= EPS) break;
         }
-        if (i > ITMAX)  { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
+        if (i > ITMAX)  { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; }
         gammcf=exp(-x+a*log(x)-gln)*h;
         
         return 0.0;
         gammcf=exp(-x+a*log(x)-gln)*h;
         
         return 0.0;
@@ -1302,7 +1302,7 @@ double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pV
         double lastTerm = 3 * (values.size()+1);
         
         H = firstTerm * middleTerm - lastTerm;
         double lastTerm = 3 * (values.size()+1);
         
         H = firstTerm * middleTerm - lastTerm;
-        
+       
         //adjust for ties
         if (TIES.size() != 0) {
             double sum = 0.0;
         //adjust for ties
         if (TIES.size() != 0) {
             double sum = 0.0;
@@ -1311,6 +1311,8 @@ double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pV
             H /= result;
         }
         
             H /= result;
         }
         
+        if (isnan(H) || isinf(H)) { H = 0; }
+        
         //Numerical Recipes pg221
         pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
         
         //Numerical Recipes pg221
         pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
         
index 7453f4ebd8b86ed20f530c2e1ba7989808361fb2..e4c8c1588b60c3868de7f4e06353a43112ea3037 100644 (file)
@@ -57,7 +57,7 @@ private:
     double betacf(const double, const double, const double);
     double betai(const double, const double, const double);
     double gammln(const double);
     double betacf(const double, const double, const double);
     double betai(const double, const double, const double);
     double gammln(const double);
-    double gammq(const double, const double);
+    //double gammq(const double, const double);
     double gser(double&, const double, const double, double&);
     double gcf(double&, const double, const double, double&);
     double erfcc(double);
     double gser(double&, const double, const double, double&);
     double gcf(double&, const double, const double, double&);
     double erfcc(double);
index a53a3dcb92837f112771f44ef39219d9e36a277a..8a7656cee9cda0086fd2aaedd1afd5fb27896952 100644 (file)
@@ -17,7 +17,7 @@ vector<string> MatrixOutputCommand::setParameters(){
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
-               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
                CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
         CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
         CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
@@ -281,6 +281,9 @@ MatrixOutputCommand::MatrixOutputCommand(string option)  {
                                                        matrixCalculators.push_back(new MemEuclidean());
                                                }else if (Estimators[i] == "mempearson") { 
                                                        matrixCalculators.push_back(new MemPearson());
                                                        matrixCalculators.push_back(new MemEuclidean());
                                                }else if (Estimators[i] == "mempearson") { 
                                                        matrixCalculators.push_back(new MemPearson());
+                        }else if (Estimators[i] == "jsd") {
+                                matrixCalculators.push_back(new JSD());
+
                                                }
                                        }
                                }
                                                }
                                        }
                                }
@@ -791,7 +794,7 @@ int MatrixOutputCommand::driver(vector<SharedRAbundVector*> thisLookup, int star
                                                vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
                                                
                                                if (m->control_pressed) { return 1; }
                                                vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
                                                
                                                if (m->control_pressed) { return 1; }
-                                               
+        
                                                seqDist temp(l, k, tempdata[0]);
                                                calcDists[i].push_back(temp);
                                        }
                                                seqDist temp(l, k, tempdata[0]);
                                                calcDists[i].push_back(temp);
                                        }
index 90f120602e70dc04600b3048bab41bde21b21edc..5ddfb2c1c788e2d09f0f7ffcf4658a04f9e79c47 100644 (file)
@@ -54,6 +54,7 @@
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
+#include "sharedjsd.h"
 
 
 // aka. dist.shared()
 
 
 // aka. dist.shared()
@@ -224,7 +225,10 @@ static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){
                     matrixCalculators.push_back(new MemEuclidean());
                 }else if (pDataArray->Estimators[i] == "mempearson") { 
                     matrixCalculators.push_back(new MemPearson());
                     matrixCalculators.push_back(new MemEuclidean());
                 }else if (pDataArray->Estimators[i] == "mempearson") { 
                     matrixCalculators.push_back(new MemPearson());
+                }else if (pDataArray->Estimators[i] == "jsd") {
+                    matrixCalculators.push_back(new JSD());
                 }
                 }
+
             }
         }
         
             }
         }
         
index 33b559fc2dc358017c90fd44fe77ec0be61acb65..de9e373194b30bc97a2afbeceb49b5303aa66de3 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "metastatscommand.h"
 #include "sharedutilities.h"
 
 #include "metastatscommand.h"
 #include "sharedutilities.h"
+#include "sharedrabundfloatvector.h"
 
 
 //**********************************************************************************************************************
 
 
 //**********************************************************************************************************************
@@ -213,7 +214,7 @@ MetaStatsCommand::MetaStatsCommand(string option) {
 int MetaStatsCommand::execute(){
        try {
        
 int MetaStatsCommand::execute(){
        try {
        
-               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        if (abort == true) { if (calledHelp) { return 0; }  return 2;  }
         
         //just used to convert files to test metastats online
         /****************************************************/
         
         //just used to convert files to test metastats online
         /****************************************************/
@@ -221,7 +222,6 @@ int MetaStatsCommand::execute(){
         convertSharedToInput = false;
         if (convertInputToShared) { convertToShared(sharedfile); return 0; }
         /****************************************************/
         convertSharedToInput = false;
         if (convertInputToShared) { convertToShared(sharedfile); return 0; }
         /****************************************************/
-        
                
                designMap = new GroupMap(designfile);
                designMap->readDesignMap();
                
                designMap = new GroupMap(designfile);
                designMap->readDesignMap();
@@ -574,9 +574,11 @@ int MetaStatsCommand::convertToShared(string filename) {
         string header = m->getline(in); m->gobble(in);
         
         vector<string> groups = m->splitWhiteSpace(header);
         string header = m->getline(in); m->gobble(in);
         
         vector<string> groups = m->splitWhiteSpace(header);
-        vector<SharedRAbundVector*> newLookup;
+        vector<SharedRAbundFloatVector*> newLookup;
+        cout << groups.size() << endl;
         for (int i = 0; i < groups.size(); i++) {
         for (int i = 0; i < groups.size(); i++) {
-            SharedRAbundVector* temp = new SharedRAbundVector();
+            cout << "creating group " << groups[i] << endl;
+            SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
             temp->setLabel("0.03");
             temp->setGroup(groups[i]);
             newLookup.push_back(temp);
             temp->setLabel("0.03");
             temp->setGroup(groups[i]);
             newLookup.push_back(temp);
@@ -589,9 +591,9 @@ int MetaStatsCommand::convertToShared(string filename) {
             string otuname;
             in >> otuname; m->gobble(in);
             otuCount++;
             string otuname;
             in >> otuname; m->gobble(in);
             otuCount++;
-            
+            cout << otuname << endl;
             for (int i = 0; i < groups.size(); i++) {
             for (int i = 0; i < groups.size(); i++) {
-                int temp;
+                double temp;
                 in >> temp; m->gobble(in);
                 newLookup[i]->push_back(temp, groups[i]);
             }
                 in >> temp; m->gobble(in);
                 newLookup[i]->push_back(temp, groups[i]);
             }
@@ -654,6 +656,8 @@ int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string
         }
         out.close();
         
         }
         out.close();
         
+        cout << thisfilename+".matrix" << endl;
+        
         return 0;
     }
        catch(exception& e) {
         return 0;
     }
        catch(exception& e) {
diff --git a/pam.cpp b/pam.cpp
new file mode 100644 (file)
index 0000000..2c70f58
--- /dev/null
+++ b/pam.cpp
@@ -0,0 +1,373 @@
+//
+//  pam.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "pam.h"
+#define DBL_EPSILON 1e-9
+
+/**************************************************************************************************/
+Pam::Pam(vector<vector<int> > c, vector<vector<double> > d, int p) : CommunityTypeFinder() {
+    try {
+        countMatrix = c;
+        numSamples = (int)d.size();
+        numOTUs = (int)c[0].size();
+        numPartitions = p;
+        dists = d;
+        
+        largestDist = 0;
+        for (int i = 0; i < dists.size(); i++) {
+            for (int j = i; j < dists.size(); j++) {
+                if (m->control_pressed) { break; }
+                if (dists[i][j] > largestDist) { largestDist = dists[i][j]; } 
+            }
+        }
+        
+        buildPhase(); //choosing the medoids
+        swapPhase(); //optimize clusters
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Pam", "Pam");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//sets Dp[0] does not set Dp[1]. chooses intial medoids.
+int Pam::buildPhase() {
+    try {
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: building medoids\n"); }
+        
+        vector<double> gains; gains.resize(numSamples);
+        
+        largestDist *= 1.1 + 1; //make this distance larger than any distance in the matrix
+        Dp.resize(numSamples);
+        for (int i = 0; i < numSamples; i++) { Dp[i].push_back(largestDist); Dp[i].push_back(largestDist); } //2 smallest dists for this sample in this partition
+        
+        zMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            zMatrix[i].assign(numSamples, 0);
+        }
+    
+        for (int k = 0; k < numPartitions; k++) {
+            
+            int medoid = -1;
+            double totalGain = 0.0;
+            double clusterGain = 0.0;
+            
+            for (int i = 0; i < numSamples; i++) {  //does this need to be square?? can we do lt?
+                if (m->control_pressed) { break; }
+        
+                if (medoids.count(i) == 0) { //is this sample is NOT a medoid?
+                    gains[i] = 0.0;
+                
+                    for (int j = 0; j < numSamples; j++) {
+                        //cout << i << '\t' << j << '\t' <<   Dp[i][0] << '\t' << dists[i][j] << '\t' << totalGain << endl;
+                        totalGain = Dp[i][0] - dists[i][j];
+                        if (totalGain > 0.0) { gains[i] += totalGain; }
+                    }
+                    if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +  " totalGain = " + toString(totalGain) + "\n"); }
+                    
+                    if (clusterGain <= gains[i]) {
+                        clusterGain = gains[i];
+                        medoid = i;
+                    }
+                }
+            }
+            
+            //save medoid value
+            medoids.insert(medoid);
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: new medoid " + toString(medoid) + "\n"); }
+            
+            //update dp values
+            for (int i = 0; i < numSamples; i++) {
+                if (Dp[i][0] > dists[i][medoid]) { Dp[i][0] = dists[i][medoid]; }
+            }
+        }
+        if (m->debug) { m->mothurOut("[DEBUG]: done building medoids\n"); }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Pam", "buildPhase");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//goal to swap medoids with non-medoids to see if we can reduce the overall cost
+int Pam::swapPhase() {
+    try {
+        if (m->debug) { m->mothurOut("[DEBUG]: swapping  medoids\n"); }
+        //calculate cost of initial choice - average distance of samples to their closest medoid
+        double sky = 0.0;
+        double dzsky = 1.0;
+        for (int i = 0; i < numSamples; i++) { sky += Dp[i][0]; }  sky /= (double) numSamples;
+        
+        bool done = false;
+        int hbest, nbest; hbest = -1; nbest = -1;
+        while (!done) {
+            if (m->control_pressed) { break; }
+            
+            updateDp();
+            
+            dzsky = 1;
+            
+            for (int h = 0; h < numSamples; h++) {
+                if (m->control_pressed) { break; }
+                if (medoids.count(h) == 0) { //this is NOT a medoid
+                    for (int i = 0; i < numSamples; i++) {
+                        if (medoids.count(i) != 0) { //this is a medoid
+                        
+                            double dz = 0.0; //Tih sum of distances between objects and closest medoid caused by swapping i and h. Basically the change in cost. If this < 0 its a "good" swap. When all Tih are > 0, then we stop the algo, because we have the optimal medoids.
+                            for (int j = 0; j < numSamples; j++) {
+                                if (m->control_pressed) { break; }
+                                if (dists[i][j] == Dp[j][0]) {
+                                    double small = 0.0;
+                                    if (Dp[j][1] > dists[h][j]) {   small = dists[h][j];    }
+                                    else                        {   small = Dp[j][1];       }
+                                    dz += (small - Dp[j][0]);
+                                }else if (dists[h][j] < Dp[j][0]) {
+                                    dz += (dists[h][j] - Dp[j][0]);
+                                }
+                            }
+                            if (dzsky > dz) {
+                                dzsky = dz;
+                                hbest = h; 
+                                nbest = i;
+                            }
+                        }//end if medoid
+                    }//end for i
+                }//end if NOT medoid
+            }//end if h
+            
+            if (dzsky < -16 *DBL_EPSILON * fabs(sky)) {
+                medoids.insert(hbest);
+                medoids.erase(nbest);
+                if (m->debug) { m->mothurOut("[DEBUG]: swapping " + toString(hbest) + " " + toString(nbest) + "\n"); }
+                sky += dzsky;
+            }else { done = true; } //stop algo.
+        }
+        
+        
+        //fill zmatrix
+        int count = 0;
+        vector<int> tempMedoids;
+        for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+            medoid2Partition[*it] = count;
+            zMatrix[count][*it] = 1; count++; //set medoid in this partition.
+            tempMedoids.push_back(*it);
+        }
+        
+        //which partition do you belong to?
+        laplace = 0;
+        for (int i = 0; i < numSamples; i++) {
+            int partition = 0;
+            double dist = dists[i][tempMedoids[0]]; //assign to first medoid
+            for (int j = 1; j < tempMedoids.size(); j++) {
+                if (dists[i][tempMedoids[j]] < dist) { //is this medoid closer?
+                    dist = dists[i][tempMedoids[j]];
+                    partition = j;
+                }
+            }
+            zMatrix[partition][i] = 1;
+            laplace += dist;
+        }
+        laplace /= (double) numSamples;
+        
+        if (m->debug) {
+            for(int i=0;i<numPartitions;i++){
+                m->mothurOut("[DEBUG]: partition 1: "); 
+                for (int j = 0; j < numSamples; j++) {
+                     m->mothurOut(toString(zMatrix[i][j]) + " ");
+                }
+                m->mothurOut("\n"); 
+            }
+            m->mothurOut("[DEBUG]: medoids : ");
+            for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) { m->mothurOut(toString(*it) + " ");
+            }
+            m->mothurOut("\n");
+            
+            m->mothurOut("[DEBUG]: laplace : " + toString(laplace));  m->mothurOut("\n");
+        }
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: done swapping  medoids\n"); }
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "swapPhase");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+int Pam::updateDp() {
+    try {
+        for (int j = 0; j < numSamples; j++) {
+            if (m->control_pressed) { break; }
+            
+            //initialize dp and ep
+            Dp[j][0] = largestDist; Dp[j][1] = largestDist;
+        
+            for (int i = 0; i < numSamples; i++) {
+                if (medoids.count(i) != 0) { //is this a medoid? 
+                    if (Dp[j][0] > dists[j][i]) {
+                        Dp[j][0] = Dp[j][1];
+                        Dp[j][0] = dists[j][i];
+                    }else if (Dp[j][1] > dists[j][i]) {
+                        Dp[j][1] = dists[j][i];
+                    }
+                }
+            }
+        }
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "updateDp");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> Pam::calcSilhouettes(vector<vector<double> > dists) {
+    try {
+        vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+        if (numPartitions < 2) { return silhouettes; }
+        
+        vector<int> whoDp;
+        for (int i = 0; i < numSamples; i++) { // assumes at least 2 partitions
+            vector<seqDist> tempMeds;
+            for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+                if (m->control_pressed) { break; }
+                seqDist temp(*it, *it, dists[i][*it]); //medoid, medoid, distance from medoid to sample
+                tempMeds.push_back(temp);
+            }
+            sort(tempMeds.begin(), tempMeds.end(), compareSequenceDistance); //sort by distance
+            whoDp.push_back(tempMeds[1].seq1); //second closest medoid
+        }
+        
+        
+        vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+        
+        //calc a - all a[i] are the same in the same partition
+        for (int k = 0; k < numPartitions; k++) {
+            if (m->control_pressed) { break; }
+            
+            int count = 0;
+            double totalA = 0.0;
+            for (int i = 0; i < numSamples; i++) {
+                for (int j = 0; j < numSamples; j++) {
+                    if (m->control_pressed) { break; }
+                    if ((zMatrix[k][i] != 0) && (zMatrix[k][j] != 0)){ //are both samples in the partition, if so add there distance
+                        totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+                        count++;
+                    }
+                }
+            }
+            totalA /= (double) count;
+            
+            //set a[i] to average for cluster
+            for (int i = 0; i < numSamples; i++) {
+                if (zMatrix[k][i] != 0) { a[i] = totalA; }
+            }
+        }
+        
+        //calc b
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            int nextClosestMedoid = whoDp[i];
+            int thisPartition = medoid2Partition[nextClosestMedoid];
+            int count = 0;
+            double totalB = 0.0;
+            for (int j = 0; j < numSamples; j++) {
+                if (zMatrix[thisPartition][j] != 0) { //this sample is in this partition
+                    totalB += dists[i][j];
+                    count++;
+                }
+            }
+            b[i] = totalB / (double) count;
+        }
+        
+        //calc silhouettes
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            double denom = a[i];
+            if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+            
+            silhouettes[i] = (b[i] - a[i]) / denom;
+            
+            //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+        }
+               
+        return silhouettes;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "calcSilhouettes");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+ CHk=Bk/(k−1)/Wk/(n−k)
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double Pam::calcCHIndex(vector< vector<double> > dists){ //countMatrix = [numSamples][numOtus]
+    try {
+        double CH = 0.0;
+        
+        if (numPartitions < 2) { return CH; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int i = 0; i < numPartitions; i++) {
+            for (int j = 0; j < numSamples; j++) {
+                if (m->control_pressed) { return 0.0; }
+                if (zMatrix[i][j] != 0) { clusterMap[j] = i; }
+            }
+        }
+        
+        double sumBetweenCluster = 0.0;
+        double sumWithinClusters = 0.0;
+        
+        for (int i = 0; i < numSamples; i++) { //lt
+            for (int j = 0; j < i; j++) {
+                if (m->control_pressed) { return 0.0; }
+                int partitionI = clusterMap[i];
+                int partitionJ = clusterMap[j];
+                
+                if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+                    sumWithinClusters += (dists[i][j] * dists[i][j]);
+                }else { //they are NOT from the same cluster so this distance is added to Bk
+                    sumBetweenCluster += (dists[i][j] * dists[i][j]);
+                }
+            }
+        }
+        //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' <<  (numSamples - numPartitions) << endl;
+        
+        CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+        
+        return CH;
+    }
+    catch(exception& e){
+        m->errorOut(e, "Pam", "calcCHIndex");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+
+
diff --git a/pam.h b/pam.h
new file mode 100644 (file)
index 0000000..9930176
--- /dev/null
+++ b/pam.h
@@ -0,0 +1,40 @@
+//
+//  pam.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_pam_h
+#define Mothur_pam_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class Pam : public CommunityTypeFinder {
+    
+public:
+    Pam(vector<vector<int> >, vector<vector<double> >, int);
+    vector<double> calcSilhouettes(vector< vector< double> >);
+    double calcCHIndex(vector< vector< double> >);
+    
+private:
+    set<int> medoids;
+    map<int, int> medoid2Partition;
+    double largestDist;
+    vector<vector<double> > dists;
+    vector<vector< double> > Dp; // [numSamples][2] - It contains Dp and Ep. Dp is in [numSamples][0] and Ep is in [numSamples][1]. Dp is the distance between p and the closest sample in S and Ep is the distance between p and the second closest object in S. Both are used in the build and swap phases.
+    
+    int buildPhase();
+    int swapPhase();
+    int updateDp();
+    
+    
+    
+/**************************************************************************************************/
+};
+
+
+#endif
index c89644bae43649c7db10511b913be8ee9a3b7504..7ea5611ab0045d46fe11454ad0a522e565127003 100644 (file)
@@ -7,26 +7,27 @@
 //
 
 #include "qFinderDMM.h"
 //
 
 #include "qFinderDMM.h"
-#include "linearalgebra.h"
 
 
-#define EPSILON numeric_limits<double>::epsilon()
+
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
+qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
     try {
     try {
-        m = MothurOut::getInstance();
+        //cout << "here" << endl;
+        numPartitions = p;
+        countMatrix = cm;
         numSamples = (int)countMatrix.size();
         numOTUs = (int)countMatrix[0].size();
         
         numSamples = (int)countMatrix.size();
         numOTUs = (int)countMatrix[0].size();
         
-        
-        kMeans();
-        //    cout << "done kMeans" << endl;
+        //cout << "before kmeans" <<endl;
+        findkMeans();
+            //cout << "done kMeans" << endl;
         
         optimizeLambda();
         
         
         
         optimizeLambda();
         
         
-        //    cout << "done optimizeLambda" << endl;
+            //cout << "done optimizeLambda" << endl;
         
         double change = 1.0000;
         currNLL = 0.0000;
         
         double change = 1.0000;
         currNLL = 0.0000;
@@ -35,19 +36,19 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
         
         while(change > 1.0e-6 && iter < 100){
             
         
         while(change > 1.0e-6 && iter < 100){
             
-            //        cout << "Calc_Z: ";
+                    //cout << "Calc_Z: ";
             calculatePiK();
             
             optimizeLambda();
             
             calculatePiK();
             
             optimizeLambda();
             
-            //        printf("Iter:%d\t",iter);
+                    //printf("Iter:%d\t",iter);
             
             for(int i=0;i<numPartitions;i++){
                 weights[i] = 0.0000;
                 for(int j=0;j<numSamples;j++){
                     weights[i] += zMatrix[i][j];
                 }
             
             for(int i=0;i<numPartitions;i++){
                 weights[i] = 0.0000;
                 for(int j=0;j<numSamples;j++){
                     weights[i] += zMatrix[i][j];
                 }
-                //            printf("w_%d=%.3f\t",i,weights[i]);
+                           // printf("w_%d=%.3f\t",i,weights[i]);
                 
             }
             
                 
             }
             
@@ -57,7 +58,7 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
             
             currNLL = nLL;
             
             
             currNLL = nLL;
             
-            //        printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
+                   // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
             
             iter++;
         }
             
             iter++;
         }
@@ -95,84 +96,35 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
                exit(1);
        }
 }
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 /**************************************************************************************************/
-
-void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
+void qFinderDMM::printFitData(ofstream& out){
     try {
     try {
-        ofstream printMatrix;
-        m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
-        printMatrix.setf(ios::fixed, ios::floatfield);
-        printMatrix.setf(ios::showpoint);
-        
-        for(int i=0;i<numPartitions;i++){   printMatrix << "\tPartition_" << i+1;   }   printMatrix << endl;
-        
-        for(int i=0;i<numSamples;i++){
-            printMatrix << sampleName[i];
-            for(int j=0;j<numPartitions;j++){
-                printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
-            }
-            printMatrix << endl;
-        }
-        printMatrix.close();
+        out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printFitData");
+        exit(1);
     }
     }
-       catch(exception& e) {
-               m->errorOut(e, "qFinderDMM", "printZMatrix");
-               exit(1);
-       }
 }
 }
-
 /**************************************************************************************************/
 /**************************************************************************************************/
-
-void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
+void qFinderDMM::printFitData(ostream& out, double minLaplace){
     try {
     try {
-        ofstream printRA;
-        m->openOutputFile(fileName, printRA); //(fileName.c_str());
-        printRA.setf(ios::fixed, ios::floatfield);
-        printRA.setf(ios::showpoint);
-        
-        vector<double> totals(numPartitions, 0.0000);
-        for(int i=0;i<numPartitions;i++){
-            for(int j=0;j<numOTUs;j++){
-                totals[i] += exp(lambdaMatrix[i][j]);
-            }
+        if(laplace < minLaplace){
+            out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << "***" << endl;
+        }else {
+            out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
         }
         
         }
         
-        printRA << "Taxon";
-        for(int i=0;i<numPartitions;i++){
-            printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
-            printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
-        }
-        printRA << endl;
-        
-        for(int i=0;i<numOTUs;i++){
-            
-            if (m->control_pressed) { break; }
-            
-            printRA << otuNames[i];
-            for(int j=0;j<numPartitions;j++){
-                
-                if(error[j][i] >= 0.0000){
-                    double std = sqrt(error[j][i]);
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
-                }
-                else{
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
-                    printRA << '\t' << "NA";
-                    printRA << '\t' << "NA";
-                }
-            }
-            printRA << endl;
-        }
-        
-        printRA.close();
+        m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(getNLL()) + '\t' + toString(getLogDet()) + '\t');
+        m->mothurOutJustToLog(toString(getBIC()) + '\t' + toString(getAIC()) + '\t' + toString(laplace));
+
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printFitData");
+        exit(1);
     }
     }
-       catch(exception& e) {
-               m->errorOut(e, "qFinderDMM", "printRelAbund");
-               exit(1);
-       }
 }
 
 /**************************************************************************************************/
 }
 
 /**************************************************************************************************/
@@ -193,7 +145,7 @@ interp_quad (double f0, double fp0, double f1, double zl, double zh)
     
     double zmin = zl, fmin = fl;
     
     
     double zmin = zl, fmin = fl;
     
-    if (fh < fmin) { zmin = zh; fmin = fh; } 
+    if (fh < fmin) { zmin = zh; fmin = fh; }
     
     if (c > 0)  /* positive curvature required for a minimum */
     {
     
     if (c > 0)  /* positive curvature required for a minimum */
     {
@@ -216,7 +168,7 @@ interp_quad (double f0, double fp0, double f1, double zl, double zh)
  *
  * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
  *
  *
  * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
  *
- * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). 
+ * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
  */
 
 double cubic (double c0, double c1, double c2, double c3, double z){
  */
 
 double cubic (double c0, double c1, double c2, double c3, double z){
@@ -231,7 +183,7 @@ void check_extremum (double c0, double c1, double c2, double c3, double z,
     
     double y = cubic (c0, c1, c2, c3, z);
     
     
     double y = cubic (c0, c1, c2, c3, z);
     
-    if (y < *fmin)  
+    if (y < *fmin)
     {
         *zmin = z;  /* accepted new point*/
         *fmin = y;
     {
         *zmin = z;  /* accepted new point*/
         *fmin = y;
@@ -240,7 +192,7 @@ void check_extremum (double c0, double c1, double c2, double c3, double z,
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-int gsl_poly_solve_quadratic (double a, double b, double c, 
+int gsl_poly_solve_quadratic (double a, double b, double c,
                               double *x0, double *x1)
 {
     
                               double *x0, double *x1)
 {
     
@@ -274,12 +226,12 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
             double r1 = temp / a ;
             double r2 = c / temp ;
             
             double r1 = temp / a ;
             double r2 = c / temp ;
             
-            if (r1 < r2) 
+            if (r1 < r2)
             {
                 *x0 = r1 ;
                 *x1 = r2 ;
             {
                 *x0 = r1 ;
                 *x1 = r2 ;
-            } 
-            else 
+            }
+            else
             {
                 *x0 = r2 ;
                 *x1 = r1 ;
             {
                 *x0 = r2 ;
                 *x1 = r1 ;
@@ -287,7 +239,7 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
         }
         return 2;
     }
         }
         return 2;
     }
-    else if (disc == 0) 
+    else if (disc == 0)
     {
         *x0 = -0.5 * b / a ;
         *x1 = -0.5 * b / a ;
     {
         *x0 = -0.5 * b / a ;
         *x1 = -0.5 * b / a ;
@@ -297,7 +249,7 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
     {
         return 0;
     }
     {
         return 0;
     }
-   
+    
 }
 
 /**************************************************************************************************/
 }
 
 /**************************************************************************************************/
@@ -317,14 +269,14 @@ double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, do
         
         if (n == 2)  /* found 2 roots */
         {
         
         if (n == 2)  /* found 2 roots */
         {
-            if (z0 > zl && z0 < zh) 
+            if (z0 > zl && z0 < zh)
                 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
                 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
-            if (z1 > zl && z1 < zh) 
+            if (z1 > zl && z1 < zh)
                 check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
         }
         else if (n == 1)  /* found 1 root */
         {
                 check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
         }
         else if (n == 1)  /* found 1 root */
         {
-            if (z0 > zl && z0 < zh) 
+            if (z0 > zl && z0 < zh)
                 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
         }
     }
                 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
         }
     }
@@ -355,7 +307,7 @@ double interpolate (double a, double fa, double fpa,
     else{
         z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
     }
     else{
         z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
     }
-
+    
     
     alpha = a + z * (b - a);
     
     
     alpha = a + z * (b - a);
     
@@ -635,178 +587,6 @@ int qFinderDMM::bfgs2_Solver(vector<double>& x){
     }
 }
 
     }
 }
 
-/**************************************************************************************************/
-
-//can we get these psi/psi1 calculations into their own math class?
-//psi calcualtions swiped from gsl library...
-
-static const double psi_cs[23] = {
-    -.038057080835217922,
-    .491415393029387130, 
-    -.056815747821244730,
-    .008357821225914313,
-    -.001333232857994342,
-    .000220313287069308,
-    -.000037040238178456,
-    .000006283793654854,
-    -.000001071263908506,
-    .000000183128394654,
-    -.000000031353509361,
-    .000000005372808776,
-    -.000000000921168141,
-    .000000000157981265,
-    -.000000000027098646,
-    .000000000004648722,
-    -.000000000000797527,
-    .000000000000136827,
-    -.000000000000023475,
-    .000000000000004027,
-    -.000000000000000691,
-    .000000000000000118,
-    -.000000000000000020
-};
-
-static double apsi_cs[16] = {    
-    -.0204749044678185,
-    -.0101801271534859,
-    .0000559718725387,
-    -.0000012917176570,
-    .0000000572858606,
-    -.0000000038213539,
-    .0000000003397434,
-    -.0000000000374838,
-    .0000000000048990,
-    -.0000000000007344,
-    .0000000000001233,
-    -.0000000000000228,
-    .0000000000000045,
-    -.0000000000000009,
-    .0000000000000002,
-    -.0000000000000000 
-};    
-
-/**************************************************************************************************/
-
-double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
-    try {
-        double d = 0.0000;
-        double dd = 0.0000;
-        
-        double x2 = xx * 2.0000;
-        
-        for(int j=order;j>=1;j--){
-            if (m->control_pressed) {  return 0; }
-            double temp = d;
-            d = x2 * d - dd + seriesData[j];
-            dd = temp;
-        }
-        
-        d = xx * d - dd + 0.5 * seriesData[0];
-        return d;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "cheb_eval");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi(double xx){
-    try {
-        double psiX = 0.0000;
-        
-        if(xx < 1.0000){
-            
-            double t1 = 1.0 / xx;
-            psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
-            psiX = -t1 + psiX;
-            
-        }
-        else if(xx < 2.0000){
-            
-            const double v = xx - 1.0;
-            psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
-            
-        }
-        else{
-            const double t = 8.0/(xx*xx)-1.0;
-            psiX = cheb_eval(apsi_cs, 15, t);
-            psiX += log(xx) - 0.5/xx;
-        }
-        
-        return psiX;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "psi");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
-/* coefficients for Maclaurin summation in hzeta()
- * B_{2j}/(2j)!
- */
-static double hzeta_c[15] = {
-    1.00000000000000000000000000000,
-    0.083333333333333333333333333333,
-    -0.00138888888888888888888888888889,
-    0.000033068783068783068783068783069,
-    -8.2671957671957671957671957672e-07,
-    2.0876756987868098979210090321e-08,
-    -5.2841901386874931848476822022e-10,
-    1.3382536530684678832826980975e-11,
-    -3.3896802963225828668301953912e-13,
-    8.5860620562778445641359054504e-15,
-    -2.1748686985580618730415164239e-16,
-    5.5090028283602295152026526089e-18,
-    -1.3954464685812523340707686264e-19,
-    3.5347070396294674716932299778e-21,
-    -8.9535174270375468504026113181e-23
-};
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi1(double xx){
-    try {
-        
-        /* Euler-Maclaurin summation formula
-         * [Moshier, p. 400, with several typo corrections]
-         */
-        
-        double s = 2.0000;
-        const int jmax = 12;
-        const int kmax = 10;
-        int j, k;
-        const double pmax  = pow(kmax + xx, -s);
-        double scp = s;
-        double pcp = pmax / (kmax + xx);
-        double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
-        
-        for(k=0; k<kmax; k++) {
-            if (m->control_pressed) {  return 0; }
-            value += pow(k + xx, -s);
-        }
-        
-        for(j=0; j<=jmax; j++) {
-            if (m->control_pressed) {  return 0; }
-            double delta = hzeta_c[j+1] * scp * pcp;
-            value += delta;
-            
-            if(fabs(delta/value) < 0.5*EPSILON) break;
-            
-            scp *= (s+2*j+1)*(s+2*j+2);
-            pcp /= (kmax + xx)*(kmax + xx);
-        }
-        
-        return value;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "psi1");
-        exit(1);
-    }
-}
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
@@ -961,145 +741,6 @@ double qFinderDMM::getNegativeLogEvidence(vector<double>& lambda, int group){
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-void qFinderDMM::kMeans(){
-    try {
-        
-        vector<vector<double> > relativeAbundance(numSamples);
-        vector<vector<double> > alphaMatrix;
-        
-        alphaMatrix.resize(numPartitions);
-        lambdaMatrix.resize(numPartitions);
-        for(int i=0;i<numPartitions;i++){
-            alphaMatrix[i].assign(numOTUs, 0);
-            lambdaMatrix[i].assign(numOTUs, 0);
-        }
-        
-        //get relative abundance
-        for(int i=0;i<numSamples;i++){
-            if (m->control_pressed) {  return; }
-            int groupTotal = 0;
-            
-            relativeAbundance[i].assign(numOTUs, 0.0);
-            
-            for(int j=0;j<numOTUs;j++){
-                groupTotal += countMatrix[i][j];
-            }
-            for(int j=0;j<numOTUs;j++){
-                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
-            }
-        }
-        
-        //randomly assign samples into partitions
-        zMatrix.resize(numPartitions);
-        for(int i=0;i<numPartitions;i++){
-            zMatrix[i].assign(numSamples, 0);
-        }
-        
-        for(int i=0;i<numSamples;i++){
-            zMatrix[rand()%numPartitions][i] = 1;
-        }
-        
-        double maxChange = 1;
-        int maxIters = 1000;
-        int iteration = 0;
-        
-        weights.assign(numPartitions, 0);
-        
-        while(maxChange > 1e-6 && iteration < maxIters){
-            
-            if (m->control_pressed) {  return; }
-            //calcualte average relative abundance
-            maxChange = 0.0000;
-            for(int i=0;i<numPartitions;i++){
-                
-                double normChange = 0.0;
-                
-                weights[i] = 0;
-                
-                for(int j=0;j<numSamples;j++){
-                    weights[i] += (double)zMatrix[i][j];
-                }
-                
-                vector<double> averageRelativeAbundance(numOTUs, 0);
-                for(int j=0;j<numOTUs;j++){
-                    for(int k=0;k<numSamples;k++){
-                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
-                    }
-                }
-                
-                for(int j=0;j<numOTUs;j++){
-                    averageRelativeAbundance[j] /= weights[i];
-                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
-                    normChange += difference * difference;
-                    alphaMatrix[i][j] = averageRelativeAbundance[j];
-                }
-                
-                normChange = sqrt(normChange);
-                
-                if(normChange > maxChange){ maxChange = normChange; }
-            }
-            
-            
-            //calcualte distance between each sample in partition adn the average relative abundance
-            for(int i=0;i<numSamples;i++){
-                if (m->control_pressed) {  return; }
-                
-                double normalizationFactor = 0;
-                vector<double> totalDistToPartition(numPartitions, 0);
-                
-                for(int j=0;j<numPartitions;j++){
-                    for(int k=0;k<numOTUs;k++){
-                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
-                        totalDistToPartition[j] += difference * difference;
-                    }
-                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
-                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
-                }
-                
-                
-                for(int j=0;j<numPartitions;j++){
-                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
-                }
-                
-            }
-            
-            iteration++;
-            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
-            
-        }
-        
-        //    cout << "Iter:-1";
-        for(int i=0;i<numPartitions;i++){
-            weights[i] = 0.0000;
-            
-            for(int j=0;j<numSamples;j++){
-                weights[i] += zMatrix[i][j];
-            }
-            //        printf("\tw_%d=%.3f", i, weights[i]);
-        }
-        //    cout << endl;
-        
-        
-        for(int i=0;i<numOTUs;i++){
-            if (m->control_pressed) {  return; }
-            for(int j=0;j<numPartitions;j++){
-                if(alphaMatrix[j][i] > 0){
-                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
-                }
-                else{
-                    lambdaMatrix[j][i] = -10.0;
-                }
-            }
-        }
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "kMeans");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
 void qFinderDMM::optimizeLambda(){    
     try {
         for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
 void qFinderDMM::optimizeLambda(){    
     try {
         for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
@@ -1112,7 +753,6 @@ void qFinderDMM::optimizeLambda(){
         exit(1);
     }
 }
         exit(1);
     }
 }
-
 /**************************************************************************************************/
 
 void qFinderDMM::calculatePiK(){
 /**************************************************************************************************/
 
 void qFinderDMM::calculatePiK(){
@@ -1247,80 +887,4 @@ double qFinderDMM::getNegativeLogLikelihood(){
 
 
 }
 
 
 }
-
-/**************************************************************************************************/
-
-vector<vector<double> > qFinderDMM::getHessian(){
-    try {
-        vector<double> alpha(numOTUs, 0.0000);
-        double alphaSum = 0.0000;
-        
-        vector<double> pi = zMatrix[currentPartition];
-        vector<double> psi_ajk(numOTUs, 0.0000);
-        vector<double> psi_cjk(numOTUs, 0.0000);
-        vector<double> psi1_ajk(numOTUs, 0.0000);
-        vector<double> psi1_cjk(numOTUs, 0.0000);
-        
-        for(int j=0;j<numOTUs;j++){
-            
-            if (m->control_pressed) {  break; }
-            
-            alpha[j] = exp(lambdaMatrix[currentPartition][j]);
-            alphaSum += alpha[j];
-            
-            for(int i=0;i<numSamples;i++){
-                double X = (double) countMatrix[i][j];
-                
-                psi_ajk[j] += pi[i] * psi(alpha[j]);
-                psi1_ajk[j] += pi[i] * psi1(alpha[j]);
-                
-                psi_cjk[j] += pi[i] * psi(alpha[j] + X);
-                psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
-            }
-        }
-        
-        
-        double psi_Ck = 0.0000;
-        double psi1_Ck = 0.0000;
-        
-        double weight = 0.0000;
-        
-        for(int i=0;i<numSamples;i++){
-            if (m->control_pressed) {  break; }
-            weight += pi[i];
-            double sum = 0.0000;
-            for(int j=0;j<numOTUs;j++){     sum += alpha[j] + countMatrix[i][j];    }
-            
-            psi_Ck += pi[i] * psi(sum);
-            psi1_Ck += pi[i] * psi1(sum);
-        }
-        
-        double psi_Ak = weight * psi(alphaSum);
-        double psi1_Ak = weight * psi1(alphaSum);
-        
-        vector<vector<double> > hessian(numOTUs);
-        for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
-        
-        for(int i=0;i<numOTUs;i++){
-            if (m->control_pressed) {  break; }
-            double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
-            double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
-            double term3 = 0.1 * alpha[i];
-            
-            hessian[i][i] = term1 + term2 + term3;
-            
-            for(int j=0;j<i;j++){   
-                hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
-                hessian[j][i] = hessian[i][j];
-            }
-        }
-        
-        return hessian;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "getHessian");
-        exit(1);
-    }
-}
-
 /**************************************************************************************************/
 /**************************************************************************************************/
index d94e6cce9f11b89de87d9f756ae490c69aabda72..df7b44305ef38fc417360bebd729357fb6724e2d 100644 (file)
@@ -9,27 +9,19 @@
 #ifndef pds_dmm_qFinderDMM_h
 #define pds_dmm_qFinderDMM_h
 
 #ifndef pds_dmm_qFinderDMM_h
 #define pds_dmm_qFinderDMM_h
 
-/**************************************************************************************************/
-
-#include "mothurout.h"
+#include "communitytype.h"
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-class qFinderDMM {
+class qFinderDMM : public CommunityTypeFinder {
   
 public:
     qFinderDMM(vector<vector<int> >, int);
   
 public:
     qFinderDMM(vector<vector<int> >, int);
-    double getNLL()     {    return currNLL;        }  
-    double getAIC()     {    return aic;            }
-    double getBIC()     {    return bic;            }
-    double getLogDet()  {    return logDeterminant; }
-    double getLaplace() {    return laplace;        }
-    void printZMatrix(string, vector<string>);
-    void printRelAbund(string, vector<string>);
-
+    void printFitData(ofstream&);
+    void printFitData(ostream&, double);
+    
 private:
 private:
-    MothurOut* m;
-    void kMeans();
+   
     void optimizeLambda();
     void calculatePiK();
 
     void optimizeLambda();
     void calculatePiK();
 
@@ -37,31 +29,14 @@ private:
     void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
     double getNegativeLogEvidence(vector<double>&, int);
     double getNegativeLogLikelihood();
     void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
     double getNegativeLogEvidence(vector<double>&, int);
     double getNegativeLogLikelihood();
-    vector<vector<double> > getHessian();
+    
     
     int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
     int bfgs2_Solver(vector<double>&);//, double, double);
     
     int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
     int bfgs2_Solver(vector<double>&);//, double, double);
-    double cheb_eval(const double[], int, double);
-    double psi(double);
-    double psi1(double);
-
-    vector<vector<int> > countMatrix;
-    vector<vector<double> > zMatrix;
-    vector<vector<double> > lambdaMatrix;
-    vector<double> weights;
-    vector<vector<double> > error;
-    
-    int numPartitions;
-    int numSamples;
-    int numOTUs;
-    int currentPartition;
-    
-    double currNLL;
-    double aic;
-    double bic;
-    double logDeterminant;
-    double laplace;
     
     
+   
+
+        
 };
 
 /**************************************************************************************************/
 };
 
 /**************************************************************************************************/
index 0c55650b33149148c80374a0b8c03d41a7a4ea53..3a1687bf9e174d77ddded9eaf331d6f1a78da734 100644 (file)
@@ -130,7 +130,7 @@ string QualityScores::getName(){
 void QualityScores::printQScores(ofstream& qFile){
        try {
                
 void QualityScores::printQScores(ofstream& qFile){
        try {
                
-               double aveQScore = calculateAverage();
+               double aveQScore = calculateAverage(false);
                
                qFile << '>' << seqName << '\t' << aveQScore << endl;
                
                
                qFile << '>' << seqName << '\t' << aveQScore << endl;
                
@@ -228,7 +228,7 @@ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
+bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
        try {
                string rawSequence = sequence.getUnaligned();
                int seqLength = sequence.getNumBases();
        try {
                string rawSequence = sequence.getUnaligned();
                int seqLength = sequence.getNumBases();
@@ -240,12 +240,22 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol
                
                int end = -1;
                double rollingSum = 0.0000;
                
                int end = -1;
                double rollingSum = 0.0000;
+        double value = 0.0;
                
                for(int i=0;i<seqLength;i++){
                
                for(int i=0;i<seqLength;i++){
-
-                       rollingSum += (double)qScores[i];
+            
+            if (logTransform)   {
+                rollingSum += (double)pow(10.0, qScores[i]);
+                value = log10(rollingSum / (double)(i+1));
+                
+            } //Sum 10^Q
+            else                {
+                rollingSum += (double)qScores[i];
+                value = rollingSum / (double)(i+1);
+            }
+                       
                        
                        
-                       if(rollingSum / (double)(i+1) < qThreshold){
+                       if(value < qThreshold){
                                end = i;
                                break;
                        }
                                end = i;
                                break;
                        }
@@ -269,7 +279,7 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
+bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
        try {
                string rawSequence = sequence.getUnaligned();
                int seqLength = sequence.getNumBases();
        try {
                string rawSequence = sequence.getUnaligned();
                int seqLength = sequence.getNumBases();
@@ -288,9 +298,12 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
                        double windowSum = 0.0000;
 
                        for(int i=start;i<end;i++){
                        double windowSum = 0.0000;
 
                        for(int i=start;i<end;i++){
-                               windowSum += qScores[i];
+                if (logTransform)   {  windowSum += pow(10.0, qScores[i]);  }
+                else                {  windowSum += qScores[i];             }
                        }
                        }
-                       double windowAverage = windowSum / (double)(end-start);
+                       double windowAverage = 0.0;
+            if (logTransform)   { windowAverage = log10(windowSum / (double)(end-start)); }
+            else                { windowAverage = windowSum / (double)(end-start);      }
                                
                        if(windowAverage < qThreshold){
                                end = end - stepSize;
                                
                        if(windowAverage < qThreshold){
                                end = end - stepSize;
@@ -323,21 +336,25 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-double QualityScores::calculateAverage(){
+double QualityScores::calculateAverage(bool logTransform){
        
        double aveQScore = 0.0000;
        
        for(int i=0;i<seqLength;i++){
        
        double aveQScore = 0.0000;
        
        for(int i=0;i<seqLength;i++){
-               aveQScore += (double) qScores[i];
+        if (logTransform)   {  aveQScore += pow(10.0, qScores[i]);  }
+        else                {  aveQScore += qScores[i];             }
        }
        aveQScore /= (double) seqLength;
        }
        aveQScore /= (double) seqLength;
+    
+    if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);  }
+    else                {  aveQScore /= (double) seqLength;                 }
        
        return aveQScore;
 }
 
 /**************************************************************************************************/
 
        
        return aveQScore;
 }
 
 /**************************************************************************************************/
 
-bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
+bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
        try {
                string rawSequence = sequence.getUnaligned();
                bool success = 0;       //guilty until proven innocent
        try {
                string rawSequence = sequence.getUnaligned();
                bool success = 0;       //guilty until proven innocent
@@ -347,7 +364,7 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
                        m->mothurOutEndLine();  
                } 
                        
                        m->mothurOutEndLine();  
                } 
                        
-               double aveQScore = calculateAverage();
+               double aveQScore = calculateAverage(logTransform);
                
                if(aveQScore >= qAverage)       {       success = 1;    }
                else                                            {       success = 0;    }
                
                if(aveQScore >= qAverage)       {       success = 1;    }
                else                                            {       success = 0;    }
index 500d3e98db79b934effee830b65f54afb0023af3..a802636fd364071ab15aa84c148ebb38fead14b4 100644 (file)
@@ -30,9 +30,9 @@ public:
        void trimQScores(int, int);
        void flipQScores();
        bool stripQualThreshold(Sequence&, double);
        void trimQScores(int, int);
        void flipQScores();
        bool stripQualThreshold(Sequence&, double);
-       bool stripQualRollingAverage(Sequence&, double);
-       bool stripQualWindowAverage(Sequence&, int, int, double);
-       bool cullQualAverage(Sequence&, double);
+       bool stripQualRollingAverage(Sequence&, double, bool);
+       bool stripQualWindowAverage(Sequence&, int, int, double, bool);
+       bool cullQualAverage(Sequence&, double, bool);
        void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
        void updateForwardMap(vector<vector<int> >&, int, int, int);
        void updateReverseMap(vector<vector<int> >&, int, int, int);
        void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
        void updateForwardMap(vector<vector<int> >&, int, int, int);
        void updateReverseMap(vector<vector<int> >&, int, int, int);
@@ -42,7 +42,7 @@ public:
        
 private:
        
        
 private:
        
-       double calculateAverage();
+       double calculateAverage(bool);
        MothurOut* m;
        vector<int> qScores;
        
        MothurOut* m;
        vector<int> qScores;
        
index e941b3a9a2ff500dd07e221712757a8b899526d2..70aa55d38502237388c520aeae892daf4891e73c 100644 (file)
@@ -449,7 +449,8 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
 
                bool done = false;
                int count = 0;
 
                bool done = false;
                int count = 0;
-       
+        
+        
                while (!done) {
                                
                        if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
                while (!done) {
                                
                        if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
@@ -479,7 +480,6 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                        ambigBases.push_back(current.getAmbigBases());
                                        longHomoPolymer.push_back(current.getLongHomoPolymer());
                                }
                                        ambigBases.push_back(current.getAmbigBases());
                                        longHomoPolymer.push_back(current.getLongHomoPolymer());
                                }
-                               
                                count++;
                                outSummary << current.getName() << '\t';
                                outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
                                count++;
                                outSummary << current.getName() << '\t';
                                outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
diff --git a/shannonrange.cpp b/shannonrange.cpp
new file mode 100644 (file)
index 0000000..f26f08a
--- /dev/null
@@ -0,0 +1,35 @@
+//
+//  shannonrange.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 1/3/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#include "shannonrange.h"
+
+/***********************************************************************/
+
+EstOutput RangeShannon::getValues(vector<SharedRAbundVector*> shared) {
+       try {
+        data.resize(3,0);
+        
+        double commSize = 1e20;
+        
+        SAbundVector sabund1 = shared[0]->getSAbundVector();
+        SAbundVector sabund2 = shared[1]->getSAbundVector();
+        
+        double sampleSize = 0;
+        for (int i = 0; i < sabund1.getNumBins(); i++) {  sampleSize += (sabund1.get(i) * sabund2.get(i));  }
+        int aux = ceil(pow((sampleSize+1), 0.33333));
+               
+               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+               
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RangeShannon", "getValues");
+               exit(1);
+       }
+}
+/***********************************************************************/
diff --git a/shannonrange.h b/shannonrange.h
new file mode 100644 (file)
index 0000000..34b73f4
--- /dev/null
@@ -0,0 +1,29 @@
+//
+//  shannonrange.h
+//  Mothur
+//
+//  Created by SarahsWork on 1/3/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_shannonrange_h
+#define Mothur_shannonrange_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+
+class RangeShannon : public Calculator  {
+       
+public:
+       RangeShannon() : Calculator("rangeshannon", 3, false) {};
+       EstOutput getValues(SAbundVector*) {return data;};
+    EstOutput getValues(vector<SharedRAbundVector*>);
+       string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
+};
+
+/***********************************************************************/
+
+
+
+#endif
diff --git a/sharedjsd.cpp b/sharedjsd.cpp
new file mode 100644 (file)
index 0000000..46cb977
--- /dev/null
@@ -0,0 +1,48 @@
+//
+//  sharedjsd.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sharedjsd.h"
+
+/***********************************************************************/
+//KLD <- function(x,y) sum(x *log(x/y))
+//JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
+EstOutput JSD::getValues(vector<SharedRAbundVector*> shared) {
+       try {
+        
+               data.resize(1,0);
+        
+        double KLD1 = 0.0;
+        double KLD2 = 0.0;
+
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
+                       //store in temps to avoid multiple repetitive function calls
+                       double tempA = shared[0]->getAbundance(i);
+                       double tempB = shared[1]->getAbundance(i);
+            
+            if (tempA == 0) { tempA = 0.000001; }
+            if (tempB == 0) { tempB = 0.000001; }
+            
+            double denom = (tempA+tempB)/(double)2.0;
+           
+            if (tempA != 0) {  KLD1 += tempA * log(tempA/denom); } //KLD(x,m)
+            if (tempB != 0) {  KLD2 += tempB * log(tempB/denom); } //KLD(y,m)
+        }
+        
+               data[0] = sqrt((0.5*KLD1) + (0.5*KLD2));
+               
+               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+               
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "JSD", "getValues");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
diff --git a/sharedjsd.h b/sharedjsd.h
new file mode 100644 (file)
index 0000000..cec0faa
--- /dev/null
@@ -0,0 +1,30 @@
+//
+//  sharedjsd.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sharedjsd_h
+#define Mothur_sharedjsd_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+//Jensen-Shannon divergence (JSD)
+class JSD : public Calculator  {
+       
+public:
+       JSD() :  Calculator("jsd", 1, false) {};
+       EstOutput getValues(SAbundVector*) {return data;};
+       EstOutput getValues(vector<SharedRAbundVector*>);
+       string getCitation() { return "http://www.mothur.org/wiki/JSD"; }
+private:
+       
+};
+
+/***********************************************************************/
+
+
+#endif
index 2c9399bda1dd363d04c1fed89ff9b12ea8c3aace..ef5e365fdfa8c356c3951c1dae74ae02bf41b978 100644 (file)
@@ -17,7 +17,7 @@ vector<string> SummarySharedCommand::setParameters(){
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
                CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
                CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
                CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
@@ -294,6 +294,8 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                                        sumCalculators.push_back(new MemEuclidean());
                                                }else if (Estimators[i] == "mempearson") { 
                                                        sumCalculators.push_back(new MemPearson());
                                                        sumCalculators.push_back(new MemEuclidean());
                                                }else if (Estimators[i] == "mempearson") { 
                                                        sumCalculators.push_back(new MemPearson());
+                                               }else if (Estimators[i] == "jsd") {
+                                                       sumCalculators.push_back(new JSD());
                                                }
                                        }
                                }
                                                }
                                        }
                                }
index 8ba9b9fcf3e40b745cf75ddbaccd2f6df8cde652..1b3f16469bfc64345631f489e4c6651895041de0 100644 (file)
@@ -56,6 +56,7 @@
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
 #include "memchord.h"
 #include "memeuclidean.h"
 #include "mempearson.h"
+#include "sharedjsd.h"
 
 class SummarySharedCommand : public Command {
 
 
 class SummarySharedCommand : public Command {
 
@@ -216,6 +217,8 @@ static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){
                     sumCalculators.push_back(new MemEuclidean());
                 }else if (pDataArray->Estimators[i] == "mempearson") { 
                     sumCalculators.push_back(new MemPearson());
                     sumCalculators.push_back(new MemEuclidean());
                 }else if (pDataArray->Estimators[i] == "mempearson") { 
                     sumCalculators.push_back(new MemPearson());
+                }else if (pDataArray->Estimators[i] == "jsd") {
+                    sumCalculators.push_back(new JSD());
                 }
             }
         }
                 }
             }
         }
index fd55edf25ede0b4de807bef17dadb861b44c03a8..33349decab52e90be19c928b44b9b07c8f6c710a 100644 (file)
@@ -395,7 +395,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                
                if(line->start == 0){
                        flowFile >> numFlows; m->gobble(flowFile);
                
                if(line->start == 0){
                        flowFile >> numFlows; m->gobble(flowFile);
-                       scrapFlowFile << maxFlows << endl;
+                       scrapFlowFile << numFlows << endl;
                        trimFlowFile << maxFlows << endl;
                        if(allFiles){
                                for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
                        trimFlowFile << maxFlows << endl;
                        if(allFiles){
                                for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
@@ -599,6 +599,7 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                else if(type == "REVERSE"){
                                        string oligoRC = reverseOligo(oligo);
                                        revPrimer.push_back(oligoRC);
                                else if(type == "REVERSE"){
                                        string oligoRC = reverseOligo(oligo);
                                        revPrimer.push_back(oligoRC);
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
                                }
                                else if(type == "BARCODE"){
                                        oligosFile >> group;
                                }
                                else if(type == "BARCODE"){
                                        oligosFile >> group;
index 0d55d7c4a0b043f811d2d6b9f61ef159952ea1f9..951ce65d400bb36afc60df92e56e148c14ee271c 100644 (file)
@@ -34,6 +34,7 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
                CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
                CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
+        CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
@@ -61,7 +62,7 @@ string TrimSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
         helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
         helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
@@ -84,6 +85,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
                helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
+        helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
                helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
                helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
@@ -329,6 +331,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
             temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
                        keepforward = m->isTrue(temp);
             
             temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
                        keepforward = m->isTrue(temp);
             
+            temp = validParameter.validFile(parameters, "logtransform", false);                if (temp == "not found") { temp = "F"; }
+                       logtransform = m->isTrue(temp);
+            
             temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
                        reorient = m->isTrue(temp);
                        
             temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
                        reorient = m->isTrue(temp);
                        
@@ -444,8 +449,6 @@ int TrimSeqsCommand::execute(){
                        }
                }
         
                        }
                }
         
-        if (!pairedOligos) { if (reorient) { m->mothurOut("[WARNING]: You cannot use reorient without paired barcodes or primers, skipping."); m->mothurOutEndLine(); reorient = false; } }
-        
         if (m->control_pressed) { return 0; }
             
         //fills lines and qlines
         if (m->control_pressed) { return 0; }
             
         //fills lines and qlines
@@ -695,7 +698,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                 rpairedBarcodes[it->first] = tempPair;
                  //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
             }
                 rpairedBarcodes[it->first] = tempPair;
                  //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
             }
-            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+            int index = rpairedBarcodes.size();
+            for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
+            }
+            
+            index = rpairedPrimers.size();
+            for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedPrimers[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
+            }
+
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); 
         }
         
                while (moreSeqs) {
         }
         
                while (moreSeqs) {
@@ -819,9 +836,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
-                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
-                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
-                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
+                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage, logtransform);                            }
+                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform);        }
+                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform);     }
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
@@ -1174,7 +1191,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
-                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, 
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
@@ -1646,7 +1663,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
         if (hasPairedBarcodes || hasPrimer) {
             pairedOligos = true;
             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
         if (hasPairedBarcodes || hasPrimer) {
             pairedOligos = true;
             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
-        }else if (reorient) { m->mothurOut("[Warning]: cannot use checkorient without paired barcodes or primers, ignoring.\n"); m->mothurOutEndLine(); reorient = false; }
+        }
         
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
         
         
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
         
index 53f04d37965d55c189447c7cbd1c702ac93dc5ac..14eed17e236a333583d461d61482a1d541c6a9cf 100644 (file)
@@ -55,7 +55,7 @@ private:
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
-       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
+       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
@@ -98,7 +98,7 @@ struct trimData {
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
+    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
        int qWindowSize, qWindowStep, keepFirst, removeLast, count;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
        int qWindowSize, qWindowStep, keepFirst, removeLast, count;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
@@ -121,7 +121,7 @@ struct trimData {
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
-                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
@@ -174,6 +174,7 @@ struct trimData {
         qThreshold = Threshold;
         qAverage = Average;
         qRollAverage = RollAverage;
         qThreshold = Threshold;
         qAverage = Average;
         qRollAverage = RollAverage;
+        logtransform = lt;
         minLength = minL;
         maxAmbig = maxA;
         maxHomoP = maxH;
         minLength = minL;
         maxAmbig = maxA;
         maxHomoP = maxH;
@@ -276,6 +277,19 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                 rpairedBarcodes[it->first] = tempPair;
             }
                 oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                 rpairedBarcodes[it->first] = tempPair;
             }
+            
+            int index = rpairedBarcodes.size();
+            for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
+                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[index] = tempPair; index++;
+            }
+            
+            index = rpairedPrimers.size();
+            for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedPrimers[index] = tempPair; index++;
+            }
+
             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
         }
         
             rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
         }
         
@@ -416,9 +430,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
-                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
-                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
-                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
+                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform);                            }
+                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform);        }
+                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform);     }
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
index 7df7c4e28d65cb2f7853500728075e8edc161723..4718e2ff89f96a28013a19dc1ca2f51ae93d924e 100644 (file)
@@ -76,6 +76,7 @@
 #include "mempearson.h"
 #include "sharedsobs.h"
 #include "sharednseqs.h"
 #include "mempearson.h"
 #include "sharedsobs.h"
 #include "sharednseqs.h"
+#include "sharedjsd.h"
 
 
 /********************************************************************/
 
 
 /********************************************************************/
@@ -206,6 +207,7 @@ void ValidCalculators::printCitations(vector<string> Estimators) {
                                }else if (Estimators[i] == "mempearson") { Calculator* temp = new MemPearson(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "sharedobserved") { Calculator* temp = new SharedSobs(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "mempearson") { Calculator* temp = new MemPearson(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "sharedobserved") { Calculator* temp = new SharedSobs(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
+                }else if (Estimators[i] == "jsd") { Calculator* temp = new JSD(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                }else { m->mothurOut("[ERROR]: Missing else if for " + Estimators[i] + " in printCitations."); m->mothurOutEndLine(); }
                        }else { m->mothurOut(Estimators[i] + " is not a valid calculator, no citation will be given."); m->mothurOutEndLine(); }
                }
                                }else { m->mothurOut("[ERROR]: Missing else if for " + Estimators[i] + " in printCitations."); m->mothurOutEndLine(); }
                        }else { m->mothurOut(Estimators[i] + " is not a valid calculator, no citation will be given."); m->mothurOutEndLine(); }
                }
@@ -456,6 +458,7 @@ void ValidCalculators::initialShared() {
                shared["memchord"]                              = "memchord";
                shared["memeuclidean"]                  = "memeuclidean";
                shared["mempearson"]                    = "mempearson";
                shared["memchord"]                              = "memchord";
                shared["memeuclidean"]                  = "memeuclidean";
                shared["mempearson"]                    = "mempearson";
+        shared["jsd"]                          = "jsd";
                shared["default"]                   = "default";
        }
        catch(exception& e) {
                shared["default"]                   = "default";
        }
        catch(exception& e) {
@@ -569,6 +572,7 @@ void ValidCalculators::initialSharedSummary() {
                sharedsummary["memchord"]                               = "memchord";
                sharedsummary["memeuclidean"]                   = "memeuclidean";
                sharedsummary["mempearson"]                             = "mempearson";
                sharedsummary["memchord"]                               = "memchord";
                sharedsummary["memeuclidean"]                   = "memeuclidean";
                sharedsummary["mempearson"]                             = "mempearson";
+        sharedsummary["jsd"]                           = "jsd";
                sharedsummary["default"]                                = "default";
        }
        catch(exception& e) {
                sharedsummary["default"]                                = "default";
        }
        catch(exception& e) {
@@ -733,6 +737,7 @@ void ValidCalculators::initialMatrix() {
                matrix["memchord"]                              = "memchord";
                matrix["memeuclidean"]                  = "memeuclidean";
                matrix["mempearson"]                            = "mempearson";
                matrix["memchord"]                              = "memchord";
                matrix["memeuclidean"]                  = "memeuclidean";
                matrix["mempearson"]                            = "mempearson";
+        matrix["jsd"]                          = "jsd";
                
        }
        catch(exception& e) {
                
        }
        catch(exception& e) {