added Jensen-Shannon calc. working on get.communitytype command. fixed bug in get...

author Sarah Westcott <mothur.westcott@gmail.com>

Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)

committer Sarah Westcott <mothur.westcott@gmail.com>

Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
author Sarah Westcott <mothur.westcott@gmail.com>
Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
committer Sarah Westcott <mothur.westcott@gmail.com>
Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index f3b6a09ea7e690a55a31fcbe6b9fc457fa59ca2a..26456a3ee2c230477db7c945e2b9586dc45aea8d 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -18,6 +18,7 @@
                 A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
                 A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
                 A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
+               A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7132EB2184E792700AAA402 /* communitytype.cpp */; };
                 A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
                 A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                 A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
@@ -29,6 +30,7 @@
                 A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
                 A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
                 A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
+               A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7222D721856277C0055A993 /* sharedjsd.cpp */; };
                 A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
                 A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
                 A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
@@ -70,16 +72,19 @@
                 A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
                 A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
                 A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
+               A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */; };
                 A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                 A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                 A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                 A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; };
+               A7B093C018579F0400843CD1 /* pam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B093BF18579F0400843CD1 /* pam.cpp */; };
                 A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
                 A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
                 A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
                 A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
                 A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
                 A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; };
+               A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D395C3184FA3A200A350D7 /* kmeans.cpp */; };
                 A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
                 A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; };
                 A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
@@ -418,6 +423,8 @@
                 A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                 A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = "<group>"; };
                 A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = "<group>"; };
+               A7132EAE184E76EB00AAA402 /* communitytype.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = communitytype.h; sourceTree = "<group>"; };
+               A7132EB2184E792700AAA402 /* communitytype.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = communitytype.cpp; sourceTree = "<group>"; };
                 A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                 A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
                 A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
@@ -440,6 +447,8 @@
                 A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
                 A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
                 A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
+               A7222D711856276C0055A993 /* sharedjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedjsd.h; sourceTree = "<group>"; };
+               A7222D721856277C0055A993 /* sharedjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjsd.cpp; sourceTree = "<group>"; };
                 A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
                 A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
                 A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
@@ -523,6 +532,8 @@
                 A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
                 A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
                 A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
+               A7A09B0E18773BF700FAA081 /* shannonrange.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = shannonrange.h; sourceTree = "<group>"; };
+               A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shannonrange.cpp; sourceTree = "<group>"; };
                 A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
                 A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                 A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
@@ -533,6 +544,8 @@
                 A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
                 A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = "<group>"; };
                 A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = "<group>"; };
+               A7B093BE18579EF600843CD1 /* pam.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pam.h; sourceTree = "<group>"; };
+               A7B093BF18579F0400843CD1 /* pam.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pam.cpp; sourceTree = "<group>"; };
                 A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
                 A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
                 A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
@@ -545,6 +558,8 @@
                 A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = "<group>"; };
                 A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = "<group>"; };
                 A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = "<group>"; };
+               A7D395C2184FA39300A350D7 /* kmeans.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = kmeans.h; sourceTree = "<group>"; };
+               A7D395C3184FA3A200A350D7 /* kmeans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmeans.cpp; sourceTree = "<group>"; };
                 A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
                 A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
                 A7D9378917B146B5001E90B0 /* wilcox.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wilcox.cpp; sourceTree = "<group>"; };
@@ -1178,6 +1193,7 @@
                                 A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */,
                                 A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */,
                                 A7DAAFA3133A254E003956EB /* commandparameter.h */,
+                               A7D395C1184FA34300A350D7 /* communitytype */,
                                 A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */,
                                 A7E9BA4212D3960D00DA6239 /* containers */,
                                 A7E9B6B612D37EC400DA6239 /* consensus.h */,
@@ -1229,8 +1245,6 @@
                                 A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
                                 A7E9B79B12D37EC400DA6239 /* progress.cpp */,
                                 A7E9B79C12D37EC400DA6239 /* progress.hpp */,
-                               A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
-                               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
                                 A77B7187173D4041002163C2 /* randomnumber.h */,
                                 A77B7186173D4041002163C2 /* randomnumber.cpp */,
                                 A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
@@ -1318,6 +1332,21 @@
                         name = fortran;
                         sourceTree = "<group>";
                 };
+               A7D395C1184FA34300A350D7 /* communitytype */ = {
+                       isa = PBXGroup;
+                       children = (
+                               A7132EAE184E76EB00AAA402 /* communitytype.h */,
+                               A7132EB2184E792700AAA402 /* communitytype.cpp */,
+                               A7D395C2184FA39300A350D7 /* kmeans.h */,
+                               A7D395C3184FA3A200A350D7 /* kmeans.cpp */,
+                               A7B093BE18579EF600843CD1 /* pam.h */,
+                               A7B093BF18579F0400843CD1 /* pam.cpp */,
+                               A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
+                               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+                       );
+                       name = communitytype;
+                       sourceTree = "<group>";
+               };
                 A7E9BA3812D3956100DA6239 /* commands */ = {
                         isa = PBXGroup;
                         children = (
@@ -1683,6 +1712,8 @@
                                 A7E9B7E612D37EC400DA6239 /* shannon.h */,
                                 A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */,
                                 A7E9B7E812D37EC400DA6239 /* shannoneven.h */,
+                               A7A09B0E18773BF700FAA081 /* shannonrange.h */,
+                               A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */,
                                 A7E9B7E912D37EC400DA6239 /* sharedace.cpp */,
                                 A7E9B7EA12D37EC400DA6239 /* sharedace.h */,
                                 A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */,
@@ -1699,6 +1730,8 @@
                                 A7E9B7F912D37EC400DA6239 /* sharedjclass.h */,
                                 A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */,
                                 A7E9B7FB12D37EC400DA6239 /* sharedjest.h */,
+                               A7222D711856276C0055A993 /* sharedjsd.h */,
+                               A7222D721856277C0055A993 /* sharedjsd.cpp */,
                                 A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */,
                                 A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */,
                                 A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */,
@@ -2400,6 +2433,11 @@
                                 A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
                                 A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
                                 A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
+                               A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */,
+                               A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */,
+                               A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */,
+                               A7B093C018579F0400843CD1 /* pam.cpp in Sources */,
+                               A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp

index 40ee3d255cc03c3a005bc79d77eeee78fb25d510..56e60d88bc88200a1b21d82d70b4a9c1f22ebdef 100644 (file)
--- a/collectsharedcommand.cpp
+++ b/collectsharedcommand.cpp
@@ -49,6 +49,7 @@
  #include "memchord.h"
  #include "memeuclidean.h"
  #include "mempearson.h"
+#include "sharedjsd.h"
  
  
  //**********************************************************************************************************************
@@ -57,7 +58,7 @@ vector<string> CollectSharedCommand::setParameters(){
                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -141,6 +142,7 @@ string CollectSharedCommand::getOutputPattern(string type) {
          else if (type == "memchord")            {  pattern =  "[filename],memchord";        }
          else if (type == "memeuclidean")        {  pattern =  "[filename],memeuclidean";    }
          else if (type == "mempearson")          {  pattern =  "[filename],mempearson";      }
+        else if (type == "jsd")                 {  pattern =  "[filename],jsd";             }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -196,6 +198,7 @@ CollectSharedCommand::CollectSharedCommand(){
                 outputTypes["memchord"] = tempOutNames;
                 outputTypes["memeuclidean"] = tempOutNames;
                 outputTypes["mempearson"] = tempOutNames;
+        outputTypes["jsd"] = tempOutNames;
                 
         }
         catch(exception& e) {
@@ -268,6 +271,7 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                         outputTypes["memchord"] = tempOutNames;
                         outputTypes["memeuclidean"] = tempOutNames;
                         outputTypes["mempearson"] = tempOutNames;
+            outputTypes["jsd"] = tempOutNames;
                         
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -460,6 +464,9 @@ CollectSharedCommand::CollectSharedCommand(string option)  {
                                                 }else if (Estimators[i] == "mempearson") { 
                                                         cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(getOutputFileName("mempearson", variables))));
                                                         outputNames.push_back(getOutputFileName("mempearson", variables)); outputTypes["mempearson"].push_back(getOutputFileName("mempearson", variables));
+                                               }else if (Estimators[i] == "jsd") {
+                                                       cDisplays.push_back(new CollectDisplay(new JSD(), new SharedOneColumnFile(getOutputFileName("jsd", variables))));
+                                                       outputNames.push_back(getOutputFileName("jsd", variables)); outputTypes["jsd"].push_back(getOutputFileName("jsd", variables));
                                                 }
                                                 
                                         }
diff --git a/communitytype.cpp b/communitytype.cpp

new file mode 100644 (file)

index 0000000..f88932c
--- /dev/null
+++ b/communitytype.cpp
@@ -0,0 +1,512 @@
+//
+//  communitytype.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/3/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+//can we get these psi/psi1 calculations into their own math class?
+//psi calcualtions swiped from gsl library...
+
+static const double psi_cs[23] = {
+    -.038057080835217922,
+    .491415393029387130,
+    -.056815747821244730,
+    .008357821225914313,
+    -.001333232857994342,
+    .000220313287069308,
+    -.000037040238178456,
+    .000006283793654854,
+    -.000001071263908506,
+    .000000183128394654,
+    -.000000031353509361,
+    .000000005372808776,
+    -.000000000921168141,
+    .000000000157981265,
+    -.000000000027098646,
+    .000000000004648722,
+    -.000000000000797527,
+    .000000000000136827,
+    -.000000000000023475,
+    .000000000000004027,
+    -.000000000000000691,
+    .000000000000000118,
+    -.000000000000000020
+};
+
+static double apsi_cs[16] = {
+    -.0204749044678185,
+    -.0101801271534859,
+    .0000559718725387,
+    -.0000012917176570,
+    .0000000572858606,
+    -.0000000038213539,
+    .0000000003397434,
+    -.0000000000374838,
+    .0000000000048990,
+    -.0000000000007344,
+    .0000000000001233,
+    -.0000000000000228,
+    .0000000000000045,
+    -.0000000000000009,
+    .0000000000000002,
+    -.0000000000000000
+};
+
+/**************************************************************************************************/
+/* coefficients for Maclaurin summation in hzeta()
+ * B_{2j}/(2j)!
+ */
+static double hzeta_c[15] = {
+    1.00000000000000000000000000000,
+    0.083333333333333333333333333333,
+    -0.00138888888888888888888888888889,
+    0.000033068783068783068783068783069,
+    -8.2671957671957671957671957672e-07,
+    2.0876756987868098979210090321e-08,
+    -5.2841901386874931848476822022e-10,
+    1.3382536530684678832826980975e-11,
+    -3.3896802963225828668301953912e-13,
+    8.5860620562778445641359054504e-15,
+    -2.1748686985580618730415164239e-16,
+    5.5090028283602295152026526089e-18,
+    -1.3954464685812523340707686264e-19,
+    3.5347070396294674716932299778e-21,
+    -8.9535174270375468504026113181e-23
+};
+
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ofstream& out, double chi, vector<double> sils){
+    try {
+        out << setprecision (6) << numPartitions << '\t'  << chi << '\t';
+        for (int i = 0; i < sils.size(); i++) {
+            out << sils[i] << '\t';
+        }
+        out << endl;
+        
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printSilData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ostream& out, double chi, vector<double> sils){
+    try {
+        out << setprecision (6) << numPartitions << '\t'  << chi << '\t';
+        m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(chi) + '\t');
+        for (int i = 0; i < sils.size(); i++) {
+            out << sils[i] << '\t';
+            m->mothurOutJustToLog(toString(sils[i]) + '\t');
+        }
+        out << endl;
+        m->mothurOutJustToLog("\n");
+        
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printSilData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printZMatrix(string fileName, vector<string> sampleName){
+    try {
+        ofstream printMatrix;
+        m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
+        printMatrix.setf(ios::fixed, ios::floatfield);
+        printMatrix.setf(ios::showpoint);
+        
+        for(int i=0;i<numPartitions;i++){   printMatrix << "\tPartition_" << i+1;   }   printMatrix << endl;
+        
+        for(int i=0;i<numSamples;i++){
+            printMatrix << sampleName[i];
+            for(int j=0;j<numPartitions;j++){
+                printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
+            }
+            printMatrix << endl;
+        }
+        printMatrix.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommunityTypeFinder", "printZMatrix");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printRelAbund(string fileName, vector<string> otuNames){
+    try {
+        ofstream printRA;
+        m->openOutputFile(fileName, printRA); //(fileName.c_str());
+        printRA.setf(ios::fixed, ios::floatfield);
+        printRA.setf(ios::showpoint);
+        
+        vector<double> totals(numPartitions, 0.0000);
+        for(int i=0;i<numPartitions;i++){
+            for(int j=0;j<numOTUs;j++){
+                totals[i] += exp(lambdaMatrix[i][j]);
+            }
+        }
+        
+        printRA << "Taxon";
+        for(int i=0;i<numPartitions;i++){
+            printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
+            printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
+        }
+        printRA << endl;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            printRA << otuNames[i];
+            for(int j=0;j<numPartitions;j++){
+                
+                if(error[j][i] >= 0.0000){
+                    double std = sqrt(error[j][i]);
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
+                }
+                else{
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << "NA";
+                    printRA << '\t' << "NA";
+                }
+            }
+            printRA << endl;
+        }
+        
+        printRA.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommunityTypeFinder", "printRelAbund");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+vector<vector<double> > CommunityTypeFinder::getHessian(){
+    try {
+        vector<double> alpha(numOTUs, 0.0000);
+        double alphaSum = 0.0000;
+        
+        vector<double> pi = zMatrix[currentPartition];
+        vector<double> psi_ajk(numOTUs, 0.0000);
+        vector<double> psi_cjk(numOTUs, 0.0000);
+        vector<double> psi1_ajk(numOTUs, 0.0000);
+        vector<double> psi1_cjk(numOTUs, 0.0000);
+        
+        for(int j=0;j<numOTUs;j++){
+            
+            if (m->control_pressed) {  break; }
+            
+            alpha[j] = exp(lambdaMatrix[currentPartition][j]);
+            alphaSum += alpha[j];
+            
+            for(int i=0;i<numSamples;i++){
+                double X = (double) countMatrix[i][j];
+                
+                psi_ajk[j] += pi[i] * psi(alpha[j]);
+                psi1_ajk[j] += pi[i] * psi1(alpha[j]);
+                
+                psi_cjk[j] += pi[i] * psi(alpha[j] + X);
+                psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
+            }
+        }
+        
+        
+        double psi_Ck = 0.0000;
+        double psi1_Ck = 0.0000;
+        
+        double weight = 0.0000;
+        
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  break; }
+            weight += pi[i];
+            double sum = 0.0000;
+            for(int j=0;j<numOTUs;j++){     sum += alpha[j] + countMatrix[i][j];    }
+            
+            psi_Ck += pi[i] * psi(sum);
+            psi1_Ck += pi[i] * psi1(sum);
+        }
+        
+        double psi_Ak = weight * psi(alphaSum);
+        double psi1_Ak = weight * psi1(alphaSum);
+        
+        vector<vector<double> > hessian(numOTUs);
+        for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  break; }
+            double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
+            double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
+            double term3 = 0.1 * alpha[i];
+            
+            hessian[i][i] = term1 + term2 + term3;
+            
+            for(int j=0;j<i;j++){
+                hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
+                hessian[j][i] = hessian[i][j];
+            }
+        }
+        
+        return hessian;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "getHessian");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi1(double xx){
+    try {
+        
+        /* Euler-Maclaurin summation formula
+         * [Moshier, p. 400, with several typo corrections]
+         */
+        
+        double s = 2.0000;
+        const int jmax = 12;
+        const int kmax = 10;
+        int j, k;
+        const double pmax  = pow(kmax + xx, -s);
+        double scp = s;
+        double pcp = pmax / (kmax + xx);
+        double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
+        
+        for(k=0; k<kmax; k++) {
+            if (m->control_pressed) {  return 0; }
+            value += pow(k + xx, -s);
+        }
+        
+        for(j=0; j<=jmax; j++) {
+            if (m->control_pressed) {  return 0; }
+            double delta = hzeta_c[j+1] * scp * pcp;
+            value += delta;
+            
+            if(fabs(delta/value) < 0.5*EPSILON) break;
+            
+            scp *= (s+2*j+1)*(s+2*j+2);
+            pcp /= (kmax + xx)*(kmax + xx);
+        }
+        
+        return value;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "psi1");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi(double xx){
+    try {
+        double psiX = 0.0000;
+        
+        if(xx < 1.0000){
+            
+            double t1 = 1.0 / xx;
+            psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
+            psiX = -t1 + psiX;
+            
+        }
+        else if(xx < 2.0000){
+            
+            const double v = xx - 1.0;
+            psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
+            
+        }
+        else{
+            const double t = 8.0/(xx*xx)-1.0;
+            psiX = cheb_eval(apsi_cs, 15, t);
+            psiX += log(xx) - 0.5/xx;
+        }
+        
+        return psiX;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "psi");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::cheb_eval(const double seriesData[], int order, double xx){
+    try {
+        double d = 0.0000;
+        double dd = 0.0000;
+        
+        double x2 = xx * 2.0000;
+        
+        for(int j=order;j>=1;j--){
+            if (m->control_pressed) {  return 0; }
+            double temp = d;
+            d = x2 * d - dd + seriesData[j];
+            dd = temp;
+        }
+        
+        d = xx * d - dd + 0.5 * seriesData[0];
+        return d;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "cheb_eval");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+int CommunityTypeFinder::findkMeans(){
+    try {
+        error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); }
+        vector<vector<double> > relativeAbundance(numSamples);
+        vector<vector<double> > alphaMatrix;
+        
+        alphaMatrix.resize(numPartitions);
+        lambdaMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            alphaMatrix[i].assign(numOTUs, 0);
+            lambdaMatrix[i].assign(numOTUs, 0);
+        }
+        
+        //get relative abundance
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  return 0; }
+            int groupTotal = 0;
+            
+            relativeAbundance[i].assign(numOTUs, 0.0);
+            
+            for(int j=0;j<numOTUs;j++){
+                groupTotal += countMatrix[i][j];
+            }
+            for(int j=0;j<numOTUs;j++){
+                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
+            }
+        }
+        
+        //randomly assign samples into partitions
+        zMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            zMatrix[i].assign(numSamples, 0);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            zMatrix[rand()%numPartitions][i] = 1;
+        }
+        
+        double maxChange = 1;
+        int maxIters = 1000;
+        int iteration = 0;
+        
+        weights.assign(numPartitions, 0);
+        
+        while(maxChange > 1e-6 && iteration < maxIters){
+            
+            if (m->control_pressed) {  return 0; }
+            //calcualte average relative abundance
+            maxChange = 0.0000;
+            for(int i=0;i<numPartitions;i++){
+                
+                double normChange = 0.0;
+                
+                weights[i] = 0;
+                
+                for(int j=0;j<numSamples;j++){
+                    weights[i] += (double)zMatrix[i][j];
+                }
+                
+                vector<double> averageRelativeAbundance(numOTUs, 0);
+                for(int j=0;j<numOTUs;j++){
+                    for(int k=0;k<numSamples;k++){
+                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
+                    }
+                }
+                
+                for(int j=0;j<numOTUs;j++){
+                    averageRelativeAbundance[j] /= weights[i];
+                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
+                    normChange += difference * difference;
+                    alphaMatrix[i][j] = averageRelativeAbundance[j];
+                }
+                
+                normChange = sqrt(normChange);
+                
+                if(normChange > maxChange){ maxChange = normChange; }
+            }
+            
+            
+            //calcualte distance between each sample in partition and the average relative abundance
+            for(int i=0;i<numSamples;i++){
+                if (m->control_pressed) {  return 0; }
+                
+                double normalizationFactor = 0;
+                vector<double> totalDistToPartition(numPartitions, 0);
+                
+                for(int j=0;j<numPartitions;j++){
+                    for(int k=0;k<numOTUs;k++){
+                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
+                        totalDistToPartition[j] += difference * difference;
+                    }
+                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
+                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
+                }
+                
+                
+                for(int j=0;j<numPartitions;j++){
+                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
+                }
+                
+            }
+            
+            iteration++;
+            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
+            
+        }
+        
+        //    cout << "Iter:-1";
+        for(int i=0;i<numPartitions;i++){
+            weights[i] = 0.0000;
+            
+            for(int j=0;j<numSamples;j++){
+                weights[i] += zMatrix[i][j];
+            }
+            //        printf("\tw_%d=%.3f", i, weights[i]);
+        }
+        //    cout << endl;
+        
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return 0; }
+            for(int j=0;j<numPartitions;j++){
+                if(alphaMatrix[j][i] > 0){
+                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
+                }
+                else{
+                    lambdaMatrix[j][i] = -10.0;
+                }
+            }
+        }
+        
+        return 0;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "kMeans");
+        exit(1);
+    }
+}
+
+
+/**************************************************************************************************/
+
diff --git a/communitytype.h b/communitytype.h

new file mode 100644 (file)

index 0000000..59291f5
--- /dev/null
+++ b/communitytype.h
@@ -0,0 +1,75 @@
+//
+//  communitytype.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/3/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_communitytype_h
+#define Mothur_communitytype_h
+
+#define EPSILON numeric_limits<double>::epsilon()
+
+
+#include "mothurout.h"
+#include "linearalgebra.h"
+/**************************************************************************************************/
+
+class CommunityTypeFinder {
+    
+public:
+       CommunityTypeFinder(){  m = MothurOut::getInstance();  }
+       virtual ~CommunityTypeFinder(){};
+    
+    virtual void printZMatrix(string, vector<string>);
+    virtual void printRelAbund(string, vector<string>);
+    virtual void printFitData(ofstream&) {}
+    virtual void printFitData(ostream&, double) {}
+    virtual void printSilData(ofstream&, double, vector<double>);
+    virtual void printSilData(ostream&, double, vector<double>);
+    
+    virtual double getNLL()     {    return currNLL;        }
+    virtual double getAIC()     {    return aic;            }
+    virtual double getBIC()     {    return bic;            }
+    virtual double getLogDet()  {    return logDeterminant; }
+    virtual double getLaplace() {    return laplace;        }
+    
+    virtual double calcCHIndex(vector< vector< double> >) {return 0;}  //Calinski-Harabasz
+    virtual vector<double> calcSilhouettes(vector< vector< double> >) {  vector<double> s; return s; } //if none provided by child class
+
+
+protected:
+    
+    int findkMeans();
+    vector<vector<double> > getHessian();
+    double psi1(double);
+    double psi(double);
+    double cheb_eval(const double[], int, double);
+
+    
+       MothurOut* m;
+    vector<vector<double> > zMatrix;
+    vector<vector<double> > lambdaMatrix;
+    vector<vector<double> > error;
+    vector<vector<int> > countMatrix;
+    vector<double> weights;
+
+
+
+    int numPartitions;
+    int numSamples;
+    int numOTUs;
+    int currentPartition;
+    
+    double currNLL, aic, bic, logDeterminant, laplace;
+     
+       
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
diff --git a/flowdata.cpp b/flowdata.cpp

index 7d61f8c265e060f2dc0002c94f69edfd054f89ae..2891281c9de412a27b2d27229683c8d3bb7cf41b 100644 (file)
--- a/flowdata.cpp
+++ b/flowdata.cpp
@@ -40,12 +40,19 @@ FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP, string
  //**********************************************************************************************************************
  
  bool FlowData::getNext(ifstream& flowFile){
-       
         try {
+        
          seqName = getSequenceName(flowFile);
-               flowFile >> endFlow;    
+        if (m->debug) {  m->mothurOut("[DEBUG]: flow = " + seqName + " "); }
+               flowFile >> endFlow;
+        if (m->debug) {  m->mothurOut(toString(endFlow) + " "); }
          if (!m->control_pressed) {
-            for(int i=0;i<numFlows;i++)        {       flowFile >> flowData[i];        }
+            if (m->debug) {  m->mothurOut(" "); }
+            for(int i=0;i<numFlows;i++)        {
+                flowFile >> flowData[i];
+                if (m->debug) {  m->mothurOut(toString(flowData[i]) + " "); }
+            }
+            if (m->debug) {  m->mothurOut("\n"); }
              updateEndFlow(); 
              translateFlow();
              m->gobble(flowFile);
diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp

index 08cd35a9470406ba0d4827b7e3b10a94f7de94ad..b3160749e9031072ceafdea29b92a33a2909cff6 100644 (file)
--- a/getmetacommunitycommand.cpp
+++ b/getmetacommunitycommand.cpp
@@ -7,7 +7,10 @@
  //
  
  #include "getmetacommunitycommand.h"
-
+#include "communitytype.h"
+#include "kmeans.h"
+#include "validcalculator.h"
+#include "subsample.h"
  
  //**********************************************************************************************************************
  vector<string> GetMetaCommunityCommand::setParameters(){
@@ -15,13 +18,17 @@ vector<string> GetMetaCommunityCommand::setParameters(){
          CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
          CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
          CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
          CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
          CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
          CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
+        
                 vector<string> myArray;
                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                 return myArray;
@@ -35,9 +42,13 @@ vector<string> GetMetaCommunityCommand::setParameters(){
  string GetMetaCommunityCommand::getHelpString(){
         try {
                 string helpString = "";
-               helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
+               helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
          helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
+        helpString += "The method parameter allows to select the method you would like to use.  Options are dmm, kmeans and pam. Default=dmm.\n";
+        helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam method. By default the jsd calculator is used.\n";
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matirx for the pam method.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam method.\n";
                 helpString += "The minpartitions parameter is used to .... Default=5.\n";
          helpString += "The maxpartitions parameter is used to .... Default=10.\n";
          helpString += "The optimizegap parameter is used to .... Default=3.\n";
@@ -171,6 +182,37 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                 else { allLines = 1;  }
                         }
+            
+            method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "dmm"; }
+                       
+                       if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
+                       else { m->mothurOut("[ERROR]: " + method + " is not a valid method.  Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
+            
+            calc = validParameter.validFile(parameters, "calc", false);
+                       if (calc == "not found") { calc = "jsd";  }
+                       else {
+                if (calc == "default")  {  calc = "jsd";  }
+                       }
+                       m->splitAtDash(calc, Estimators);
+                       if (m->inUsersGroups("citation", Estimators)) {
+                               ValidCalculators validCalc; validCalc.printCitations(Estimators);
+                               //remove citation from list of calcs
+                               for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
+                       }
+            if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
+            
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters);
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 0; }
                 }
                 
         }
@@ -194,6 +236,35 @@ int GetMetaCommunityCommand::execute(){
          set<string> processedLabels;
          set<string> userLabels = labels;
          
+        if (subsample) {
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = lookup[0]->getNumSeqs();
+                for (int i = 1; i < lookup.size(); i++) {
+                    int thisSize = lookup[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                m->clearGroups();
+                Groups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (lookup[i]->getNumSeqs() < subsampleSize) {
+                        m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete lookup[i];
+                    }else {
+                        Groups.push_back(lookup[i]->getGroup());
+                        temp.push_back(lookup[i]);
+                    }
+                }
+                lookup = temp;
+                m->setGroups(Groups);
+            }
+            
+            if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true;  return 0; }
+        }
+
+        
          //as long as you are not at the end of the file or done wih the lines you want
          while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
              
@@ -363,7 +434,12 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
                 }
                 
                 //do my part
-        m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+        if (method == "dmm") {  m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");  }
+        else {
+            m->mothurOut("K\tCH\t");
+            for (int i = 0; i < thislookup.size(); i++) {  m->mothurOut(thislookup[i]->getGroup() + '\t'); }
+            m->mothurOut("\n");
+        }
                 minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
                 
                 //force parent to wait until all the processes are done
@@ -437,56 +513,6 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
          }
          
  #else
-        /*
-        vector<metaCommunityData*> pDataArray;
-               DWORD   dwThreadIdArray[processors-1];
-               HANDLE  hThreadArray[processors-1];
-        
-               //Create processor worker threads.
-               for( int i=1; i<processors; i++ ){
-            //copy lookup
-            //make copy of lookup so we don't get access violations
-            vector<SharedRAbundVector*> newLookup;
-            for (int k = 0; k < thislookup.size(); k++) {
-                SharedRAbundVector* temp = new SharedRAbundVector();
-                temp->setLabel(thislookup[k]->getLabel());
-                temp->setGroup(thislookup[k]->getGroup());
-                newLookup.push_back(temp);
-            }
-            
-            //for each bin
-            for (int k = 0; k < thislookup[0]->getNumBins(); k++) {
-                if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); }
-            }
-
-            processIDS.push_back(i);
-            
-                       // Allocate memory for thread data.
-                       metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap);
-                       pDataArray.push_back(tempMeta);
-                       
-                       hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
-               }
-               
-        //do my part
-               minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]);
-        
-               //Wait until all threads have terminated.
-               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-               
-               //Close all thread handles and free memory allocations.
-               for(int i=0; i < pDataArray.size(); i++){
-            if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; }
-            for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
-                outputNames.push_back(pDataArray[i]->outputNames[j]);
-            }
-            m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
-            m->mothurRemove(outputFileName + toString(processIDS[i]));
-                       CloseHandle(hThreadArray[i]);
-                       delete pDataArray[i];
-               } */
-        //do my part
          m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
                 minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
  #endif
@@ -501,7 +527,8 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
          
          //run generate Summary function for smallest minPartition
          variables["[tag]"] = toString(minPartition);
-        generateSummaryFile(minPartition, variables);
+        vector<double> piValues = generateDesignFile(minPartition, variables);
+        if (method == "dmm") {  generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
          
          return 0;
  
@@ -516,12 +543,25 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
         try {
          
          double minLaplace = 1e10;
-        int minPartition = 0;
+        int minPartition = 1;
+        vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
+        
+               ofstream fitData, silData;
+        if (method == "dmm") {
+            m->openOutputFile(outputFileName, fitData);
+            fitData.setf(ios::fixed, ios::floatfield);
+            fitData.setf(ios::showpoint);
+            fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
+            minLaplace = 0;
+            m->openOutputFile(outputFileName, silData);
+            silData.setf(ios::fixed, ios::floatfield);
+            silData.setf(ios::showpoint);
+            silData << "K\tCH\t";
+            for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t';  }
+            silData << endl;
+        } 
          
-               ofstream fitData;
-               m->openOutputFile(outputFileName, fitData);
-        fitData.setf(ios::fixed, ios::floatfield);
-        fitData.setf(ios::showpoint);
          cout.setf(ios::fixed, ios::floatfield);
          cout.setf(ios::showpoint);
  
@@ -529,7 +569,18 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
          vector<string> thisGroups;
          for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
          
-        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        vector< vector<double> > dists; //do we want to output this matrix??
+        if ((method == "pam") || (method == "kmeans")) {  dists = generateDistanceMatrix(thislookup);  }
+        
+        if (m->debug) {
+            m->mothurOut("[DEBUG]: dists = \n");
+            for (int i = 0; i < dists.size(); i++) {
+                if (m->control_pressed) { break; }
+                m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
+                for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
+                m->mothurOut("\n");
+            }
+        }
          
          for(int i=0;i<parts.size();i++){
              
@@ -551,33 +602,51 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                  }
              }
              
-            qFinderDMM findQ(sharedMatrix, numPartitions);
-            
-            double laplace = findQ.getLaplace();
-            m->mothurOut(toString(numPartitions) + '\t');
-            cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
-            cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
-            m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
-            
-            fitData << numPartitions << '\t';
-            fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
-            
-            if(laplace < minLaplace){
-                minPartition = numPartitions;
-                minLaplace = laplace;
-                m->mothurOut("***");
+            CommunityTypeFinder* finder = NULL;
+            if (method == "dmm")            {   finder = new qFinderDMM(sharedMatrix, numPartitions);   }
+            else if (method == "kmeans")    {   finder = new KMeans(sharedMatrix, numPartitions);       }
+            else if (method == "pam")       {   finder = new Pam(sharedMatrix, dists, numPartitions);                 }
+            else {
+                if (i == 0) {  m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
+                finder = new qFinderDMM(sharedMatrix, numPartitions);
              }
-            m->mothurOutEndLine();
              
+            double chi; vector<double> silhouettes;
+            if (method == "dmm") {
+                double laplace = finder->getLaplace();
+                if(laplace < minLaplace){
+                    minPartition = numPartitions;
+                    minLaplace = laplace;
+                }
+            }else {
+                chi = finder->calcCHIndex(dists);
+                silhouettes = finder->calcSilhouettes(dists);
+                if (chi > minLaplace) { //save partition with maximum ch index score
+                    minPartition = numPartitions;
+                    minLaplace = chi;
+                    minSilhouettes = silhouettes;
+                }
+            }
              string relabund = relabunds[i];
-            outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
              string matrixName = matrix[i];
              outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
              
-            findQ.printZMatrix(matrixName, thisGroups);
-            findQ.printRelAbund(relabund, m->currentSharedBinLabels);
+            finder->printZMatrix(matrixName, thisGroups);
+            
+            if (method == "dmm") {
+                finder->printFitData(cout, minLaplace);
+                finder->printFitData(fitData);
+                finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+            }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
+                finder->printSilData(cout, chi, silhouettes);
+                finder->printSilData(silData, chi, silhouettes);
+                if (method == "kmeans") {
+                    finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                    outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+                }
+            }
+            delete finder;
              
              if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
                  string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
@@ -588,9 +657,7 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                  break;
              }
          }
-        fitData.close();
-        
-        //minPartition = 4;
+        if (method == "dmm") { fitData.close(); }
          
          if (m->control_pressed) { return 0; }
  
@@ -672,7 +739,7 @@ vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, ma
  inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
  
  /**************************************************************************************************/
-int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v){
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
      try {
          vector<summaryData> summary;
          
@@ -683,9 +750,6 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
          string name, header;
          double mean, lci, uci;
          
-        
-        vector<double> piValues = generateDesignFile(numPartitions, v);
-        
          ifstream referenceFile;
          map<string, string> variables;
          variables["[filename]"] = v["[filename]"];
@@ -808,4 +872,236 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
      
  }
  //**********************************************************************************************************************
+vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
+    try {
+        vector<vector<double> > results;
+        
+        Calculator* matrixCalculator;
+        ValidCalculators validCalculator;
+        int i = 0;
+        
+        if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
+            if (Estimators[i] == "sharedsobs") {
+                matrixCalculator = new SharedSobsCS();
+            }else if (Estimators[i] == "sharedchao") {
+                matrixCalculator = new SharedChao1();
+            }else if (Estimators[i] == "sharedace") {
+                matrixCalculator = new SharedAce();
+            }else if (Estimators[i] == "jabund") {
+                matrixCalculator = new JAbund();
+            }else if (Estimators[i] == "sorabund") {
+                matrixCalculator = new SorAbund();
+            }else if (Estimators[i] == "jclass") {
+                matrixCalculator = new Jclass();
+            }else if (Estimators[i] == "sorclass") {
+                matrixCalculator = new SorClass();
+            }else if (Estimators[i] == "jest") {
+                matrixCalculator = new Jest();
+            }else if (Estimators[i] == "sorest") {
+                matrixCalculator = new SorEst();
+            }else if (Estimators[i] == "thetayc") {
+                matrixCalculator = new ThetaYC();
+            }else if (Estimators[i] == "thetan") {
+                matrixCalculator = new ThetaN();
+            }else if (Estimators[i] == "kstest") {
+                matrixCalculator = new KSTest();
+            }else if (Estimators[i] == "sharednseqs") {
+                matrixCalculator = new SharedNSeqs();
+            }else if (Estimators[i] == "ochiai") {
+                matrixCalculator = new Ochiai();
+            }else if (Estimators[i] == "anderberg") {
+                matrixCalculator = new Anderberg();
+            }else if (Estimators[i] == "kulczynski") {
+                matrixCalculator = new Kulczynski();
+            }else if (Estimators[i] == "kulczynskicody") {
+                matrixCalculator = new KulczynskiCody();
+            }else if (Estimators[i] == "lennon") {
+                matrixCalculator = new Lennon();
+            }else if (Estimators[i] == "morisitahorn") {
+                matrixCalculator = new MorHorn();
+            }else if (Estimators[i] == "braycurtis") {
+                matrixCalculator = new BrayCurtis();
+            }else if (Estimators[i] == "whittaker") {
+                matrixCalculator = new Whittaker();
+            }else if (Estimators[i] == "odum") {
+                matrixCalculator = new Odum();
+            }else if (Estimators[i] == "canberra") {
+                matrixCalculator = new Canberra();
+            }else if (Estimators[i] == "structeuclidean") {
+                matrixCalculator = new StructEuclidean();
+            }else if (Estimators[i] == "structchord") {
+                matrixCalculator = new StructChord();
+            }else if (Estimators[i] == "hellinger") {
+                matrixCalculator = new Hellinger();
+            }else if (Estimators[i] == "manhattan") {
+                matrixCalculator = new Manhattan();
+            }else if (Estimators[i] == "structpearson") {
+                matrixCalculator = new StructPearson();
+            }else if (Estimators[i] == "soergel") {
+                matrixCalculator = new Soergel();
+            }else if (Estimators[i] == "spearman") {
+                matrixCalculator = new Spearman();
+            }else if (Estimators[i] == "structkulczynski") {
+                matrixCalculator = new StructKulczynski();
+            }else if (Estimators[i] == "speciesprofile") {
+                matrixCalculator = new SpeciesProfile();
+            }else if (Estimators[i] == "hamming") {
+                matrixCalculator = new Hamming();
+            }else if (Estimators[i] == "structchi2") {
+                matrixCalculator = new StructChi2();
+            }else if (Estimators[i] == "gower") {
+                matrixCalculator = new Gower();
+            }else if (Estimators[i] == "memchi2") {
+                matrixCalculator = new MemChi2();
+            }else if (Estimators[i] == "memchord") {
+                matrixCalculator = new MemChord();
+            }else if (Estimators[i] == "memeuclidean") {
+                matrixCalculator = new MemEuclidean();
+            }else if (Estimators[i] == "mempearson") {
+                matrixCalculator = new MemPearson();
+            }else if (Estimators[i] == "jsd") {
+                matrixCalculator = new JSD();
+            }else {
+                m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
+            }
+        }
+        
+        //calc distances
+        vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, then each groupCombos dists. this will be used to make .dist files
+        vector< vector<seqDist> > calcDists; calcDists.resize(1);
+        
+        for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+ 
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+            if (subsample && (thisIter != 0)) {
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return results; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+            
+           
+            driver(thisItersLookup, calcDists, matrixCalculator);
+                     
+            if (subsample && (thisIter != 0)) {
+                if((thisIter) % 100 == 0){     m->mothurOutJustToScreen(toString(thisIter)+"\n");              }
+                calcDistsTotals.push_back(calcDists);
+                for (int i = 0; i < calcDists.size(); i++) {
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        if (m->debug) {  m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n");  }
+                    }
+                }
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+            }else { //print results for whole dataset
+                for (int i = 0; i < calcDists.size(); i++) {
+                    if (m->control_pressed) { break; }
+                    
+                    //initialize matrix
+                    results.resize(thisLookup.size());
+                    for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                    
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        int row = calcDists[i][j].seq1;
+                        int column = calcDists[i][j].seq2;
+                        double dist = calcDists[i][j].dist;
+                        
+                        results[row][column] = dist;
+                        results[column][row] = dist;
+                    }
+                }
+            }
+            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+               }
+               
+        if (iters != 0) {
+            //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, "average");
+            
+            //print results
+            for (int i = 0; i < calcDists.size(); i++) {
+                results.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    
+                    results[row][column] = dist;
+                    results[column][row] = dist;
+                }
+            }
+        }
+
+        
+        return results;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
+       try {
+               vector<SharedRAbundVector*> subset;
+        
+               for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
+                       
+                       for (int l = 0; l < k; l++) {
+                               
+                               if (k != l) { //we dont need to similiarity of a groups to itself
+                                       subset.clear(); //clear out old pair of sharedrabunds
+                                       //add new pair of sharedrabunds
+                                       subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+                                       
+                                       
+                    
+                    //if this calc needs all groups to calculate the pair load all groups
+                    if (matrixCalculator->getNeedsAll()) {
+                        //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+                        for (int w = 0; w < thisLookup.size(); w++) {
+                            if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+                        }
+                    }
+                    
+                    vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
+                    
+                    if (m->control_pressed) { return 1; }
+                    
+                    seqDist temp(l, k, tempdata[0]);
+                    //cout << l << '\t' << k << '\t' <<  tempdata[0] << endl;
+                    calcDists[0].push_back(temp);
+                }
+                               
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MatrixOutputCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
  
diff --git a/getmetacommunitycommand.h b/getmetacommunitycommand.h

index 7697430cca6164b695ed92526dd190926ec1eb5e..9c17e4d2eb844e6a2eb1c7b6bfee0bf96910ded4 100644 (file)
--- a/getmetacommunitycommand.h
+++ b/getmetacommunitycommand.h
@@ -12,6 +12,49 @@
  #include "command.hpp"
  #include "inputdata.h"
  #include "qFinderDMM.h"
+#include "pam.h"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
+#include "sharedjsd.h"
  
  /**************************************************************************************************/
  
@@ -40,18 +83,20 @@ private:
                 unsigned long long end;
                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
         };
-    bool abort, allLines;
+    bool abort, allLines, subsample;
      string outputDir;
      vector<string> outputNames;
-    string sharedfile;
-    int minpartitions, maxpartitions, optimizegap, processors;
-    vector<string> Groups;
+    string sharedfile, method, calc;
+    int minpartitions, maxpartitions, optimizegap, processors, iters, subsampleSize;
+    vector<string> Groups, Estimators;
      set<string> labels;
      
+    vector<vector<double> > generateDistanceMatrix(vector<SharedRAbundVector*>& lookup);
+    int driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator*);
      int processDriver(vector<SharedRAbundVector*>&, vector<int>&, string, vector<string>, vector<string>, vector<string>, int);
      int createProcesses(vector<SharedRAbundVector*>&);
      vector<double> generateDesignFile(int, map<string,string>);
-    int generateSummaryFile(int, map<string,string>);
+    int generateSummaryFile(int, map<string,string>, vector<double>);
  
  };
  
@@ -63,111 +108,5 @@ struct summaryData {
      vector<double> partMean, partLCI, partUCI;
      
  };
-/**************************************************************************************************/
-
-struct metaCommunityData {
-    vector<SharedRAbundVector*> thislookup;
-       MothurOut* m;
-       string outputFileName;
-    vector<string> relabunds, matrix, outputNames;
-    int minpartitions, maxpartitions, optimizegap;
-    vector<int> parts;
-    int minPartition;
-       
-       metaCommunityData(){}
-       metaCommunityData(vector<SharedRAbundVector*> lu, MothurOut* mout, vector<int> dp, string fit, vector<string> rels, vector<string> mat, int minp, int maxp, int opg) {
-               m = mout;
-        thislookup = lu;
-        parts = dp;
-        outputFileName = fit;
-        relabunds = rels;
-        matrix = mat;
-        minpartitions = minp;
-        maxpartitions = maxp;
-        optimizegap = opg;
-        minPartition = 0;
-       }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyMetaCommunityThreadFunction(LPVOID lpParam){
-       metaCommunityData* pDataArray;
-       pDataArray = (metaCommunityData*)lpParam;
-       
-       try {
-        
-        double minLaplace = 1e10;
-        
-               ofstream fitData;
-               pDataArray->m->openOutputFile(pDataArray->outputFileName, fitData);
-        fitData.setf(ios::fixed, ios::floatfield);
-        fitData.setf(ios::showpoint);
-        cout.setf(ios::fixed, ios::floatfield);
-        cout.setf(ios::showpoint);
-        
-        vector< vector<int> > sharedMatrix;
-        for (int i = 0; i < pDataArray->thislookup.size(); i++) { sharedMatrix.push_back(pDataArray->thislookup[i]->getAbundances()); }
-        
-        pDataArray->m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
-        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
-        
-        for(int i=0;i<pDataArray->parts.size();i++){
-            
-            int numPartitions = pDataArray->parts[i];
-            
-            if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); }
-            
-            if (pDataArray->m->control_pressed) { break; }
-            
-            qFinderDMM* findQ = new qFinderDMM(sharedMatrix, numPartitions);
-            
-            if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: done finding Q " + toString(numPartitions) + "\n"); }
-            
-            double laplace = findQ->getLaplace();
-            pDataArray->m->mothurOut(toString(numPartitions) + '\t');
-            cout << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
-            pDataArray->m->mothurOutJustToLog(toString(findQ->getNLL()) + '\t' + toString(findQ->getLogDet()) + '\t');
-            cout << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace;
-            pDataArray->m->mothurOutJustToLog(toString(findQ->getBIC()) + '\t' + toString(findQ->getAIC()) + '\t' + toString(laplace));
-            
-            fitData << numPartitions << '\t';
-            fitData << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
-            fitData << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace << endl;
-            
-            if(laplace < minLaplace){
-                pDataArray->minPartition = numPartitions;
-                minLaplace = laplace;
-                pDataArray->m->mothurOut("***");
-            }
-            pDataArray->m->mothurOutEndLine();
-            
-            pDataArray->outputNames.push_back(pDataArray->relabunds[i]);
-            pDataArray->outputNames.push_back(pDataArray->matrix[i]);
-            
-            findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups());
-            findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentSharedBinLabels);
-            
-            if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; }
-            
-            delete findQ;
-        }
-        fitData.close();
-        
-        //minPartition = 4;
-        
-        if (pDataArray->m->control_pressed) { return 0; }
-        
-        return 0;
-               
-       }
-       catch(exception& e) {
-               pDataArray->m->errorOut(e, "GetMetaCommunityCommand", "MyMetaCommunityThreadFunction");
-               exit(1);
-       }
-}
-#endif
-
-
  
  #endif
diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp

index 61092372b18ae196a652d0b0f66c6b04d22d0266..c64549334ded9f4d32140ad64cbe898c5c41a023 100644 (file)
--- a/getoturepcommand.cpp
+++ b/getoturepcommand.cpp
@@ -760,6 +760,8 @@ string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
         try{
          vector<string> reps;
          string rep = "notFound";
+    
+        if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); }
          
          if ((names.size() == 1)) {
              return names[0];
@@ -773,7 +775,7 @@ string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
                  if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                      int numRep = 0;
                      if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
-                    else { numRep = ct.getGroupCount(names[i]);  }
+                    else { numRep = ct.getNumSeqs(names[i]);  }
                      if (numRep > maxAbund) {
                          reps.clear();
                          reps.push_back(names[i]);
@@ -834,7 +836,7 @@ string GetOTURepCommand::findRep(vector<string> names, string group) {
                          if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
                              int numRep = 0;
                              if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
-                            else { numRep = ct.getGroupCount(names[i]);  }
+                            else { numRep = ct.getNumSeqs(names[i]);  }
                              for (int j = 1; j < numRep; j++) { //don't add yourself again
                                  seqIndex.push_back(nameToIndex[names[i]]);
                              }
diff --git a/kmeans.cpp b/kmeans.cpp

new file mode 100644 (file)

index 0000000..dab5c7d
--- /dev/null
+++ b/kmeans.cpp
@@ -0,0 +1,237 @@
+//
+//  kmeans.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/4/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "kmeans.h"
+
+/**************************************************************************************************/
+
+KMeans::KMeans(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
+    try {
+        countMatrix = cm;
+        numSamples = (int)countMatrix.size();
+        numOTUs = (int)countMatrix[0].size();
+        numPartitions = p;
+        
+        findkMeans();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "KMeans", "KMeans");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+ 
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> KMeans::calcSilhouettes(vector<vector<double> > dists) {
+    try {
+        vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+        if (numPartitions < 2) { return silhouettes; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int j = 0; j < numSamples; j++) {
+            double maxValue = 0.0;
+            for (int i = 0; i < numPartitions; i++) {
+                if (m->control_pressed) { return silhouettes; }
+                if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+                    clusterMap[j] = i;
+                    maxValue = zMatrix[i][j];
+                }
+            }
+        }
+        
+        vector<int> nextClosestPartition;
+        findSecondClosest(nextClosestPartition, dists, clusterMap);
+        
+        if (m->control_pressed) { return silhouettes; }
+        
+        vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+        
+        //calc a - all a[i] are the same in the same partition
+        for (int k = 0; k < numPartitions; k++) {
+            if (m->control_pressed) { break; }
+            
+            int count = 0;
+            double totalA = 0.0;
+            for (int i = 0; i < numSamples; i++) {
+                for (int j = 0; j < numSamples; j++) {
+                    if (m->control_pressed) { break; }
+                    if ((clusterMap[i] == k) && (clusterMap[j] == k)){ //are both samples in the partition, if so add there distance
+                        totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+                        count++;
+                    }
+                }
+            }
+            totalA /= (double) count;
+            
+            //set a[i] to average for cluster
+            for (int i = 0; i < numSamples; i++) {
+                if (clusterMap[i] == k) { a[i] = totalA; }
+            }
+        }
+        
+        //calc b
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            int thisPartition = nextClosestPartition[i];
+            int count = 0;
+            double totalB = 0.0;
+            for (int j = 0; j < numSamples; j++) {
+                if (clusterMap[j] == thisPartition) { //this sample is in this partition
+                    totalB += dists[i][j];
+                    count++;
+                }
+            }
+            b[i] = totalB / (double) count;
+        }
+        
+        //calc silhouettes
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            double denom = a[i];
+            if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+            
+            silhouettes[i] = (b[i] - a[i]) / denom;
+            
+            //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+        }
+        
+        return silhouettes;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "calcSilhouettes");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int KMeans::findSecondClosest(vector<int>& nextClosestPartition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+    try {
+        vector<double> minScores; minScores.resize(numSamples, 1e6);
+        nextClosestPartition.resize(numSamples, 0);
+        
+        
+        for (int i = 0; i < numSamples; i++) {
+            for (int j = 0; j < numPartitions; j++) {
+                if (m->control_pressed) { break; }
+                
+                //is this the one we are assigned to - ie the "best" cluster. We want second best.
+                //if numPartitions = 2, then special case??
+                if (clusterMap[i] != j) {
+                    double score = 1e6;
+                    if (numPartitions == 2) {
+                        score = 0.0;  //choose other option, there are only 2.
+                    }else{   score = calcScore(i, j, dists, clusterMap); }
+                
+                    if (m->debug) { m->mothurOut("[DEBUG]: sample = " + toString(i) + " partition = " + toString(j) + " score = " + toString(score) + "\n"); }
+                    
+                    //is this better than our last find
+                    if (score < minScores[i]) {
+                        minScores[i] = score;
+                        nextClosestPartition[i] = j;
+                    }
+                }else {} //best case, ignore
+            }
+        }
+        
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "findSecondClosest");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+double KMeans::calcScore(int sample, int partition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+    try {
+        //square the distances and then for each pair of clusters, calculate the sum of the squraed distances between the clusters
+        //then with the sum of hte squared dsitances take the square root and divide by the number of distances in the sum
+        
+        double sum = 0.0; int count = 0;
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            if (clusterMap[i] == partition) { //samples in this cluster
+                sum += (dists[sample][i] * dists[sample][i]);
+                count++;
+            }
+        }
+        
+        sum = sqrt(sum);
+        sum /= (double) count;
+                
+        return sum;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KMeans", "calcScore");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+ 
+ CHk=Bk/(k−1)/Wk/(n−k)
+ 
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double KMeans::calcCHIndex(vector< vector< double> > dists){
+    try {
+        double CH = 0.0;
+        
+        if (numPartitions < 2) { return CH; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int j = 0; j < numSamples; j++) {
+            double maxValue = 0.0;
+            for (int i = 0; i < numPartitions; i++) {
+                if (m->control_pressed) { return 0.0; }
+                if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+                    clusterMap[j] = i;
+                    maxValue = zMatrix[i][j];
+                }
+            }
+        }
+        
+        double sumBetweenCluster = 0.0;
+        double sumWithinClusters = 0.0;
+        
+        for (int i = 0; i < numSamples; i++) { //lt
+            for (int j = 0; j < i; j++) {
+                if (m->control_pressed) { return 0.0; }
+                int partitionI = clusterMap[i];
+                int partitionJ = clusterMap[j];
+                
+                if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+                    sumWithinClusters += (dists[i][j] * dists[i][j]);
+                }else { //they are NOT from the same cluster so this distance is added to Bk
+                    sumBetweenCluster += (dists[i][j] * dists[i][j]);
+                }
+            }
+        }
+        //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' <<  (numSamples - numPartitions) << endl;
+        
+        CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+        
+        return CH;
+    }
+    catch(exception& e){
+        m->errorOut(e, "KMeans", "calcCHIndex");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+
+
+
+
diff --git a/kmeans.h b/kmeans.h

new file mode 100644 (file)

index 0000000..97dfb68
--- /dev/null
+++ b/kmeans.h
@@ -0,0 +1,32 @@
+//
+//  kmeans.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/4/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_kmeans_h
+#define Mothur_kmeans_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class KMeans : public CommunityTypeFinder {
+    
+public:
+    KMeans(vector<vector<int> >, int);
+    vector<double> calcSilhouettes(vector< vector< double> >);
+    double calcCHIndex(vector< vector< double> >);
+    
+private:
+
+    int findSecondClosest(vector<int>&, vector<vector<double> >&, map<int, int>);
+    double calcScore(int sample, int partition, vector<vector<double> >&, map<int, int>);
+
+};
+
+/**************************************************************************************************/
+
+#endif
diff --git a/linearalgebra.cpp b/linearalgebra.cpp

index b208aab042b6be5c0c096531651b9067976d8f45..bacf2c1c07bb420430d07416b6ca7354c650a79b 100644 (file)
--- a/linearalgebra.cpp
+++ b/linearalgebra.cpp
@@ -114,7 +114,7 @@ double LinearAlgebra::gammp(const double a, const double x) {
  }
  /*********************************************************************************************************************************/
  //Numerical Recipes pg. 223
-double LinearAlgebra::gammq(const double a, const double x) {
+/*double LinearAlgebra::gammq(const double a, const double x) {
      try {
          double gamser,gammcf,gln;
          
@@ -130,11 +130,11 @@ double LinearAlgebra::gammq(const double a, const double x) {
          return 0;
      }
         catch(exception& e) {
-               m->errorOut(e, "LinearAlgebra", "gammp");
+               m->errorOut(e, "LinearAlgebra", "gammq");
                 exit(1);
         }
  }
-/*********************************************************************************************************************************/
+*********************************************************************************************************************************/
  //Numerical Recipes pg. 224
  double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
      try {
@@ -161,7 +161,7 @@ double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double
              h *= del;
              if (fabs(del-1.0) <= EPS) break;
          }
-        if (i > ITMAX)  { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
+        if (i > ITMAX)  { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; }
          gammcf=exp(-x+a*log(x)-gln)*h;
          
          return 0.0;
@@ -1302,7 +1302,7 @@ double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pV
          double lastTerm = 3 * (values.size()+1);
          
          H = firstTerm * middleTerm - lastTerm;
-        
+       
          //adjust for ties
          if (TIES.size() != 0) {
              double sum = 0.0;
@@ -1311,6 +1311,8 @@ double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pV
              H /= result;
          }
          
+        if (isnan(H) || isinf(H)) { H = 0; }
+        
          //Numerical Recipes pg221
          pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
          
diff --git a/linearalgebra.h b/linearalgebra.h

index 7453f4ebd8b86ed20f530c2e1ba7989808361fb2..e4c8c1588b60c3868de7f4e06353a43112ea3037 100644 (file)
--- a/linearalgebra.h
+++ b/linearalgebra.h
@@ -57,7 +57,7 @@ private:
      double betacf(const double, const double, const double);
      double betai(const double, const double, const double);
      double gammln(const double);
-    double gammq(const double, const double);
+    //double gammq(const double, const double);
      double gser(double&, const double, const double, double&);
      double gcf(double&, const double, const double, double&);
      double erfcc(double);
diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp

index a53a3dcb92837f112771f44ef39219d9e36a277a..8a7656cee9cda0086fd2aaedd1afd5fb27896952 100644 (file)
--- a/matrixoutputcommand.cpp
+++ b/matrixoutputcommand.cpp
@@ -17,7 +17,7 @@ vector<string> MatrixOutputCommand::setParameters(){
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
          CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
-               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
                 CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
          CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
@@ -281,6 +281,9 @@ MatrixOutputCommand::MatrixOutputCommand(string option)  {
                                                         matrixCalculators.push_back(new MemEuclidean());
                                                 }else if (Estimators[i] == "mempearson") { 
                                                         matrixCalculators.push_back(new MemPearson());
+                        }else if (Estimators[i] == "jsd") {
+                                matrixCalculators.push_back(new JSD());
+
                                                 }
                                         }
                                 }
@@ -791,7 +794,7 @@ int MatrixOutputCommand::driver(vector<SharedRAbundVector*> thisLookup, int star
                                                 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
                                                 
                                                 if (m->control_pressed) { return 1; }
-                                               
+        
                                                 seqDist temp(l, k, tempdata[0]);
                                                 calcDists[i].push_back(temp);
                                         }
diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h

index 90f120602e70dc04600b3048bab41bde21b21edc..5ddfb2c1c788e2d09f0f7ffcf4658a04f9e79c47 100644 (file)
--- a/matrixoutputcommand.h
+++ b/matrixoutputcommand.h
@@ -54,6 +54,7 @@
  #include "memchord.h"
  #include "memeuclidean.h"
  #include "mempearson.h"
+#include "sharedjsd.h"
  
  
  // aka. dist.shared()
@@ -224,7 +225,10 @@ static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){
                      matrixCalculators.push_back(new MemEuclidean());
                  }else if (pDataArray->Estimators[i] == "mempearson") { 
                      matrixCalculators.push_back(new MemPearson());
+                }else if (pDataArray->Estimators[i] == "jsd") {
+                    matrixCalculators.push_back(new JSD());
                  }
+
              }
          }
          
diff --git a/metastatscommand.cpp b/metastatscommand.cpp

index 33b559fc2dc358017c90fd44fe77ec0be61acb65..de9e373194b30bc97a2afbeceb49b5303aa66de3 100644 (file)
--- a/metastatscommand.cpp
+++ b/metastatscommand.cpp
@@ -9,6 +9,7 @@
  
  #include "metastatscommand.h"
  #include "sharedutilities.h"
+#include "sharedrabundfloatvector.h"
  
  
  //**********************************************************************************************************************
@@ -213,7 +214,7 @@ MetaStatsCommand::MetaStatsCommand(string option) {
  int MetaStatsCommand::execute(){
         try {
         
-               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        if (abort == true) { if (calledHelp) { return 0; }  return 2;  }
          
          //just used to convert files to test metastats online
          /****************************************************/
@@ -221,7 +222,6 @@ int MetaStatsCommand::execute(){
          convertSharedToInput = false;
          if (convertInputToShared) { convertToShared(sharedfile); return 0; }
          /****************************************************/
-        
                 
                 designMap = new GroupMap(designfile);
                 designMap->readDesignMap();
@@ -574,9 +574,11 @@ int MetaStatsCommand::convertToShared(string filename) {
          string header = m->getline(in); m->gobble(in);
          
          vector<string> groups = m->splitWhiteSpace(header);
-        vector<SharedRAbundVector*> newLookup;
+        vector<SharedRAbundFloatVector*> newLookup;
+        cout << groups.size() << endl;
          for (int i = 0; i < groups.size(); i++) {
-            SharedRAbundVector* temp = new SharedRAbundVector();
+            cout << "creating group " << groups[i] << endl;
+            SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
              temp->setLabel("0.03");
              temp->setGroup(groups[i]);
              newLookup.push_back(temp);
@@ -589,9 +591,9 @@ int MetaStatsCommand::convertToShared(string filename) {
              string otuname;
              in >> otuname; m->gobble(in);
              otuCount++;
-            
+            cout << otuname << endl;
              for (int i = 0; i < groups.size(); i++) {
-                int temp;
+                double temp;
                  in >> temp; m->gobble(in);
                  newLookup[i]->push_back(temp, groups[i]);
              }
@@ -654,6 +656,8 @@ int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string
          }
          out.close();
          
+        cout << thisfilename+".matrix" << endl;
+        
          return 0;
      }
         catch(exception& e) {
diff --git a/pam.cpp b/pam.cpp

new file mode 100644 (file)

index 0000000..2c70f58
--- /dev/null
+++ b/pam.cpp
@@ -0,0 +1,373 @@
+//
+//  pam.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "pam.h"
+#define DBL_EPSILON 1e-9
+
+/**************************************************************************************************/
+Pam::Pam(vector<vector<int> > c, vector<vector<double> > d, int p) : CommunityTypeFinder() {
+    try {
+        countMatrix = c;
+        numSamples = (int)d.size();
+        numOTUs = (int)c[0].size();
+        numPartitions = p;
+        dists = d;
+        
+        largestDist = 0;
+        for (int i = 0; i < dists.size(); i++) {
+            for (int j = i; j < dists.size(); j++) {
+                if (m->control_pressed) { break; }
+                if (dists[i][j] > largestDist) { largestDist = dists[i][j]; } 
+            }
+        }
+        
+        buildPhase(); //choosing the medoids
+        swapPhase(); //optimize clusters
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Pam", "Pam");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//sets Dp[0] does not set Dp[1]. chooses intial medoids.
+int Pam::buildPhase() {
+    try {
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: building medoids\n"); }
+        
+        vector<double> gains; gains.resize(numSamples);
+        
+        largestDist *= 1.1 + 1; //make this distance larger than any distance in the matrix
+        Dp.resize(numSamples);
+        for (int i = 0; i < numSamples; i++) { Dp[i].push_back(largestDist); Dp[i].push_back(largestDist); } //2 smallest dists for this sample in this partition
+        
+        zMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            zMatrix[i].assign(numSamples, 0);
+        }
+    
+        for (int k = 0; k < numPartitions; k++) {
+            
+            int medoid = -1;
+            double totalGain = 0.0;
+            double clusterGain = 0.0;
+            
+            for (int i = 0; i < numSamples; i++) {  //does this need to be square?? can we do lt?
+                if (m->control_pressed) { break; }
+        
+                if (medoids.count(i) == 0) { //is this sample is NOT a medoid?
+                    gains[i] = 0.0;
+                
+                    for (int j = 0; j < numSamples; j++) {
+                        //cout << i << '\t' << j << '\t' <<   Dp[i][0] << '\t' << dists[i][j] << '\t' << totalGain << endl;
+                        totalGain = Dp[i][0] - dists[i][j];
+                        if (totalGain > 0.0) { gains[i] += totalGain; }
+                    }
+                    if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +  " totalGain = " + toString(totalGain) + "\n"); }
+                    
+                    if (clusterGain <= gains[i]) {
+                        clusterGain = gains[i];
+                        medoid = i;
+                    }
+                }
+            }
+            
+            //save medoid value
+            medoids.insert(medoid);
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: new medoid " + toString(medoid) + "\n"); }
+            
+            //update dp values
+            for (int i = 0; i < numSamples; i++) {
+                if (Dp[i][0] > dists[i][medoid]) { Dp[i][0] = dists[i][medoid]; }
+            }
+        }
+        if (m->debug) { m->mothurOut("[DEBUG]: done building medoids\n"); }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Pam", "buildPhase");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+//goal to swap medoids with non-medoids to see if we can reduce the overall cost
+int Pam::swapPhase() {
+    try {
+        if (m->debug) { m->mothurOut("[DEBUG]: swapping  medoids\n"); }
+        //calculate cost of initial choice - average distance of samples to their closest medoid
+        double sky = 0.0;
+        double dzsky = 1.0;
+        for (int i = 0; i < numSamples; i++) { sky += Dp[i][0]; }  sky /= (double) numSamples;
+        
+        bool done = false;
+        int hbest, nbest; hbest = -1; nbest = -1;
+        while (!done) {
+            if (m->control_pressed) { break; }
+            
+            updateDp();
+            
+            dzsky = 1;
+            
+            for (int h = 0; h < numSamples; h++) {
+                if (m->control_pressed) { break; }
+                if (medoids.count(h) == 0) { //this is NOT a medoid
+                    for (int i = 0; i < numSamples; i++) {
+                        if (medoids.count(i) != 0) { //this is a medoid
+                        
+                            double dz = 0.0; //Tih sum of distances between objects and closest medoid caused by swapping i and h. Basically the change in cost. If this < 0 its a "good" swap. When all Tih are > 0, then we stop the algo, because we have the optimal medoids.
+                            for (int j = 0; j < numSamples; j++) {
+                                if (m->control_pressed) { break; }
+                                if (dists[i][j] == Dp[j][0]) {
+                                    double small = 0.0;
+                                    if (Dp[j][1] > dists[h][j]) {   small = dists[h][j];    }
+                                    else                        {   small = Dp[j][1];       }
+                                    dz += (small - Dp[j][0]);
+                                }else if (dists[h][j] < Dp[j][0]) {
+                                    dz += (dists[h][j] - Dp[j][0]);
+                                }
+                            }
+                            if (dzsky > dz) {
+                                dzsky = dz;
+                                hbest = h; 
+                                nbest = i;
+                            }
+                        }//end if medoid
+                    }//end for i
+                }//end if NOT medoid
+            }//end if h
+            
+            if (dzsky < -16 *DBL_EPSILON * fabs(sky)) {
+                medoids.insert(hbest);
+                medoids.erase(nbest);
+                if (m->debug) { m->mothurOut("[DEBUG]: swapping " + toString(hbest) + " " + toString(nbest) + "\n"); }
+                sky += dzsky;
+            }else { done = true; } //stop algo.
+        }
+        
+        
+        //fill zmatrix
+        int count = 0;
+        vector<int> tempMedoids;
+        for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+            medoid2Partition[*it] = count;
+            zMatrix[count][*it] = 1; count++; //set medoid in this partition.
+            tempMedoids.push_back(*it);
+        }
+        
+        //which partition do you belong to?
+        laplace = 0;
+        for (int i = 0; i < numSamples; i++) {
+            int partition = 0;
+            double dist = dists[i][tempMedoids[0]]; //assign to first medoid
+            for (int j = 1; j < tempMedoids.size(); j++) {
+                if (dists[i][tempMedoids[j]] < dist) { //is this medoid closer?
+                    dist = dists[i][tempMedoids[j]];
+                    partition = j;
+                }
+            }
+            zMatrix[partition][i] = 1;
+            laplace += dist;
+        }
+        laplace /= (double) numSamples;
+        
+        if (m->debug) {
+            for(int i=0;i<numPartitions;i++){
+                m->mothurOut("[DEBUG]: partition 1: "); 
+                for (int j = 0; j < numSamples; j++) {
+                     m->mothurOut(toString(zMatrix[i][j]) + " ");
+                }
+                m->mothurOut("\n"); 
+            }
+            m->mothurOut("[DEBUG]: medoids : ");
+            for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) { m->mothurOut(toString(*it) + " ");
+            }
+            m->mothurOut("\n");
+            
+            m->mothurOut("[DEBUG]: laplace : " + toString(laplace));  m->mothurOut("\n");
+        }
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: done swapping  medoids\n"); }
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "swapPhase");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+int Pam::updateDp() {
+    try {
+        for (int j = 0; j < numSamples; j++) {
+            if (m->control_pressed) { break; }
+            
+            //initialize dp and ep
+            Dp[j][0] = largestDist; Dp[j][1] = largestDist;
+        
+            for (int i = 0; i < numSamples; i++) {
+                if (medoids.count(i) != 0) { //is this a medoid? 
+                    if (Dp[j][0] > dists[j][i]) {
+                        Dp[j][0] = Dp[j][1];
+                        Dp[j][0] = dists[j][i];
+                    }else if (Dp[j][1] > dists[j][i]) {
+                        Dp[j][1] = dists[j][i];
+                    }
+                }
+            }
+        }
+        return 0;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "updateDp");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+ 
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> Pam::calcSilhouettes(vector<vector<double> > dists) {
+    try {
+        vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+        if (numPartitions < 2) { return silhouettes; }
+        
+        vector<int> whoDp;
+        for (int i = 0; i < numSamples; i++) { // assumes at least 2 partitions
+            vector<seqDist> tempMeds;
+            for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+                if (m->control_pressed) { break; }
+                seqDist temp(*it, *it, dists[i][*it]); //medoid, medoid, distance from medoid to sample
+                tempMeds.push_back(temp);
+            }
+            sort(tempMeds.begin(), tempMeds.end(), compareSequenceDistance); //sort by distance
+            whoDp.push_back(tempMeds[1].seq1); //second closest medoid
+        }
+        
+        
+        vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+        
+        //calc a - all a[i] are the same in the same partition
+        for (int k = 0; k < numPartitions; k++) {
+            if (m->control_pressed) { break; }
+            
+            int count = 0;
+            double totalA = 0.0;
+            for (int i = 0; i < numSamples; i++) {
+                for (int j = 0; j < numSamples; j++) {
+                    if (m->control_pressed) { break; }
+                    if ((zMatrix[k][i] != 0) && (zMatrix[k][j] != 0)){ //are both samples in the partition, if so add there distance
+                        totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+                        count++;
+                    }
+                }
+            }
+            totalA /= (double) count;
+            
+            //set a[i] to average for cluster
+            for (int i = 0; i < numSamples; i++) {
+                if (zMatrix[k][i] != 0) { a[i] = totalA; }
+            }
+        }
+        
+        //calc b
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            int nextClosestMedoid = whoDp[i];
+            int thisPartition = medoid2Partition[nextClosestMedoid];
+            int count = 0;
+            double totalB = 0.0;
+            for (int j = 0; j < numSamples; j++) {
+                if (zMatrix[thisPartition][j] != 0) { //this sample is in this partition
+                    totalB += dists[i][j];
+                    count++;
+                }
+            }
+            b[i] = totalB / (double) count;
+        }
+        
+        //calc silhouettes
+        for (int i = 0; i < numSamples; i++) {
+            if (m->control_pressed) { break; }
+            
+            double denom = a[i];
+            if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+            
+            silhouettes[i] = (b[i] - a[i]) / denom;
+            
+            //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+        }
+               
+        return silhouettes;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Pam", "calcSilhouettes");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+ 
+ CHk=Bk/(k−1)/Wk/(n−k)
+ 
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double Pam::calcCHIndex(vector< vector<double> > dists){ //countMatrix = [numSamples][numOtus]
+    try {
+        double CH = 0.0;
+        
+        if (numPartitions < 2) { return CH; }
+        
+        map<int, int> clusterMap; //map sample to partition
+        for (int i = 0; i < numPartitions; i++) {
+            for (int j = 0; j < numSamples; j++) {
+                if (m->control_pressed) { return 0.0; }
+                if (zMatrix[i][j] != 0) { clusterMap[j] = i; }
+            }
+        }
+        
+        double sumBetweenCluster = 0.0;
+        double sumWithinClusters = 0.0;
+        
+        for (int i = 0; i < numSamples; i++) { //lt
+            for (int j = 0; j < i; j++) {
+                if (m->control_pressed) { return 0.0; }
+                int partitionI = clusterMap[i];
+                int partitionJ = clusterMap[j];
+                
+                if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+                    sumWithinClusters += (dists[i][j] * dists[i][j]);
+                }else { //they are NOT from the same cluster so this distance is added to Bk
+                    sumBetweenCluster += (dists[i][j] * dists[i][j]);
+                }
+            }
+        }
+        //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' <<  (numSamples - numPartitions) << endl;
+        
+        CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+        
+        return CH;
+    }
+    catch(exception& e){
+        m->errorOut(e, "Pam", "calcCHIndex");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+
+
diff --git a/pam.h b/pam.h

new file mode 100644 (file)

index 0000000..9930176
--- /dev/null
+++ b/pam.h
@@ -0,0 +1,40 @@
+//
+//  pam.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_pam_h
+#define Mothur_pam_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class Pam : public CommunityTypeFinder {
+    
+public:
+    Pam(vector<vector<int> >, vector<vector<double> >, int);
+    vector<double> calcSilhouettes(vector< vector< double> >);
+    double calcCHIndex(vector< vector< double> >);
+    
+private:
+    set<int> medoids;
+    map<int, int> medoid2Partition;
+    double largestDist;
+    vector<vector<double> > dists;
+    vector<vector< double> > Dp; // [numSamples][2] - It contains Dp and Ep. Dp is in [numSamples][0] and Ep is in [numSamples][1]. Dp is the distance between p and the closest sample in S and Ep is the distance between p and the second closest object in S. Both are used in the build and swap phases.
+    
+    int buildPhase();
+    int swapPhase();
+    int updateDp();
+    
+    
+    
+/**************************************************************************************************/
+};
+
+
+#endif
diff --git a/qFinderDMM.cpp b/qFinderDMM.cpp

index c89644bae43649c7db10511b913be8ee9a3b7504..7ea5611ab0045d46fe11454ad0a522e565127003 100644 (file)
--- a/qFinderDMM.cpp
+++ b/qFinderDMM.cpp
@@ -7,26 +7,27 @@
  //
  
  #include "qFinderDMM.h"
-#include "linearalgebra.h"
  
-#define EPSILON numeric_limits<double>::epsilon()
+
  
  /**************************************************************************************************/
  
-qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
+qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
      try {
-        m = MothurOut::getInstance();
+        //cout << "here" << endl;
+        numPartitions = p;
+        countMatrix = cm;
          numSamples = (int)countMatrix.size();
          numOTUs = (int)countMatrix[0].size();
          
-        
-        kMeans();
-        //    cout << "done kMeans" << endl;
+        //cout << "before kmeans" <<endl;
+        findkMeans();
+            //cout << "done kMeans" << endl;
          
          optimizeLambda();
          
          
-        //    cout << "done optimizeLambda" << endl;
+            //cout << "done optimizeLambda" << endl;
          
          double change = 1.0000;
          currNLL = 0.0000;
@@ -35,19 +36,19 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
          
          while(change > 1.0e-6 && iter < 100){
              
-            //        cout << "Calc_Z: ";
+                    //cout << "Calc_Z: ";
              calculatePiK();
              
              optimizeLambda();
              
-            //        printf("Iter:%d\t",iter);
+                    //printf("Iter:%d\t",iter);
              
              for(int i=0;i<numPartitions;i++){
                  weights[i] = 0.0000;
                  for(int j=0;j<numSamples;j++){
                      weights[i] += zMatrix[i][j];
                  }
-                //            printf("w_%d=%.3f\t",i,weights[i]);
+                           // printf("w_%d=%.3f\t",i,weights[i]);
                  
              }
              
@@ -57,7 +58,7 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
              
              currNLL = nLL;
              
-            //        printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
+                   // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
              
              iter++;
          }
@@ -95,84 +96,35 @@ qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPart
                 exit(1);
         }
  }
-
  /**************************************************************************************************/
-
-void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
+void qFinderDMM::printFitData(ofstream& out){
      try {
-        ofstream printMatrix;
-        m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
-        printMatrix.setf(ios::fixed, ios::floatfield);
-        printMatrix.setf(ios::showpoint);
-        
-        for(int i=0;i<numPartitions;i++){   printMatrix << "\tPartition_" << i+1;   }   printMatrix << endl;
-        
-        for(int i=0;i<numSamples;i++){
-            printMatrix << sampleName[i];
-            for(int j=0;j<numPartitions;j++){
-                printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
-            }
-            printMatrix << endl;
-        }
-        printMatrix.close();
+        out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printFitData");
+        exit(1);
      }
-       catch(exception& e) {
-               m->errorOut(e, "qFinderDMM", "printZMatrix");
-               exit(1);
-       }
  }
-
  /**************************************************************************************************/
-
-void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
+void qFinderDMM::printFitData(ostream& out, double minLaplace){
      try {
-        ofstream printRA;
-        m->openOutputFile(fileName, printRA); //(fileName.c_str());
-        printRA.setf(ios::fixed, ios::floatfield);
-        printRA.setf(ios::showpoint);
-        
-        vector<double> totals(numPartitions, 0.0000);
-        for(int i=0;i<numPartitions;i++){
-            for(int j=0;j<numOTUs;j++){
-                totals[i] += exp(lambdaMatrix[i][j]);
-            }
+        if(laplace < minLaplace){
+            out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << "***" << endl;
+        }else {
+            out << setprecision (2) << numPartitions << '\t'  << getNLL() << '\t' << getLogDet() << '\t' <<  getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
          }
          
-        printRA << "Taxon";
-        for(int i=0;i<numPartitions;i++){
-            printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
-            printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
-        }
-        printRA << endl;
-        
-        for(int i=0;i<numOTUs;i++){
-            
-            if (m->control_pressed) { break; }
-            
-            printRA << otuNames[i];
-            for(int j=0;j<numPartitions;j++){
-                
-                if(error[j][i] >= 0.0000){
-                    double std = sqrt(error[j][i]);
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
-                }
-                else{
-                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
-                    printRA << '\t' << "NA";
-                    printRA << '\t' << "NA";
-                }
-            }
-            printRA << endl;
-        }
-        
-        printRA.close();
+        m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(getNLL()) + '\t' + toString(getLogDet()) + '\t');
+        m->mothurOutJustToLog(toString(getBIC()) + '\t' + toString(getAIC()) + '\t' + toString(laplace));
+
+        return;
+    }
+    catch(exception& e){
+        m->errorOut(e, "CommunityTypeFinder", "printFitData");
+        exit(1);
      }
-       catch(exception& e) {
-               m->errorOut(e, "qFinderDMM", "printRelAbund");
-               exit(1);
-       }
  }
  
  /**************************************************************************************************/
@@ -193,7 +145,7 @@ interp_quad (double f0, double fp0, double f1, double zl, double zh)
      
      double zmin = zl, fmin = fl;
      
-    if (fh < fmin) { zmin = zh; fmin = fh; } 
+    if (fh < fmin) { zmin = zh; fmin = fh; }
      
      if (c > 0)  /* positive curvature required for a minimum */
      {
@@ -216,7 +168,7 @@ interp_quad (double f0, double fp0, double f1, double zl, double zh)
   *
   * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
   *
- * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). 
+ * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
   */
  
  double cubic (double c0, double c1, double c2, double c3, double z){
@@ -231,7 +183,7 @@ void check_extremum (double c0, double c1, double c2, double c3, double z,
      
      double y = cubic (c0, c1, c2, c3, z);
      
-    if (y < *fmin)  
+    if (y < *fmin)
      {
          *zmin = z;  /* accepted new point*/
          *fmin = y;
@@ -240,7 +192,7 @@ void check_extremum (double c0, double c1, double c2, double c3, double z,
  
  /**************************************************************************************************/
  
-int gsl_poly_solve_quadratic (double a, double b, double c, 
+int gsl_poly_solve_quadratic (double a, double b, double c,
                                double *x0, double *x1)
  {
      
@@ -274,12 +226,12 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
              double r1 = temp / a ;
              double r2 = c / temp ;
              
-            if (r1 < r2) 
+            if (r1 < r2)
              {
                  *x0 = r1 ;
                  *x1 = r2 ;
-            } 
-            else 
+            }
+            else
              {
                  *x0 = r2 ;
                  *x1 = r1 ;
@@ -287,7 +239,7 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
          }
          return 2;
      }
-    else if (disc == 0) 
+    else if (disc == 0)
      {
          *x0 = -0.5 * b / a ;
          *x1 = -0.5 * b / a ;
@@ -297,7 +249,7 @@ int gsl_poly_solve_quadratic (double a, double b, double c,
      {
          return 0;
      }
-   
+    
  }
  
  /**************************************************************************************************/
@@ -317,14 +269,14 @@ double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, do
          
          if (n == 2)  /* found 2 roots */
          {
-            if (z0 > zl && z0 < zh) 
+            if (z0 > zl && z0 < zh)
                  check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
-            if (z1 > zl && z1 < zh) 
+            if (z1 > zl && z1 < zh)
                  check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
          }
          else if (n == 1)  /* found 1 root */
          {
-            if (z0 > zl && z0 < zh) 
+            if (z0 > zl && z0 < zh)
                  check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
          }
      }
@@ -355,7 +307,7 @@ double interpolate (double a, double fa, double fpa,
      else{
          z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
      }
-
+    
      
      alpha = a + z * (b - a);
      
@@ -635,178 +587,6 @@ int qFinderDMM::bfgs2_Solver(vector<double>& x){
      }
  }
  
-/**************************************************************************************************/
-
-//can we get these psi/psi1 calculations into their own math class?
-//psi calcualtions swiped from gsl library...
-
-static const double psi_cs[23] = {
-    -.038057080835217922,
-    .491415393029387130, 
-    -.056815747821244730,
-    .008357821225914313,
-    -.001333232857994342,
-    .000220313287069308,
-    -.000037040238178456,
-    .000006283793654854,
-    -.000001071263908506,
-    .000000183128394654,
-    -.000000031353509361,
-    .000000005372808776,
-    -.000000000921168141,
-    .000000000157981265,
-    -.000000000027098646,
-    .000000000004648722,
-    -.000000000000797527,
-    .000000000000136827,
-    -.000000000000023475,
-    .000000000000004027,
-    -.000000000000000691,
-    .000000000000000118,
-    -.000000000000000020
-};
-
-static double apsi_cs[16] = {    
-    -.0204749044678185,
-    -.0101801271534859,
-    .0000559718725387,
-    -.0000012917176570,
-    .0000000572858606,
-    -.0000000038213539,
-    .0000000003397434,
-    -.0000000000374838,
-    .0000000000048990,
-    -.0000000000007344,
-    .0000000000001233,
-    -.0000000000000228,
-    .0000000000000045,
-    -.0000000000000009,
-    .0000000000000002,
-    -.0000000000000000 
-};    
-
-/**************************************************************************************************/
-
-double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
-    try {
-        double d = 0.0000;
-        double dd = 0.0000;
-        
-        double x2 = xx * 2.0000;
-        
-        for(int j=order;j>=1;j--){
-            if (m->control_pressed) {  return 0; }
-            double temp = d;
-            d = x2 * d - dd + seriesData[j];
-            dd = temp;
-        }
-        
-        d = xx * d - dd + 0.5 * seriesData[0];
-        return d;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "cheb_eval");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi(double xx){
-    try {
-        double psiX = 0.0000;
-        
-        if(xx < 1.0000){
-            
-            double t1 = 1.0 / xx;
-            psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
-            psiX = -t1 + psiX;
-            
-        }
-        else if(xx < 2.0000){
-            
-            const double v = xx - 1.0;
-            psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
-            
-        }
-        else{
-            const double t = 8.0/(xx*xx)-1.0;
-            psiX = cheb_eval(apsi_cs, 15, t);
-            psiX += log(xx) - 0.5/xx;
-        }
-        
-        return psiX;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "psi");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
-/* coefficients for Maclaurin summation in hzeta()
- * B_{2j}/(2j)!
- */
-static double hzeta_c[15] = {
-    1.00000000000000000000000000000,
-    0.083333333333333333333333333333,
-    -0.00138888888888888888888888888889,
-    0.000033068783068783068783068783069,
-    -8.2671957671957671957671957672e-07,
-    2.0876756987868098979210090321e-08,
-    -5.2841901386874931848476822022e-10,
-    1.3382536530684678832826980975e-11,
-    -3.3896802963225828668301953912e-13,
-    8.5860620562778445641359054504e-15,
-    -2.1748686985580618730415164239e-16,
-    5.5090028283602295152026526089e-18,
-    -1.3954464685812523340707686264e-19,
-    3.5347070396294674716932299778e-21,
-    -8.9535174270375468504026113181e-23
-};
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi1(double xx){
-    try {
-        
-        /* Euler-Maclaurin summation formula
-         * [Moshier, p. 400, with several typo corrections]
-         */
-        
-        double s = 2.0000;
-        const int jmax = 12;
-        const int kmax = 10;
-        int j, k;
-        const double pmax  = pow(kmax + xx, -s);
-        double scp = s;
-        double pcp = pmax / (kmax + xx);
-        double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
-        
-        for(k=0; k<kmax; k++) {
-            if (m->control_pressed) {  return 0; }
-            value += pow(k + xx, -s);
-        }
-        
-        for(j=0; j<=jmax; j++) {
-            if (m->control_pressed) {  return 0; }
-            double delta = hzeta_c[j+1] * scp * pcp;
-            value += delta;
-            
-            if(fabs(delta/value) < 0.5*EPSILON) break;
-            
-            scp *= (s+2*j+1)*(s+2*j+2);
-            pcp /= (kmax + xx)*(kmax + xx);
-        }
-        
-        return value;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "psi1");
-        exit(1);
-    }
-}
  
  /**************************************************************************************************/
  
@@ -961,145 +741,6 @@ double qFinderDMM::getNegativeLogEvidence(vector<double>& lambda, int group){
  
  /**************************************************************************************************/
  
-void qFinderDMM::kMeans(){
-    try {
-        
-        vector<vector<double> > relativeAbundance(numSamples);
-        vector<vector<double> > alphaMatrix;
-        
-        alphaMatrix.resize(numPartitions);
-        lambdaMatrix.resize(numPartitions);
-        for(int i=0;i<numPartitions;i++){
-            alphaMatrix[i].assign(numOTUs, 0);
-            lambdaMatrix[i].assign(numOTUs, 0);
-        }
-        
-        //get relative abundance
-        for(int i=0;i<numSamples;i++){
-            if (m->control_pressed) {  return; }
-            int groupTotal = 0;
-            
-            relativeAbundance[i].assign(numOTUs, 0.0);
-            
-            for(int j=0;j<numOTUs;j++){
-                groupTotal += countMatrix[i][j];
-            }
-            for(int j=0;j<numOTUs;j++){
-                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
-            }
-        }
-        
-        //randomly assign samples into partitions
-        zMatrix.resize(numPartitions);
-        for(int i=0;i<numPartitions;i++){
-            zMatrix[i].assign(numSamples, 0);
-        }
-        
-        for(int i=0;i<numSamples;i++){
-            zMatrix[rand()%numPartitions][i] = 1;
-        }
-        
-        double maxChange = 1;
-        int maxIters = 1000;
-        int iteration = 0;
-        
-        weights.assign(numPartitions, 0);
-        
-        while(maxChange > 1e-6 && iteration < maxIters){
-            
-            if (m->control_pressed) {  return; }
-            //calcualte average relative abundance
-            maxChange = 0.0000;
-            for(int i=0;i<numPartitions;i++){
-                
-                double normChange = 0.0;
-                
-                weights[i] = 0;
-                
-                for(int j=0;j<numSamples;j++){
-                    weights[i] += (double)zMatrix[i][j];
-                }
-                
-                vector<double> averageRelativeAbundance(numOTUs, 0);
-                for(int j=0;j<numOTUs;j++){
-                    for(int k=0;k<numSamples;k++){
-                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
-                    }
-                }
-                
-                for(int j=0;j<numOTUs;j++){
-                    averageRelativeAbundance[j] /= weights[i];
-                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
-                    normChange += difference * difference;
-                    alphaMatrix[i][j] = averageRelativeAbundance[j];
-                }
-                
-                normChange = sqrt(normChange);
-                
-                if(normChange > maxChange){ maxChange = normChange; }
-            }
-            
-            
-            //calcualte distance between each sample in partition adn the average relative abundance
-            for(int i=0;i<numSamples;i++){
-                if (m->control_pressed) {  return; }
-                
-                double normalizationFactor = 0;
-                vector<double> totalDistToPartition(numPartitions, 0);
-                
-                for(int j=0;j<numPartitions;j++){
-                    for(int k=0;k<numOTUs;k++){
-                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
-                        totalDistToPartition[j] += difference * difference;
-                    }
-                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
-                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
-                }
-                
-                
-                for(int j=0;j<numPartitions;j++){
-                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
-                }
-                
-            }
-            
-            iteration++;
-            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
-            
-        }
-        
-        //    cout << "Iter:-1";
-        for(int i=0;i<numPartitions;i++){
-            weights[i] = 0.0000;
-            
-            for(int j=0;j<numSamples;j++){
-                weights[i] += zMatrix[i][j];
-            }
-            //        printf("\tw_%d=%.3f", i, weights[i]);
-        }
-        //    cout << endl;
-        
-        
-        for(int i=0;i<numOTUs;i++){
-            if (m->control_pressed) {  return; }
-            for(int j=0;j<numPartitions;j++){
-                if(alphaMatrix[j][i] > 0){
-                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
-                }
-                else{
-                    lambdaMatrix[j][i] = -10.0;
-                }
-            }
-        }
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "kMeans");
-        exit(1);
-    }
-}
-
-/**************************************************************************************************/
-
  void qFinderDMM::optimizeLambda(){    
      try {
          for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
@@ -1112,7 +753,6 @@ void qFinderDMM::optimizeLambda(){
          exit(1);
      }
  }
-
  /**************************************************************************************************/
  
  void qFinderDMM::calculatePiK(){
@@ -1247,80 +887,4 @@ double qFinderDMM::getNegativeLogLikelihood(){
  
  
  }
-
-/**************************************************************************************************/
-
-vector<vector<double> > qFinderDMM::getHessian(){
-    try {
-        vector<double> alpha(numOTUs, 0.0000);
-        double alphaSum = 0.0000;
-        
-        vector<double> pi = zMatrix[currentPartition];
-        vector<double> psi_ajk(numOTUs, 0.0000);
-        vector<double> psi_cjk(numOTUs, 0.0000);
-        vector<double> psi1_ajk(numOTUs, 0.0000);
-        vector<double> psi1_cjk(numOTUs, 0.0000);
-        
-        for(int j=0;j<numOTUs;j++){
-            
-            if (m->control_pressed) {  break; }
-            
-            alpha[j] = exp(lambdaMatrix[currentPartition][j]);
-            alphaSum += alpha[j];
-            
-            for(int i=0;i<numSamples;i++){
-                double X = (double) countMatrix[i][j];
-                
-                psi_ajk[j] += pi[i] * psi(alpha[j]);
-                psi1_ajk[j] += pi[i] * psi1(alpha[j]);
-                
-                psi_cjk[j] += pi[i] * psi(alpha[j] + X);
-                psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
-            }
-        }
-        
-        
-        double psi_Ck = 0.0000;
-        double psi1_Ck = 0.0000;
-        
-        double weight = 0.0000;
-        
-        for(int i=0;i<numSamples;i++){
-            if (m->control_pressed) {  break; }
-            weight += pi[i];
-            double sum = 0.0000;
-            for(int j=0;j<numOTUs;j++){     sum += alpha[j] + countMatrix[i][j];    }
-            
-            psi_Ck += pi[i] * psi(sum);
-            psi1_Ck += pi[i] * psi1(sum);
-        }
-        
-        double psi_Ak = weight * psi(alphaSum);
-        double psi1_Ak = weight * psi1(alphaSum);
-        
-        vector<vector<double> > hessian(numOTUs);
-        for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
-        
-        for(int i=0;i<numOTUs;i++){
-            if (m->control_pressed) {  break; }
-            double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
-            double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
-            double term3 = 0.1 * alpha[i];
-            
-            hessian[i][i] = term1 + term2 + term3;
-            
-            for(int j=0;j<i;j++){   
-                hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
-                hessian[j][i] = hessian[i][j];
-            }
-        }
-        
-        return hessian;
-    }
-    catch(exception& e){
-        m->errorOut(e, "qFinderDMM", "getHessian");
-        exit(1);
-    }
-}
-
  /**************************************************************************************************/
diff --git a/qFinderDMM.h b/qFinderDMM.h

index d94e6cce9f11b89de87d9f756ae490c69aabda72..df7b44305ef38fc417360bebd729357fb6724e2d 100644 (file)
--- a/qFinderDMM.h
+++ b/qFinderDMM.h
@@ -9,27 +9,19 @@
  #ifndef pds_dmm_qFinderDMM_h
  #define pds_dmm_qFinderDMM_h
  
-/**************************************************************************************************/
-
-#include "mothurout.h"
+#include "communitytype.h"
  
  /**************************************************************************************************/
  
-class qFinderDMM {
+class qFinderDMM : public CommunityTypeFinder {
    
  public:
      qFinderDMM(vector<vector<int> >, int);
-    double getNLL()     {    return currNLL;        }  
-    double getAIC()     {    return aic;            }
-    double getBIC()     {    return bic;            }
-    double getLogDet()  {    return logDeterminant; }
-    double getLaplace() {    return laplace;        }
-    void printZMatrix(string, vector<string>);
-    void printRelAbund(string, vector<string>);
-
+    void printFitData(ofstream&);
+    void printFitData(ostream&, double);
+    
  private:
-    MothurOut* m;
-    void kMeans();
+   
      void optimizeLambda();
      void calculatePiK();
  
@@ -37,31 +29,14 @@ private:
      void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
      double getNegativeLogEvidence(vector<double>&, int);
      double getNegativeLogLikelihood();
-    vector<vector<double> > getHessian();
+    
      
      int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
      int bfgs2_Solver(vector<double>&);//, double, double);
-    double cheb_eval(const double[], int, double);
-    double psi(double);
-    double psi1(double);
-
-    vector<vector<int> > countMatrix;
-    vector<vector<double> > zMatrix;
-    vector<vector<double> > lambdaMatrix;
-    vector<double> weights;
-    vector<vector<double> > error;
-    
-    int numPartitions;
-    int numSamples;
-    int numOTUs;
-    int currentPartition;
-    
-    double currNLL;
-    double aic;
-    double bic;
-    double logDeterminant;
-    double laplace;
      
+   
+
+        
  };
  
  /**************************************************************************************************/
diff --git a/qualityscores.cpp b/qualityscores.cpp

index 0c55650b33149148c80374a0b8c03d41a7a4ea53..3a1687bf9e174d77ddded9eaf331d6f1a78da734 100644 (file)
--- a/qualityscores.cpp
+++ b/qualityscores.cpp
@@ -130,7 +130,7 @@ string QualityScores::getName(){
  void QualityScores::printQScores(ofstream& qFile){
         try {
                 
-               double aveQScore = calculateAverage();
+               double aveQScore = calculateAverage(false);
                 
                 qFile << '>' << seqName << '\t' << aveQScore << endl;
                 
@@ -228,7 +228,7 @@ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
  
  /**************************************************************************************************/
  
-bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
+bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
         try {
                 string rawSequence = sequence.getUnaligned();
                 int seqLength = sequence.getNumBases();
@@ -240,12 +240,22 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol
                 
                 int end = -1;
                 double rollingSum = 0.0000;
+        double value = 0.0;
                 
                 for(int i=0;i<seqLength;i++){
-
-                       rollingSum += (double)qScores[i];
+            
+            if (logTransform)   {
+                rollingSum += (double)pow(10.0, qScores[i]);
+                value = log10(rollingSum / (double)(i+1));
+                
+            } //Sum 10^Q
+            else                {
+                rollingSum += (double)qScores[i];
+                value = rollingSum / (double)(i+1);
+            }
+                       
                         
-                       if(rollingSum / (double)(i+1) < qThreshold){
+                       if(value < qThreshold){
                                 end = i;
                                 break;
                         }
@@ -269,7 +279,7 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol
  
  /**************************************************************************************************/
  
-bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
+bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
         try {
                 string rawSequence = sequence.getUnaligned();
                 int seqLength = sequence.getNumBases();
@@ -288,9 +298,12 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
                         double windowSum = 0.0000;
  
                         for(int i=start;i<end;i++){
-                               windowSum += qScores[i];
+                if (logTransform)   {  windowSum += pow(10.0, qScores[i]);  }
+                else                {  windowSum += qScores[i];             }
                         }
-                       double windowAverage = windowSum / (double)(end-start);
+                       double windowAverage = 0.0;
+            if (logTransform)   { windowAverage = log10(windowSum / (double)(end-start)); }
+            else                { windowAverage = windowSum / (double)(end-start);      }
                                 
                         if(windowAverage < qThreshold){
                                 end = end - stepSize;
@@ -323,21 +336,25 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
  
  /**************************************************************************************************/
  
-double QualityScores::calculateAverage(){
+double QualityScores::calculateAverage(bool logTransform){
         
         double aveQScore = 0.0000;
         
         for(int i=0;i<seqLength;i++){
-               aveQScore += (double) qScores[i];
+        if (logTransform)   {  aveQScore += pow(10.0, qScores[i]);  }
+        else                {  aveQScore += qScores[i];             }
         }
         aveQScore /= (double) seqLength;
+    
+    if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);  }
+    else                {  aveQScore /= (double) seqLength;                 }
         
         return aveQScore;
  }
  
  /**************************************************************************************************/
  
-bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
+bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
         try {
                 string rawSequence = sequence.getUnaligned();
                 bool success = 0;       //guilty until proven innocent
@@ -347,7 +364,7 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
                         m->mothurOutEndLine();  
                 } 
                         
-               double aveQScore = calculateAverage();
+               double aveQScore = calculateAverage(logTransform);
                 
                 if(aveQScore >= qAverage)       {       success = 1;    }
                 else                                            {       success = 0;    }
diff --git a/qualityscores.h b/qualityscores.h

index 500d3e98db79b934effee830b65f54afb0023af3..a802636fd364071ab15aa84c148ebb38fead14b4 100644 (file)
--- a/qualityscores.h
+++ b/qualityscores.h
@@ -30,9 +30,9 @@ public:
         void trimQScores(int, int);
         void flipQScores();
         bool stripQualThreshold(Sequence&, double);
-       bool stripQualRollingAverage(Sequence&, double);
-       bool stripQualWindowAverage(Sequence&, int, int, double);
-       bool cullQualAverage(Sequence&, double);
+       bool stripQualRollingAverage(Sequence&, double, bool);
+       bool stripQualWindowAverage(Sequence&, int, int, double, bool);
+       bool cullQualAverage(Sequence&, double, bool);
         void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
         void updateForwardMap(vector<vector<int> >&, int, int, int);
         void updateReverseMap(vector<vector<int> >&, int, int, int);
@@ -42,7 +42,7 @@ public:
         
  private:
         
-       double calculateAverage();
+       double calculateAverage(bool);
         MothurOut* m;
         vector<int> qScores;
         
diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp

index e941b3a9a2ff500dd07e221712757a8b899526d2..70aa55d38502237388c520aeae892daf4891e73c 100644 (file)
--- a/seqsummarycommand.cpp
+++ b/seqsummarycommand.cpp
@@ -449,7 +449,8 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
  
                 bool done = false;
                 int count = 0;
-       
+        
+        
                 while (!done) {
                                 
                         if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
@@ -479,7 +480,6 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                         ambigBases.push_back(current.getAmbigBases());
                                         longHomoPolymer.push_back(current.getLongHomoPolymer());
                                 }
-                               
                                 count++;
                                 outSummary << current.getName() << '\t';
                                 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
diff --git a/shannonrange.cpp b/shannonrange.cpp

new file mode 100644 (file)

index 0000000..f26f08a
--- /dev/null
+++ b/shannonrange.cpp
@@ -0,0 +1,35 @@
+//
+//  shannonrange.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 1/3/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#include "shannonrange.h"
+
+/***********************************************************************/
+
+EstOutput RangeShannon::getValues(vector<SharedRAbundVector*> shared) {
+       try {
+        data.resize(3,0);
+        
+        double commSize = 1e20;
+        
+        SAbundVector sabund1 = shared[0]->getSAbundVector();
+        SAbundVector sabund2 = shared[1]->getSAbundVector();
+        
+        double sampleSize = 0;
+        for (int i = 0; i < sabund1.getNumBins(); i++) {  sampleSize += (sabund1.get(i) * sabund2.get(i));  }
+        int aux = ceil(pow((sampleSize+1), 0.33333));
+               
+               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+               
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RangeShannon", "getValues");
+               exit(1);
+       }
+}
+/***********************************************************************/
diff --git a/shannonrange.h b/shannonrange.h

new file mode 100644 (file)

index 0000000..34b73f4
--- /dev/null
+++ b/shannonrange.h
@@ -0,0 +1,29 @@
+//
+//  shannonrange.h
+//  Mothur
+//
+//  Created by SarahsWork on 1/3/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_shannonrange_h
+#define Mothur_shannonrange_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+
+class RangeShannon : public Calculator  {
+       
+public:
+       RangeShannon() : Calculator("rangeshannon", 3, false) {};
+       EstOutput getValues(SAbundVector*) {return data;};
+    EstOutput getValues(vector<SharedRAbundVector*>);
+       string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
+};
+
+/***********************************************************************/
+
+
+
+#endif
diff --git a/sharedjsd.cpp b/sharedjsd.cpp

new file mode 100644 (file)

index 0000000..46cb977
--- /dev/null
+++ b/sharedjsd.cpp
@@ -0,0 +1,48 @@
+//
+//  sharedjsd.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 12/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sharedjsd.h"
+
+/***********************************************************************/
+//KLD <- function(x,y) sum(x *log(x/y))
+//JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
+EstOutput JSD::getValues(vector<SharedRAbundVector*> shared) {
+       try {
+        
+               data.resize(1,0);
+        
+        double KLD1 = 0.0;
+        double KLD2 = 0.0;
+
+               for (int i = 0; i < shared[0]->getNumBins(); i++) {
+                       //store in temps to avoid multiple repetitive function calls
+                       double tempA = shared[0]->getAbundance(i);
+                       double tempB = shared[1]->getAbundance(i);
+            
+            if (tempA == 0) { tempA = 0.000001; }
+            if (tempB == 0) { tempB = 0.000001; }
+            
+            double denom = (tempA+tempB)/(double)2.0;
+           
+            if (tempA != 0) {  KLD1 += tempA * log(tempA/denom); } //KLD(x,m)
+            if (tempB != 0) {  KLD2 += tempB * log(tempB/denom); } //KLD(y,m)
+        }
+        
+               data[0] = sqrt((0.5*KLD1) + (0.5*KLD2));
+               
+               if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+               
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "JSD", "getValues");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
diff --git a/sharedjsd.h b/sharedjsd.h

new file mode 100644 (file)

index 0000000..cec0faa
--- /dev/null
+++ b/sharedjsd.h
@@ -0,0 +1,30 @@
+//
+//  sharedjsd.h
+//  Mothur
+//
+//  Created by SarahsWork on 12/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sharedjsd_h
+#define Mothur_sharedjsd_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+//Jensen-Shannon divergence (JSD)
+class JSD : public Calculator  {
+       
+public:
+       JSD() :  Calculator("jsd", 1, false) {};
+       EstOutput getValues(SAbundVector*) {return data;};
+       EstOutput getValues(vector<SharedRAbundVector*>);
+       string getCitation() { return "http://www.mothur.org/wiki/JSD"; }
+private:
+       
+};
+
+/***********************************************************************/
+
+
+#endif
diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp

index 2c9399bda1dd363d04c1fed89ff9b12ea8c3aace..ef5e365fdfa8c356c3951c1dae74ae02bf41b978 100644 (file)
--- a/summarysharedcommand.cpp
+++ b/summarysharedcommand.cpp
@@ -17,7 +17,7 @@ vector<string> SummarySharedCommand::setParameters(){
                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
          CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
          CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
          CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
@@ -294,6 +294,8 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                                         sumCalculators.push_back(new MemEuclidean());
                                                 }else if (Estimators[i] == "mempearson") { 
                                                         sumCalculators.push_back(new MemPearson());
+                                               }else if (Estimators[i] == "jsd") {
+                                                       sumCalculators.push_back(new JSD());
                                                 }
                                         }
                                 }
diff --git a/summarysharedcommand.h b/summarysharedcommand.h

index 8ba9b9fcf3e40b745cf75ddbaccd2f6df8cde652..1b3f16469bfc64345631f489e4c6651895041de0 100644 (file)
--- a/summarysharedcommand.h
+++ b/summarysharedcommand.h
@@ -56,6 +56,7 @@
  #include "memchord.h"
  #include "memeuclidean.h"
  #include "mempearson.h"
+#include "sharedjsd.h"
  
  class SummarySharedCommand : public Command {
  
@@ -216,6 +217,8 @@ static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){
                      sumCalculators.push_back(new MemEuclidean());
                  }else if (pDataArray->Estimators[i] == "mempearson") { 
                      sumCalculators.push_back(new MemPearson());
+                }else if (pDataArray->Estimators[i] == "jsd") {
+                    sumCalculators.push_back(new JSD());
                  }
              }
          }
diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp

index fd55edf25ede0b4de807bef17dadb861b44c03a8..33349decab52e90be19c928b44b9b07c8f6c710a 100644 (file)
--- a/trimflowscommand.cpp
+++ b/trimflowscommand.cpp
@@ -395,7 +395,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                 
                 if(line->start == 0){
                         flowFile >> numFlows; m->gobble(flowFile);
-                       scrapFlowFile << maxFlows << endl;
+                       scrapFlowFile << numFlows << endl;
                         trimFlowFile << maxFlows << endl;
                         if(allFiles){
                                 for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
@@ -599,6 +599,7 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                 else if(type == "REVERSE"){
                                         string oligoRC = reverseOligo(oligo);
                                         revPrimer.push_back(oligoRC);
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
                                 }
                                 else if(type == "BARCODE"){
                                         oligosFile >> group;
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 0d55d7c4a0b043f811d2d6b9f61ef159952ea1f9..951ce65d400bb36afc60df92e56e148c14ee271c 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -34,6 +34,7 @@ vector<string> TrimSeqsCommand::setParameters(){
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
+        CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
@@ -61,7 +62,7 @@ string TrimSeqsCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
                 helpString += "The fasta parameter is required.\n";
                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
          helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
@@ -84,6 +85,7 @@ string TrimSeqsCommand::getHelpString(){
                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
+        helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
@@ -329,6 +331,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
              temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
                         keepforward = m->isTrue(temp);
              
+            temp = validParameter.validFile(parameters, "logtransform", false);                if (temp == "not found") { temp = "F"; }
+                       logtransform = m->isTrue(temp);
+            
              temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
                         reorient = m->isTrue(temp);
                         
@@ -444,8 +449,6 @@ int TrimSeqsCommand::execute(){
                         }
                 }
          
-        if (!pairedOligos) { if (reorient) { m->mothurOut("[WARNING]: You cannot use reorient without paired barcodes or primers, skipping."); m->mothurOutEndLine(); reorient = false; } }
-        
          if (m->control_pressed) { return 0; }
              
          //fills lines and qlines
@@ -695,7 +698,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                  rpairedBarcodes[it->first] = tempPair;
                   //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
              }
-            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+            int index = rpairedBarcodes.size();
+            for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
+            }
+            
+            index = rpairedPrimers.size();
+            for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedPrimers[index] = tempPair; index++;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
+            }
+
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); 
          }
          
                 while (moreSeqs) {
@@ -819,9 +836,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                         int origLength = currSeq.getNumBases();
                                         
                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
-                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
-                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
-                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
+                                       else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage, logtransform);                            }
+                                       else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform);        }
+                                       else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform);     }
                                         else                                            {       success = 1;                            }
                                         
                                         //you don't want to trim, if it fails above then scrap it
@@ -1174,7 +1191,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
                                                pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
                                               primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
-                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+                                              qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, 
                                               minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                         pDataArray.push_back(tempTrim);
              
@@ -1646,7 +1663,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
          if (hasPairedBarcodes || hasPrimer) {
              pairedOligos = true;
              if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
-        }else if (reorient) { m->mothurOut("[Warning]: cannot use checkorient without paired barcodes or primers, ignoring.\n"); m->mothurOutEndLine(); reorient = false; }
+        }
          
                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
          
diff --git a/trimseqscommand.h b/trimseqscommand.h

index 53f04d37965d55c189447c7cbd1c702ac93dc5ac..14eed17e236a333583d461d61482a1d541c6a9cf 100644 (file)
--- a/trimseqscommand.h
+++ b/trimseqscommand.h
@@ -55,7 +55,7 @@ private:
         bool abort, createGroup;
         string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
         
-       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
+       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
         int qWindowSize, qWindowStep, keepFirst, removeLast;
         double qRollAverage, qThreshold, qWindowAverage, qAverage;
@@ -98,7 +98,7 @@ struct trimData {
      vector<vector<string> > qualFileNames;
      vector<vector<string> > nameFileNames;
      unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
+    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
         int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
         int qWindowSize, qWindowStep, keepFirst, removeLast, count;
         double qRollAverage, qThreshold, qWindowAverage, qAverage;
@@ -121,7 +121,7 @@ struct trimData {
         trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                        int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
                        vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
-                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
                        int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
          filename = fn;
          qFileName = qn;
@@ -174,6 +174,7 @@ struct trimData {
          qThreshold = Threshold;
          qAverage = Average;
          qRollAverage = RollAverage;
+        logtransform = lt;
          minLength = minL;
          maxAmbig = maxA;
          maxHomoP = maxH;
@@ -276,6 +277,19 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                  oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                  rpairedBarcodes[it->first] = tempPair;
              }
+            
+            int index = rpairedBarcodes.size();
+            for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
+                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[index] = tempPair; index++;
+            }
+            
+            index = rpairedPrimers.size();
+            for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+                rpairedPrimers[index] = tempPair; index++;
+            }
+
              rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
          }
          
@@ -416,9 +430,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                         int origLength = currSeq.getNumBases();
                                         
                                         if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
-                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
-                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
-                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
+                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform);                            }
+                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform);        }
+                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform);     }
                                         else                                            {       success = 1;                            }
                                         
                                         //you don't want to trim, if it fails above then scrap it
diff --git a/validcalculator.cpp b/validcalculator.cpp

index 7df7c4e28d65cb2f7853500728075e8edc161723..4718e2ff89f96a28013a19dc1ca2f51ae93d924e 100644 (file)
--- a/validcalculator.cpp
+++ b/validcalculator.cpp
@@ -76,6 +76,7 @@
  #include "mempearson.h"
  #include "sharedsobs.h"
  #include "sharednseqs.h"
+#include "sharedjsd.h"
  
  
  /********************************************************************/
@@ -206,6 +207,7 @@ void ValidCalculators::printCitations(vector<string> Estimators) {
                                 }else if (Estimators[i] == "mempearson") { Calculator* temp = new MemPearson(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                 }else if (Estimators[i] == "sharedobserved") { Calculator* temp = new SharedSobs(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                 }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
+                }else if (Estimators[i] == "jsd") { Calculator* temp = new JSD(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
                                 }else { m->mothurOut("[ERROR]: Missing else if for " + Estimators[i] + " in printCitations."); m->mothurOutEndLine(); }
                         }else { m->mothurOut(Estimators[i] + " is not a valid calculator, no citation will be given."); m->mothurOutEndLine(); }
                 }
@@ -456,6 +458,7 @@ void ValidCalculators::initialShared() {
                 shared["memchord"]                              = "memchord";
                 shared["memeuclidean"]                  = "memeuclidean";
                 shared["mempearson"]                    = "mempearson";
+        shared["jsd"]                          = "jsd";
                 shared["default"]                   = "default";
         }
         catch(exception& e) {
@@ -569,6 +572,7 @@ void ValidCalculators::initialSharedSummary() {
                 sharedsummary["memchord"]                               = "memchord";
                 sharedsummary["memeuclidean"]                   = "memeuclidean";
                 sharedsummary["mempearson"]                             = "mempearson";
+        sharedsummary["jsd"]                           = "jsd";
                 sharedsummary["default"]                                = "default";
         }
         catch(exception& e) {
@@ -733,6 +737,7 @@ void ValidCalculators::initialMatrix() {
                 matrix["memchord"]                              = "memchord";
                 matrix["memeuclidean"]                  = "memeuclidean";
                 matrix["mempearson"]                            = "mempearson";
+        matrix["jsd"]                          = "jsd";
                 
         }
         catch(exception& e) {
author	Sarah Westcott <mothur.westcott@gmail.com>
	Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
committer	Sarah Westcott <mothur.westcott@gmail.com>
	Thu, 9 Jan 2014 14:44:50 +0000 (09:44 -0500)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
collectsharedcommand.cpp		patch \| blob \| history
communitytype.cpp	[new file with mode: 0644]	patch \| blob
communitytype.h	[new file with mode: 0644]	patch \| blob
flowdata.cpp		patch \| blob \| history
getmetacommunitycommand.cpp		patch \| blob \| history
getmetacommunitycommand.h		patch \| blob \| history
getoturepcommand.cpp		patch \| blob \| history
kmeans.cpp	[new file with mode: 0644]	patch \| blob
kmeans.h	[new file with mode: 0644]	patch \| blob
linearalgebra.cpp		patch \| blob \| history
linearalgebra.h		patch \| blob \| history
matrixoutputcommand.cpp		patch \| blob \| history
matrixoutputcommand.h		patch \| blob \| history
metastatscommand.cpp		patch \| blob \| history
pam.cpp	[new file with mode: 0644]	patch \| blob
pam.h	[new file with mode: 0644]	patch \| blob
qFinderDMM.cpp		patch \| blob \| history
qFinderDMM.h		patch \| blob \| history
qualityscores.cpp		patch \| blob \| history
qualityscores.h		patch \| blob \| history
seqsummarycommand.cpp		patch \| blob \| history
shannonrange.cpp	[new file with mode: 0644]	patch \| blob
shannonrange.h	[new file with mode: 0644]	patch \| blob
sharedjsd.cpp	[new file with mode: 0644]	patch \| blob
sharedjsd.h	[new file with mode: 0644]	patch \| blob
summarysharedcommand.cpp		patch \| blob \| history
summarysharedcommand.h		patch \| blob \| history
trimflowscommand.cpp		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history
trimseqscommand.h		patch \| blob \| history
validcalculator.cpp		patch \| blob \| history