A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; };
A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; };
+ A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7132EB2184E792700AAA402 /* communitytype.cpp */; };
A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; };
A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; };
+ A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7222D721856277C0055A993 /* sharedjsd.cpp */; };
A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; };
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
+ A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */; };
A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; };
+ A7B093C018579F0400843CD1 /* pam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B093BF18579F0400843CD1 /* pam.cpp */; };
A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; };
A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; };
A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; };
A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; };
A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; };
+ A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D395C3184FA3A200A350D7 /* kmeans.cpp */; };
A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; };
A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = "<group>"; };
A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = "<group>"; };
+ A7132EAE184E76EB00AAA402 /* communitytype.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = communitytype.h; sourceTree = "<group>"; };
+ A7132EB2184E792700AAA402 /* communitytype.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = communitytype.cpp; sourceTree = "<group>"; };
A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kmertree.h; sourceTree = "<group>"; };
A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = taxonomynode.cpp; sourceTree = "<group>"; };
A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = taxonomynode.h; sourceTree = "<group>"; };
+ A7222D711856276C0055A993 /* sharedjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedjsd.h; sourceTree = "<group>"; };
+ A7222D721856277C0055A993 /* sharedjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedjsd.cpp; sourceTree = "<group>"; };
A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makebiomcommand.h; sourceTree = "<group>"; };
A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makebiomcommand.cpp; sourceTree = "<group>"; };
A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
+ A7A09B0E18773BF700FAA081 /* shannonrange.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = shannonrange.h; sourceTree = "<group>"; };
+ A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shannonrange.cpp; sourceTree = "<group>"; };
A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = "<group>"; };
A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = "<group>"; };
A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = "<group>"; };
+ A7B093BE18579EF600843CD1 /* pam.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pam.h; sourceTree = "<group>"; };
+ A7B093BF18579F0400843CD1 /* pam.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pam.cpp; sourceTree = "<group>"; };
A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = "<group>"; };
A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = "<group>"; };
A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = "<group>"; };
A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = "<group>"; };
A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = "<group>"; };
A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = "<group>"; };
+ A7D395C2184FA39300A350D7 /* kmeans.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = kmeans.h; sourceTree = "<group>"; };
+ A7D395C3184FA3A200A350D7 /* kmeans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kmeans.cpp; sourceTree = "<group>"; };
A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = "<group>"; };
A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = "<group>"; };
A7D9378917B146B5001E90B0 /* wilcox.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wilcox.cpp; sourceTree = "<group>"; };
A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */,
A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */,
A7DAAFA3133A254E003956EB /* commandparameter.h */,
+ A7D395C1184FA34300A350D7 /* communitytype */,
A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */,
A7E9BA4212D3960D00DA6239 /* containers */,
A7E9B6B612D37EC400DA6239 /* consensus.h */,
A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
A7E9B79B12D37EC400DA6239 /* progress.cpp */,
A7E9B79C12D37EC400DA6239 /* progress.hpp */,
- A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
- A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
A77B7187173D4041002163C2 /* randomnumber.h */,
A77B7186173D4041002163C2 /* randomnumber.cpp */,
A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
name = fortran;
sourceTree = "<group>";
};
+ A7D395C1184FA34300A350D7 /* communitytype */ = {
+ isa = PBXGroup;
+ children = (
+ A7132EAE184E76EB00AAA402 /* communitytype.h */,
+ A7132EB2184E792700AAA402 /* communitytype.cpp */,
+ A7D395C2184FA39300A350D7 /* kmeans.h */,
+ A7D395C3184FA3A200A350D7 /* kmeans.cpp */,
+ A7B093BE18579EF600843CD1 /* pam.h */,
+ A7B093BF18579F0400843CD1 /* pam.cpp */,
+ A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
+ A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+ );
+ name = communitytype;
+ sourceTree = "<group>";
+ };
A7E9BA3812D3956100DA6239 /* commands */ = {
isa = PBXGroup;
children = (
A7E9B7E612D37EC400DA6239 /* shannon.h */,
A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */,
A7E9B7E812D37EC400DA6239 /* shannoneven.h */,
+ A7A09B0E18773BF700FAA081 /* shannonrange.h */,
+ A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */,
A7E9B7E912D37EC400DA6239 /* sharedace.cpp */,
A7E9B7EA12D37EC400DA6239 /* sharedace.h */,
A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */,
A7E9B7F912D37EC400DA6239 /* sharedjclass.h */,
A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */,
A7E9B7FB12D37EC400DA6239 /* sharedjest.h */,
+ A7222D711856276C0055A993 /* sharedjsd.h */,
+ A7222D721856277C0055A993 /* sharedjsd.cpp */,
A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */,
A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */,
A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */,
A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
+ A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */,
+ A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */,
+ A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */,
+ A7B093C018579F0400843CD1 /* pam.cpp in Sources */,
+ A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "memchord.h"
#include "memeuclidean.h"
#include "mempearson.h"
+#include "sharedjsd.h"
//**********************************************************************************************************************
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
- CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
else if (type == "memchord") { pattern = "[filename],memchord"; }
else if (type == "memeuclidean") { pattern = "[filename],memeuclidean"; }
else if (type == "mempearson") { pattern = "[filename],mempearson"; }
+ else if (type == "jsd") { pattern = "[filename],jsd"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
outputTypes["memchord"] = tempOutNames;
outputTypes["memeuclidean"] = tempOutNames;
outputTypes["mempearson"] = tempOutNames;
+ outputTypes["jsd"] = tempOutNames;
}
catch(exception& e) {
outputTypes["memchord"] = tempOutNames;
outputTypes["memeuclidean"] = tempOutNames;
outputTypes["mempearson"] = tempOutNames;
+ outputTypes["jsd"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
}else if (Estimators[i] == "mempearson") {
cDisplays.push_back(new CollectDisplay(new MemPearson(), new SharedOneColumnFile(getOutputFileName("mempearson", variables))));
outputNames.push_back(getOutputFileName("mempearson", variables)); outputTypes["mempearson"].push_back(getOutputFileName("mempearson", variables));
+ }else if (Estimators[i] == "jsd") {
+ cDisplays.push_back(new CollectDisplay(new JSD(), new SharedOneColumnFile(getOutputFileName("jsd", variables))));
+ outputNames.push_back(getOutputFileName("jsd", variables)); outputTypes["jsd"].push_back(getOutputFileName("jsd", variables));
}
}
--- /dev/null
+//
+// communitytype.cpp
+// Mothur
+//
+// Created by SarahsWork on 12/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+//can we get these psi/psi1 calculations into their own math class?
+//psi calcualtions swiped from gsl library...
+
+static const double psi_cs[23] = {
+ -.038057080835217922,
+ .491415393029387130,
+ -.056815747821244730,
+ .008357821225914313,
+ -.001333232857994342,
+ .000220313287069308,
+ -.000037040238178456,
+ .000006283793654854,
+ -.000001071263908506,
+ .000000183128394654,
+ -.000000031353509361,
+ .000000005372808776,
+ -.000000000921168141,
+ .000000000157981265,
+ -.000000000027098646,
+ .000000000004648722,
+ -.000000000000797527,
+ .000000000000136827,
+ -.000000000000023475,
+ .000000000000004027,
+ -.000000000000000691,
+ .000000000000000118,
+ -.000000000000000020
+};
+
+static double apsi_cs[16] = {
+ -.0204749044678185,
+ -.0101801271534859,
+ .0000559718725387,
+ -.0000012917176570,
+ .0000000572858606,
+ -.0000000038213539,
+ .0000000003397434,
+ -.0000000000374838,
+ .0000000000048990,
+ -.0000000000007344,
+ .0000000000001233,
+ -.0000000000000228,
+ .0000000000000045,
+ -.0000000000000009,
+ .0000000000000002,
+ -.0000000000000000
+};
+
+/**************************************************************************************************/
+/* coefficients for Maclaurin summation in hzeta()
+ * B_{2j}/(2j)!
+ */
+static double hzeta_c[15] = {
+ 1.00000000000000000000000000000,
+ 0.083333333333333333333333333333,
+ -0.00138888888888888888888888888889,
+ 0.000033068783068783068783068783069,
+ -8.2671957671957671957671957672e-07,
+ 2.0876756987868098979210090321e-08,
+ -5.2841901386874931848476822022e-10,
+ 1.3382536530684678832826980975e-11,
+ -3.3896802963225828668301953912e-13,
+ 8.5860620562778445641359054504e-15,
+ -2.1748686985580618730415164239e-16,
+ 5.5090028283602295152026526089e-18,
+ -1.3954464685812523340707686264e-19,
+ 3.5347070396294674716932299778e-21,
+ -8.9535174270375468504026113181e-23
+};
+
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ofstream& out, double chi, vector<double> sils){
+ try {
+ out << setprecision (6) << numPartitions << '\t' << chi << '\t';
+ for (int i = 0; i < sils.size(); i++) {
+ out << sils[i] << '\t';
+ }
+ out << endl;
+
+ return;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "printSilData");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void CommunityTypeFinder::printSilData(ostream& out, double chi, vector<double> sils){
+ try {
+ out << setprecision (6) << numPartitions << '\t' << chi << '\t';
+ m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(chi) + '\t');
+ for (int i = 0; i < sils.size(); i++) {
+ out << sils[i] << '\t';
+ m->mothurOutJustToLog(toString(sils[i]) + '\t');
+ }
+ out << endl;
+ m->mothurOutJustToLog("\n");
+
+ return;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "printSilData");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printZMatrix(string fileName, vector<string> sampleName){
+ try {
+ ofstream printMatrix;
+ m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
+ printMatrix.setf(ios::fixed, ios::floatfield);
+ printMatrix.setf(ios::showpoint);
+
+ for(int i=0;i<numPartitions;i++){ printMatrix << "\tPartition_" << i+1; } printMatrix << endl;
+
+ for(int i=0;i<numSamples;i++){
+ printMatrix << sampleName[i];
+ for(int j=0;j<numPartitions;j++){
+ printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
+ }
+ printMatrix << endl;
+ }
+ printMatrix.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CommunityTypeFinder", "printZMatrix");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CommunityTypeFinder::printRelAbund(string fileName, vector<string> otuNames){
+ try {
+ ofstream printRA;
+ m->openOutputFile(fileName, printRA); //(fileName.c_str());
+ printRA.setf(ios::fixed, ios::floatfield);
+ printRA.setf(ios::showpoint);
+
+ vector<double> totals(numPartitions, 0.0000);
+ for(int i=0;i<numPartitions;i++){
+ for(int j=0;j<numOTUs;j++){
+ totals[i] += exp(lambdaMatrix[i][j]);
+ }
+ }
+
+ printRA << "Taxon";
+ for(int i=0;i<numPartitions;i++){
+ printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
+ printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
+ }
+ printRA << endl;
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
+ printRA << otuNames[i];
+ for(int j=0;j<numPartitions;j++){
+
+ if(error[j][i] >= 0.0000){
+ double std = sqrt(error[j][i]);
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
+ }
+ else{
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+ printRA << '\t' << "NA";
+ printRA << '\t' << "NA";
+ }
+ }
+ printRA << endl;
+ }
+
+ printRA.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CommunityTypeFinder", "printRelAbund");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+vector<vector<double> > CommunityTypeFinder::getHessian(){
+ try {
+ vector<double> alpha(numOTUs, 0.0000);
+ double alphaSum = 0.0000;
+
+ vector<double> pi = zMatrix[currentPartition];
+ vector<double> psi_ajk(numOTUs, 0.0000);
+ vector<double> psi_cjk(numOTUs, 0.0000);
+ vector<double> psi1_ajk(numOTUs, 0.0000);
+ vector<double> psi1_cjk(numOTUs, 0.0000);
+
+ for(int j=0;j<numOTUs;j++){
+
+ if (m->control_pressed) { break; }
+
+ alpha[j] = exp(lambdaMatrix[currentPartition][j]);
+ alphaSum += alpha[j];
+
+ for(int i=0;i<numSamples;i++){
+ double X = (double) countMatrix[i][j];
+
+ psi_ajk[j] += pi[i] * psi(alpha[j]);
+ psi1_ajk[j] += pi[i] * psi1(alpha[j]);
+
+ psi_cjk[j] += pi[i] * psi(alpha[j] + X);
+ psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
+ }
+ }
+
+
+ double psi_Ck = 0.0000;
+ double psi1_Ck = 0.0000;
+
+ double weight = 0.0000;
+
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { break; }
+ weight += pi[i];
+ double sum = 0.0000;
+ for(int j=0;j<numOTUs;j++){ sum += alpha[j] + countMatrix[i][j]; }
+
+ psi_Ck += pi[i] * psi(sum);
+ psi1_Ck += pi[i] * psi1(sum);
+ }
+
+ double psi_Ak = weight * psi(alphaSum);
+ double psi1_Ak = weight * psi1(alphaSum);
+
+ vector<vector<double> > hessian(numOTUs);
+ for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { break; }
+ double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
+ double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
+ double term3 = 0.1 * alpha[i];
+
+ hessian[i][i] = term1 + term2 + term3;
+
+ for(int j=0;j<i;j++){
+ hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
+ hessian[j][i] = hessian[i][j];
+ }
+ }
+
+ return hessian;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "getHessian");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi1(double xx){
+ try {
+
+ /* Euler-Maclaurin summation formula
+ * [Moshier, p. 400, with several typo corrections]
+ */
+
+ double s = 2.0000;
+ const int jmax = 12;
+ const int kmax = 10;
+ int j, k;
+ const double pmax = pow(kmax + xx, -s);
+ double scp = s;
+ double pcp = pmax / (kmax + xx);
+ double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
+
+ for(k=0; k<kmax; k++) {
+ if (m->control_pressed) { return 0; }
+ value += pow(k + xx, -s);
+ }
+
+ for(j=0; j<=jmax; j++) {
+ if (m->control_pressed) { return 0; }
+ double delta = hzeta_c[j+1] * scp * pcp;
+ value += delta;
+
+ if(fabs(delta/value) < 0.5*EPSILON) break;
+
+ scp *= (s+2*j+1)*(s+2*j+2);
+ pcp /= (kmax + xx)*(kmax + xx);
+ }
+
+ return value;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "psi1");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+double CommunityTypeFinder::psi(double xx){
+ try {
+ double psiX = 0.0000;
+
+ if(xx < 1.0000){
+
+ double t1 = 1.0 / xx;
+ psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
+ psiX = -t1 + psiX;
+
+ }
+ else if(xx < 2.0000){
+
+ const double v = xx - 1.0;
+ psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
+
+ }
+ else{
+ const double t = 8.0/(xx*xx)-1.0;
+ psiX = cheb_eval(apsi_cs, 15, t);
+ psiX += log(xx) - 0.5/xx;
+ }
+
+ return psiX;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "psi");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+double CommunityTypeFinder::cheb_eval(const double seriesData[], int order, double xx){
+ try {
+ double d = 0.0000;
+ double dd = 0.0000;
+
+ double x2 = xx * 2.0000;
+
+ for(int j=order;j>=1;j--){
+ if (m->control_pressed) { return 0; }
+ double temp = d;
+ d = x2 * d - dd + seriesData[j];
+ dd = temp;
+ }
+
+ d = xx * d - dd + 0.5 * seriesData[0];
+ return d;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "cheb_eval");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int CommunityTypeFinder::findkMeans(){
+ try {
+ error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); }
+ vector<vector<double> > relativeAbundance(numSamples);
+ vector<vector<double> > alphaMatrix;
+
+ alphaMatrix.resize(numPartitions);
+ lambdaMatrix.resize(numPartitions);
+ for(int i=0;i<numPartitions;i++){
+ alphaMatrix[i].assign(numOTUs, 0);
+ lambdaMatrix[i].assign(numOTUs, 0);
+ }
+
+ //get relative abundance
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return 0; }
+ int groupTotal = 0;
+
+ relativeAbundance[i].assign(numOTUs, 0.0);
+
+ for(int j=0;j<numOTUs;j++){
+ groupTotal += countMatrix[i][j];
+ }
+ for(int j=0;j<numOTUs;j++){
+ relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
+ }
+ }
+
+ //randomly assign samples into partitions
+ zMatrix.resize(numPartitions);
+ for(int i=0;i<numPartitions;i++){
+ zMatrix[i].assign(numSamples, 0);
+ }
+
+ for(int i=0;i<numSamples;i++){
+ zMatrix[rand()%numPartitions][i] = 1;
+ }
+
+ double maxChange = 1;
+ int maxIters = 1000;
+ int iteration = 0;
+
+ weights.assign(numPartitions, 0);
+
+ while(maxChange > 1e-6 && iteration < maxIters){
+
+ if (m->control_pressed) { return 0; }
+ //calcualte average relative abundance
+ maxChange = 0.0000;
+ for(int i=0;i<numPartitions;i++){
+
+ double normChange = 0.0;
+
+ weights[i] = 0;
+
+ for(int j=0;j<numSamples;j++){
+ weights[i] += (double)zMatrix[i][j];
+ }
+
+ vector<double> averageRelativeAbundance(numOTUs, 0);
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<numSamples;k++){
+ averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ averageRelativeAbundance[j] /= weights[i];
+ double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
+ normChange += difference * difference;
+ alphaMatrix[i][j] = averageRelativeAbundance[j];
+ }
+
+ normChange = sqrt(normChange);
+
+ if(normChange > maxChange){ maxChange = normChange; }
+ }
+
+
+ //calcualte distance between each sample in partition and the average relative abundance
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return 0; }
+
+ double normalizationFactor = 0;
+ vector<double> totalDistToPartition(numPartitions, 0);
+
+ for(int j=0;j<numPartitions;j++){
+ for(int k=0;k<numOTUs;k++){
+ double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
+ totalDistToPartition[j] += difference * difference;
+ }
+ totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
+ normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
+ }
+
+
+ for(int j=0;j<numPartitions;j++){
+ zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
+ }
+
+ }
+
+ iteration++;
+ // cout << "K means: " << iteration << '\t' << maxChange << endl;
+
+ }
+
+ // cout << "Iter:-1";
+ for(int i=0;i<numPartitions;i++){
+ weights[i] = 0.0000;
+
+ for(int j=0;j<numSamples;j++){
+ weights[i] += zMatrix[i][j];
+ }
+ // printf("\tw_%d=%.3f", i, weights[i]);
+ }
+ // cout << endl;
+
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return 0; }
+ for(int j=0;j<numPartitions;j++){
+ if(alphaMatrix[j][i] > 0){
+ lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
+ }
+ else{
+ lambdaMatrix[j][i] = -10.0;
+ }
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "kMeans");
+ exit(1);
+ }
+}
+
+
+/**************************************************************************************************/
+
--- /dev/null
+//
+// communitytype.h
+// Mothur
+//
+// Created by SarahsWork on 12/3/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_communitytype_h
+#define Mothur_communitytype_h
+
+#define EPSILON numeric_limits<double>::epsilon()
+
+
+#include "mothurout.h"
+#include "linearalgebra.h"
+/**************************************************************************************************/
+
+class CommunityTypeFinder {
+
+public:
+ CommunityTypeFinder(){ m = MothurOut::getInstance(); }
+ virtual ~CommunityTypeFinder(){};
+
+ virtual void printZMatrix(string, vector<string>);
+ virtual void printRelAbund(string, vector<string>);
+ virtual void printFitData(ofstream&) {}
+ virtual void printFitData(ostream&, double) {}
+ virtual void printSilData(ofstream&, double, vector<double>);
+ virtual void printSilData(ostream&, double, vector<double>);
+
+ virtual double getNLL() { return currNLL; }
+ virtual double getAIC() { return aic; }
+ virtual double getBIC() { return bic; }
+ virtual double getLogDet() { return logDeterminant; }
+ virtual double getLaplace() { return laplace; }
+
+ virtual double calcCHIndex(vector< vector< double> >) {return 0;} //Calinski-Harabasz
+ virtual vector<double> calcSilhouettes(vector< vector< double> >) { vector<double> s; return s; } //if none provided by child class
+
+
+protected:
+
+ int findkMeans();
+ vector<vector<double> > getHessian();
+ double psi1(double);
+ double psi(double);
+ double cheb_eval(const double[], int, double);
+
+
+ MothurOut* m;
+ vector<vector<double> > zMatrix;
+ vector<vector<double> > lambdaMatrix;
+ vector<vector<double> > error;
+ vector<vector<int> > countMatrix;
+ vector<double> weights;
+
+
+
+ int numPartitions;
+ int numSamples;
+ int numOTUs;
+ int currentPartition;
+
+ double currNLL, aic, bic, logDeterminant, laplace;
+
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
//**********************************************************************************************************************
bool FlowData::getNext(ifstream& flowFile){
-
try {
+
seqName = getSequenceName(flowFile);
- flowFile >> endFlow;
+ if (m->debug) { m->mothurOut("[DEBUG]: flow = " + seqName + " "); }
+ flowFile >> endFlow;
+ if (m->debug) { m->mothurOut(toString(endFlow) + " "); }
if (!m->control_pressed) {
- for(int i=0;i<numFlows;i++) { flowFile >> flowData[i]; }
+ if (m->debug) { m->mothurOut(" "); }
+ for(int i=0;i<numFlows;i++) {
+ flowFile >> flowData[i];
+ if (m->debug) { m->mothurOut(toString(flowData[i]) + " "); }
+ }
+ if (m->debug) { m->mothurOut("\n"); }
updateEndFlow();
translateFlow();
m->gobble(flowFile);
//
#include "getmetacommunitycommand.h"
-
+#include "communitytype.h"
+#include "kmeans.h"
+#include "validcalculator.h"
+#include "subsample.h"
//**********************************************************************************************************************
vector<string> GetMetaCommunityCommand::setParameters(){
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
string GetMetaCommunityCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
+ helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n";
+ helpString += "The method parameter allows to select the method you would like to use. Options are dmm, kmeans and pam. Default=dmm.\n";
+ helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam method. By default the jsd calculator is used.\n";
+ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matirx for the pam method.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam method.\n";
helpString += "The minpartitions parameter is used to .... Default=5.\n";
helpString += "The maxpartitions parameter is used to .... Default=10.\n";
helpString += "The optimizegap parameter is used to .... Default=3.\n";
if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
else { allLines = 1; }
}
+
+ method = validParameter.validFile(parameters, "method", false);
+ if (method == "not found") { method = "dmm"; }
+
+ if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
+ else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
+
+ calc = validParameter.validFile(parameters, "calc", false);
+ if (calc == "not found") { calc = "jsd"; }
+ else {
+ if (calc == "default") { calc = "jsd"; }
+ }
+ m->splitAtDash(calc, Estimators);
+ if (m->inUsersGroups("citation", Estimators)) {
+ ValidCalculators validCalc; validCalc.printCitations(Estimators);
+ //remove citation from list of calcs
+ for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
+ }
+ if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
+
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 0; }
}
}
set<string> processedLabels;
set<string> userLabels = labels;
+ if (subsample) {
+ if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+ subsampleSize = lookup[0]->getNumSeqs();
+ for (int i = 1; i < lookup.size(); i++) {
+ int thisSize = lookup[i]->getNumSeqs();
+
+ if (thisSize < subsampleSize) { subsampleSize = thisSize; }
+ }
+ }else {
+ m->clearGroups();
+ Groups.clear();
+ vector<SharedRAbundVector*> temp;
+ for (int i = 0; i < lookup.size(); i++) {
+ if (lookup[i]->getNumSeqs() < subsampleSize) {
+ m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+ delete lookup[i];
+ }else {
+ Groups.push_back(lookup[i]->getGroup());
+ temp.push_back(lookup[i]);
+ }
+ }
+ lookup = temp;
+ m->setGroups(Groups);
+ }
+
+ if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ }
+
+
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
}
//do my part
- m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+ if (method == "dmm") { m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); }
+ else {
+ m->mothurOut("K\tCH\t");
+ for (int i = 0; i < thislookup.size(); i++) { m->mothurOut(thislookup[i]->getGroup() + '\t'); }
+ m->mothurOut("\n");
+ }
minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
//force parent to wait until all the processes are done
}
#else
- /*
- vector<metaCommunityData*> pDataArray;
- DWORD dwThreadIdArray[processors-1];
- HANDLE hThreadArray[processors-1];
-
- //Create processor worker threads.
- for( int i=1; i<processors; i++ ){
- //copy lookup
- //make copy of lookup so we don't get access violations
- vector<SharedRAbundVector*> newLookup;
- for (int k = 0; k < thislookup.size(); k++) {
- SharedRAbundVector* temp = new SharedRAbundVector();
- temp->setLabel(thislookup[k]->getLabel());
- temp->setGroup(thislookup[k]->getGroup());
- newLookup.push_back(temp);
- }
-
- //for each bin
- for (int k = 0; k < thislookup[0]->getNumBins(); k++) {
- if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
- for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); }
- }
-
- processIDS.push_back(i);
-
- // Allocate memory for thread data.
- metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap);
- pDataArray.push_back(tempMeta);
-
- hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
- }
-
- //do my part
- minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]);
-
- //Wait until all threads have terminated.
- WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-
- //Close all thread handles and free memory allocations.
- for(int i=0; i < pDataArray.size(); i++){
- if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; }
- for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
- outputNames.push_back(pDataArray[i]->outputNames[j]);
- }
- m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
- m->mothurRemove(outputFileName + toString(processIDS[i]));
- CloseHandle(hThreadArray[i]);
- delete pDataArray[i];
- } */
- //do my part
m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
#endif
//run generate Summary function for smallest minPartition
variables["[tag]"] = toString(minPartition);
- generateSummaryFile(minPartition, variables);
+ vector<double> piValues = generateDesignFile(minPartition, variables);
+ if (method == "dmm") { generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
return 0;
try {
double minLaplace = 1e10;
- int minPartition = 0;
+ int minPartition = 1;
+ vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
+
+ ofstream fitData, silData;
+ if (method == "dmm") {
+ m->openOutputFile(outputFileName, fitData);
+ fitData.setf(ios::fixed, ios::floatfield);
+ fitData.setf(ios::showpoint);
+ fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+ }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
+ minLaplace = 0;
+ m->openOutputFile(outputFileName, silData);
+ silData.setf(ios::fixed, ios::floatfield);
+ silData.setf(ios::showpoint);
+ silData << "K\tCH\t";
+ for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t'; }
+ silData << endl;
+ }
- ofstream fitData;
- m->openOutputFile(outputFileName, fitData);
- fitData.setf(ios::fixed, ios::floatfield);
- fitData.setf(ios::showpoint);
cout.setf(ios::fixed, ios::floatfield);
cout.setf(ios::showpoint);
vector<string> thisGroups;
for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
- fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+ vector< vector<double> > dists; //do we want to output this matrix??
+ if ((method == "pam") || (method == "kmeans")) { dists = generateDistanceMatrix(thislookup); }
+
+ if (m->debug) {
+ m->mothurOut("[DEBUG]: dists = \n");
+ for (int i = 0; i < dists.size(); i++) {
+ if (m->control_pressed) { break; }
+ m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
+ for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
+ m->mothurOut("\n");
+ }
+ }
for(int i=0;i<parts.size();i++){
}
}
- qFinderDMM findQ(sharedMatrix, numPartitions);
-
- double laplace = findQ.getLaplace();
- m->mothurOut(toString(numPartitions) + '\t');
- cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
- m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
- cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
- m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
-
- fitData << numPartitions << '\t';
- fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
- fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
-
- if(laplace < minLaplace){
- minPartition = numPartitions;
- minLaplace = laplace;
- m->mothurOut("***");
+ CommunityTypeFinder* finder = NULL;
+ if (method == "dmm") { finder = new qFinderDMM(sharedMatrix, numPartitions); }
+ else if (method == "kmeans") { finder = new KMeans(sharedMatrix, numPartitions); }
+ else if (method == "pam") { finder = new Pam(sharedMatrix, dists, numPartitions); }
+ else {
+ if (i == 0) { m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
+ finder = new qFinderDMM(sharedMatrix, numPartitions);
}
- m->mothurOutEndLine();
+ double chi; vector<double> silhouettes;
+ if (method == "dmm") {
+ double laplace = finder->getLaplace();
+ if(laplace < minLaplace){
+ minPartition = numPartitions;
+ minLaplace = laplace;
+ }
+ }else {
+ chi = finder->calcCHIndex(dists);
+ silhouettes = finder->calcSilhouettes(dists);
+ if (chi > minLaplace) { //save partition with maximum ch index score
+ minPartition = numPartitions;
+ minLaplace = chi;
+ minSilhouettes = silhouettes;
+ }
+ }
string relabund = relabunds[i];
- outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
string matrixName = matrix[i];
outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
- findQ.printZMatrix(matrixName, thisGroups);
- findQ.printRelAbund(relabund, m->currentSharedBinLabels);
+ finder->printZMatrix(matrixName, thisGroups);
+
+ if (method == "dmm") {
+ finder->printFitData(cout, minLaplace);
+ finder->printFitData(fitData);
+ finder->printRelAbund(relabund, m->currentSharedBinLabels);
+ outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+ }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
+ finder->printSilData(cout, chi, silhouettes);
+ finder->printSilData(silData, chi, silhouettes);
+ if (method == "kmeans") {
+ finder->printRelAbund(relabund, m->currentSharedBinLabels);
+ outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+ }
+ }
+ delete finder;
if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
break;
}
}
- fitData.close();
-
- //minPartition = 4;
+ if (method == "dmm") { fitData.close(); }
if (m->control_pressed) { return 0; }
inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; }
/**************************************************************************************************/
-int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v){
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
try {
vector<summaryData> summary;
string name, header;
double mean, lci, uci;
-
- vector<double> piValues = generateDesignFile(numPartitions, v);
-
ifstream referenceFile;
map<string, string> variables;
variables["[filename]"] = v["[filename]"];
}
//**********************************************************************************************************************
+vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
+ try {
+ vector<vector<double> > results;
+
+ Calculator* matrixCalculator;
+ ValidCalculators validCalculator;
+ int i = 0;
+
+ if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
+ if (Estimators[i] == "sharedsobs") {
+ matrixCalculator = new SharedSobsCS();
+ }else if (Estimators[i] == "sharedchao") {
+ matrixCalculator = new SharedChao1();
+ }else if (Estimators[i] == "sharedace") {
+ matrixCalculator = new SharedAce();
+ }else if (Estimators[i] == "jabund") {
+ matrixCalculator = new JAbund();
+ }else if (Estimators[i] == "sorabund") {
+ matrixCalculator = new SorAbund();
+ }else if (Estimators[i] == "jclass") {
+ matrixCalculator = new Jclass();
+ }else if (Estimators[i] == "sorclass") {
+ matrixCalculator = new SorClass();
+ }else if (Estimators[i] == "jest") {
+ matrixCalculator = new Jest();
+ }else if (Estimators[i] == "sorest") {
+ matrixCalculator = new SorEst();
+ }else if (Estimators[i] == "thetayc") {
+ matrixCalculator = new ThetaYC();
+ }else if (Estimators[i] == "thetan") {
+ matrixCalculator = new ThetaN();
+ }else if (Estimators[i] == "kstest") {
+ matrixCalculator = new KSTest();
+ }else if (Estimators[i] == "sharednseqs") {
+ matrixCalculator = new SharedNSeqs();
+ }else if (Estimators[i] == "ochiai") {
+ matrixCalculator = new Ochiai();
+ }else if (Estimators[i] == "anderberg") {
+ matrixCalculator = new Anderberg();
+ }else if (Estimators[i] == "kulczynski") {
+ matrixCalculator = new Kulczynski();
+ }else if (Estimators[i] == "kulczynskicody") {
+ matrixCalculator = new KulczynskiCody();
+ }else if (Estimators[i] == "lennon") {
+ matrixCalculator = new Lennon();
+ }else if (Estimators[i] == "morisitahorn") {
+ matrixCalculator = new MorHorn();
+ }else if (Estimators[i] == "braycurtis") {
+ matrixCalculator = new BrayCurtis();
+ }else if (Estimators[i] == "whittaker") {
+ matrixCalculator = new Whittaker();
+ }else if (Estimators[i] == "odum") {
+ matrixCalculator = new Odum();
+ }else if (Estimators[i] == "canberra") {
+ matrixCalculator = new Canberra();
+ }else if (Estimators[i] == "structeuclidean") {
+ matrixCalculator = new StructEuclidean();
+ }else if (Estimators[i] == "structchord") {
+ matrixCalculator = new StructChord();
+ }else if (Estimators[i] == "hellinger") {
+ matrixCalculator = new Hellinger();
+ }else if (Estimators[i] == "manhattan") {
+ matrixCalculator = new Manhattan();
+ }else if (Estimators[i] == "structpearson") {
+ matrixCalculator = new StructPearson();
+ }else if (Estimators[i] == "soergel") {
+ matrixCalculator = new Soergel();
+ }else if (Estimators[i] == "spearman") {
+ matrixCalculator = new Spearman();
+ }else if (Estimators[i] == "structkulczynski") {
+ matrixCalculator = new StructKulczynski();
+ }else if (Estimators[i] == "speciesprofile") {
+ matrixCalculator = new SpeciesProfile();
+ }else if (Estimators[i] == "hamming") {
+ matrixCalculator = new Hamming();
+ }else if (Estimators[i] == "structchi2") {
+ matrixCalculator = new StructChi2();
+ }else if (Estimators[i] == "gower") {
+ matrixCalculator = new Gower();
+ }else if (Estimators[i] == "memchi2") {
+ matrixCalculator = new MemChi2();
+ }else if (Estimators[i] == "memchord") {
+ matrixCalculator = new MemChord();
+ }else if (Estimators[i] == "memeuclidean") {
+ matrixCalculator = new MemEuclidean();
+ }else if (Estimators[i] == "mempearson") {
+ matrixCalculator = new MemPearson();
+ }else if (Estimators[i] == "jsd") {
+ matrixCalculator = new JSD();
+ }else {
+ m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
+ }
+ }
+
+ //calc distances
+ vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, then each groupCombos dists. this will be used to make .dist files
+ vector< vector<seqDist> > calcDists; calcDists.resize(1);
+
+ for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+
+ if (subsample && (thisIter != 0)) {
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return results; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+ }
+
+
+ driver(thisItersLookup, calcDists, matrixCalculator);
+
+ if (subsample && (thisIter != 0)) {
+ if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); }
+ calcDistsTotals.push_back(calcDists);
+ for (int i = 0; i < calcDists.size(); i++) {
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); }
+ }
+ }
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ }else { //print results for whole dataset
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ //initialize matrix
+ results.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ double dist = calcDists[i][j].dist;
+
+ results[row][column] = dist;
+ results[column][row] = dist;
+ }
+ }
+ }
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
+ }
+
+ if (iters != 0) {
+ //we need to find the average distance and standard deviation for each groups distance
+ vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, "average");
+
+ //print results
+ for (int i = 0; i < calcDists.size(); i++) {
+ results.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ int row = calcAverages[i][j].seq1;
+ int column = calcAverages[i][j].seq2;
+ float dist = calcAverages[i][j].dist;
+
+ results[row][column] = dist;
+ results[column][row] = dist;
+ }
+ }
+ }
+
+
+ return results;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
+ try {
+ vector<SharedRAbundVector*> subset;
+
+ for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
+
+ for (int l = 0; l < k; l++) {
+
+ if (k != l) { //we dont need to similiarity of a groups to itself
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+
+
+
+ //if this calc needs all groups to calculate the pair load all groups
+ if (matrixCalculator->getNeedsAll()) {
+ //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+ for (int w = 0; w < thisLookup.size(); w++) {
+ if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+ }
+ }
+
+ vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
+
+ if (m->control_pressed) { return 1; }
+
+ seqDist temp(l, k, tempdata[0]);
+ //cout << l << '\t' << k << '\t' << tempdata[0] << endl;
+ calcDists[0].push_back(temp);
+ }
+
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MatrixOutputCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
#include "command.hpp"
#include "inputdata.h"
#include "qFinderDMM.h"
+#include "pam.h"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
+#include "sharedjsd.h"
/**************************************************************************************************/
unsigned long long end;
linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
};
- bool abort, allLines;
+ bool abort, allLines, subsample;
string outputDir;
vector<string> outputNames;
- string sharedfile;
- int minpartitions, maxpartitions, optimizegap, processors;
- vector<string> Groups;
+ string sharedfile, method, calc;
+ int minpartitions, maxpartitions, optimizegap, processors, iters, subsampleSize;
+ vector<string> Groups, Estimators;
set<string> labels;
+ vector<vector<double> > generateDistanceMatrix(vector<SharedRAbundVector*>& lookup);
+ int driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator*);
int processDriver(vector<SharedRAbundVector*>&, vector<int>&, string, vector<string>, vector<string>, vector<string>, int);
int createProcesses(vector<SharedRAbundVector*>&);
vector<double> generateDesignFile(int, map<string,string>);
- int generateSummaryFile(int, map<string,string>);
+ int generateSummaryFile(int, map<string,string>, vector<double>);
};
vector<double> partMean, partLCI, partUCI;
};
-/**************************************************************************************************/
-
-struct metaCommunityData {
- vector<SharedRAbundVector*> thislookup;
- MothurOut* m;
- string outputFileName;
- vector<string> relabunds, matrix, outputNames;
- int minpartitions, maxpartitions, optimizegap;
- vector<int> parts;
- int minPartition;
-
- metaCommunityData(){}
- metaCommunityData(vector<SharedRAbundVector*> lu, MothurOut* mout, vector<int> dp, string fit, vector<string> rels, vector<string> mat, int minp, int maxp, int opg) {
- m = mout;
- thislookup = lu;
- parts = dp;
- outputFileName = fit;
- relabunds = rels;
- matrix = mat;
- minpartitions = minp;
- maxpartitions = maxp;
- optimizegap = opg;
- minPartition = 0;
- }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyMetaCommunityThreadFunction(LPVOID lpParam){
- metaCommunityData* pDataArray;
- pDataArray = (metaCommunityData*)lpParam;
-
- try {
-
- double minLaplace = 1e10;
-
- ofstream fitData;
- pDataArray->m->openOutputFile(pDataArray->outputFileName, fitData);
- fitData.setf(ios::fixed, ios::floatfield);
- fitData.setf(ios::showpoint);
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
-
- vector< vector<int> > sharedMatrix;
- for (int i = 0; i < pDataArray->thislookup.size(); i++) { sharedMatrix.push_back(pDataArray->thislookup[i]->getAbundances()); }
-
- pDataArray->m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
- fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
-
- for(int i=0;i<pDataArray->parts.size();i++){
-
- int numPartitions = pDataArray->parts[i];
-
- if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); }
-
- if (pDataArray->m->control_pressed) { break; }
-
- qFinderDMM* findQ = new qFinderDMM(sharedMatrix, numPartitions);
-
- if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: done finding Q " + toString(numPartitions) + "\n"); }
-
- double laplace = findQ->getLaplace();
- pDataArray->m->mothurOut(toString(numPartitions) + '\t');
- cout << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
- pDataArray->m->mothurOutJustToLog(toString(findQ->getNLL()) + '\t' + toString(findQ->getLogDet()) + '\t');
- cout << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace;
- pDataArray->m->mothurOutJustToLog(toString(findQ->getBIC()) + '\t' + toString(findQ->getAIC()) + '\t' + toString(laplace));
-
- fitData << numPartitions << '\t';
- fitData << setprecision (2) << findQ->getNLL() << '\t' << findQ->getLogDet() << '\t';
- fitData << findQ->getBIC() << '\t' << findQ->getAIC() << '\t' << laplace << endl;
-
- if(laplace < minLaplace){
- pDataArray->minPartition = numPartitions;
- minLaplace = laplace;
- pDataArray->m->mothurOut("***");
- }
- pDataArray->m->mothurOutEndLine();
-
- pDataArray->outputNames.push_back(pDataArray->relabunds[i]);
- pDataArray->outputNames.push_back(pDataArray->matrix[i]);
-
- findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups());
- findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentSharedBinLabels);
-
- if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; }
-
- delete findQ;
- }
- fitData.close();
-
- //minPartition = 4;
-
- if (pDataArray->m->control_pressed) { return 0; }
-
- return 0;
-
- }
- catch(exception& e) {
- pDataArray->m->errorOut(e, "GetMetaCommunityCommand", "MyMetaCommunityThreadFunction");
- exit(1);
- }
-}
-#endif
-
-
#endif
try{
vector<string> reps;
string rep = "notFound";
+
+ if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); }
if ((names.size() == 1)) {
return names[0];
if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
int numRep = 0;
if (group != "") { numRep = ct.getGroupCount(names[i], group); }
- else { numRep = ct.getGroupCount(names[i]); }
+ else { numRep = ct.getNumSeqs(names[i]); }
if (numRep > maxAbund) {
reps.clear();
reps.push_back(names[i]);
if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
int numRep = 0;
if (group != "") { numRep = ct.getGroupCount(names[i], group); }
- else { numRep = ct.getGroupCount(names[i]); }
+ else { numRep = ct.getNumSeqs(names[i]); }
for (int j = 1; j < numRep; j++) { //don't add yourself again
seqIndex.push_back(nameToIndex[names[i]]);
}
--- /dev/null
+//
+// kmeans.cpp
+// Mothur
+//
+// Created by SarahsWork on 12/4/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "kmeans.h"
+
+/**************************************************************************************************/
+
+KMeans::KMeans(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
+ try {
+ countMatrix = cm;
+ numSamples = (int)countMatrix.size();
+ numOTUs = (int)countMatrix[0].size();
+ numPartitions = p;
+
+ findkMeans();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KMeans", "KMeans");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> KMeans::calcSilhouettes(vector<vector<double> > dists) {
+ try {
+ vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+ if (numPartitions < 2) { return silhouettes; }
+
+ map<int, int> clusterMap; //map sample to partition
+ for (int j = 0; j < numSamples; j++) {
+ double maxValue = 0.0;
+ for (int i = 0; i < numPartitions; i++) {
+ if (m->control_pressed) { return silhouettes; }
+ if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+ clusterMap[j] = i;
+ maxValue = zMatrix[i][j];
+ }
+ }
+ }
+
+ vector<int> nextClosestPartition;
+ findSecondClosest(nextClosestPartition, dists, clusterMap);
+
+ if (m->control_pressed) { return silhouettes; }
+
+ vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+
+ //calc a - all a[i] are the same in the same partition
+ for (int k = 0; k < numPartitions; k++) {
+ if (m->control_pressed) { break; }
+
+ int count = 0;
+ double totalA = 0.0;
+ for (int i = 0; i < numSamples; i++) {
+ for (int j = 0; j < numSamples; j++) {
+ if (m->control_pressed) { break; }
+ if ((clusterMap[i] == k) && (clusterMap[j] == k)){ //are both samples in the partition, if so add there distance
+ totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+ count++;
+ }
+ }
+ }
+ totalA /= (double) count;
+
+ //set a[i] to average for cluster
+ for (int i = 0; i < numSamples; i++) {
+ if (clusterMap[i] == k) { a[i] = totalA; }
+ }
+ }
+
+ //calc b
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+
+ int thisPartition = nextClosestPartition[i];
+ int count = 0;
+ double totalB = 0.0;
+ for (int j = 0; j < numSamples; j++) {
+ if (clusterMap[j] == thisPartition) { //this sample is in this partition
+ totalB += dists[i][j];
+ count++;
+ }
+ }
+ b[i] = totalB / (double) count;
+ }
+
+ //calc silhouettes
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+
+ double denom = a[i];
+ if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+
+ silhouettes[i] = (b[i] - a[i]) / denom;
+
+ //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+ }
+
+ return silhouettes;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KMeans", "calcSilhouettes");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int KMeans::findSecondClosest(vector<int>& nextClosestPartition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+ try {
+ vector<double> minScores; minScores.resize(numSamples, 1e6);
+ nextClosestPartition.resize(numSamples, 0);
+
+
+ for (int i = 0; i < numSamples; i++) {
+ for (int j = 0; j < numPartitions; j++) {
+ if (m->control_pressed) { break; }
+
+ //is this the one we are assigned to - ie the "best" cluster. We want second best.
+ //if numPartitions = 2, then special case??
+ if (clusterMap[i] != j) {
+ double score = 1e6;
+ if (numPartitions == 2) {
+ score = 0.0; //choose other option, there are only 2.
+ }else{ score = calcScore(i, j, dists, clusterMap); }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: sample = " + toString(i) + " partition = " + toString(j) + " score = " + toString(score) + "\n"); }
+
+ //is this better than our last find
+ if (score < minScores[i]) {
+ minScores[i] = score;
+ nextClosestPartition[i] = j;
+ }
+ }else {} //best case, ignore
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KMeans", "findSecondClosest");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+double KMeans::calcScore(int sample, int partition, vector<vector<double> >& dists, map<int, int> clusterMap) {
+ try {
+ //square the distances and then for each pair of clusters, calculate the sum of the squraed distances between the clusters
+ //then with the sum of hte squared dsitances take the square root and divide by the number of distances in the sum
+
+ double sum = 0.0; int count = 0;
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+ if (clusterMap[i] == partition) { //samples in this cluster
+ sum += (dists[sample][i] * dists[sample][i]);
+ count++;
+ }
+ }
+
+ sum = sqrt(sum);
+ sum /= (double) count;
+
+ return sum;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KMeans", "calcScore");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+
+ CHk=Bk/(k−1)/Wk/(n−k)
+
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double KMeans::calcCHIndex(vector< vector< double> > dists){
+ try {
+ double CH = 0.0;
+
+ if (numPartitions < 2) { return CH; }
+
+ map<int, int> clusterMap; //map sample to partition
+ for (int j = 0; j < numSamples; j++) {
+ double maxValue = 0.0;
+ for (int i = 0; i < numPartitions; i++) {
+ if (m->control_pressed) { return 0.0; }
+ if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be
+ clusterMap[j] = i;
+ maxValue = zMatrix[i][j];
+ }
+ }
+ }
+
+ double sumBetweenCluster = 0.0;
+ double sumWithinClusters = 0.0;
+
+ for (int i = 0; i < numSamples; i++) { //lt
+ for (int j = 0; j < i; j++) {
+ if (m->control_pressed) { return 0.0; }
+ int partitionI = clusterMap[i];
+ int partitionJ = clusterMap[j];
+
+ if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+ sumWithinClusters += (dists[i][j] * dists[i][j]);
+ }else { //they are NOT from the same cluster so this distance is added to Bk
+ sumBetweenCluster += (dists[i][j] * dists[i][j]);
+ }
+ }
+ }
+ //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' << (numSamples - numPartitions) << endl;
+
+ CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+
+ return CH;
+ }
+ catch(exception& e){
+ m->errorOut(e, "KMeans", "calcCHIndex");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+
+
+
--- /dev/null
+//
+// kmeans.h
+// Mothur
+//
+// Created by SarahsWork on 12/4/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_kmeans_h
+#define Mothur_kmeans_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class KMeans : public CommunityTypeFinder {
+
+public:
+ KMeans(vector<vector<int> >, int);
+ vector<double> calcSilhouettes(vector< vector< double> >);
+ double calcCHIndex(vector< vector< double> >);
+
+private:
+
+ int findSecondClosest(vector<int>&, vector<vector<double> >&, map<int, int>);
+ double calcScore(int sample, int partition, vector<vector<double> >&, map<int, int>);
+
+};
+
+/**************************************************************************************************/
+
+#endif
}
/*********************************************************************************************************************************/
//Numerical Recipes pg. 223
-double LinearAlgebra::gammq(const double a, const double x) {
+/*double LinearAlgebra::gammq(const double a, const double x) {
try {
double gamser,gammcf,gln;
return 0;
}
catch(exception& e) {
- m->errorOut(e, "LinearAlgebra", "gammp");
+ m->errorOut(e, "LinearAlgebra", "gammq");
exit(1);
}
}
-/*********************************************************************************************************************************/
+*********************************************************************************************************************************/
//Numerical Recipes pg. 224
double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
try {
h *= del;
if (fabs(del-1.0) <= EPS) break;
}
- if (i > ITMAX) { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
+ if (i > ITMAX) { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; }
gammcf=exp(-x+a*log(x)-gln)*h;
return 0.0;
double lastTerm = 3 * (values.size()+1);
H = firstTerm * middleTerm - lastTerm;
-
+
//adjust for ties
if (TIES.size() != 0) {
double sum = 0.0;
H /= result;
}
+ if (isnan(H) || isinf(H)) { H = 0; }
+
//Numerical Recipes pg221
pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
double betacf(const double, const double, const double);
double betai(const double, const double, const double);
double gammln(const double);
- double gammq(const double, const double);
+ //double gammq(const double, const double);
double gser(double&, const double, const double, double&);
double gcf(double&, const double, const double, double&);
double erfcc(double);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
- CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
matrixCalculators.push_back(new MemEuclidean());
}else if (Estimators[i] == "mempearson") {
matrixCalculators.push_back(new MemPearson());
+ }else if (Estimators[i] == "jsd") {
+ matrixCalculators.push_back(new JSD());
+
}
}
}
vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
if (m->control_pressed) { return 1; }
-
+
seqDist temp(l, k, tempdata[0]);
calcDists[i].push_back(temp);
}
#include "memchord.h"
#include "memeuclidean.h"
#include "mempearson.h"
+#include "sharedjsd.h"
// aka. dist.shared()
matrixCalculators.push_back(new MemEuclidean());
}else if (pDataArray->Estimators[i] == "mempearson") {
matrixCalculators.push_back(new MemPearson());
+ }else if (pDataArray->Estimators[i] == "jsd") {
+ matrixCalculators.push_back(new JSD());
}
+
}
}
#include "metastatscommand.h"
#include "sharedutilities.h"
+#include "sharedrabundfloatvector.h"
//**********************************************************************************************************************
int MetaStatsCommand::execute(){
try {
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
//just used to convert files to test metastats online
/****************************************************/
convertSharedToInput = false;
if (convertInputToShared) { convertToShared(sharedfile); return 0; }
/****************************************************/
-
designMap = new GroupMap(designfile);
designMap->readDesignMap();
string header = m->getline(in); m->gobble(in);
vector<string> groups = m->splitWhiteSpace(header);
- vector<SharedRAbundVector*> newLookup;
+ vector<SharedRAbundFloatVector*> newLookup;
+ cout << groups.size() << endl;
for (int i = 0; i < groups.size(); i++) {
- SharedRAbundVector* temp = new SharedRAbundVector();
+ cout << "creating group " << groups[i] << endl;
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
temp->setLabel("0.03");
temp->setGroup(groups[i]);
newLookup.push_back(temp);
string otuname;
in >> otuname; m->gobble(in);
otuCount++;
-
+ cout << otuname << endl;
for (int i = 0; i < groups.size(); i++) {
- int temp;
+ double temp;
in >> temp; m->gobble(in);
newLookup[i]->push_back(temp, groups[i]);
}
}
out.close();
+ cout << thisfilename+".matrix" << endl;
+
return 0;
}
catch(exception& e) {
--- /dev/null
+//
+// pam.cpp
+// Mothur
+//
+// Created by SarahsWork on 12/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "pam.h"
+#define DBL_EPSILON 1e-9
+
+/**************************************************************************************************/
+Pam::Pam(vector<vector<int> > c, vector<vector<double> > d, int p) : CommunityTypeFinder() {
+ try {
+ countMatrix = c;
+ numSamples = (int)d.size();
+ numOTUs = (int)c[0].size();
+ numPartitions = p;
+ dists = d;
+
+ largestDist = 0;
+ for (int i = 0; i < dists.size(); i++) {
+ for (int j = i; j < dists.size(); j++) {
+ if (m->control_pressed) { break; }
+ if (dists[i][j] > largestDist) { largestDist = dists[i][j]; }
+ }
+ }
+
+ buildPhase(); //choosing the medoids
+ swapPhase(); //optimize clusters
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Pam", "Pam");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+//sets Dp[0] does not set Dp[1]. chooses intial medoids.
+int Pam::buildPhase() {
+ try {
+
+ if (m->debug) { m->mothurOut("[DEBUG]: building medoids\n"); }
+
+ vector<double> gains; gains.resize(numSamples);
+
+ largestDist *= 1.1 + 1; //make this distance larger than any distance in the matrix
+ Dp.resize(numSamples);
+ for (int i = 0; i < numSamples; i++) { Dp[i].push_back(largestDist); Dp[i].push_back(largestDist); } //2 smallest dists for this sample in this partition
+
+ zMatrix.resize(numPartitions);
+ for(int i=0;i<numPartitions;i++){
+ zMatrix[i].assign(numSamples, 0);
+ }
+
+ for (int k = 0; k < numPartitions; k++) {
+
+ int medoid = -1;
+ double totalGain = 0.0;
+ double clusterGain = 0.0;
+
+ for (int i = 0; i < numSamples; i++) { //does this need to be square?? can we do lt?
+ if (m->control_pressed) { break; }
+
+ if (medoids.count(i) == 0) { //is this sample is NOT a medoid?
+ gains[i] = 0.0;
+
+ for (int j = 0; j < numSamples; j++) {
+ //cout << i << '\t' << j << '\t' << Dp[i][0] << '\t' << dists[i][j] << '\t' << totalGain << endl;
+ totalGain = Dp[i][0] - dists[i][j];
+ if (totalGain > 0.0) { gains[i] += totalGain; }
+ }
+ if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) + " totalGain = " + toString(totalGain) + "\n"); }
+
+ if (clusterGain <= gains[i]) {
+ clusterGain = gains[i];
+ medoid = i;
+ }
+ }
+ }
+
+ //save medoid value
+ medoids.insert(medoid);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: new medoid " + toString(medoid) + "\n"); }
+
+ //update dp values
+ for (int i = 0; i < numSamples; i++) {
+ if (Dp[i][0] > dists[i][medoid]) { Dp[i][0] = dists[i][medoid]; }
+ }
+ }
+ if (m->debug) { m->mothurOut("[DEBUG]: done building medoids\n"); }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Pam", "buildPhase");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+//goal to swap medoids with non-medoids to see if we can reduce the overall cost
+int Pam::swapPhase() {
+ try {
+ if (m->debug) { m->mothurOut("[DEBUG]: swapping medoids\n"); }
+ //calculate cost of initial choice - average distance of samples to their closest medoid
+ double sky = 0.0;
+ double dzsky = 1.0;
+ for (int i = 0; i < numSamples; i++) { sky += Dp[i][0]; } sky /= (double) numSamples;
+
+ bool done = false;
+ int hbest, nbest; hbest = -1; nbest = -1;
+ while (!done) {
+ if (m->control_pressed) { break; }
+
+ updateDp();
+
+ dzsky = 1;
+
+ for (int h = 0; h < numSamples; h++) {
+ if (m->control_pressed) { break; }
+ if (medoids.count(h) == 0) { //this is NOT a medoid
+ for (int i = 0; i < numSamples; i++) {
+ if (medoids.count(i) != 0) { //this is a medoid
+
+ double dz = 0.0; //Tih sum of distances between objects and closest medoid caused by swapping i and h. Basically the change in cost. If this < 0 its a "good" swap. When all Tih are > 0, then we stop the algo, because we have the optimal medoids.
+ for (int j = 0; j < numSamples; j++) {
+ if (m->control_pressed) { break; }
+ if (dists[i][j] == Dp[j][0]) {
+ double small = 0.0;
+ if (Dp[j][1] > dists[h][j]) { small = dists[h][j]; }
+ else { small = Dp[j][1]; }
+ dz += (small - Dp[j][0]);
+ }else if (dists[h][j] < Dp[j][0]) {
+ dz += (dists[h][j] - Dp[j][0]);
+ }
+ }
+ if (dzsky > dz) {
+ dzsky = dz;
+ hbest = h;
+ nbest = i;
+ }
+ }//end if medoid
+ }//end for i
+ }//end if NOT medoid
+ }//end if h
+
+ if (dzsky < -16 *DBL_EPSILON * fabs(sky)) {
+ medoids.insert(hbest);
+ medoids.erase(nbest);
+ if (m->debug) { m->mothurOut("[DEBUG]: swapping " + toString(hbest) + " " + toString(nbest) + "\n"); }
+ sky += dzsky;
+ }else { done = true; } //stop algo.
+ }
+
+
+ //fill zmatrix
+ int count = 0;
+ vector<int> tempMedoids;
+ for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+ medoid2Partition[*it] = count;
+ zMatrix[count][*it] = 1; count++; //set medoid in this partition.
+ tempMedoids.push_back(*it);
+ }
+
+ //which partition do you belong to?
+ laplace = 0;
+ for (int i = 0; i < numSamples; i++) {
+ int partition = 0;
+ double dist = dists[i][tempMedoids[0]]; //assign to first medoid
+ for (int j = 1; j < tempMedoids.size(); j++) {
+ if (dists[i][tempMedoids[j]] < dist) { //is this medoid closer?
+ dist = dists[i][tempMedoids[j]];
+ partition = j;
+ }
+ }
+ zMatrix[partition][i] = 1;
+ laplace += dist;
+ }
+ laplace /= (double) numSamples;
+
+ if (m->debug) {
+ for(int i=0;i<numPartitions;i++){
+ m->mothurOut("[DEBUG]: partition 1: ");
+ for (int j = 0; j < numSamples; j++) {
+ m->mothurOut(toString(zMatrix[i][j]) + " ");
+ }
+ m->mothurOut("\n");
+ }
+ m->mothurOut("[DEBUG]: medoids : ");
+ for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) { m->mothurOut(toString(*it) + " ");
+ }
+ m->mothurOut("\n");
+
+ m->mothurOut("[DEBUG]: laplace : " + toString(laplace)); m->mothurOut("\n");
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: done swapping medoids\n"); }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Pam", "swapPhase");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+int Pam::updateDp() {
+ try {
+ for (int j = 0; j < numSamples; j++) {
+ if (m->control_pressed) { break; }
+
+ //initialize dp and ep
+ Dp[j][0] = largestDist; Dp[j][1] = largestDist;
+
+ for (int i = 0; i < numSamples; i++) {
+ if (medoids.count(i) != 0) { //is this a medoid?
+ if (Dp[j][0] > dists[j][i]) {
+ Dp[j][0] = Dp[j][1];
+ Dp[j][0] = dists[j][i];
+ }else if (Dp[j][1] > dists[j][i]) {
+ Dp[j][1] = dists[j][i];
+ }
+ }
+ }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Pam", "updateDp");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+//The silhouette width S(i)of individual data points i is calculated using the following formula:
+/*
+ s(i) = b(i) - a(i)
+ -----------
+ max(b(i),a(i))
+ where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster.
+
+ The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster.
+ */
+
+vector<double> Pam::calcSilhouettes(vector<vector<double> > dists) {
+ try {
+ vector<double> silhouettes; silhouettes.resize(numSamples, 0.0);
+ if (numPartitions < 2) { return silhouettes; }
+
+ vector<int> whoDp;
+ for (int i = 0; i < numSamples; i++) { // assumes at least 2 partitions
+ vector<seqDist> tempMeds;
+ for (set<int>::iterator it = medoids.begin(); it != medoids.end(); it++) {
+ if (m->control_pressed) { break; }
+ seqDist temp(*it, *it, dists[i][*it]); //medoid, medoid, distance from medoid to sample
+ tempMeds.push_back(temp);
+ }
+ sort(tempMeds.begin(), tempMeds.end(), compareSequenceDistance); //sort by distance
+ whoDp.push_back(tempMeds[1].seq1); //second closest medoid
+ }
+
+
+ vector<double> a, b; a.resize(numSamples, 0.0); b.resize(numSamples, 0.0);
+
+ //calc a - all a[i] are the same in the same partition
+ for (int k = 0; k < numPartitions; k++) {
+ if (m->control_pressed) { break; }
+
+ int count = 0;
+ double totalA = 0.0;
+ for (int i = 0; i < numSamples; i++) {
+ for (int j = 0; j < numSamples; j++) {
+ if (m->control_pressed) { break; }
+ if ((zMatrix[k][i] != 0) && (zMatrix[k][j] != 0)){ //are both samples in the partition, if so add there distance
+ totalA += dists[j][i]; //distance from this sample to the other samples in the partition
+ count++;
+ }
+ }
+ }
+ totalA /= (double) count;
+
+ //set a[i] to average for cluster
+ for (int i = 0; i < numSamples; i++) {
+ if (zMatrix[k][i] != 0) { a[i] = totalA; }
+ }
+ }
+
+ //calc b
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+
+ int nextClosestMedoid = whoDp[i];
+ int thisPartition = medoid2Partition[nextClosestMedoid];
+ int count = 0;
+ double totalB = 0.0;
+ for (int j = 0; j < numSamples; j++) {
+ if (zMatrix[thisPartition][j] != 0) { //this sample is in this partition
+ totalB += dists[i][j];
+ count++;
+ }
+ }
+ b[i] = totalB / (double) count;
+ }
+
+ //calc silhouettes
+ for (int i = 0; i < numSamples; i++) {
+ if (m->control_pressed) { break; }
+
+ double denom = a[i];
+ if (b[i] > denom) { denom = b[i]; } //max(a[i],b[i])
+
+ silhouettes[i] = (b[i] - a[i]) / denom;
+
+ //cout << "silhouettes " << i << '\t' << silhouettes[i] << endl;
+ }
+
+ return silhouettes;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Pam", "calcSilhouettes");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+/*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as:
+
+ CHk=Bk/(k−1)/Wk/(n−k)
+
+ where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/
+double Pam::calcCHIndex(vector< vector<double> > dists){ //countMatrix = [numSamples][numOtus]
+ try {
+ double CH = 0.0;
+
+ if (numPartitions < 2) { return CH; }
+
+ map<int, int> clusterMap; //map sample to partition
+ for (int i = 0; i < numPartitions; i++) {
+ for (int j = 0; j < numSamples; j++) {
+ if (m->control_pressed) { return 0.0; }
+ if (zMatrix[i][j] != 0) { clusterMap[j] = i; }
+ }
+ }
+
+ double sumBetweenCluster = 0.0;
+ double sumWithinClusters = 0.0;
+
+ for (int i = 0; i < numSamples; i++) { //lt
+ for (int j = 0; j < i; j++) {
+ if (m->control_pressed) { return 0.0; }
+ int partitionI = clusterMap[i];
+ int partitionJ = clusterMap[j];
+
+ if (partitionI == partitionJ) { //they are from the same cluster so this distance is added to Wk
+ sumWithinClusters += (dists[i][j] * dists[i][j]);
+ }else { //they are NOT from the same cluster so this distance is added to Bk
+ sumBetweenCluster += (dists[i][j] * dists[i][j]);
+ }
+ }
+ }
+ //cout << numPartitions << '\t' << sumWithinClusters << '\t' << sumBetweenCluster << '\t' << (numSamples - numPartitions) << endl;
+
+ CH = (sumBetweenCluster / (double)(numPartitions - 1)) / (sumWithinClusters / (double) (numSamples - numPartitions));
+
+ return CH;
+ }
+ catch(exception& e){
+ m->errorOut(e, "Pam", "calcCHIndex");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+
+
--- /dev/null
+//
+// pam.h
+// Mothur
+//
+// Created by SarahsWork on 12/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_pam_h
+#define Mothur_pam_h
+
+#include "communitytype.h"
+
+/**************************************************************************************************/
+
+class Pam : public CommunityTypeFinder {
+
+public:
+ Pam(vector<vector<int> >, vector<vector<double> >, int);
+ vector<double> calcSilhouettes(vector< vector< double> >);
+ double calcCHIndex(vector< vector< double> >);
+
+private:
+ set<int> medoids;
+ map<int, int> medoid2Partition;
+ double largestDist;
+ vector<vector<double> > dists;
+ vector<vector< double> > Dp; // [numSamples][2] - It contains Dp and Ep. Dp is in [numSamples][0] and Ep is in [numSamples][1]. Dp is the distance between p and the closest sample in S and Ep is the distance between p and the second closest object in S. Both are used in the build and swap phases.
+
+ int buildPhase();
+ int swapPhase();
+ int updateDp();
+
+
+
+/**************************************************************************************************/
+};
+
+
+#endif
//
#include "qFinderDMM.h"
-#include "linearalgebra.h"
-#define EPSILON numeric_limits<double>::epsilon()
+
/**************************************************************************************************/
-qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
+qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
try {
- m = MothurOut::getInstance();
+ //cout << "here" << endl;
+ numPartitions = p;
+ countMatrix = cm;
numSamples = (int)countMatrix.size();
numOTUs = (int)countMatrix[0].size();
-
- kMeans();
- // cout << "done kMeans" << endl;
+ //cout << "before kmeans" <<endl;
+ findkMeans();
+ //cout << "done kMeans" << endl;
optimizeLambda();
- // cout << "done optimizeLambda" << endl;
+ //cout << "done optimizeLambda" << endl;
double change = 1.0000;
currNLL = 0.0000;
while(change > 1.0e-6 && iter < 100){
- // cout << "Calc_Z: ";
+ //cout << "Calc_Z: ";
calculatePiK();
optimizeLambda();
- // printf("Iter:%d\t",iter);
+ //printf("Iter:%d\t",iter);
for(int i=0;i<numPartitions;i++){
weights[i] = 0.0000;
for(int j=0;j<numSamples;j++){
weights[i] += zMatrix[i][j];
}
- // printf("w_%d=%.3f\t",i,weights[i]);
+ // printf("w_%d=%.3f\t",i,weights[i]);
}
currNLL = nLL;
- // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
+ // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
iter++;
}
exit(1);
}
}
-
/**************************************************************************************************/
-
-void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
+void qFinderDMM::printFitData(ofstream& out){
try {
- ofstream printMatrix;
- m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
- printMatrix.setf(ios::fixed, ios::floatfield);
- printMatrix.setf(ios::showpoint);
-
- for(int i=0;i<numPartitions;i++){ printMatrix << "\tPartition_" << i+1; } printMatrix << endl;
-
- for(int i=0;i<numSamples;i++){
- printMatrix << sampleName[i];
- for(int j=0;j<numPartitions;j++){
- printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
- }
- printMatrix << endl;
- }
- printMatrix.close();
+ out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
+ return;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "printFitData");
+ exit(1);
}
- catch(exception& e) {
- m->errorOut(e, "qFinderDMM", "printZMatrix");
- exit(1);
- }
}
-
/**************************************************************************************************/
-
-void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
+void qFinderDMM::printFitData(ostream& out, double minLaplace){
try {
- ofstream printRA;
- m->openOutputFile(fileName, printRA); //(fileName.c_str());
- printRA.setf(ios::fixed, ios::floatfield);
- printRA.setf(ios::showpoint);
-
- vector<double> totals(numPartitions, 0.0000);
- for(int i=0;i<numPartitions;i++){
- for(int j=0;j<numOTUs;j++){
- totals[i] += exp(lambdaMatrix[i][j]);
- }
+ if(laplace < minLaplace){
+ out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << "***" << endl;
+ }else {
+ out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << endl;
}
- printRA << "Taxon";
- for(int i=0;i<numPartitions;i++){
- printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
- printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
- }
- printRA << endl;
-
- for(int i=0;i<numOTUs;i++){
-
- if (m->control_pressed) { break; }
-
- printRA << otuNames[i];
- for(int j=0;j<numPartitions;j++){
-
- if(error[j][i] >= 0.0000){
- double std = sqrt(error[j][i]);
- printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
- printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
- printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
- }
- else{
- printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
- printRA << '\t' << "NA";
- printRA << '\t' << "NA";
- }
- }
- printRA << endl;
- }
-
- printRA.close();
+ m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(getNLL()) + '\t' + toString(getLogDet()) + '\t');
+ m->mothurOutJustToLog(toString(getBIC()) + '\t' + toString(getAIC()) + '\t' + toString(laplace));
+
+ return;
+ }
+ catch(exception& e){
+ m->errorOut(e, "CommunityTypeFinder", "printFitData");
+ exit(1);
}
- catch(exception& e) {
- m->errorOut(e, "qFinderDMM", "printRelAbund");
- exit(1);
- }
}
/**************************************************************************************************/
double zmin = zl, fmin = fl;
- if (fh < fmin) { zmin = zh; fmin = fh; }
+ if (fh < fmin) { zmin = zh; fmin = fh; }
if (c > 0) /* positive curvature required for a minimum */
{
*
* c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
*
- * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
+ * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
*/
double cubic (double c0, double c1, double c2, double c3, double z){
double y = cubic (c0, c1, c2, c3, z);
- if (y < *fmin)
+ if (y < *fmin)
{
*zmin = z; /* accepted new point*/
*fmin = y;
/**************************************************************************************************/
-int gsl_poly_solve_quadratic (double a, double b, double c,
+int gsl_poly_solve_quadratic (double a, double b, double c,
double *x0, double *x1)
{
double r1 = temp / a ;
double r2 = c / temp ;
- if (r1 < r2)
+ if (r1 < r2)
{
*x0 = r1 ;
*x1 = r2 ;
- }
- else
+ }
+ else
{
*x0 = r2 ;
*x1 = r1 ;
}
return 2;
}
- else if (disc == 0)
+ else if (disc == 0)
{
*x0 = -0.5 * b / a ;
*x1 = -0.5 * b / a ;
{
return 0;
}
-
+
}
/**************************************************************************************************/
if (n == 2) /* found 2 roots */
{
- if (z0 > zl && z0 < zh)
+ if (z0 > zl && z0 < zh)
check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
- if (z1 > zl && z1 < zh)
+ if (z1 > zl && z1 < zh)
check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
}
else if (n == 1) /* found 1 root */
{
- if (z0 > zl && z0 < zh)
+ if (z0 > zl && z0 < zh)
check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
}
}
else{
z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
}
-
+
alpha = a + z * (b - a);
}
}
-/**************************************************************************************************/
-
-//can we get these psi/psi1 calculations into their own math class?
-//psi calcualtions swiped from gsl library...
-
-static const double psi_cs[23] = {
- -.038057080835217922,
- .491415393029387130,
- -.056815747821244730,
- .008357821225914313,
- -.001333232857994342,
- .000220313287069308,
- -.000037040238178456,
- .000006283793654854,
- -.000001071263908506,
- .000000183128394654,
- -.000000031353509361,
- .000000005372808776,
- -.000000000921168141,
- .000000000157981265,
- -.000000000027098646,
- .000000000004648722,
- -.000000000000797527,
- .000000000000136827,
- -.000000000000023475,
- .000000000000004027,
- -.000000000000000691,
- .000000000000000118,
- -.000000000000000020
-};
-
-static double apsi_cs[16] = {
- -.0204749044678185,
- -.0101801271534859,
- .0000559718725387,
- -.0000012917176570,
- .0000000572858606,
- -.0000000038213539,
- .0000000003397434,
- -.0000000000374838,
- .0000000000048990,
- -.0000000000007344,
- .0000000000001233,
- -.0000000000000228,
- .0000000000000045,
- -.0000000000000009,
- .0000000000000002,
- -.0000000000000000
-};
-
-/**************************************************************************************************/
-
-double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
- try {
- double d = 0.0000;
- double dd = 0.0000;
-
- double x2 = xx * 2.0000;
-
- for(int j=order;j>=1;j--){
- if (m->control_pressed) { return 0; }
- double temp = d;
- d = x2 * d - dd + seriesData[j];
- dd = temp;
- }
-
- d = xx * d - dd + 0.5 * seriesData[0];
- return d;
- }
- catch(exception& e){
- m->errorOut(e, "qFinderDMM", "cheb_eval");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi(double xx){
- try {
- double psiX = 0.0000;
-
- if(xx < 1.0000){
-
- double t1 = 1.0 / xx;
- psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
- psiX = -t1 + psiX;
-
- }
- else if(xx < 2.0000){
-
- const double v = xx - 1.0;
- psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
-
- }
- else{
- const double t = 8.0/(xx*xx)-1.0;
- psiX = cheb_eval(apsi_cs, 15, t);
- psiX += log(xx) - 0.5/xx;
- }
-
- return psiX;
- }
- catch(exception& e){
- m->errorOut(e, "qFinderDMM", "psi");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-/* coefficients for Maclaurin summation in hzeta()
- * B_{2j}/(2j)!
- */
-static double hzeta_c[15] = {
- 1.00000000000000000000000000000,
- 0.083333333333333333333333333333,
- -0.00138888888888888888888888888889,
- 0.000033068783068783068783068783069,
- -8.2671957671957671957671957672e-07,
- 2.0876756987868098979210090321e-08,
- -5.2841901386874931848476822022e-10,
- 1.3382536530684678832826980975e-11,
- -3.3896802963225828668301953912e-13,
- 8.5860620562778445641359054504e-15,
- -2.1748686985580618730415164239e-16,
- 5.5090028283602295152026526089e-18,
- -1.3954464685812523340707686264e-19,
- 3.5347070396294674716932299778e-21,
- -8.9535174270375468504026113181e-23
-};
-
-/**************************************************************************************************/
-
-double qFinderDMM::psi1(double xx){
- try {
-
- /* Euler-Maclaurin summation formula
- * [Moshier, p. 400, with several typo corrections]
- */
-
- double s = 2.0000;
- const int jmax = 12;
- const int kmax = 10;
- int j, k;
- const double pmax = pow(kmax + xx, -s);
- double scp = s;
- double pcp = pmax / (kmax + xx);
- double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
-
- for(k=0; k<kmax; k++) {
- if (m->control_pressed) { return 0; }
- value += pow(k + xx, -s);
- }
-
- for(j=0; j<=jmax; j++) {
- if (m->control_pressed) { return 0; }
- double delta = hzeta_c[j+1] * scp * pcp;
- value += delta;
-
- if(fabs(delta/value) < 0.5*EPSILON) break;
-
- scp *= (s+2*j+1)*(s+2*j+2);
- pcp /= (kmax + xx)*(kmax + xx);
- }
-
- return value;
- }
- catch(exception& e){
- m->errorOut(e, "qFinderDMM", "psi1");
- exit(1);
- }
-}
/**************************************************************************************************/
/**************************************************************************************************/
-void qFinderDMM::kMeans(){
- try {
-
- vector<vector<double> > relativeAbundance(numSamples);
- vector<vector<double> > alphaMatrix;
-
- alphaMatrix.resize(numPartitions);
- lambdaMatrix.resize(numPartitions);
- for(int i=0;i<numPartitions;i++){
- alphaMatrix[i].assign(numOTUs, 0);
- lambdaMatrix[i].assign(numOTUs, 0);
- }
-
- //get relative abundance
- for(int i=0;i<numSamples;i++){
- if (m->control_pressed) { return; }
- int groupTotal = 0;
-
- relativeAbundance[i].assign(numOTUs, 0.0);
-
- for(int j=0;j<numOTUs;j++){
- groupTotal += countMatrix[i][j];
- }
- for(int j=0;j<numOTUs;j++){
- relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
- }
- }
-
- //randomly assign samples into partitions
- zMatrix.resize(numPartitions);
- for(int i=0;i<numPartitions;i++){
- zMatrix[i].assign(numSamples, 0);
- }
-
- for(int i=0;i<numSamples;i++){
- zMatrix[rand()%numPartitions][i] = 1;
- }
-
- double maxChange = 1;
- int maxIters = 1000;
- int iteration = 0;
-
- weights.assign(numPartitions, 0);
-
- while(maxChange > 1e-6 && iteration < maxIters){
-
- if (m->control_pressed) { return; }
- //calcualte average relative abundance
- maxChange = 0.0000;
- for(int i=0;i<numPartitions;i++){
-
- double normChange = 0.0;
-
- weights[i] = 0;
-
- for(int j=0;j<numSamples;j++){
- weights[i] += (double)zMatrix[i][j];
- }
-
- vector<double> averageRelativeAbundance(numOTUs, 0);
- for(int j=0;j<numOTUs;j++){
- for(int k=0;k<numSamples;k++){
- averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
- }
- }
-
- for(int j=0;j<numOTUs;j++){
- averageRelativeAbundance[j] /= weights[i];
- double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
- normChange += difference * difference;
- alphaMatrix[i][j] = averageRelativeAbundance[j];
- }
-
- normChange = sqrt(normChange);
-
- if(normChange > maxChange){ maxChange = normChange; }
- }
-
-
- //calcualte distance between each sample in partition adn the average relative abundance
- for(int i=0;i<numSamples;i++){
- if (m->control_pressed) { return; }
-
- double normalizationFactor = 0;
- vector<double> totalDistToPartition(numPartitions, 0);
-
- for(int j=0;j<numPartitions;j++){
- for(int k=0;k<numOTUs;k++){
- double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
- totalDistToPartition[j] += difference * difference;
- }
- totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
- normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
- }
-
-
- for(int j=0;j<numPartitions;j++){
- zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
- }
-
- }
-
- iteration++;
- // cout << "K means: " << iteration << '\t' << maxChange << endl;
-
- }
-
- // cout << "Iter:-1";
- for(int i=0;i<numPartitions;i++){
- weights[i] = 0.0000;
-
- for(int j=0;j<numSamples;j++){
- weights[i] += zMatrix[i][j];
- }
- // printf("\tw_%d=%.3f", i, weights[i]);
- }
- // cout << endl;
-
-
- for(int i=0;i<numOTUs;i++){
- if (m->control_pressed) { return; }
- for(int j=0;j<numPartitions;j++){
- if(alphaMatrix[j][i] > 0){
- lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
- }
- else{
- lambdaMatrix[j][i] = -10.0;
- }
- }
- }
- }
- catch(exception& e){
- m->errorOut(e, "qFinderDMM", "kMeans");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
void qFinderDMM::optimizeLambda(){
try {
for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
exit(1);
}
}
-
/**************************************************************************************************/
void qFinderDMM::calculatePiK(){
}
-
-/**************************************************************************************************/
-
-vector<vector<double> > qFinderDMM::getHessian(){
- try {
- vector<double> alpha(numOTUs, 0.0000);
- double alphaSum = 0.0000;
-
- vector<double> pi = zMatrix[currentPartition];
- vector<double> psi_ajk(numOTUs, 0.0000);
- vector<double> psi_cjk(numOTUs, 0.0000);
- vector<double> psi1_ajk(numOTUs, 0.0000);
- vector<double> psi1_cjk(numOTUs, 0.0000);
-
- for(int j=0;j<numOTUs;j++){
-
- if (m->control_pressed) { break; }
-
- alpha[j] = exp(lambdaMatrix[currentPartition][j]);
- alphaSum += alpha[j];
-
- for(int i=0;i<numSamples;i++){
- double X = (double) countMatrix[i][j];
-
- psi_ajk[j] += pi[i] * psi(alpha[j]);
- psi1_ajk[j] += pi[i] * psi1(alpha[j]);
-
- psi_cjk[j] += pi[i] * psi(alpha[j] + X);
- psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
- }
- }
-
-
- double psi_Ck = 0.0000;
- double psi1_Ck = 0.0000;
-
- double weight = 0.0000;
-
- for(int i=0;i<numSamples;i++){
- if (m->control_pressed) { break; }
- weight += pi[i];
- double sum = 0.0000;
- for(int j=0;j<numOTUs;j++){ sum += alpha[j] + countMatrix[i][j]; }
-
- psi_Ck += pi[i] * psi(sum);
- psi1_Ck += pi[i] * psi1(sum);
- }
-
- double psi_Ak = weight * psi(alphaSum);
- double psi1_Ak = weight * psi1(alphaSum);
-
- vector<vector<double> > hessian(numOTUs);
- for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
-
- for(int i=0;i<numOTUs;i++){
- if (m->control_pressed) { break; }
- double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
- double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
- double term3 = 0.1 * alpha[i];
-
- hessian[i][i] = term1 + term2 + term3;
-
- for(int j=0;j<i;j++){
- hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
- hessian[j][i] = hessian[i][j];
- }
- }
-
- return hessian;
- }
- catch(exception& e){
- m->errorOut(e, "qFinderDMM", "getHessian");
- exit(1);
- }
-}
-
/**************************************************************************************************/
#ifndef pds_dmm_qFinderDMM_h
#define pds_dmm_qFinderDMM_h
-/**************************************************************************************************/
-
-#include "mothurout.h"
+#include "communitytype.h"
/**************************************************************************************************/
-class qFinderDMM {
+class qFinderDMM : public CommunityTypeFinder {
public:
qFinderDMM(vector<vector<int> >, int);
- double getNLL() { return currNLL; }
- double getAIC() { return aic; }
- double getBIC() { return bic; }
- double getLogDet() { return logDeterminant; }
- double getLaplace() { return laplace; }
- void printZMatrix(string, vector<string>);
- void printRelAbund(string, vector<string>);
-
+ void printFitData(ofstream&);
+ void printFitData(ostream&, double);
+
private:
- MothurOut* m;
- void kMeans();
+
void optimizeLambda();
void calculatePiK();
void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
double getNegativeLogEvidence(vector<double>&, int);
double getNegativeLogLikelihood();
- vector<vector<double> > getHessian();
+
int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
int bfgs2_Solver(vector<double>&);//, double, double);
- double cheb_eval(const double[], int, double);
- double psi(double);
- double psi1(double);
-
- vector<vector<int> > countMatrix;
- vector<vector<double> > zMatrix;
- vector<vector<double> > lambdaMatrix;
- vector<double> weights;
- vector<vector<double> > error;
-
- int numPartitions;
- int numSamples;
- int numOTUs;
- int currentPartition;
-
- double currNLL;
- double aic;
- double bic;
- double logDeterminant;
- double laplace;
+
+
+
};
/**************************************************************************************************/
void QualityScores::printQScores(ofstream& qFile){
try {
- double aveQScore = calculateAverage();
+ double aveQScore = calculateAverage(false);
qFile << '>' << seqName << '\t' << aveQScore << endl;
/**************************************************************************************************/
-bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
+bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
try {
string rawSequence = sequence.getUnaligned();
int seqLength = sequence.getNumBases();
int end = -1;
double rollingSum = 0.0000;
+ double value = 0.0;
for(int i=0;i<seqLength;i++){
-
- rollingSum += (double)qScores[i];
+
+ if (logTransform) {
+ rollingSum += (double)pow(10.0, qScores[i]);
+ value = log10(rollingSum / (double)(i+1));
+
+ } //Sum 10^Q
+ else {
+ rollingSum += (double)qScores[i];
+ value = rollingSum / (double)(i+1);
+ }
+
- if(rollingSum / (double)(i+1) < qThreshold){
+ if(value < qThreshold){
end = i;
break;
}
/**************************************************************************************************/
-bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
+bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
try {
string rawSequence = sequence.getUnaligned();
int seqLength = sequence.getNumBases();
double windowSum = 0.0000;
for(int i=start;i<end;i++){
- windowSum += qScores[i];
+ if (logTransform) { windowSum += pow(10.0, qScores[i]); }
+ else { windowSum += qScores[i]; }
}
- double windowAverage = windowSum / (double)(end-start);
+ double windowAverage = 0.0;
+ if (logTransform) { windowAverage = log10(windowSum / (double)(end-start)); }
+ else { windowAverage = windowSum / (double)(end-start); }
if(windowAverage < qThreshold){
end = end - stepSize;
/**************************************************************************************************/
-double QualityScores::calculateAverage(){
+double QualityScores::calculateAverage(bool logTransform){
double aveQScore = 0.0000;
for(int i=0;i<seqLength;i++){
- aveQScore += (double) qScores[i];
+ if (logTransform) { aveQScore += pow(10.0, qScores[i]); }
+ else { aveQScore += qScores[i]; }
}
aveQScore /= (double) seqLength;
+
+ if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); }
+ else { aveQScore /= (double) seqLength; }
return aveQScore;
}
/**************************************************************************************************/
-bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
+bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
try {
string rawSequence = sequence.getUnaligned();
bool success = 0; //guilty until proven innocent
m->mothurOutEndLine();
}
- double aveQScore = calculateAverage();
+ double aveQScore = calculateAverage(logTransform);
if(aveQScore >= qAverage) { success = 1; }
else { success = 0; }
void trimQScores(int, int);
void flipQScores();
bool stripQualThreshold(Sequence&, double);
- bool stripQualRollingAverage(Sequence&, double);
- bool stripQualWindowAverage(Sequence&, int, int, double);
- bool cullQualAverage(Sequence&, double);
+ bool stripQualRollingAverage(Sequence&, double, bool);
+ bool stripQualWindowAverage(Sequence&, int, int, double, bool);
+ bool cullQualAverage(Sequence&, double, bool);
void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
void updateForwardMap(vector<vector<int> >&, int, int, int);
void updateReverseMap(vector<vector<int> >&, int, int, int);
private:
- double calculateAverage();
+ double calculateAverage(bool);
MothurOut* m;
vector<int> qScores;
bool done = false;
int count = 0;
-
+
+
while (!done) {
if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
ambigBases.push_back(current.getAmbigBases());
longHomoPolymer.push_back(current.getLongHomoPolymer());
}
-
count++;
outSummary << current.getName() << '\t';
outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
--- /dev/null
+//
+// shannonrange.cpp
+// Mothur
+//
+// Created by SarahsWork on 1/3/14.
+// Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#include "shannonrange.h"
+
+/***********************************************************************/
+
+EstOutput RangeShannon::getValues(vector<SharedRAbundVector*> shared) {
+ try {
+ data.resize(3,0);
+
+ double commSize = 1e20;
+
+ SAbundVector sabund1 = shared[0]->getSAbundVector();
+ SAbundVector sabund2 = shared[1]->getSAbundVector();
+
+ double sampleSize = 0;
+ for (int i = 0; i < sabund1.getNumBins(); i++) { sampleSize += (sabund1.get(i) * sabund2.get(i)); }
+ int aux = ceil(pow((sampleSize+1), 0.33333));
+
+ if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+
+ return data;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RangeShannon", "getValues");
+ exit(1);
+ }
+}
+/***********************************************************************/
--- /dev/null
+//
+// shannonrange.h
+// Mothur
+//
+// Created by SarahsWork on 1/3/14.
+// Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_shannonrange_h
+#define Mothur_shannonrange_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+
+class RangeShannon : public Calculator {
+
+public:
+ RangeShannon() : Calculator("rangeshannon", 3, false) {};
+ EstOutput getValues(SAbundVector*) {return data;};
+ EstOutput getValues(vector<SharedRAbundVector*>);
+ string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
+};
+
+/***********************************************************************/
+
+
+
+#endif
--- /dev/null
+//
+// sharedjsd.cpp
+// Mothur
+//
+// Created by SarahsWork on 12/9/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sharedjsd.h"
+
+/***********************************************************************/
+//KLD <- function(x,y) sum(x *log(x/y))
+//JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
+EstOutput JSD::getValues(vector<SharedRAbundVector*> shared) {
+ try {
+
+ data.resize(1,0);
+
+ double KLD1 = 0.0;
+ double KLD2 = 0.0;
+
+ for (int i = 0; i < shared[0]->getNumBins(); i++) {
+ //store in temps to avoid multiple repetitive function calls
+ double tempA = shared[0]->getAbundance(i);
+ double tempB = shared[1]->getAbundance(i);
+
+ if (tempA == 0) { tempA = 0.000001; }
+ if (tempB == 0) { tempB = 0.000001; }
+
+ double denom = (tempA+tempB)/(double)2.0;
+
+ if (tempA != 0) { KLD1 += tempA * log(tempA/denom); } //KLD(x,m)
+ if (tempB != 0) { KLD2 += tempB * log(tempB/denom); } //KLD(y,m)
+ }
+
+ data[0] = sqrt((0.5*KLD1) + (0.5*KLD2));
+
+ if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+
+ return data;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "JSD", "getValues");
+ exit(1);
+ }
+}
+
+/***********************************************************************/
--- /dev/null
+//
+// sharedjsd.h
+// Mothur
+//
+// Created by SarahsWork on 12/9/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sharedjsd_h
+#define Mothur_sharedjsd_h
+
+#include "calculator.h"
+
+/***********************************************************************/
+//Jensen-Shannon divergence (JSD)
+class JSD : public Calculator {
+
+public:
+ JSD() : Calculator("jsd", 1, false) {};
+ EstOutput getValues(SAbundVector*) {return data;};
+ EstOutput getValues(vector<SharedRAbundVector*>);
+ string getCitation() { return "http://www.mothur.org/wiki/JSD"; }
+private:
+
+};
+
+/***********************************************************************/
+
+
+#endif
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
- CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
sumCalculators.push_back(new MemEuclidean());
}else if (Estimators[i] == "mempearson") {
sumCalculators.push_back(new MemPearson());
+ }else if (Estimators[i] == "jsd") {
+ sumCalculators.push_back(new JSD());
}
}
}
#include "memchord.h"
#include "memeuclidean.h"
#include "mempearson.h"
+#include "sharedjsd.h"
class SummarySharedCommand : public Command {
sumCalculators.push_back(new MemEuclidean());
}else if (pDataArray->Estimators[i] == "mempearson") {
sumCalculators.push_back(new MemPearson());
+ }else if (pDataArray->Estimators[i] == "jsd") {
+ sumCalculators.push_back(new JSD());
}
}
}
if(line->start == 0){
flowFile >> numFlows; m->gobble(flowFile);
- scrapFlowFile << maxFlows << endl;
+ scrapFlowFile << numFlows << endl;
trimFlowFile << maxFlows << endl;
if(allFiles){
for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
+ if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
}
else if(type == "BARCODE"){
oligosFile >> group;
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
+ CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform);
CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
string helpString = "";
helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
- helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+ helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
+ helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n";
helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
keepforward = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "logtransform", false); if (temp == "not found") { temp = "F"; }
+ logtransform = m->isTrue(temp);
+
temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
reorient = m->isTrue(temp);
}
}
- if (!pairedOligos) { if (reorient) { m->mothurOut("[WARNING]: You cannot use reorient without paired barcodes or primers, skipping."); m->mothurOutEndLine(); reorient = false; } }
-
if (m->control_pressed) { return 0; }
//fills lines and qlines
rpairedBarcodes[it->first] = tempPair;
//cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
}
- rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+ int index = rpairedBarcodes.size();
+ for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+ oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedBarcodes[index] = tempPair; index++;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
+ }
+
+ index = rpairedPrimers.size();
+ for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+ oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedPrimers[index] = tempPair; index++;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
+ }
+
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
}
while (moreSeqs) {
int origLength = currSeq.getNumBases();
if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
- else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
- else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
- else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
+ else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage, logtransform); }
+ else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage, logtransform); }
+ else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage, logtransform); }
else { success = 1; }
//you don't want to trim, if it fails above then scrap it
lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
- qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+ qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform,
minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
pDataArray.push_back(tempTrim);
if (hasPairedBarcodes || hasPrimer) {
pairedOligos = true;
if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
- }else if (reorient) { m->mothurOut("[Warning]: cannot use checkorient without paired barcodes or primers, ignoring.\n"); m->mothurOutEndLine(); reorient = false; }
+ }
if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
bool abort, createGroup;
string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
- bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
+ bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
int qWindowSize, qWindowStep, keepFirst, removeLast;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
- bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
+ bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
int qWindowSize, qWindowStep, keepFirst, removeLast, count;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
- int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+ int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
filename = fn;
qFileName = qn;
qThreshold = Threshold;
qAverage = Average;
qRollAverage = RollAverage;
+ logtransform = lt;
minLength = minL;
maxAmbig = maxA;
maxHomoP = maxH;
oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
rpairedBarcodes[it->first] = tempPair;
}
+
+ int index = rpairedBarcodes.size();
+ for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
+ oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedBarcodes[index] = tempPair; index++;
+ }
+
+ index = rpairedPrimers.size();
+ for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+ oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+ rpairedPrimers[index] = tempPair; index++;
+ }
+
rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
}
int origLength = currSeq.getNumBases();
if(pDataArray->qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold); }
- else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage); }
- else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage); }
- else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage); }
+ else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform); }
+ else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform); }
+ else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform); }
else { success = 1; }
//you don't want to trim, if it fails above then scrap it
#include "mempearson.h"
#include "sharedsobs.h"
#include "sharednseqs.h"
+#include "sharedjsd.h"
/********************************************************************/
}else if (Estimators[i] == "mempearson") { Calculator* temp = new MemPearson(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "sharedobserved") { Calculator* temp = new SharedSobs(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
+ }else if (Estimators[i] == "jsd") { Calculator* temp = new JSD(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else { m->mothurOut("[ERROR]: Missing else if for " + Estimators[i] + " in printCitations."); m->mothurOutEndLine(); }
}else { m->mothurOut(Estimators[i] + " is not a valid calculator, no citation will be given."); m->mothurOutEndLine(); }
}
shared["memchord"] = "memchord";
shared["memeuclidean"] = "memeuclidean";
shared["mempearson"] = "mempearson";
+ shared["jsd"] = "jsd";
shared["default"] = "default";
}
catch(exception& e) {
sharedsummary["memchord"] = "memchord";
sharedsummary["memeuclidean"] = "memeuclidean";
sharedsummary["mempearson"] = "mempearson";
+ sharedsummary["jsd"] = "jsd";
sharedsummary["default"] = "default";
}
catch(exception& e) {
matrix["memchord"] = "memchord";
matrix["memeuclidean"] = "memeuclidean";
matrix["mempearson"] = "mempearson";
+ matrix["jsd"] = "jsd";
}
catch(exception& e) {