+ if (large) { splitDistanceLarge(); }
+ else { splitDistanceRAM(); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "splitDistance");
+ exit(1);
+ }
+}
+
+/***********************************************************************/
+int SplitMatrix::splitClassify(){
+ try {
+ cutoff = int(cutoff);
+
+ map<string, int> seqGroup;
+ map<string, int>::iterator it;
+ map<string, int>::iterator it2;
+
+ int numGroups = 0;
+
+ //build tree from users taxonomy file
+ PhyloTree* phylo = new PhyloTree();
+
+ map<string, string> temp;
+ m->readTax(taxFile, temp);
+
+ for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+ phylo->addSeqToTree(itTemp->first, itTemp->second);
+ temp.erase(itTemp++);
+ }
+
+ phylo->assignHeirarchyIDs(0);
+
+ //make sure the cutoff is not greater than maxlevel
+ if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); }
+
+ //for each node in tree
+ for (int i = 0; i < phylo->getNumNodes(); i++) {
+
+ //is this node within the cutoff
+ TaxNode taxon = phylo->get(i);
+
+ if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences
+ if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton
+ for (int j = 0; j < taxon.accessions.size(); j++) {
+ seqGroup[taxon.accessions[j]] = numGroups;
+ }
+ numGroups++;
+ }
+ }
+ }
+
+ delete phylo;
+
+ if (method == "classify") {
+ splitDistanceFileByTax(seqGroup, numGroups);
+ }else {
+ createDistanceFilesFromTax(seqGroup, numGroups);
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "splitClassify");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numGroups){
+ try {
+ map<string, int> copyGroups = seqGroup;
+ map<string, int>::iterator it;
+ set<string> names;
+
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
+ }
+
+ ifstream in;
+ m->openInputFile(fastafile, in);
+
+ //parse fastafile
+ ofstream outFile;
+ while (!in.eof()) {
+ Sequence query(in); m->gobble(in);
+ if (query.getName() != "") {
+
+ it = seqGroup.find(query.getName());
+
+ //save names in case no namefile is given
+ if ((namefile == "") && (countfile == "")) { names.insert(query.getName()); }
+
+ if (it != seqGroup.end()) { //not singleton
+ m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
+ query.printSequence(outFile);
+ outFile.close();
+
+ copyGroups.erase(query.getName());
+ }
+ }
+ }
+ in.close();
+
+ //warn about sequence in groups that are not in fasta file
+ for(it = copyGroups.begin(); it != copyGroups.end(); it++) {
+ m->mothurOut("ERROR: " + it->first + " is missing from your fastafile. This could happen if your taxonomy file is not unique and your fastafile is, or it could indicate and error."); m->mothurOutEndLine();
+ exit(1);
+ }
+
+ copyGroups.clear();
+
+ //process each distance file
+ for (int i = 0; i < numGroups; i++) {
+
+ string options = "";
+ if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+ else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
+ if (outputDir != "") { options += ", outputdir=" + outputDir; }
+
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ Command* command = new DistanceCommand(options);
+
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ command->execute();
+ delete command;
+
+ m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
+
+ //remove old names files just in case
+ if (namefile != "") { m->mothurRemove((namefile + "." + toString(i) + ".temp")); }
+ else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
+ }
+
+ vector<string> tempDistFiles;
+ for(int i=0;i<numGroups;i++){
+ if (outputDir == "") { outputDir = m->hasPath(fastafile); }
+ string tempDistFile = "";
+ if (classic) { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+ else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
+ tempDistFiles.push_back(tempDistFile);
+ }
+
+ splitNames(seqGroup, numGroups, tempDistFiles);
+
+ if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { m->mothurRemove((dists[i].begin()->first)); m->mothurRemove((dists[i].begin()->second)); } dists.clear(); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "createDistanceFilesFromTax");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroups){
+ try {
+ map<string, int>::iterator it;
+ map<string, int>::iterator it2;
+
+ ofstream outFile;
+ ifstream dFile;
+ m->openInputFile(distFile, dFile);
+
+
+ for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+ m->mothurRemove((distFile + "." + toString(i) + ".temp"));
+ }