#include "splitmatrix.h"
#include "phylotree.h"
#include "distancecommand.h"
+#include "seqsummarycommand.h"
/***********************************************************************/
}
/***********************************************************************/
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
m = MothurOut::getInstance();
fastafile = ffile;
namefile = name;
taxFile = tax;
- cutoff = c;
+ cutoff = c; //tax level cutoff
+ distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
method = t;
processors = p;
outputDir = output;
if (large) { splitDistanceLarge(); }
else { splitDistanceRAM(); }
+
+ return 0;
}
catch(exception& e) {
int SplitMatrix::splitClassify(){
try {
cutoff = int(cutoff);
-
+
map<string, int> seqGroup;
map<string, int>::iterator it;
map<string, int>::iterator it2;
PhyloTree* phylo = new PhyloTree();
ifstream in;
- openInputFile(taxFile, in);
+ m->openInputFile(taxFile, in);
//read in users taxonomy file and add sequences to tree
string seqname, tax;
while(!in.eof()){
- in >> seqname >> tax; gobble(in);
+ in >> seqname >> tax; m->gobble(in);
phylo->addSeqToTree(seqname, tax);
}
in.close();
}
ifstream in;
- openInputFile(fastafile, in);
+ m->openInputFile(fastafile, in);
//parse fastafile
ofstream outFile;
while (!in.eof()) {
- Sequence query(in); gobble(in);
+ Sequence query(in); m->gobble(in);
if (query.getName() != "") {
it = seqGroup.find(query.getName());
if (namefile == "") { names.insert(query.getName()); }
if (it != seqGroup.end()) { //not singleton
- openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
query.printSequence(outFile);
outFile.close();
//process each distance file
for (int i = 0; i < numGroups; i++) {
- string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(cutoff);
+ string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
Command* command = new DistanceCommand(options);
+
command->execute();
delete command;
//remove old names files just in case
remove((namefile + "." + toString(i) + ".temp").c_str());
}
-
+
singleton = namefile + ".extra.temp";
ofstream remainingNames;
- openOutputFile(singleton, remainingNames);
+ m->openOutputFile(singleton, remainingNames);
bool wroteExtra = false;
ifstream bigNameFile;
- openInputFile(namefile, bigNameFile);
+ m->openInputFile(namefile, bigNameFile);
string name, nameList;
while(!bigNameFile.eof()){
- bigNameFile >> name >> nameList; gobble(bigNameFile);
+ bigNameFile >> name >> nameList; m->gobble(bigNameFile);
//did this sequence get assigned a group
it = seqGroup.find(name);
if (it != seqGroup.end()) {
- openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
outFile << name << '\t' << nameList << endl;
outFile.close();
}else{
for(int i=0;i<numGroups;i++){
string tempNameFile = namefile + "." + toString(i) + ".temp";
- string tempDistFile = outputDir + getRootName(getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+ if (outputDir == "") { outputDir = m->hasPath(fastafile); }
+ string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
//if there are valid distances
ifstream fileHandle;
fileHandle.open(tempDistFile.c_str());
if(fileHandle) {
- gobble(fileHandle);
+ m->gobble(fileHandle);
if (!fileHandle.eof()) { //check for blank file - this could occur if all dists in group are above cutoff
map<string, string> temp;
temp[tempDistFile] = tempNameFile;
dists.push_back(temp);
}else {
ifstream in;
- openInputFile(tempNameFile, in);
+ m->openInputFile(tempNameFile, in);
while(!in.eof()) {
- in >> name >> nameList; gobble(in);
+ in >> name >> nameList; m->gobble(in);
wroteExtra = true;
remainingNames << name << '\t' << nameList << endl;
}
map<string, int>::iterator it2;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
ofstream outFile;
for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } }
- dFile >> seqA >> seqB >> dist; gobble(dFile);
+ dFile >> seqA >> seqB >> dist; m->gobble(dFile);
//if both sequences are in the same group then they are within the cutoff
it = seqGroup.find(seqA);
if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons
if (it->second == it2->second) { //they are from the same group so add the distance
if (numOutputs[it->second] > 30) {
- openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
outFile.close();
outputs[it->second] = "";
//write out any remaining buffers
if (numOutputs[i] > 0) {
- openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+ m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
outFile << outputs[i];
outFile.close();
outputs[i] = "";
}
ifstream bigNameFile;
- openInputFile(namefile, bigNameFile);
+ m->openInputFile(namefile, bigNameFile);
singleton = namefile + ".extra.temp";
ofstream remainingNames;
- openOutputFile(singleton, remainingNames);
+ m->openOutputFile(singleton, remainingNames);
bool wroteExtra = false;
string name, nameList;
while(!bigNameFile.eof()){
- bigNameFile >> name >> nameList; gobble(bigNameFile);
+ bigNameFile >> name >> nameList; m->gobble(bigNameFile);
//did this sequence get assigned a group
it = seqGroup.find(name);
if (it != seqGroup.end()) {
- openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
outFile << name << '\t' << nameList << endl;
outFile.close();
}else{
dists.push_back(temp);
}else{
ifstream in;
- openInputFile(tempNameFile, in);
+ m->openInputFile(tempNameFile, in);
while(!in.eof()) {
- in >> name >> nameList; gobble(in);
+ in >> name >> nameList; m->gobble(in);
wroteExtra = true;
remainingNames << name << '\t' << nameList << endl;
}
ofstream outFile;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
while(dFile){
string seqA, seqB;
}
}
}
- gobble(dFile);
+ m->gobble(dFile);
}
dFile.close();
while(bigNameFile){
bigNameFile >> name >> nameList;
nameMap[name] = nameList;
- gobble(bigNameFile);
+ m->gobble(bigNameFile);
}
bigNameFile.close();
int numGroups = 0;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
while(dFile){
string seqA, seqB;
}
}
}
- gobble(dFile);
+ m->gobble(dFile);
}
dFile.close();