#include "splitmatrix.h"
#include "phylotree.h"
#include "distancecommand.h"
+#include "seqsummarycommand.h"
/***********************************************************************/
}
/***********************************************************************/
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
m = MothurOut::getInstance();
fastafile = ffile;
namefile = name;
taxFile = tax;
- cutoff = c;
+ cutoff = c; //tax level cutoff
+ distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
method = t;
processors = p;
+ outputDir = output;
}
/***********************************************************************/
if (large) { splitDistanceLarge(); }
else { splitDistanceRAM(); }
+
+ return 0;
}
catch(exception& e) {
int SplitMatrix::splitClassify(){
try {
cutoff = int(cutoff);
-
+
map<string, int> seqGroup;
map<string, int>::iterator it;
map<string, int>::iterator it2;
PhyloTree* phylo = new PhyloTree();
ifstream in;
- openInputFile(taxFile, in);
+ m->openInputFile(taxFile, in);
//read in users taxonomy file and add sequences to tree
string seqname, tax;
while(!in.eof()){
- in >> seqname >> tax; gobble(in);
+ in >> seqname >> tax; m->gobble(in);
phylo->addSeqToTree(seqname, tax);
}
in.close();
set<string> names;
for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
- remove((fastafile + "." + toString(i) + ".temp").c_str());
+ m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
}
ifstream in;
- openInputFile(fastafile, in);
+ m->openInputFile(fastafile, in);
//parse fastafile
ofstream outFile;
while (!in.eof()) {
- Sequence query(in); gobble(in);
+ Sequence query(in); m->gobble(in);
if (query.getName() != "") {
it = seqGroup.find(query.getName());
if (namefile == "") { names.insert(query.getName()); }
if (it != seqGroup.end()) { //not singleton
- openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
query.printSequence(outFile);
outFile.close();
- copyGroups.erase(it);
+ copyGroups.erase(query.getName());
}
}
}
//process each distance file
for (int i = 0; i < numGroups; i++) {
- string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors);
+ string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+ if (outputDir != "") { options += ", outputdir=" + outputDir; }
Command* command = new DistanceCommand(options);
+
command->execute();
delete command;
- remove((fastafile + "." + toString(i) + ".temp").c_str());
+ m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
//remove old names files just in case
- remove((namefile + "." + toString(i) + ".temp").c_str());
+ m->mothurRemove((namefile + "." + toString(i) + ".temp"));
}
-
+
singleton = namefile + ".extra.temp";
ofstream remainingNames;
- openOutputFile(singleton, remainingNames);
+ m->openOutputFile(singleton, remainingNames);
bool wroteExtra = false;
ifstream bigNameFile;
- openInputFile(namefile, bigNameFile);
+ m->openInputFile(namefile, bigNameFile);
string name, nameList;
while(!bigNameFile.eof()){
- bigNameFile >> name >> nameList; gobble(bigNameFile);
+ bigNameFile >> name >> nameList; m->gobble(bigNameFile);
//did this sequence get assigned a group
it = seqGroup.find(name);
if (it != seqGroup.end()) {
- openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
outFile << name << '\t' << nameList << endl;
outFile.close();
}else{
}
bigNameFile.close();
- remainingNames.close();
- if (!wroteExtra) {
- remove(singleton.c_str());
- singleton = "none";
- }
-
for(int i=0;i<numGroups;i++){
string tempNameFile = namefile + "." + toString(i) + ".temp";
- string tempDistFile = getRootName(getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+ if (outputDir == "") { outputDir = m->hasPath(fastafile); }
+ string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
//if there are valid distances
ifstream fileHandle;
fileHandle.open(tempDistFile.c_str());
if(fileHandle) {
- gobble(fileHandle);
- if (!fileHandle.eof()) { //check for blank file
+ m->gobble(fileHandle);
+ if (!fileHandle.eof()) { //check for blank file - this could occur if all dists in group are above cutoff
map<string, string> temp;
temp[tempDistFile] = tempNameFile;
dists.push_back(temp);
+ }else {
+ ifstream in;
+ m->openInputFile(tempNameFile, in);
+
+ while(!in.eof()) {
+ in >> name >> nameList; m->gobble(in);
+ wroteExtra = true;
+ remainingNames << name << '\t' << nameList << endl;
+ }
+ in.close();
+ m->mothurRemove(tempNameFile);
}
}
fileHandle.close();
}
- if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); }
+ remainingNames.close();
+ if (!wroteExtra) {
+ m->mothurRemove(singleton);
+ singleton = "none";
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { m->mothurRemove((dists[i].begin()->first)); m->mothurRemove((dists[i].begin()->second)); } dists.clear(); }
return 0;
}
map<string, int>::iterator it2;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
ofstream outFile;
for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
- remove((distFile + "." + toString(i) + ".temp").c_str());
+ m->mothurRemove((distFile + "." + toString(i) + ".temp"));
}
//for buffering the io to improve speed
string seqA, seqB;
float dist;
- if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } }
+ if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { m->mothurRemove((distFile + "." + toString(i) + ".temp")); } }
- dFile >> seqA >> seqB >> dist; gobble(dFile);
+ dFile >> seqA >> seqB >> dist; m->gobble(dFile);
//if both sequences are in the same group then they are within the cutoff
it = seqGroup.find(seqA);
if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons
if (it->second == it2->second) { //they are from the same group so add the distance
if (numOutputs[it->second] > 30) {
- openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
outFile.close();
outputs[it->second] = "";
dFile.close();
for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
- remove((namefile + "." + toString(i) + ".temp").c_str());
+ m->mothurRemove((namefile + "." + toString(i) + ".temp"));
//write out any remaining buffers
if (numOutputs[i] > 0) {
- openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+ m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
outFile << outputs[i];
outFile.close();
outputs[i] = "";
}
ifstream bigNameFile;
- openInputFile(namefile, bigNameFile);
+ m->openInputFile(namefile, bigNameFile);
singleton = namefile + ".extra.temp";
ofstream remainingNames;
- openOutputFile(singleton, remainingNames);
+ m->openOutputFile(singleton, remainingNames);
bool wroteExtra = false;
string name, nameList;
while(!bigNameFile.eof()){
- bigNameFile >> name >> nameList; gobble(bigNameFile);
+ bigNameFile >> name >> nameList; m->gobble(bigNameFile);
//did this sequence get assigned a group
it = seqGroup.find(name);
if (it != seqGroup.end()) {
- openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+ m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
outFile << name << '\t' << nameList << endl;
outFile.close();
}else{
dists.push_back(temp);
}else{
ifstream in;
- openInputFile(tempNameFile, in);
+ m->openInputFile(tempNameFile, in);
while(!in.eof()) {
- in >> name >> nameList; gobble(in);
+ in >> name >> nameList; m->gobble(in);
wroteExtra = true;
remainingNames << name << '\t' << nameList << endl;
}
in.close();
- remove(tempNameFile.c_str());
+ m->mothurRemove(tempNameFile);
}
}
remainingNames.close();
if (!wroteExtra) {
- remove(singleton.c_str());
+ m->mothurRemove(singleton);
singleton = "none";
}
if (m->control_pressed) {
for (int i = 0; i < dists.size(); i++) {
- remove((dists[i].begin()->first).c_str());
- remove((dists[i].begin()->second).c_str());
+ m->mothurRemove((dists[i].begin()->first));
+ m->mothurRemove((dists[i].begin()->second));
}
dists.clear();
}
ofstream outFile;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
while(dFile){
string seqA, seqB;
dFile >> seqA >> seqB >> dist;
- if (m->control_pressed) { dFile.close(); for(int i=0;i<numGroups;i++){ if(groups[i].size() > 0){ remove((distFile + "." + toString(i) + ".temp").c_str()); } } return 0; }
+ if (m->control_pressed) { dFile.close(); for(int i=0;i<numGroups;i++){ if(groups[i].size() > 0){ m->mothurRemove((distFile + "." + toString(i) + ".temp")); } } return 0; }
if(dist < cutoff){
//cout << "in cutoff: " << dist << endl;
delete memblock;
fileB.close();
- remove(fileName2.c_str());
+ m->mothurRemove(fileName2);
//write out the merged memory
if (numOutputs[groupID] > 60) {
delete memblock;
fileB.close();
- remove(fileName2.c_str());
+ m->mothurRemove(fileName2);
//write out the merged memory
if (numOutputs[groupID] > 60) {
}
}
}
- gobble(dFile);
+ m->gobble(dFile);
}
dFile.close();
while(bigNameFile){
bigNameFile >> name >> nameList;
nameMap[name] = nameList;
- gobble(bigNameFile);
+ m->gobble(bigNameFile);
}
bigNameFile.close();
if (m->control_pressed) {
for (int i = 0; i < dists.size(); i++) {
- remove((dists[i].begin()->first).c_str());
- remove((dists[i].begin()->second).c_str());
+ m->mothurRemove((dists[i].begin()->first));
+ m->mothurRemove((dists[i].begin()->second));
}
dists.clear();
}
int numGroups = 0;
ifstream dFile;
- openInputFile(distFile, dFile);
+ m->openInputFile(distFile, dFile);
while(dFile){
string seqA, seqB;
dFile >> seqA >> seqB >> dist;
- if (m->control_pressed) { dFile.close(); for(int i=0;i<numGroups;i++){ if(groups[i].size() > 0){ remove((distFile + "." + toString(i) + ".temp").c_str()); } } return 0; }
+ if (m->control_pressed) { dFile.close(); for(int i=0;i<numGroups;i++){ if(groups[i].size() > 0){ m->mothurRemove((distFile + "." + toString(i) + ".temp")); } } return 0; }
if(dist < cutoff){
//cout << "in cutoff: " << dist << endl;
}
}
}
- gobble(dFile);
+ m->gobble(dFile);
}
dFile.close();