//***************************************************************************************************************
//this is a vertical soft filter
-void Chimera::createFilter(vector<Sequence*> seqs) {
+string Chimera::createFilter(vector<Sequence*> seqs, float t) {
try {
filterString = "";
- int threshold = int (0.5 * seqs.size());
+ int threshold = int (t * seqs.size());
//cout << "threshhold = " << threshold << endl;
vector<int> gaps; gaps.resize(seqs[0]->getAligned().length(), 0);
vector<int> t; t.resize(seqs[0]->getAligned().length(), 0);
vector<int> g; g.resize(seqs[0]->getAligned().length(), 0);
vector<int> c; c.resize(seqs[0]->getAligned().length(), 0);
-
+
filterString = (string(seqs[0]->getAligned().length(), '1'));
//for each sequence
for (int i = 0; i < seqs.size(); i++) {
+ if (m->control_pressed) { return filterString; }
+
string seqAligned = seqs[i]->getAligned();
+ if (seqAligned.length() != filterString.length()) { m->mothurOut(seqs[i]->getName() + " is not the same length as the template sequences. Aborting!\n"); exit(1); }
+
for (int j = 0; j < seqAligned.length(); j++) {
//if this spot is a gap
if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; }
}
}
- //zero out spot where all sequences have blanks
//zero out spot where all sequences have blanks
int numColRemoved = 0;
for(int i = 0;i < seqs[0]->getAligned().length(); i++){
+
+ if (m->control_pressed) { return filterString; }
+
if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; }
else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) { filterString[i] = '0'; numColRemoved++; }
//cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl;
}
-
-//cout << "filter = " << filterString << endl;
- mothurOut("Filter removed " + toString(numColRemoved) + " columns."); mothurOutEndLine();
+ if (threshold != 0.0) { m->mothurOut("Filter removed " + toString(numColRemoved) + " columns."); m->mothurOutEndLine(); }
+
+ return filterString;
}
catch(exception& e) {
- errorOut(e, "Chimera", "createFilter");
+ m->errorOut(e, "Chimera", "createFilter");
exit(1);
}
}
//***************************************************************************************************************
-void Chimera::runFilter(Sequence* seq) {
+map<int, int> Chimera::runFilter(Sequence* seq) {
try {
-
+ map<int, int> maskMap;
string seqAligned = seq->getAligned();
string newAligned = "";
+ int count = 0;
for (int j = 0; j < seqAligned.length(); j++) {
//if this spot is a gap
- if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+ if (filterString[j] == '1') {
+ newAligned += seqAligned[j];
+ maskMap[count] = j;
+ count++;
+ }
}
seq->setAligned(newAligned);
+
+ return maskMap;
}
catch(exception& e) {
- errorOut(e, "Chimera", "runFilter");
+ m->errorOut(e, "Chimera", "runFilter");
exit(1);
}
}
vector<Sequence*> Chimera::readSeqs(string file) {
try {
- mothurOut("Reading sequences... "); cout.flush();
- ifstream in;
- openInputFile(file, in);
vector<Sequence*> container;
int count = 0;
- int length = 0;
+ length = 0;
unaligned = false;
+
+ m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long int> positions;
+ int numSeqs;
+ int tag = 2001;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ //char* inFileName = new char[file.length()];
+ //memcpy(inFileName, file.c_str(), file.length());
+
+ char inFileName[1024];
+ strcpy(inFileName, file.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ //delete inFileName;
+
+ if (pid == 0) {
+ positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
+
+ //read next sequence
+ int seqlength = positions[i+1] - positions[i];
+ char* buf4 = new char[seqlength];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence* current = new Sequence(iss);
+ if (current->getName() != "") {
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ container.push_back(current);
+ }
+ }
+
+ MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
+
+ ifstream in;
+ m->openInputFile(file, in);
//read in seqs and store in vector
while(!in.eof()){
- Sequence* current = new Sequence(in); gobble(in);
+ if (m->control_pressed) { return container; }
- if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { //seqs are unaligned
- unaligned = true;
- }
+ Sequence* current = new Sequence(in); m->gobble(in);
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
if (current->getName() != "") { container.push_back(current); }
}
-
in.close();
- mothurOut("Done."); mothurOutEndLine();
+ #endif
+
+ m->mothurOut("Done."); m->mothurOutEndLine();
+
+ filterString = (string(container[0]->getAligned().length(), '1'));
return container;
}
catch(exception& e) {
- errorOut(e, "Chimera", "readSeqs");
+ m->errorOut(e, "Chimera", "readSeqs");
exit(1);
}
}
}else if (filename == "") { //do nothing
seqMask = "";
}else{
- ifstream infile;
- openInputFile(filename, infile);
+
+ #ifdef USE_MPI
+ MPI_File inMPI;
+ MPI_Offset size;
+ MPI_Status status;
- while (!infile.eof()) {
- Sequence temp(infile);
- seqMask = temp.getAligned();
-
- gobble(infile);
- }
+ //char* inFileName = new char[filename.length()];
+ //memcpy(inFileName, filename.c_str(), filename.length());
- infile.close();
- }
- }
- catch(exception& e) {
- errorOut(e, "Chimera", "setMask");
- exit(1);
- }
-}
-//***************************************************************************************************************
-
-vector< vector<float> > Chimera::readQuantiles() {
- try {
-
- ifstream in;
- openInputFile(quanfile, in);
-
- vector< vector<float> > quan;
- vector <float> temp; temp.resize(6, 0);
-
- //to fill 0
- quan.push_back(temp);
+ char inFileName[1024];
+ strcpy(inFileName, filename.c_str());
- int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine;
-
- while(!in.eof()){
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_get_size(inMPI, &size);
+
+ //delete inFileName;
- in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine;
+ char* buffer = new char[size];
+ MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
- temp.clear();
+ string tempBuf = buffer;
+ if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); }
+ istringstream iss (tempBuf,istringstream::in);
+
+ delete buffer;
- temp.push_back(ten);
- temp.push_back(twentyfive);
- temp.push_back(fifty);
- temp.push_back(seventyfive);
- temp.push_back(ninetyfive);
- temp.push_back(ninetynine);
+ if (!iss.eof()) {
+ Sequence temp(iss);
+ seqMask = temp.getAligned();
+ }else {
+ m->mothurOut("Problem with mask."); m->mothurOutEndLine();
+ seqMask = "";
+ }
- quan.push_back(temp);
+ MPI_File_close(&inMPI);
+ #else
+
+ ifstream infile;
+ m->openInputFile(filename, infile);
+
+ if (!infile.eof()) {
+ Sequence temp(infile);
+ seqMask = temp.getAligned();
+ }else {
+ m->mothurOut("Problem with mask."); m->mothurOutEndLine();
+ seqMask = "";
+ }
+ infile.close();
+ #endif
- gobble(in);
}
-
- in.close();
- return quan;
-
}
catch(exception& e) {
- errorOut(e, "Chimera", "readQuantiles");
+ m->errorOut(e, "Chimera", "setMask");
exit(1);
}
}
}
}
- if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+ if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
return temp;
}
catch(exception& e) {
- errorOut(e, "Chimera", "getSequence");
+ m->errorOut(e, "Chimera", "getSequence");
exit(1);
}
}