-/*
- * mallard.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 8/11/09.
- * Copyright 2009 __MyCompanyName__. All rights reserved.
- *
- */
-
-#include "mallard.h"
-
-//***************************************************************************************************************
-
-Mallard::Mallard(string filename) { fastafile = filename; }
-//***************************************************************************************************************
-
-Mallard::~Mallard() {
- try {
- for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
- }
- catch(exception& e) {
- errorOut(e, "Mallard", "~Mallard");
- exit(1);
- }
-}
-//***************************************************************************************************************
-void Mallard::print(ostream& out) {
- try {
-
- for (int i = 0; i < querySeqs.size(); i++) {
-
- out << querySeqs[i]->getName() << "\thighest de value = " << highestDE[i] << "\tpenalty score = " << marked[i] << endl;
- cout << querySeqs[i]->getName() << "\tpenalty score = " << marked[i] << endl;
-
- }
- }
- catch(exception& e) {
- errorOut(e, "Mallard", "print");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-void Mallard::getChimeras() {
- try {
-
- //read in query sequences and subject sequences
- mothurOut("Reading sequences and template file... "); cout.flush();
- querySeqs = readSeqs(fastafile);
- mothurOut("Done."); mothurOutEndLine();
-
- int numSeqs = querySeqs.size();
-
- windowSizes.resize(numSeqs, window);
- quantilesMembers.resize(100); //one for every percent mismatch
- highestDE.resize(numSeqs, 0.0); //contains the highest de value for each seq
-
- //break up file if needed
- int linesPerProcess = numSeqs / processors ;
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- //find breakup of sequences for all times we will Parallelize
- if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
- else {
- //fill line pairs
- for (int i = 0; i < (processors-1); i++) {
- lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
- }
- //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
- int i = processors - 1;
- lines.push_back(new linePair((i*linesPerProcess), numSeqs));
- }
-
- #else
- lines.push_back(new linePair(0, numSeqs));
- #endif
-
- decalc = new DeCalculator();
-
- //if the user does enter a mask then you want to keep all the spots in the alignment
- if (seqMask.length() == 0) { decalc->setAlignmentLength(querySeqs[0]->getAligned().length()); }
- else { decalc->setAlignmentLength(seqMask.length()); }
-
- decalc->setMask(seqMask);
-
- //find P
- mothurOut("Getting conservation... "); cout.flush();
- probabilityProfile = decalc->calcFreq(querySeqs, fastafile);
-
- //make P into Q
- for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; }
- mothurOut("Done."); mothurOutEndLine();
-
- //mask sequences if the user wants to
- if (seqMask != "") {
- //mask querys
- for (int i = 0; i < querySeqs.size(); i++) {
- decalc->runMask(querySeqs[i]);
- }
- }
-
- mothurOut("Calculating DE values..."); cout.flush();
- if (processors == 1) {
- quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
- }else { createProcessesQuan(); }
- mothurOut("Done."); mothurOutEndLine();
-
- mothurOut("Ranking outliers..."); cout.flush();
- marked = decalc->returnObviousOutliers(quantilesMembers, querySeqs.size());
- mothurOut("Done."); mothurOutEndLine();
-
-
- //free memory
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
- delete decalc;
- }
- catch(exception& e) {
- errorOut(e, "Mallard", "getChimeras");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
-void Mallard::createProcessesQuan() {
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, lines[process]->start, lines[process]->end, highestDE);
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
-
- //output observed distances
- for (int i = 0; i < quantilesMembers.size(); i++) {
- out << quantilesMembers[i].size() << '\t';
- for (int j = 0; j < quantilesMembers[i].size(); j++) {
- out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
- }
- out << endl;
- }
-
- out << highestDE.size() << endl;
- for (int i = 0; i < highestDE.size(); i++) {
- out << highestDE[i] << '\t';
- }
- out << endl;
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- vector< vector<quanMember> > quan;
- quan.resize(100);
-
- //get quantiles
- for (int m = 0; m < quan.size(); m++) {
- int num;
- in >> num;
-
- gobble(in);
-
- vector<quanMember> q; float w; int b, n;
- for (int j = 0; j < num; j++) {
- in >> w >> b >> n;
- //cout << w << '\t' << b << '\t' n << endl;
- quanMember newMember(w, b, n);
- q.push_back(newMember);
- }
-//cout << "here" << endl;
- quan[m] = q;
-//cout << "now here" << endl;
- gobble(in);
- }
-
-
- //save quan in quantiles
- for (int j = 0; j < quan.size(); j++) {
- //put all values of q[i] into quan[i]
- for (int l = 0; l < quan[j].size(); l++) { quantilesMembers[j].push_back(quan[j][l]); }
- //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
- }
-
- int num;
- in >> num; gobble(in);
-
- int count = lines[process]->start;
- for (int s = 0; s < num; s++) {
- float high;
- in >> high;
-
- highestDE[count] = high;
- count++;
- }
-
- in.close();
- remove(s.c_str());
- }
-
-#else
- quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
-#endif
- }
- catch(exception& e) {
- errorOut(e, "Mallard", "createProcessesQuan");
- exit(1);
- }
-}
-//***************************************************************************************************************
-
-