]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
test
[mothur.git] / chimeraslayer.cpp
index 293ee807d5b875728716fddde17cd6c032f318e1..5d3d6706ede49b38c5385d29c1d127b1488c4000 100644 (file)
@@ -26,9 +26,30 @@ ChimeraSlayer::~ChimeraSlayer() {
 //***************************************************************************************************************
 void ChimeraSlayer::print(ostream& out) {
        try {
-               
                mothurOutEndLine();
+               mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+               mothurOutEndLine();
+               
+               for (int i = 0; i < querySeqs.size(); i++) {
                
+                       if (chimeraFlags[i] == "yes") { 
+//                             cout << i << endl;
+                               if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
+                                       mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
+                                       out << querySeqs[i]->getName() << "\tyes" << endl;
+                               }else {
+                                       out << querySeqs[i]->getName() << "\tyes" << endl;
+                                       //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+                               }
+
+                               printBlock(chimeraResults[i][0], out, i);
+                               
+                               out << endl;
+                       }else{
+                               out << querySeqs[i]->getName() << "\tno" << endl;
+                               //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+                       }
+               }
                                
        }
        catch(exception& e) {
@@ -38,7 +59,7 @@ void ChimeraSlayer::print(ostream& out) {
 }
 
 //***************************************************************************************************************
-void ChimeraSlayer::getChimeras() {
+int ChimeraSlayer::getChimeras() {
        try {
                
                //read in query sequences and subject sequences
@@ -49,6 +70,12 @@ void ChimeraSlayer::getChimeras() {
                
                int numSeqs = querySeqs.size();
                
+               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1;  }
+               
+               chimeraResults.resize(numSeqs);
+               chimeraFlags.resize(numSeqs, "no");
+               spotMap.resize(numSeqs);
+               
                //break up file if needed
                int linesPerProcess = numSeqs / processors ;
                
@@ -70,40 +97,56 @@ void ChimeraSlayer::getChimeras() {
                
                if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
                
+               //initialize spotMap
+               for (int j = 0; j < numSeqs; j++) {
+                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
+                               spotMap[j][i] = i;
+                       }
+               }
+               
                //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
                maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
-               slayer = new Slayer(window, increment, minSim, divR);
+               slayer = new Slayer(window, increment, minSim, divR, iters);
                
                for (int i = 0; i < querySeqs.size(); i++) {
                
                        string chimeraFlag = maligner->getResults(querySeqs[i]);
-                       float percentIdentical = maligner->getPercentID();
+                       //float percentIdentical = maligner->getPercentID();
                        vector<results> Results = maligner->getOutput();
                        
-                       cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+                       cout << "Processing sequence: " << i+1 << endl;
                        
-                       for (int j = 0; j < Results.size(); j++) {
-                               cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                               
-                       }
+                       //for (int j = 0; j < Results.size(); j++) {
+                               //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
+                       //}
                        
-                       if (chimeraFlag == "yes") {
+                       //if (chimeraFlag == "yes") {
                        
                                //get sequence that were given from maligner results
                                vector<SeqDist> seqs;
+                               map<string, float> removeDups;
+                               map<string, float>::iterator itDup;
                                for (int j = 0; j < Results.size(); j++) {
-                                       Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
-                                       
-                                       //seq = NULL if error occurred in getSequence
-                                       if (seq == NULL) {  break;      }
-                                       else {  
-                                               SeqDist member;
-                                               member.seq = seq;
-                                               member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
-                                               seqs.push_back(member); 
+                                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                                       //only add if you are not a duplicate
+                                       itDup = removeDups.find(Results[j].parent);
+                                       if (itDup == removeDups.end()) { //this is not duplicate
+                                               removeDups[Results[j].parent] = dist;
+                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                                               removeDups[Results[j].parent] = dist;
                                        }
                                }
-                       
+                               
+                               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+                                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
+                                       
+                                       SeqDist member;
+                                       member.seq = seq;
+                                       member.dist = itDup->second;
+                                       
+                                       seqs.push_back(member);
+                               }
+                               
                                //limit number of parents to explore - default 5
                                if (Results.size() > parents) {
                                        //sort by distance
@@ -116,10 +159,28 @@ void ChimeraSlayer::getChimeras() {
                                                seqs.pop_back();        
                                        }
                                }
-                               
+               
                                //put seqs into vector to send to slayer
                                vector<Sequence*> seqsForSlayer;
                                for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+//ofstream out;
+//string name = querySeqs[i]->getName();
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsforslayer";
+//openOutputFile(filename, out);       
+//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);       }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);       
+//q->printSequence(out);
+//out.close();
+
                        
                                //mask then send to slayer...
                                if (seqMask != "") {
@@ -133,14 +194,19 @@ void ChimeraSlayer::getChimeras() {
                                                decalc->runMask(seqsForSlayer[k]);
                                        }
                                        
+                                       for (int i = 0; i < numSeqs; i++) {
+                                               spotMap[i] = decalc->getMaskMap();
+                                       }
+
                                }
                                
                                //send to slayer
-                               slayer->getResults(querySeqs[i], seqsForSlayer);
-                               
+                               chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
+                               chimeraResults[i] = slayer->getOutput();
+                       
                                //free memory
                                for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       }
+                       //}
                        
                }       
                //free memory
@@ -149,8 +215,8 @@ void ChimeraSlayer::getChimeras() {
                if (seqMask != "") {
                        delete decalc; 
                }
-
-                       
+               
+               return 0;
        }
        catch(exception& e) {
                errorOut(e, "ChimeraSlayer", "getChimeras");
@@ -183,6 +249,34 @@ Sequence* ChimeraSlayer::getSequence(string name) {
        }
 }
 //***************************************************************************************************************
+void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+       try {
+               
+               out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
+               out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
+               out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+               
+               out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
+               out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
+               
+               out << "Similarity of parents: " << data.ab << endl;
+               out << "Similarity of query to parentA: " << data.qa << endl;
+               out << "Similarity of query to parentB: " << data.qb << endl;
+               
+               //out << "Per_id(QL,A): " << data.qla << endl;
+               //out << "Per_id(QL,B): " << data.qlb << endl;
+               //out << "Per_id(QR,A): " << data.qra << endl;
+               //out << "Per_id(QR,B): " << data.qrb << endl;
 
+               
+               out << "DeltaL: " << (data.qla - data.qlb) << endl;
+               out << "DeltaR: " << (data.qra - data.qrb) << endl;
 
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "printBlock");
+               exit(1);
+       }
+}
+//***************************************************************************************************************