5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; }
14 //***************************************************************************************************************
16 ChimeraSlayer::~ChimeraSlayer() {
18 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
19 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
22 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
35 errorOut(e, "ChimeraSlayer", "print");
40 //***************************************************************************************************************
41 void ChimeraSlayer::getChimeras() {
44 //read in query sequences and subject sequences
45 mothurOut("Reading sequences and template file... "); cout.flush();
46 querySeqs = readSeqs(fastafile);
47 templateSeqs = readSeqs(templateFile);
48 mothurOut("Done."); mothurOutEndLine();
50 int numSeqs = querySeqs.size();
52 //break up file if needed
53 int linesPerProcess = numSeqs / processors ;
55 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
56 //find breakup of sequences for all times we will Parallelize
57 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
60 for (int i = 0; i < (processors-1); i++) {
61 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
63 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
64 int i = processors - 1;
65 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
68 lines.push_back(new linePair(0, numSeqs));
71 if (seqMask != "") { decalc = new DeCalculator(); } //to use below
73 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
74 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
75 slayer = new Slayer(window, increment, minSim, divR);
77 for (int i = 0; i < querySeqs.size(); i++) {
79 string chimeraFlag = maligner->getResults(querySeqs[i]);
80 float percentIdentical = maligner->getPercentID();
81 vector<results> Results = maligner->getOutput();
83 cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
85 for (int j = 0; j < Results.size(); j++) {
86 cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
90 if (chimeraFlag == "yes") {
92 //get sequence that were given from maligner results
94 for (int j = 0; j < Results.size(); j++) {
95 Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
97 //seq = NULL if error occurred in getSequence
98 if (seq == NULL) { break; }
102 member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
103 seqs.push_back(member);
107 //limit number of parents to explore - default 5
108 if (Results.size() > parents) {
110 sort(seqs.begin(), seqs.end(), compareSeqDist);
111 //prioritize larger more similiar sequence fragments
112 reverse(seqs.begin(), seqs.end());
114 for (int k = seqs.size()-1; k > (parents-1); k--) {
120 //put seqs into vector to send to slayer
121 vector<Sequence*> seqsForSlayer;
122 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
124 //mask then send to slayer...
126 decalc->setMask(seqMask);
129 decalc->runMask(querySeqs[i]);
132 for (int k = 0; k < seqsForSlayer.size(); k++) {
133 decalc->runMask(seqsForSlayer[k]);
139 slayer->getResults(querySeqs[i], seqsForSlayer);
142 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
147 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
155 catch(exception& e) {
156 errorOut(e, "ChimeraSlayer", "getChimeras");
160 //***************************************************************************************************************
161 Sequence* ChimeraSlayer::getSequence(string name) {
165 //look through templateSeqs til you find it
167 for (int i = 0; i < templateSeqs.size(); i++) {
168 if (name == templateSeqs[i]->getName()) {
174 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
176 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
180 catch(exception& e) {
181 errorOut(e, "ChimeraSlayer", "getSequence");
185 //***************************************************************************************************************