From: westcott Date: Mon, 14 Feb 2011 15:13:23 +0000 (+0000) Subject: fixed catchall PATH variable bug added trimera to chimera.slayer X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=7b80945ef716dd72d00563a5a4d692394f7f84c3 fixed catchall PATH variable bug added trimera to chimera.slayer --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 6d6393f..49131e5 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1615,7 +1615,7 @@ attributes = { ORGANIZATIONNAME = "Schloss Lab"; }; - buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */; compatibilityVersion = "Xcode 3.1"; developmentRegion = English; hasScannedForEncodings = 1; @@ -2036,7 +2036,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928A08733DD80010E9CD /* Debug */, diff --git a/catchallcommand.cpp b/catchallcommand.cpp index a462bc7..b0ab7c8 100644 --- a/catchallcommand.cpp +++ b/catchallcommand.cpp @@ -176,22 +176,24 @@ int CatchAllCommand::execute() { if (abort == true) { if (calledHelp) { return 0; } return 2; } - //prepare full output directory - outputDir = m->getFullPathName(outputDir); - //get location of catchall GlobalData* globaldata = GlobalData::getInstance(); path = globaldata->argv; - path = path.substr(0, (path.find_last_of('m'))); + path = path.substr(0, (path.find_last_of("othur")-5)); path = m->getFullPathName(path); - + string catchAllCommandExe = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) catchAllCommandExe += "mono " + path + "CatchAllcmdL.exe "; + if (outputDir == "") { outputDir = "./"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd. #else catchAllCommandExe += "\"" + path + "CatchAllcmdW.exe\"" + " "; + if (outputDir == "") { outputDir = ".\\"; } //force full pathname to be created for catchall, this is necessary because if catchall is in the path it will look for input file whereever the exe is and not the cwd. #endif + //prepare full output directory + outputDir = m->getFullPathName(outputDir); + vector inputFileNames; if (sharedfile != "") { inputFileNames = parseSharedFile(sharedfile); globaldata->setFormat("sabund"); } else { inputFileNames.push_back(sabundfile); } @@ -237,7 +239,8 @@ int CatchAllCommand::execute() { //wrap entire string in "" catchAllCommand = "\"" + catchAllCommand + "\""; #endif - //run catchall + + //run catchall system(catchAllCommand.c_str()); remove(filename.c_str()); diff --git a/chimera.h b/chimera.h index 8eec3cd..e37d9f9 100644 --- a/chimera.h +++ b/chimera.h @@ -13,7 +13,52 @@ #include "mothur.h" #include "sequence.hpp" - +/***********************************************************************/ +struct data_struct { + float divr_qla_qrb; + float divr_qlb_qra; + float qla_qrb; + float qlb_qra; + float qla; + float qrb; + float ab; + float qa; + float qb; + float lab; + float rab; + float qra; + float qlb; + int winLStart; + int winLEnd; + int winRStart; + int winREnd; + Sequence querySeq; + Sequence parentA; + Sequence parentB; + float bsa; + float bsb; + float bsMax; + float chimeraMax; + +}; +/***********************************************************************/ +struct data_results { + vector results; + string flag; + map spotMap; + Sequence trimQuery; + + data_results(vector d, string f, map s, Sequence t) : results(d), flag(f), spotMap(s), trimQuery(t) {} + data_results() {} +}; +/***********************************************************************/ +//sorts lowest to highest first by bsMax, then if tie by chimeraMax +inline bool compareDataStruct(data_struct left, data_struct right){ + if (left.bsMax < right.bsMax) { return true; } + else if (left.bsMax == right.bsMax) { + return (left.chimeraMax < right.chimeraMax); + }else { return false; } +} /***********************************************************************/ struct Preference { string name; @@ -102,7 +147,9 @@ class Chimera { virtual int getChimeras(Sequence*){ return 0; } virtual int getChimeras(){ return 0; } virtual Sequence* print(ostream&, ostream&){ return NULL; } + virtual Sequence* print(ostream&, ostream&, data_results, data_results) { return NULL; } virtual int print(ostream&, ostream&, string){ return 0; } + virtual data_results getResults() { data_results results; return results; } #ifdef USE_MPI virtual Sequence* print(MPI_File&, MPI_File&){ return 0; } diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 38f7abe..eae6acf 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -462,8 +462,241 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { exit(1); } } +//*************************************************************************************************************** +Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) { + try { + Sequence* trim = NULL; + + if (trimChimera) { + string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); + trim = new Sequence(leftPiece.trimQuery.getName(), aligned); + } + + if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { + + string chimeraFlag = "no"; + if (leftPiece.flag == "yes") { + + if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR) + || + (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + if (rightPiece.flag == "yes") { + if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR) + || + (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + bool rightChimeric = false; + bool leftChimeric = false; + + if (chimeraFlag == "yes") { + //which peice is chimeric or are both + if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } } + if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } + + if (rightChimeric || leftChimeric) { + m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); + outAcc << querySeq->getName() << endl; + + if (trimChimera) { + string newAligned = trim->getAligned(); + + //right side is fine so keep that + if ((leftChimeric) && (!rightChimeric)) { + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //both sides are chimeric, keep longest piece + + int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLStart] - leftPiece.spotMap[leftPiece.results[0].winLEnd]; + int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winRStart] - leftPiece.spotMap[leftPiece.results[0].winREnd]; + + int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 + int length = lengthLeftLeft; + if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } + + int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLStart] - rightPiece.spotMap[rightPiece.results[0].winLEnd]; + int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winRStart] - rightPiece.spotMap[rightPiece.results[0].winREnd]; + + if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } + if (lengthRightRight > length) { longest = 4; } + + if (longest == 1) { //leftleft + for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 2) { //leftright + //get rid of leftleft + for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + //get rid of right + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 3) { //rightleft + //get rid of left + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + //get rid of rightright + for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //rightright + //get rid of left + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + //get rid of rightleft + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + } + } + + trim->setAligned(newAligned); + } + + } + } + + printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out); + out << endl; + }else { out << querySeq->getName() << "\tno" << endl; } + + return trim; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} + #ifdef USE_MPI //*************************************************************************************************************** +Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) { + try { + MPI_Status status; + bool results = false; + string outAccString = ""; + string outputString = ""; + + Sequence* trim = NULL; + + if (trimChimera) { + string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); + trim = new Sequence(leftPiece.trimQuery.getName(), aligned); + } + + + if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { + + string chimeraFlag = "no"; + if (leftPiece.flag == "yes") { + + if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR) + || + (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + if (rightPiece.flag == "yes") { + if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR) + || + (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + } + + bool rightChimeric = false; + bool leftChimeric = false; + + if (chimeraFlag == "yes") { + //which peice is chimeric or are both + if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } } + if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } + + if (rightChimeric || leftChimeric) { + cout << querySeq->getName() << "\tyes" << endl; + outAccString += querySeq->getName() + "\n"; + results = true; + + //write to accnos file + int length = outAccString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); + delete buf2; + + if (trimChimera) { + string newAligned = trim->getAligned(); + + //right side is fine so keep that + if ((leftChimeric) && (!rightChimeric)) { + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //both sides are chimeric, keep longest piece + + int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLStart] - leftPiece.spotMap[leftPiece.results[0].winLEnd]; + int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winRStart] - leftPiece.spotMap[leftPiece.results[0].winREnd]; + + int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 + int length = lengthLeftLeft; + if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } + + int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLStart] - rightPiece.spotMap[rightPiece.results[0].winLEnd]; + int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winRStart] - rightPiece.spotMap[rightPiece.results[0].winREnd]; + + if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } + if (lengthRightRight > length) { longest = 4; } + + if (longest == 1) { //leftleft + for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 2) { //leftright + //get rid of leftleft + for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + //get rid of right + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else if (longest == 3) { //rightleft + //get rid of left + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + //get rid of rightright + for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //rightright + //get rid of left + for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } + //get rid of rightleft + for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + } + } + + trim->setAligned(newAligned); + } + + } + } + + outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag); + outputString += "\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + + }else { + outputString += querySeq->getName() + "\tno\n"; + + //write to output file + int length = outputString.length(); + char* buf = new char[length]; + memcpy(buf, outputString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; + } + + + return trim; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} +//*************************************************************************************************************** Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { try { MPI_Status status; @@ -491,7 +724,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { int length = outAccString.length(); char* buf2 = new char[length]; memcpy(buf2, outAccString.c_str(), length); - + MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); delete buf2; @@ -512,28 +745,27 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { outputString = getBlock(chimeraResults[0], chimeraFlag); outputString += "\n"; - //cout << outputString << endl; + //write to output file int length = outputString.length(); char* buf = new char[length]; memcpy(buf, outputString.c_str(), length); - + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; - + }else { outputString += querySeq->getName() + "\tno\n"; - //cout << outputString << endl; + //write to output file int length = outputString.length(); char* buf = new char[length]; memcpy(buf, outputString.c_str(), length); - + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); delete buf; } - return trim; } catch(exception& e) { @@ -546,12 +778,14 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { - if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); } + if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; } chimeraFlags = "no"; + printResults.flag = "no"; //filter query spotMap = runFilter(query); + printResults.spotMap = spotMap; querySeq = query; @@ -574,11 +808,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) { } if (m->control_pressed) { return 0; } - + string chimeraFlag = maligner.getResults(query, decalc); if (m->control_pressed) { return 0; } vector Results = maligner.getOutput(); - + //found in testing realigning only made things worse if (realign) { ChimeraReAligner realigner(thisTemplate, match, misMatch); @@ -607,9 +841,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { } for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) { - //Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template itSeq = parentNameSeq.find(itDup->first); -//cout << itDup->first << itSeq->second << endl; Sequence* seq = new Sequence(itDup->first, itSeq->second); SeqDist member; @@ -634,6 +866,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //put seqs into vector to send to slayer vector seqsForSlayer; + for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } //mask then send to slayer... @@ -652,7 +885,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { } if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; } - + //send to slayer chimeraFlags = slayer.getResults(query, seqsForSlayer); if (m->control_pressed) { return 0; } @@ -660,11 +893,12 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //free memory for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } + + printResults.spotMap = spotMap; + printResults.flag = chimeraFlags; + printResults.results = chimeraResults; } - //delete maligner; - //delete slayer; - return 0; } catch(exception& e) { @@ -675,32 +909,66 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //*************************************************************************************************************** void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ try { - //out << ":)\n"; - out << querySeq->getName() << '\t'; out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t'; - //out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl; - //out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl; - + out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t'; out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t'; out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t'; - //out << "Similarity of parents: " << data.ab << endl; - //out << "Similarity of query to parentA: " << data.qa << endl; - //out << "Similarity of query to parentB: " << data.qb << endl; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "printBlock"); + exit(1); + } +} +//*************************************************************************************************************** +void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){ + try { + + if ((leftChimeric) && (!rightChimeric)) { //print left + out << querySeq->getName() << '\t'; + out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; + + out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; + out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; + out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t'; - //out << "Per_id(QL,A): " << data.qla << endl; - //out << "Per_id(QL,B): " << data.qlb << endl; - //out << "Per_id(QR,A): " << data.qra << endl; - //out << "Per_id(QR,B): " << data.qrb << endl; - + }else if ((!leftChimeric) && (rightChimeric)) { //print right + out << querySeq->getName() << '\t'; + out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; + + out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; + out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; + + out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t'; + + }else { //print both results + if (leftdata.flag == "yes") { + out << querySeq->getName() + "_LEFT" << '\t'; + out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t'; + + out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t'; + out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t'; + + out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t'; + } + + if (rightdata.flag == "yes") { + if (leftdata.flag == "yes") { out << endl; } + + out << querySeq->getName() + "_RIGHT"<< '\t'; + out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t'; + + out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t'; + out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t'; + + out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t'; - //out << "DeltaL: " << (data.qla - data.qlb) << endl; - //out << "DeltaR: " << (data.qra - data.qrb) << endl; - + } + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "printBlock"); @@ -708,6 +976,62 @@ void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ } } //*************************************************************************************************************** +string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){ + try { + + string out = ""; + + if ((leftChimeric) && (!rightChimeric)) { //get left + out += querySeq->getName() + "\t"; + out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; + + out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; + out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t"; + + }else if ((!leftChimeric) && (rightChimeric)) { //print right + out += querySeq->getName() + "\t"; + out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; + + out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; + out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t"; + + }else { //print both results + + if (leftdata.flag == "yes") { + out += querySeq->getName() + "_LEFT\t"; + out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t"; + + out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t"; + out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t"; + } + + if (rightdata.flag == "yes") { + if (leftdata.flag == "yes") { out += "\n"; } + out += querySeq->getName() + "_RIGHT\t"; + out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t"; + + out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t"; + out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t"; + + out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t"; + } + } + + return out; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayer", "getBlock"); + exit(1); + } +} +//*************************************************************************************************************** string ChimeraSlayer::getBlock(data_struct data, string flag){ try { diff --git a/chimeraslayer.h b/chimeraslayer.h index 87439a3..3e76297 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -19,7 +19,6 @@ //This class was modeled after the chimeraSlayer written by the Broad Institute /***********************************************************************/ - class ChimeraSlayer : public Chimera { public: @@ -30,11 +29,14 @@ class ChimeraSlayer : public Chimera { int getChimeras(Sequence*); Sequence* print(ostream&, ostream&); + Sequence* print(ostream&, ostream&, data_results, data_results); void printHeader(ostream&); int doPrep(); + data_results getResults() { return printResults; } #ifdef USE_MPI Sequence* print(MPI_File&, MPI_File&); + Sequence* print(MPI_File&, MPI_File&, data_results, data_results); #endif private: @@ -47,13 +49,16 @@ class ChimeraSlayer : public Chimera { map > nameMapRank; //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template vector chimeraResults; + data_results printResults; string chimeraFlags, searchMethod, fastafile, includeAbunds; bool realign, trimChimera; int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment; float divR; void printBlock(data_struct, string, ostream&); + void printBlock(data_results, data_results, bool, bool, string, ostream&); string getBlock(data_struct, string); + string getBlock(data_results, data_results, bool, bool, string); int readNameFile(string); vector getTemplate(Sequence*); diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 38c097f..ea2a656 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -14,7 +14,7 @@ //********************************************************************************************************************** vector ChimeraSlayerCommand::getValidParameters(){ try { - string AlignArray[] = {"fasta", "processors","trim", "name","window", "include","template","numwanted", "ksize", "match","mismatch", + string AlignArray[] = {"fasta", "processors","trim","trimera", "name","window", "include","template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); return myArray; @@ -71,7 +71,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta", "processors","name", "include","trim", "window", "template","numwanted", "ksize", "match","mismatch", + string Array[] = {"fasta", "processors","name", "include","trim", "trimera","window", "template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); @@ -273,8 +273,8 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found") { temp = "f"; } trim = m->isTrue(temp); - //temp = validParameter.validFile(parameters, "trimera", false); if (temp == "not found") { temp = "f"; } - //trimera = m->isTrue(temp); + temp = validParameter.validFile(parameters, "trimera", false); if (temp == "not found") { temp = "f"; } + trimera = m->isTrue(temp); search = validParameter.validFile(parameters, "search", false); if (search == "not found") { search = "distance"; } @@ -312,7 +312,7 @@ void ChimeraSlayerCommand::help(){ m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n"); #endif m->mothurOut("The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n"); - //m->mothurOut("The trimera parameter allows you to check both peices of a chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"); + m->mothurOut("The trimera parameter allows you to check both peices of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"); m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"); m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n"); m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n"); @@ -608,6 +608,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f if (m->control_pressed) { out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); + string candidateAligned = candidateSeq->getAligned(); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file @@ -618,14 +619,63 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f chimera->getChimeras(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 1; } - - //print results - Sequence* trimmed = chimera->print(out, out2); - - if (trim) { trimmed->printSequence(out3); delete trimmed; } //do you want to check both pieces for chimeras - //if (trimera) {} + if (trimera) { + + //if you are not chimeric, then check each half + data_results wholeResults = chimera->getResults(); + + //determine if we need to split + bool isChimeric = false; + + if (wholeResults.flag == "yes") { + string chimeraFlag = "no"; + if( (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR) + || + (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; } + } + } + + if (!isChimeric) { + + //split sequence in half by bases + string leftQuery, rightQuery; + Sequence tempSeq(candidateSeq->getName(), candidateAligned); + divideInHalf(tempSeq, leftQuery, rightQuery); + + //run chimeraSlayer on each piece + Sequence* left = new Sequence(candidateSeq->getName(), leftQuery); + Sequence* right = new Sequence(candidateSeq->getName(), rightQuery); + + //find chimeras + chimera->getChimeras(left); + data_results leftResults = chimera->getResults(); + + chimera->getChimeras(right); + data_results rightResults = chimera->getResults(); + + //if either piece is chimeric then report + Sequence* trimmed = chimera->print(out, out2, leftResults, rightResults); + if (trim) { trimmed->printSequence(out3); delete trimmed; } + + delete left; delete right; + + }else { //already chimeric + //print results + Sequence* trimmed = chimera->print(out, out2); + if (trim) { trimmed->printSequence(out3); delete trimmed; } + } + }else { + //print results + Sequence* trimmed = chimera->print(out, out2); + if (trim) { trimmed->printSequence(out3); delete trimmed; } + } + } count++; } @@ -681,6 +731,7 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil delete buf4; Sequence* candidateSeq = new Sequence(iss); m->gobble(iss); + string candidateAligned = candidateSeq->getAligned(); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file @@ -692,26 +743,97 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil chimera->getChimeras(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 1; } - - //print results - Sequence* trimmed = chimera->print(outMPI, outAccMPI); - if (trim) { - string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n"; - delete trimmed; + //do you want to check both pieces for chimeras + if (trimera) { + + //if you are not chimeric, then check each half + data_results wholeResults = chimera->getResults(); + + //determine if we need to split + bool isChimeric = false; - //write to accnos file - int length = outputString.length(); - char* buf2 = new char[length]; - memcpy(buf2, outputString.c_str(), length); + if (wholeResults.flag == "yes") { + string chimeraFlag = "no"; + if( (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR) + || + (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; } + } + } + + if (!isChimeric) { + //split sequence in half by bases + string leftQuery, rightQuery; + Sequence tempSeq(candidateSeq->getName(), candidateAligned); + divideInHalf(tempSeq, leftQuery, rightQuery); + + //run chimeraSlayer on each piece + Sequence* left = new Sequence(candidateSeq->getName(), leftQuery); + Sequence* right = new Sequence(candidateSeq->getName(), rightQuery); + + //find chimeras + chimera->getChimeras(left); + data_results leftResults = chimera->getResults(); + + chimera->getChimeras(right); + data_results rightResults = chimera->getResults(); + + //if either piece is chimeric then report + Sequence* trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults); + if (trim) { + string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n"; + delete trimmed; + + //write to accnos file + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + } + + delete left; delete right; + + }else { //already chimeric + //print results + Sequence* trimmed = chimera->print(outMPI, outAccMPI); + + if (trim) { + string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n"; + delete trimmed; + + //write to accnos file + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + } + } + }else { + //print results + Sequence* trimmed = chimera->print(outMPI, outAccMPI); - MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); - delete buf2; + if (trim) { + string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n"; + delete trimmed; + + //write to accnos file + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + } } - //do you want to check both pieces for chimeras - //if (trimera) {} - } } delete candidateSeq; @@ -790,4 +912,42 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename /**************************************************************************************************/ +int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, string& rightQuery) { + try { + + string queryUnAligned = querySeq.getUnaligned(); + int numBases = int(queryUnAligned.length() * 0.5); + + string queryAligned = querySeq.getAligned(); + leftQuery = querySeq.getAligned(); + rightQuery = querySeq.getAligned(); + + int baseCount = 0; + int leftSpot = 0; + for (int i = 0; i < queryAligned.length(); i++) { + //if you are a base + if (isalpha(queryAligned[i])) { + baseCount++; + } + + //if you have half + if (baseCount >= numBases) { leftSpot = i; break; } //first half + } + + //blank out right side + for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; } + + //blank out left side + for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "divideInHalf"); + exit(1); + } +} + +/**************************************************************************************************/ diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index ff19337..d201ccb 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -43,6 +43,7 @@ private: int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); + int divideInHalf(Sequence, string&, string&); #ifdef USE_MPI int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); diff --git a/engine.cpp b/engine.cpp index dd61de6..5419717 100644 --- a/engine.cpp +++ b/engine.cpp @@ -25,19 +25,12 @@ Engine::Engine(){ exit(1); } } - /***********************************************************************/ - -InteractEngine::InteractEngine(string path){ - - globaldata = GlobalData::getInstance(); - - string temppath = path.substr(0, (path.find_last_of('m'))); - - //this will happen if you set the path variable to contain mothur's exe location - if (temppath == "") { - +string Engine::findMothursPath(){ + try { + string envPath = getenv("PATH"); + string mothurPath = ""; //delimiting path char char delim; @@ -52,7 +45,6 @@ InteractEngine::InteractEngine(string path){ mout->splitAtChar(envPath, dirs, delim); //get path related to mothur - string mothurPath = ""; for (int i = 0; i < dirs.size(); i++) { //to lower so we can find it string tempLower = ""; @@ -62,15 +54,53 @@ InteractEngine::InteractEngine(string path){ if (tempLower.find("mothur") != -1) { mothurPath = dirs[i]; break; } } - //add mothur so it looks like what argv would look like - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - mothurPath += "/mothur"; - #else - mothurPath += "\\mothur"; - #endif + if (mothurPath != "") { + //add mothur so it looks like what argv would look like + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + mothurPath += "/mothur"; + #else + mothurPath += "\\mothur"; + #endif + }else { + //okay mothur is not in the path, so the folder mothur is in must be in the path + //lets find out which one + + //get path related to mothur + for (int i = 0; i < dirs.size(); i++) { + + //is this mothurs path? + ifstream in; + string tempIn = dirs[i]; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + tempIn += "/mothur"; + #else + tempIn += "\\mothur"; + #endif + mout->openInputFile(tempIn, in, ""); + + //if this file exists + if (in) { in.close(); mothurPath = tempIn; break; } + } + } + + return mothurPath; - path = mothurPath; } + catch(exception& e) { + mout->errorOut(e, "Engine", "findMothursPath"); + exit(1); + } +} +/***********************************************************************/ + +InteractEngine::InteractEngine(string path){ + + globaldata = GlobalData::getInstance(); + + string temppath = path.substr(0, (path.find_last_of("othur")-5)); + + //this will happen if you set the path variable to contain mothur's exe location + if (temppath == "") { path = findMothursPath(); } globaldata->argv = path; } @@ -224,46 +254,11 @@ BatchEngine::BatchEngine(string path, string batchFileName){ openedBatch = mout->openInputFile(batchFileName, inputBatchFile); - string temppath = path.substr(0, (path.find_last_of('m'))); + string temppath = path.substr(0, (path.find_last_of("othur")-5)); //this will happen if you set the path variable to contain mothur's exe location - if (temppath == "") { + if (temppath == "") { path = findMothursPath(); } - string envPath = getenv("PATH"); - - //delimiting path char - char delim; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - delim = ':'; - #else - delim = ';'; - #endif - - //break apart path variable by ':' - vector dirs; - mout->splitAtChar(envPath, dirs, delim); - - //get path related to mothur - string mothurPath = ""; - for (int i = 0; i < dirs.size(); i++) { - //to lower so we can find it - string tempLower = ""; - for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); } - - //is this mothurs path? - if (tempLower.find("mothur") != -1) { mothurPath = dirs[i]; break; } - } - - //add mothur so it looks like what argv would look like - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - mothurPath += "/mothur"; - #else - mothurPath += "\\mothur"; - #endif - - path = mothurPath; - } - globaldata->argv = path; } @@ -416,46 +411,11 @@ ScriptEngine::ScriptEngine(string path, string commandString){ //remove quotes listOfCommands = commandString.substr(1, (commandString.length()-1)); - string temppath = path.substr(0, (path.find_last_of('m'))); - + string temppath = path.substr(0, (path.find_last_of("othur")-5)); + //this will happen if you set the path variable to contain mothur's exe location - if (temppath == "") { + if (temppath == "") { path = findMothursPath(); } - string envPath = getenv("PATH"); - - //delimiting path char - char delim; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - delim = ':'; - #else - delim = ';'; - #endif - - //break apart path variable by ':' - vector dirs; - mout->splitAtChar(envPath, dirs, delim); - - //get path related to mothur - string mothurPath = ""; - for (int i = 0; i < dirs.size(); i++) { - //to lower so we can find it - string tempLower = ""; - for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); } - - //is this mothurs path? - if (tempLower.find("mothur") != -1) { mothurPath = dirs[i]; break; } - } - - //add mothur so it looks like what argv would look like - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - mothurPath += "/mothur"; - #else - mothurPath += "\\mothur"; - #endif - - path = mothurPath; - } - globaldata->argv = path; } diff --git a/engine.hpp b/engine.hpp index 0e7721a..d5846ba 100644 --- a/engine.hpp +++ b/engine.hpp @@ -36,6 +36,7 @@ protected: vector options; CommandFactory* cFactory; MothurOut* mout; + string findMothursPath(); }; diff --git a/maligner.cpp b/maligner.cpp index 6deeb80..30e9936 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -624,7 +624,7 @@ vector Maligner::getKmerSeqs(Sequence* q, int num) { return refResults; } catch(exception& e) { - m->errorOut(e, "Maligner", "getBlastSeqs"); + m->errorOut(e, "Maligner", "getKmerSeqs"); exit(1); } } diff --git a/slayer.cpp b/slayer.cpp index 5d5a48b..72da257 100644 --- a/slayer.cpp +++ b/slayer.cpp @@ -27,7 +27,7 @@ string Slayer::getResults(Sequence* query, vector refSeqs) { Sequence* q = new Sequence(query->getName(), query->getAligned()); Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned()); Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned()); - + map spots; //map from spot in original sequence to spot in filtered sequence for query and both parents vector divs = runBellerophon(q, leftParent, rightParent, spots); @@ -37,10 +37,10 @@ string Slayer::getResults(Sequence* query, vector refSeqs) { delete rightParent; return "no"; } - + vector selectedDivs; for (int k = 0; k < divs.size(); k++) { - + vector snpsLeft = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winLStart, divs[k].winLEnd); vector snpsRight = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winRStart, divs[k].winREnd); @@ -105,7 +105,6 @@ string Slayer::getResults(Sequence* query, vector refSeqs) { } } - // compute bootstrap support if (all.size() > 0) { //sort them @@ -143,13 +142,13 @@ vector Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* string parentA = pA->getAligned(); string parentB = pB->getAligned(); int length = query.length(); -//cout << q->getName() << endl << q->getAligned() << endl << endl; -//cout << pA->getName() << endl << pA->getAligned() << endl << endl; -//cout << pB->getName() << endl << pB->getAligned() << endl << endl; -//cout << " length = " << length << endl; -//cout << q->getName() << endl; -//cout << pA->getName() << '\t'; -//cout << pB->getName() << endl; +/*cout << q->getName() << endl << q->getAligned() << endl << endl; +cout << pA->getName() << endl << pA->getAligned() << endl << endl; +cout << pB->getName() << endl << pB->getAligned() << endl << endl; +cout << " length = " << length << endl; +cout << q->getName() << endl; +cout << pA->getName() << '\t'; +cout << pB->getName() << endl;*/ //check window size if (length < (2*windowSize+windowStep)) { diff --git a/slayer.h b/slayer.h index 33ccc3f..875e4a9 100644 --- a/slayer.h +++ b/slayer.h @@ -11,46 +11,11 @@ #include "sequence.hpp" +#include "chimera.h" /***********************************************************************/ //This class was modeled after the chimeraSlayer written by the Broad Institute /***********************************************************************/ -struct data_struct { //this is crazy big...but follow original. - float divr_qla_qrb; - float divr_qlb_qra; - float qla_qrb; - float qlb_qra; - float qla; - float qrb; - float ab; - float qa; - float qb; - float lab; - float rab; - float qra; - float qlb; - int winLStart; - int winLEnd; - int winRStart; - int winREnd; - Sequence querySeq; - Sequence parentA; - Sequence parentB; - float bsa; - float bsb; - float bsMax; - float chimeraMax; - -}; -/***********************************************************************/ -//sorts lowest to highest first by bsMax, then if tie by chimeraMax -inline bool compareDataStruct(data_struct left, data_struct right){ - if (left.bsMax < right.bsMax) { return true; } - else if (left.bsMax == right.bsMax) { - return (left.chimeraMax < right.chimeraMax); - }else { return false; } -} -/***********************************************************************/ struct snps { char queryChar; char parentAChar;