+
+/**************************************************************************************************/
+
+static DWORD WINAPI MySlayerGroupThreadFunction(LPVOID lpParam){
+ slayerData* pDataArray;
+ pDataArray = (slayerData*)lpParam;
+
+ try {
+ ofstream outCountList;
+ if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
+
+ int totalSeqs = 0;
+ pDataArray->end = 0;
+
+ for (map<string, map<string, int> >::iterator itFile = pDataArray->fileToPriority.begin(); itFile != pDataArray->fileToPriority.end(); itFile++) {
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ int start = time(NULL);
+ string thisFastaName = itFile->first;
+ map<string, int> thisPriority = itFile->second;
+ string thisoutputFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.chimera";
+ string thisaccnosFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.accnos";
+ string thistrimFastaFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.fasta";
+
+ pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group: " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine();
+
+ //int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ ofstream out;
+ pDataArray->m->openOutputFile(thisoutputFileName, out);
+
+ ofstream out2;
+ pDataArray->m->openOutputFile(thisaccnosFileName, out2);
+
+ ofstream out3;
+ if (pDataArray->trim) { pDataArray->m->openOutputFile(thistrimFastaFileName, out3); }
+
+ ifstream inFASTA;
+ pDataArray->m->openInputFile(thisFastaName, inFASTA);
+
+ Chimera* chimera;
+ chimera = new ChimeraSlayer(thisFastaName, pDataArray->templatefile, pDataArray->trim, thisPriority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);
+ chimera->printHeader(out);
+
+ int numSeqs = 0;
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; }
+
+ if (chimera->getUnaligned()) {
+ pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine();
+ out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
+ delete chimera;
+ return 0;
+ }
+ int templateSeqsLength = chimera->getLength();
+
+ bool done = false;
+ while (!done) {
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; }
+
+ Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA);
+ string candidateAligned = candidateSeq->getAligned();
+
+ if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+ if (candidateSeq->getAligned().length() != templateSeqsLength) {
+ pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
+ }else{
+ //find chimeras
+ chimera->getChimeras(candidateSeq);
+
+ if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete candidateSeq; delete chimera; return 1; }
+
+ //if you are not chimeric, then check each half
+ data_results wholeResults = chimera->getResults();
+
+ //determine if we need to split
+ bool isChimeric = false;
+
+ if (wholeResults.flag == "yes") {
+ string chimeraFlag = "no";
+ if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
+ ||
+ (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
+
+
+ if (chimeraFlag == "yes") {
+ if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
+ }
+ }
+
+ if ((!isChimeric) && pDataArray->trimera) {
+
+ //split sequence in half by bases
+ string leftQuery, rightQuery;
+ Sequence tempSeq(candidateSeq->getName(), candidateAligned);
+ //divideInHalf(tempSeq, leftQuery, rightQuery);
+ string queryUnAligned = tempSeq.getUnaligned();
+ int numBases = int(queryUnAligned.length() * 0.5);
+
+ string queryAligned = tempSeq.getAligned();
+ leftQuery = tempSeq.getAligned();
+ rightQuery = tempSeq.getAligned();
+
+ int baseCount = 0;
+ int leftSpot = 0;
+ for (int i = 0; i < queryAligned.length(); i++) {
+ //if you are a base
+ if (isalpha(queryAligned[i])) {
+ baseCount++;
+ }
+
+ //if you have half
+ if (baseCount >= numBases) { leftSpot = i; break; } //first half
+ }
+
+ //blank out right side
+ for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
+
+ //blank out left side
+ for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
+
+ //run chimeraSlayer on each piece
+ Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
+ Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
+
+ //find chimeras
+ chimera->getChimeras(left);
+ data_results leftResults = chimera->getResults();
+
+ chimera->getChimeras(right);
+ data_results rightResults = chimera->getResults();
+
+ //if either piece is chimeric then report
+ Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
+ if (pDataArray->trim) { trimmed.printSequence(out3); }
+
+ delete left; delete right;
+
+ }else { //already chimeric
+ //print results
+ Sequence trimmed = chimera->print(out, out2);
+ if (pDataArray->trim) { trimmed.printSequence(out3); }
+ }
+
+
+ }
+ numSeqs++;
+ }
+
+ delete candidateSeq;
+
+ if (inFASTA.eof()) { break; }
+
+ //report progress
+ if((numSeqs) % 100 == 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs)+"\n"); pDataArray->m->mothurOutEndLine(); }
+ }
+ //report progress
+ if((numSeqs) % 100 != 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs)+"\n"); }
+
+ pDataArray->numNoParents = chimera->getNumNoParents();
+ if (pDataArray->numNoParents == numSeqs) { pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
+
+ out.close();
+ out2.close();
+ if (pDataArray->trim) { out3.close(); }
+ inFASTA.close();
+ delete chimera;
+ pDataArray->end++;
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+ //This table will zero out group counts for seqs determined to be chimeric by that group.
+ if (pDataArray->dups) {
+ if (!pDataArray->m->isBlank(thisaccnosFileName)) {
+ ifstream in;
+ pDataArray->m->openInputFile(thisaccnosFileName, in);
+ string name;
+ if (pDataArray->hasCount) {
+ while (!in.eof()) {
+ in >> name; pDataArray->m->gobble(in);
+ outCountList << name << '\t' << pDataArray->fileGroup[thisFastaName] << endl;
+ }
+ in.close();
+ }else {
+ map<string, map<string, string> >::iterator itGroupNameMap = pDataArray->group2NameMap.find(pDataArray->fileGroup[thisFastaName]);
+ if (itGroupNameMap != pDataArray->group2NameMap.end()) {
+ map<string, string> thisnamemap = itGroupNameMap->second;
+ map<string, string>::iterator itN;
+ ofstream out;
+ pDataArray->m->openOutputFile(thisaccnosFileName+".temp", out);
+ while (!in.eof()) {
+ in >> name; pDataArray->m->gobble(in);
+ //pDataArray->m->mothurOut("here = " + name + '\t');
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+ //pDataArray->m->mothurOut(itN->second + '\n');
+
+ }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+ }
+ out.close();
+ in.close();
+ pDataArray->m->renameFile(thisaccnosFileName+".temp", thisaccnosFileName);
+ }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + pDataArray->fileGroup[thisFastaName] + ".\n"); pDataArray->m->control_pressed = true; }
+ }
+
+ }
+ }
+
+
+ //append files
+ pDataArray->m->appendFiles(thisoutputFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(thisoutputFileName);
+ pDataArray->m->appendFiles(thisaccnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(thisaccnosFileName);
+ if (pDataArray->trim) { pDataArray->m->appendFiles(thistrimFastaFileName, pDataArray->fasta); pDataArray->m->mothurRemove(thistrimFastaFileName); }
+ pDataArray->m->mothurRemove(thisFastaName);
+
+ totalSeqs += numSeqs;
+
+ pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine();
+ }
+
+ pDataArray->count = totalSeqs;
+ if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerGroupThreadFunction");
+ exit(1);
+ }
+}
+