From 9aa36ad8297141ef9fcab04fea10e96d2fed26fe Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Wed, 28 Mar 2012 14:20:50 -0400 Subject: [PATCH] added keep dots to pcr.seqs. fixed pre.cluster name file name when group option is used. --- pcrseqscommand.h | 53 +++++++++++++++++++++++++++++++---------- prcseqscommand.cpp | 55 +++++++++++++++++++++++++++++++++++-------- preclustercommand.cpp | 1 + sequence.cpp | 10 ++++++++ 4 files changed, 97 insertions(+), 22 deletions(-) diff --git a/pcrseqscommand.h b/pcrseqscommand.h index 07ca8d5..03092bc 100644 --- a/pcrseqscommand.h +++ b/pcrseqscommand.h @@ -43,7 +43,7 @@ private: vector lines; bool getOligos(vector >&, vector >&, vector >&); - bool abort, keepprimer; + bool abort, keepprimer, keepdots; string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch; int start, end, pdiffs, processors, length; @@ -79,11 +79,11 @@ struct pcrData { vector primers; vector revPrimer; set badSeqNames; - bool keepprimer; + bool keepprimer, keepdots; pcrData(){} - pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector pr, vector rpr, string nm, bool kp, int st, int en, int l, unsigned long long fst, unsigned long long fen) { + pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) { filename = f; goodFasta = gf; badFasta = bfn; @@ -94,6 +94,7 @@ struct pcrData { revPrimer = rpr; nomatch = nm; keepprimer = kp; + keepdots = kd; start = st; end = en; length = l; @@ -216,8 +217,14 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ else{ //are you aligned if (aligned) { - if (!pDataArray->keepprimer) { currSeq.padToPos(mapAligned[primerEnd]); } - else { currSeq.padToPos(mapAligned[primerStart]); } + if (!pDataArray->keepprimer) { + if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + } + else { + if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + } }else { if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } @@ -286,9 +293,14 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ else{ //are you aligned if (aligned) { - if (!pDataArray->keepprimer) { currSeq.padFromPos(mapAligned[primerStart]); } - else { currSeq.padFromPos(mapAligned[primerEnd]); } - } + if (!pDataArray->keepprimer) { + if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + } + else { + if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + } } else { if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd)); } @@ -302,21 +314,38 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ else if (currSeq.getAligned().length() != pDataArray->length) { pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; }else { - currSeq.padToPos(pDataArray->start); - currSeq.padFromPos(pDataArray->end); + if (pDataArray->keepdots) { + currSeq.filterToPos(start); + currSeq.filterFromPos(end); + }else { + string seqString = currSeq.getAligned().substr(0, end); + seqString = seqString.substr(start); + currSeq.setAligned(seqString); + } } }else{ //using start and end to trim //make sure the seqs are aligned lengths.insert(currSeq.getAligned().length()); if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } else { - if (pDataArray->start != -1) { currSeq.padToPos(pDataArray->start); } if (pDataArray->end != -1) { if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; } else { - currSeq.padFromPos(pDataArray->end); + if (pDataArray->keepdots) { currSeq.filterFromPos(end); } + else { + string seqString = currSeq.getAligned().substr(0, end); + currSeq.setAligned(seqString); + } + } + } + if (pDataArray->start != -1) { + if (pDataArray->keepdots) { currSeq.filterToPos(start); } + else { + string seqString = currSeq.getAligned().substr(start); + currSeq.setAligned(seqString); } } + } } diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index d950b48..afedc74 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -23,6 +23,7 @@ vector PcrSeqsCommand::setParameters(){ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer); + CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -177,6 +178,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { temp = validParameter.validFile(parameters, "keepprimer", false); if (temp == "not found") { temp = "f"; } keepprimer = m->isTrue(temp); + temp = validParameter.validFile(parameters, "keepdots", false); if (temp == "not found") { temp = "t"; } + keepdots = m->isTrue(temp); + temp = validParameter.validFile(parameters, "oligos", true); if (temp == "not found"){ oligosfile = ""; } else if(temp == "not open"){ oligosfile = ""; abort = true; } @@ -297,7 +301,10 @@ int PcrSeqsCommand::execute(){ if (m->control_pressed) { return 0; } - writeAccnos(badNames); + //don't write or keep if blank + if (badNames.size() != 0) { writeAccnos(badNames); } + if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (namefile != "") { readName(badNames); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -434,7 +441,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); } // Allocate memory for thread data. - pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, start, end, length, lines[i].start, lines[i].end); + pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end); pDataArray.push_back(tempPcr); //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier @@ -516,8 +523,14 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.filterToPos(mapAligned[primerEnd]); } - else { currSeq.filterToPos(mapAligned[primerStart]); } + if (!keepprimer) { + if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + } + else { + if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + } }else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } @@ -533,8 +546,14 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.filterFromPos(mapAligned[primerStart]); } - else { currSeq.filterFromPos(mapAligned[primerEnd]); } + if (!keepprimer) { + if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + } + else { + if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } + else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + } } else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } @@ -549,19 +568,35 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else if (currSeq.getAligned().length() != length) { m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; }else { - currSeq.filterToPos(start); - currSeq.filterFromPos(end); + if (keepdots) { + currSeq.filterToPos(start); + currSeq.filterFromPos(end); + }else { + string seqString = currSeq.getAligned().substr(0, end); + seqString = seqString.substr(start); + currSeq.setAligned(seqString); + } } }else{ //using start and end to trim //make sure the seqs are aligned lengths.insert(currSeq.getAligned().length()); if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } else { - if (start != -1) { currSeq.filterToPos(start); } if (end != -1) { if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; } else { - currSeq.filterFromPos(end); + if (keepdots) { currSeq.filterFromPos(end); } + else { + string seqString = currSeq.getAligned().substr(0, end); + currSeq.setAligned(seqString); + } + } + } + if (start != -1) { + if (keepdots) { currSeq.filterToPos(start); } + else { + string seqString = currSeq.getAligned().substr(start); + currSeq.setAligned(seqString); } } } diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 23d7386..f2fbc80 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -229,6 +229,7 @@ int PreClusterCommand::execute(){ m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->renameFile(filenames["fasta"][0], newFastaFile); + m->renameFile(filenames["name"][0], newNamesFile); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); diff --git a/sequence.cpp b/sequence.cpp index cfd8ec2..9cdbfb9 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -581,6 +581,11 @@ int Sequence::filterToPos(int start){ aligned[j] = '.'; } + //things like ......----------AT become ................AT + for(int j = start-1; j < aligned.length(); j++) { + if (isalpha(aligned[j])) { break; } + else { aligned[j] = '.'; } + } setUnaligned(aligned); return 0; @@ -596,6 +601,11 @@ int Sequence::filterFromPos(int end){ aligned[j] = '.'; } + for(int j = aligned.length()-1; j < 0; j--) { + if (isalpha(aligned[j])) { break; } + else { aligned[j] = '.'; } + } + setUnaligned(aligned); return 0; -- 2.39.2