X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=pairwiseseqscommand.cpp;h=c9e5ecfdfda727427793e98aaed0270cdcd6e046;hp=1fa96e3e9d4c6cf8ce9a4e99af30d01c5562a009;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=49d2b7459c5027557564b21e9487dadafbbbdc96 diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp index 1fa96e3..c9e5ecf 100644 --- a/pairwiseseqscommand.cpp +++ b/pairwiseseqscommand.cpp @@ -12,20 +12,20 @@ //********************************************************************************************************************** vector PairwiseSeqsCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign); - CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch); - CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); - CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); - CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput); - CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc); - CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends); - CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress); - CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true,true); parameters.push_back(pfasta); + CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "","",false,false); parameters.push_back(palign); + CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); + CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); + CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); + CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false,true); parameters.push_back(poutput); + CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc); + CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends); + CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); + CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false,true); parameters.push_back(pcutoff); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -65,25 +65,20 @@ string PairwiseSeqsCommand::getHelpString(){ } } //********************************************************************************************************************** -string PairwiseSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string PairwiseSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "phylip") { outputFileName = "dist"; } - else if (type == "column") { outputFileName = "dist"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "PairwiseSeqsCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "phylip") { pattern = "[filename],[outputtag],dist"; } + else if (type == "column") { pattern = "[filename],dist"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "PairwiseSeqsCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** PairwiseSeqsCommand::PairwiseSeqsCommand(){ @@ -249,6 +244,7 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option) { align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } + if (output=="phylip") { output = "lt"; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; } calc = validParameter.validFile(parameters, "calc", false); @@ -290,16 +286,20 @@ int PairwiseSeqsCommand::execute(){ int numSeqs = alignDB.getNumSeqs(); int startTime = time(NULL); string outputFile = ""; - + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); if (output == "lt") { //does the user want lower triangle phylip formatted file - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip." + getOutputFileNameTag("phylip"); + variables["[outputtag]"] = "phylip"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); }else if (output == "column") { //user wants column format - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("column"); + outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); m->mothurRemove(outputFile); }else { //assume square - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square." + getOutputFileNameTag("phylip"); + variables["[outputtag]"] = "square"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } @@ -498,7 +498,7 @@ int PairwiseSeqsCommand::execute(){ } m->mothurOutEndLine(); - m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); @@ -564,7 +564,7 @@ void PairwiseSeqsCommand::createProcesses(string filename) { string extension = toString(i) + ".temp"; // Allocate memory for thread data. - pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, i); + pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, cutoff, i); pDataArray.push_back(tempDist); processIDS.push_back(i); @@ -581,6 +581,9 @@ void PairwiseSeqsCommand::createProcesses(string filename) { //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -669,10 +672,14 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); - + //cout << seqI.getName() << '\t' << seqJ.getName() << endl; + //cout << alignment->getSeqAAln() << endl << alignment->getSeqBAln() << endl; + distCalculator->calcDist(seqI, seqJ); double dist = distCalculator->getDist(); - + + //cout << "dist = " << dist << endl; + if(dist <= cutoff){ if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; } }