X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=pairwiseseqscommand.cpp;h=c9e5ecfdfda727427793e98aaed0270cdcd6e046;hp=ed11dfbdd56e0ff480bfd281b944dcaa1fab34eb;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=a78fa674631a7d8a8d4e5043384ee244ed65cc09 diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp index ed11dfb..c9e5ecf 100644 --- a/pairwiseseqscommand.cpp +++ b/pairwiseseqscommand.cpp @@ -12,20 +12,20 @@ //********************************************************************************************************************** vector PairwiseSeqsCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign); - CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch); - CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); - CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); - CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput); - CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc); - CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends); - CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress); - CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true,true); parameters.push_back(pfasta); + CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "","",false,false); parameters.push_back(palign); + CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); + CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); + CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); + CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false,true); parameters.push_back(poutput); + CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc); + CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends); + CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); + CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false,true); parameters.push_back(pcutoff); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -64,7 +64,22 @@ string PairwiseSeqsCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string PairwiseSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "phylip") { pattern = "[filename],[outputtag],dist"; } + else if (type == "column") { pattern = "[filename],dist"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "PairwiseSeqsCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** PairwiseSeqsCommand::PairwiseSeqsCommand(){ try { @@ -203,12 +218,15 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option) { temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; } m->mothurConvert(temp, misMatch); + if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; } temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; } m->mothurConvert(temp, gapOpen); + if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; } temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; } m->mothurConvert(temp, gapExtend); + if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); @@ -226,6 +244,7 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option) { align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } + if (output=="phylip") { output = "lt"; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; } calc = validParameter.validFile(parameters, "calc", false); @@ -267,16 +286,20 @@ int PairwiseSeqsCommand::execute(){ int numSeqs = alignDB.getNumSeqs(); int startTime = time(NULL); string outputFile = ""; - + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); if (output == "lt") { //does the user want lower triangle phylip formatted file - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "phylip.dist"; + variables["[outputtag]"] = "phylip"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); }else if (output == "column") { //user wants column format - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "dist"; + outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); m->mothurRemove(outputFile); }else { //assume square - outputFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "square.dist"; + variables["[outputtag]"] = "square"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } @@ -393,7 +416,7 @@ int PairwiseSeqsCommand::execute(){ if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } - if (m->control_pressed) { delete distCalculator; return 0; } + if (m->control_pressed) { return 0; } //tell parent you are done. MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD); @@ -402,7 +425,7 @@ int PairwiseSeqsCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } @@ -475,7 +498,7 @@ int PairwiseSeqsCommand::execute(){ } m->mothurOutEndLine(); - m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); @@ -494,7 +517,7 @@ void PairwiseSeqsCommand::createProcesses(string filename) { int process = 1; processIDS.clear(); -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want @@ -541,7 +564,7 @@ void PairwiseSeqsCommand::createProcesses(string filename) { string extension = toString(i) + ".temp"; // Allocate memory for thread data. - pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, i); + pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, cutoff, i); pDataArray.push_back(tempDist); processIDS.push_back(i); @@ -558,6 +581,9 @@ void PairwiseSeqsCommand::createProcesses(string filename) { //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -646,10 +672,14 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); - + //cout << seqI.getName() << '\t' << seqJ.getName() << endl; + //cout << alignment->getSeqAAln() << endl << alignment->getSeqBAln() << endl; + distCalculator->calcDist(seqI, seqJ); double dist = distCalculator->getDist(); - + + //cout << "dist = " << dist << endl; + if(dist <= cutoff){ if (output == "column") { outFile << alignDB.get(i).getName() << ' ' << alignDB.get(j).getName() << ' ' << dist << endl; } } @@ -991,6 +1021,22 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + ValidCalculators validCalculator; + Dist* distCalculator; + if (countends) { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap") { distCalculator = new eachGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + string outputString = ""; size = 0;