X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=shhhseqscommand.cpp;h=82d956189a6f025fd57dc7e901e585456f59f8ab;hp=625b93922c15a72096b2a9bb1550185f14dc579e;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=e0dc0bcef2a0f7e1f63abb531dbb1ad533da98ca diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp index 625b939..82d9561 100644 --- a/shhhseqscommand.cpp +++ b/shhhseqscommand.cpp @@ -14,13 +14,13 @@ //********************************************************************************************************************** vector ShhhSeqsCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(psigma); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-map",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","name",false,true,true); parameters.push_back(pname); + CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(psigma); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -37,7 +37,7 @@ string ShhhSeqsCommand::getHelpString(){ string helpString = ""; helpString += "The shhh.seqs command reads a fasta and name file and ....\n"; helpString += "The shhh.seqs command parameters are fasta, name, group, sigma and processors.\n"; - helpString += "The fasta parameter allows you to enter the fasta file containing your potentially sequences, and is required, unless you have a valid current fasta file. \n"; + helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n"; helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; @@ -54,6 +54,24 @@ string ShhhSeqsCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string ShhhSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],shhh_seqs.fasta"; } + else if (type == "name") { pattern = "[filename],shhh_seqs.names"; } + else if (type == "map") { pattern = "[filename],shhh_seqs.map"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ShhhSeqsCommand", "getOutputPattern"); + exit(1); + } +} + //********************************************************************************************************************** ShhhSeqsCommand::ShhhSeqsCommand(){ @@ -160,11 +178,17 @@ ShhhSeqsCommand::ShhhSeqsCommand(string option) { else { m->setGroupFile(groupfile); } string temp = validParameter.validFile(parameters, "sigma", false); if(temp == "not found"){ temp = "0.01"; } - convert(temp, sigma); - + m->mothurConvert(temp, sigma); + sigma = 1/sigma; + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); + + if (namefile == "") { + vector files; files.push_back(fastafile); + parser.getNameFile(files); + } } } catch(exception& e) { @@ -178,10 +202,13 @@ int ShhhSeqsCommand::execute() { if (abort == true) { if (calledHelp) { return 0; } return 2; } - if (outputDir == "") { outputDir = m->hasPath(fastafile); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.fasta"; - string nameFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.names"; - string mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.map"; + if (outputDir == "") { outputDir = m->hasPath(fastafile); }//if user entered a file with a path then preserve it + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); + string outputFileName = getOutputFileName("fasta",variables); + string nameFileName = getOutputFileName("name",variables); + string mapFileName = getOutputFileName("map",variables); if (groupfile != "") { //Parse sequences by group @@ -196,13 +223,16 @@ int ShhhSeqsCommand::execute() { m->openOutputFile(nameFileName, out1); out1.close(); mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh."; - if(processors == 1) { driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups); } - else { createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups); } + vector mapFileNames; + if(processors == 1) { mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups); } + else { mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups); } - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return 0; } - //deconvolute results by running unique.seqs + for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); } + //deconvolute results by running unique.seqs + deconvoluteResults(outputFileName, nameFileName); if (m->control_pressed) { return 0; } @@ -227,13 +257,13 @@ int ShhhSeqsCommand::execute() { if (m->control_pressed) { m->mothurRemove(distFileName); return 0; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); + outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName); - outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -335,11 +365,12 @@ int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector groups) { +vector ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector groups) { try { vector processIDS; int process = 1; + vector mapfileNames; //sanity check if (groups.size() < processors) { processors = groups.size(); } @@ -354,7 +385,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa lines.push_back(linePair(startIndex, endIndex)); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -364,7 +395,18 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups); + mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups); + + //pass filenames to parent + ofstream out; + string tempFile = newMName + toString(getpid()) + ".temp"; + m->openOutputFile(tempFile, out); + out << mapfileNames.size() << endl; + for (int i = 0; i < mapfileNames.size(); i++) { + out << mapfileNames[i] << endl; + } + out.close(); + exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -374,7 +416,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa } //do my part - driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups); + mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { + int tempNum = 0; in >> tempNum; m->gobble(in); + for (int j = 0; j < tempNum; j++) { + string filename; + in >> filename; m->gobble(in); + mapfileNames.push_back(filename); + } + } + in.close(); m->mothurRemove(tempFile); + + } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -397,7 +455,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa for( int i=1; icount != (pDataArray[i]->end-pDataArray[i]->start)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; + } + for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) { + mapfileNames.push_back(pDataArray[i]->mapfileNames[j]); + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -431,7 +495,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa m->mothurRemove((newNName + toString(processIDS[i]) + ".temp")); } - return 0; + return mapfileNames; } catch(exception& e) { @@ -440,14 +504,16 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa } } /**************************************************************************************************/ -int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector groups){ +vector ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector groups){ try { + vector mapFileNames; + for (int i = start; i < end; i++) { start = time(NULL); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine(); @@ -465,26 +531,27 @@ int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, strin //load this groups info in order loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } //calc distances for cluster string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist"; correct->execute(distFileName); delete correct; - if (m->control_pressed) { m->mothurRemove(distFileName); return 0; } + if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]); m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]); + mapFileNames.push_back(newMFile+groups[i]+".map"); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); } - return 0; + return mapFileNames; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "driverGroups"); @@ -652,21 +719,22 @@ int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){ string inputString = "fasta=" + fastaFile + ", name=" + nameFile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + m->mothurCalling = true; + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; - + m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); string newnameFile = filenames["name"][0]; string newfastaFile = filenames["fasta"][0]; m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); - m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); + if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); } return 0; }