From: Sarah Westcott Date: Tue, 25 Mar 2014 12:30:35 +0000 (-0400) Subject: added mothurgetpid function. fixed bug with align.seqs related to g++ 4.8 change... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=60928795782d8f8648ec373d6a96b53032a77429 added mothurgetpid function. fixed bug with align.seqs related to g++ 4.8 change. fixed bug in rarefaction.single with shared command if shared file not created by mothur labels not in right order in *.groups.rarefaction. summary.shared sub ample bug, not subampleing. added rjsd and jsd to tree.shared. working on sra command. chimera.slayer blast error bug on linux related to getpid(). --- diff --git a/aligncommand.cpp b/aligncommand.cpp index f9c0436..6b0ef5a 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -543,6 +543,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam //moved this into driver to avoid deep copies in windows paralellized version Alignment* alignment; int longestBase = templateDB->getLongestBase(); + if (m->debug) { m->mothurOut("[DEBUG]: template longest base = " + toString(templateDB->getLongestBase()) + " \n"); } if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } @@ -565,11 +566,12 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam int numBasesNeeded = origNumBases * threshold; if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file - if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { - alignment->resize(candidateSeq->getUnaligned().length()+1); + if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) { + if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); } + alignment->resize(candidateSeq->getUnaligned().length()+2); } Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; + Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); float searchScore = templateDB->getSearchScore(); @@ -593,19 +595,26 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam //get reverse compliment copy = new Sequence(candidateSeq->getName(), originalUnaligned); copy->reverseComplement(); + + if (m->debug) { m->mothurOut("[DEBUG]: flipping " + candidateSeq->getName() + " \n"); } //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; + Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); + + if (m->debug) { m->mothurOut("[DEBUG]: closest template " + temp2.getName() + " \n"); } searchScore = templateDB->getSearchScore(); nast2 = new Nast(alignment, copy, templateSeq2); + + if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 " + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); } //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; + delete templateSeq; + templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; @@ -613,8 +622,10 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam }else{ wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; + delete templateSeq2; delete copy; } + if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); } } //create accnos file with names @@ -630,6 +641,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam report.print(); delete nast; + delete templateSeq; if (needToDeleteCopy) { delete copy; } count++; @@ -734,7 +746,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align } Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; + Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); float searchScore = templateDB->getSearchScore(); @@ -759,7 +771,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; + Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); searchScore = templateDB->getSearchScore(); @@ -768,7 +780,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; + delete templateSeq; + templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; @@ -776,6 +789,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align }else{ wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; delete nast2; + delete templateSeq2; delete copy; } } @@ -821,6 +835,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align delete buf3; delete nast; + delete templateSeq; if (needToDeleteCopy) { delete copy; } } delete candidateSeq; @@ -856,11 +871,11 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename); + num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename); //pass numSeqs to parent ofstream out; - string tempFile = alignFileName + toString(getpid()) + ".num.temp"; + string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/aligncommand.h b/aligncommand.h index 4e1b8d3..f806c8d 100644 --- a/aligncommand.h +++ b/aligncommand.h @@ -177,7 +177,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ } Sequence temp = templateDB->findClosestSequence(candidateSeq); - Sequence* templateSeq = &temp; + Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); float searchScore = templateDB->getSearchScore(); @@ -204,7 +204,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); - Sequence* templateSeq2 = &temp2; + Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); searchScore = templateDB->getSearchScore(); @@ -213,7 +213,8 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better - templateSeq = templateSeq2; + delete templateSeq; + templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; @@ -221,6 +222,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ }else{ wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; + delete templateSeq2; delete copy; } } @@ -238,6 +240,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ report.print(); delete nast; + delete templateSeq; if (needToDeleteCopy) { delete copy; } pDataArray->count++; diff --git a/bellerophon.cpp b/bellerophon.cpp index 833cfb9..013c4dd 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -370,7 +370,7 @@ int Bellerophon::createProcesses(vector mid) { process++; }else if (pid == 0){ exitCommand = driverChimeras(mid, lines[process]); - string tempOut = outputDir + toString(getpid()) + ".temp"; + string tempOut = outputDir + toString(m->mothurGetpid(process)) + ".temp"; writePrefs(tempOut, lines[process]); exit(0); }else { diff --git a/blastdb.cpp b/blastdb.cpp index 2eced7e..a77880e 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -23,14 +23,10 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { int randNumber = rand(); //int randNumber = 12345; - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - pid += getpid(); -#else - pid += toString(threadID); -#endif - + string pid = m->mothurGetpid(threadID); + if (m->debug) { m->mothurOut("[DEBUG]: tag = " + tag + "\t pid = " + pid + "\n"); } + dbFileName = tag + pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = tag + pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = tag + pid + toString(randNumber) + ".blast"; @@ -122,13 +118,7 @@ BlastDB::BlastDB(string b, int tid) : Database() { } int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - pid += getpid(); -#else - pid += toString(threadID); -#endif - + string pid = m->mothurGetpid(threadID); dbFileName = pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = pid + toString(randNumber) + ".blast"; diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index d890db4..925be97 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -639,11 +639,11 @@ int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp"); + num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp"); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index 64ed9fa..988c711 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -628,11 +628,11 @@ int ChimeraCheckCommand::createProcesses(string outputFileName, string filename) processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename); + num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp index b8d70c1..353b124 100644 --- a/chimeraperseuscommand.cpp +++ b/chimeraperseuscommand.cpp @@ -1098,11 +1098,11 @@ int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accn processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; - string tempFile = outputFName + toString(getpid()) + ".num.temp"; + string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index fca9f17..8057099 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -726,11 +726,11 @@ int ChimeraPintailCommand::createProcesses(string outputFileName, string filenam processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp"); + num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp"); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index e7dc92e..9a2b249 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -1583,11 +1583,11 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup, accnos + toString(getpid()) + ".byCount"); + num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", accnos + m->mothurGetpid(process) + ".temp", fasta + toString(m->mothurGetpid(process)) + ".temp", breakUp[process], fileGroup, accnos + toString(m->mothurGetpid(process)) + ".byCount"); //pass numSeqs to parent ofstream out; - string tempFile = outputFName + toString(getpid()) + ".num.temp"; + string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); @@ -1714,6 +1714,8 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map& priority){ try { + if (m->debug) { m->mothurOut("[DEBUG]: filename = " + filename + "\n"); } + Chimera* chimera; if (templatefile != "self") { //you want to run slayer with a reference template chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); @@ -1998,6 +2000,8 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename int process = 0; int num = 0; processIDS.clear(); + + if (m->debug) { m->mothurOut("[DEBUG]: filename = " + filename + "\n"); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want @@ -2008,11 +2012,11 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority); + num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp", fasta + toString(m->mothurGetpid(process)) + ".temp", thisPriority); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 7a48cf7..61a91f4 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -1639,11 +1639,11 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras); + num = driver(outputFileName + toString(m->mothurGetpid(process)) + ".temp", files[process], accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", numChimeras); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out << numChimeras << endl; @@ -1823,11 +1823,11 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", filename + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; - string tempFile = outputFName + toString(getpid()) + ".num.temp"; + string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/chopseqscommand.cpp b/chopseqscommand.cpp index 96d7ae6..8c82992 100644 --- a/chopseqscommand.cpp +++ b/chopseqscommand.cpp @@ -378,11 +378,11 @@ bool ChopSeqsCommand::createProcesses(vector lines, string filename, s processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - wroteAccnos = driver(lines[process], filename, outFasta + toString(getpid()) + ".temp", outAccnos + toString(getpid()) + ".temp"); + wroteAccnos = driver(lines[process], filename, outFasta + m->mothurGetpid(process) + ".temp", outAccnos + m->mothurGetpid(process) + ".temp"); //pass numSeqs to parent ofstream out; - string tempFile = fastafile + toString(getpid()) + ".bool.temp"; + string tempFile = fastafile + m->mothurGetpid(process) + ".bool.temp"; m->openOutputFile(tempFile, out); out << wroteAccnos << endl; out.close(); diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 30bfd9e..d494068 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -980,11 +980,11 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", filename); + num = driver(lines[process], taxFileName + m->mothurGetpid(process) + ".temp", tempTaxFile + m->mothurGetpid(process) + ".temp", accnos + m->mothurGetpid(process) + ".temp", filename); //pass numSeqs to parent ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 874eb6d..0dae893 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -981,7 +981,7 @@ vector ClusterSplitCommand::createProcesses(vector< map vector listFileNames = cluster(dividedNames[process], labels); //write out names to file - string filename = toString(getpid()) + ".temp"; + string filename = m->mothurGetpid(process) + ".temp"; ofstream out; m->openOutputFile(filename, out); out << tag << endl; @@ -990,7 +990,7 @@ vector ClusterSplitCommand::createProcesses(vector< map //print out labels ofstream outLabels; - filename = toString(getpid()) + ".temp.labels"; + filename = m->mothurGetpid(process) + ".temp.labels"; m->openOutputFile(filename, outLabels); outLabels << cutoff << endl; diff --git a/consensusseqscommand.h b/consensusseqscommand.h index a69ddbf..165d420 100644 --- a/consensusseqscommand.h +++ b/consensusseqscommand.h @@ -44,7 +44,8 @@ private: map fastaMap; map nameMap; map nameFileMap; - int cutoff, seqLength; + int seqLength; + float cutoff; int readFasta(); int readNames(); diff --git a/countseqscommand.cpp b/countseqscommand.cpp index 301aff7..88c1409 100644 --- a/countseqscommand.cpp +++ b/countseqscommand.cpp @@ -433,10 +433,10 @@ int CountSeqsCommand::createProcesses(GroupMap*& groupMap, string outputFileName processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - string filename = toString(getpid()) + ".temp"; + string filename = m->mothurGetpid(process) + ".temp"; numSeqs = driver(lines[process].start, lines[process].end, filename, groupMap); - string tempFile = toString(getpid()) + ".num.temp"; + string tempFile = m->mothurGetpid(process) + ".num.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); @@ -451,7 +451,7 @@ int CountSeqsCommand::createProcesses(GroupMap*& groupMap, string outputFileName } } - string filename = toString(getpid()) + ".temp"; + string filename = m->mothurGetpid(process) + ".temp"; numSeqs = driver(lines[processors-1].start, lines[processors-1].end, filename, groupMap); //force parent to wait until all the processes are done diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index a353166..bc17323 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -16,7 +16,7 @@ vector CreateDatabaseCommand::setParameters(){ CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); - CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy); + CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy); CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); @@ -37,11 +37,11 @@ string CreateDatabaseCommand::getHelpString(){ try { string helpString = ""; helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n"; - helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n"; + helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List, repfasta, repnames or count, and constaxonomy are required.\n"; helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n"; - helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; + helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; helpString += "The group file is optional and will just give you the abundance breakdown by group.\n"; helpString += "The label parameter allows you to specify a label to be used from your listfile.\n"; helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n"; @@ -133,12 +133,12 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { if (path == "") { parameters["repname"] = inputDir + it->second; } } - it = parameters.find("contaxonomy"); + it = parameters.find("constaxonomy"); //user has given a template file if(it != parameters.end()){ path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["contaxonomy"] = inputDir + it->second; } + if (path == "") { parameters["constaxonomy"] = inputDir + it->second; } } it = parameters.find("repfasta"); @@ -208,9 +208,9 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option) { if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } } else { if (outputDir == "") { outputDir = m->hasPath(listfile); } } - contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true); + contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true); if (contaxonomyfile == "not found") { //if there is a current list file, use it - contaxonomyfile = ""; m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; + contaxonomyfile = ""; m->mothurOut("The constaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; } else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; } @@ -581,7 +581,7 @@ vector CreateDatabaseCommand::readFasta(vector& seqs){ int binNumber = 0; string temp = ""; for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } } - m->mothurConvert(temp, binNumber); + m->mothurConvert(m->getSimpleLabel(temp), binNumber); set::iterator it = sanity.find(binNumber); if (it != sanity.end()) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break; diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp index 98109ef..ec80973 100644 --- a/deconvolutecommand.cpp +++ b/deconvolutecommand.cpp @@ -286,7 +286,7 @@ int DeconvoluteCommand::execute() { m->gobble(in); - if(count % 1000 == 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); } + if(count % 1000 == 0) { m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n"); } } if(count % 1000 != 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); } diff --git a/distancecommand.cpp b/distancecommand.cpp index 9243df3..8be3ad4 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -540,8 +540,8 @@ void DistanceCommand::createProcesses(string filename) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - if (output != "square") { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); } - else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); } + if (output != "square") { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", cutoff); } + else { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", "square"); } exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 4ac3381..031ac0e 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -659,12 +659,12 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, strin processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - string filteredFasta = filename + toString(getpid()) + ".temp"; + string filteredFasta = filename + m->mothurGetpid(process) + ".temp"; num = driverRunFilter(F, filteredFasta, filename, lines[process]); //pass numSeqs to parent ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); @@ -1095,7 +1095,7 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) num = driverCreateFilter(F, filename, lines[process]); //write out filter counts to file - filename += toString(getpid()) + "filterValues.temp"; + filename += m->mothurGetpid(process) + "filterValues.temp"; ofstream out; m->openOutputFile(filename, out); diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp index 047f176..288e5ca 100644 --- a/getmetacommunitycommand.cpp +++ b/getmetacommunitycommand.cpp @@ -415,11 +415,11 @@ int GetMetaCommunityCommand::createProcesses(vector& thislo process++; }else if (pid == 0){ outputNames.clear(); - num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process); + num = processDriver(thislookup, dividedPartitions[process], (outputFileName + m->mothurGetpid(process)), rels[process], matrix[process], doneFlags, process); //pass numSeqs to parent ofstream out; - string tempFile = toString(getpid()) + ".outputNames.temp"; + string tempFile = m->mothurGetpid(process) + ".outputNames.temp"; m->openOutputFile(tempFile, out); out << num << endl; out << outputNames.size() << endl; diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index ab6e670..36640e7 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -1201,7 +1201,7 @@ vector IndicatorCommand::getPValues(vector< vectormothurGetpid(process) + ".pvalues.temp"; m->openOutputFile(tempFile, out); //pass values @@ -1379,7 +1379,7 @@ vector IndicatorCommand::getPValues(vector< vector > //pass pvalues to parent ofstream out; - string tempFile = toString(getpid()) + ".pvalues.temp"; + string tempFile = m->mothurGetpid(process) + ".pvalues.temp"; m->openOutputFile(tempFile, out); //pass values diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 8796ab2..72e8ea0 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -704,7 +704,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o for(int i=0;imothurGetpid(process) + ".temp"; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); } } @@ -712,14 +712,14 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } num = driver(files[process], - outputFasta + toString(getpid()) + ".temp", - outputScrapFasta + toString(getpid()) + ".temp", - outputMisMatches + toString(getpid()) + ".temp", + outputFasta + m->mothurGetpid(process) + ".temp", + outputScrapFasta + m->mothurGetpid(process) + ".temp", + outputMisMatches + m->mothurGetpid(process) + ".temp", tempFASTAFileNames, process, group); //pass groupCounts to parent ofstream out; - string tempFile = toString(getpid()) + ".num.temp"; + string tempFile = m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; if (createFileGroup || createOligosGroup) { @@ -1143,7 +1143,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string num++; //report progress - if((num) % 1000 == 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); } + if((num) % 1000 == 0){ m->mothurOutJustToScreen(toString(num)); m->mothurOutEndLine(); } } //report progress @@ -1289,7 +1289,7 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c count++; //report progress - if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + if((count) % 10000 == 0){ m->mothurOutJustToScreen(toString(count)); m->mothurOutEndLine(); } //} } } diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 2e6943f..67e54d0 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -432,7 +432,7 @@ int MatrixOutputCommand::execute(){ string current = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { - if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); } + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; if (!subsample) { m->setPhylipFile(current); } } } m->mothurOutEndLine(); @@ -540,7 +540,7 @@ int MatrixOutputCommand::process(vector thisLookup){ driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); - string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist"; + string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); @@ -739,6 +739,8 @@ int MatrixOutputCommand::process(vector thisLookup){ variables["[calc]"] = matrixCalculators[i]->getName(); string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); + //set current phylip file to average distance matrix + m->setPhylipFile(distFileName); ofstream outAve; m->openOutputFile(distFileName, outAve); outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); diff --git a/mergesfffilecommand.cpp b/mergesfffilecommand.cpp index f417dde..a7a9c49 100644 --- a/mergesfffilecommand.cpp +++ b/mergesfffilecommand.cpp @@ -220,7 +220,7 @@ int MergeSfffilesCommand::execute(){ if (outputDir == "") { thisOutputDir += m->hasPath(outputFile); } variables["[filename]"] = thisOutputDir + m->getSimpleName(outputFile); outputFile = getOutputFileName("sff",variables); - m->openOutputFile(outputFile, out); + m->openOutputFileBinary(outputFile, out); outputNames.push_back(outputFile); outputTypes["sff"].push_back(outputFile); outputFileHeader = outputFile + ".headers"; numTotalReads = 0; diff --git a/mothurout.cpp b/mothurout.cpp index 0f24cb2..313deef 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1364,8 +1364,8 @@ int MothurOut::appendFiles(string temp, string filename) { ifstream input; //open output file in append mode - openOutputFileAppend(filename, output); - int ableToOpen = openInputFile(temp, input, "no error"); + openOutputFileBinaryAppend(filename, output); + int ableToOpen = openInputFileBinary(temp, input, "no error"); //int ableToOpen = openInputFile(temp, input); int numLines = 0; @@ -2755,6 +2755,33 @@ string MothurOut::getSimpleLabel(string label){ exit(1); } } +/***********************************************************************/ +string MothurOut::mothurGetpid(int threadID){ + try { + + string pid = ""; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + pid += toString(getpid()); if(debug) { mothurOut("[DEBUG]: " + pid + "\n"); } + //remove any weird chars + string pid1 = ""; + for (int i = 0; i < pid.length(); i++) { + if(pid[i]>47 && pid[i]<58) { //is a digit + pid1 += pid[i]; + } + } + pid = pid1; +#else + pid += toString(threadID); +#endif + return pid; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurGetpid"); + exit(1); + } +} + /***********************************************************************/ bool MothurOut::isLabelEquivalent(string label1, string label2){ diff --git a/mothurout.h b/mothurout.h index 1747b14..d5b7e5f 100644 --- a/mothurout.h +++ b/mothurout.h @@ -146,6 +146,7 @@ class MothurOut { bool isLabelEquivalent(string, string); string getSimpleLabel(string); string findEdianness(); + string mothurGetpid(int); //string manipulation diff --git a/myseqdist.cpp b/myseqdist.cpp index 2853895..b43af1c 100644 --- a/myseqdist.cpp +++ b/myseqdist.cpp @@ -159,7 +159,7 @@ int correctDist::createProcess(string distanceFileName){ process++; } else if(pid == 0){ - driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp"); + driver(start[process], end[process], distanceFileName + m->mothurGetpid(process) + ".temp"); exit(0); } else{ diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp index f874e9a..357724d 100644 --- a/pairwiseseqscommand.cpp +++ b/pairwiseseqscommand.cpp @@ -528,8 +528,8 @@ void PairwiseSeqsCommand::createProcesses(string filename) { processIDS.push_back(pid); process++; }else if (pid == 0){ - if (output != "square") { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); } - else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); } + if (output != "square") { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", cutoff); } + else { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", "square"); } exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); diff --git a/parsimony.cpp b/parsimony.cpp index 1b08e8a..9dc8cf7 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -102,7 +102,7 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGr //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".pars.results.temp"; + string tempFile = outputDir + m->mothurGetpid(process) + ".pars.results.temp"; m->openOutputFile(tempFile, out); out << myresults.size() << endl; for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index 339649c..b84ee25 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -420,7 +420,7 @@ int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< }else if (pid == 0){ driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false); - string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp"; + string outTemp = outputDir + m->mothurGetpid(process) + ".sumDiv.temp"; ofstream out; m->openOutputFile(outTemp, out); diff --git a/pintail.cpp b/pintail.cpp index b9f2434..9f46cf9 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -546,7 +546,7 @@ void Pintail::createProcessesQuan() { //write out data to file so parent can read it ofstream out; - string s = toString(getpid()) + ".temp"; + string s = m->mothurGetpid(process) + ".temp"; m->openOutputFile(s, out); //output observed distances diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index 6a4df63..c4416b3 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -442,12 +442,12 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - string locationsFile = toString(getpid()) + ".temp"; - num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded); + string locationsFile = m->mothurGetpid(process) + ".temp"; + num = driverPcr(filename, goodFileName + m->mothurGetpid(process) + ".temp", badFileName + m->mothurGetpid(process) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded); //pass numSeqs to parent ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << pstart << '\t' << adjustNeeded << endl; out << num << '\t' << badSeqNames.size() << endl; @@ -464,7 +464,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string } } - string locationsFile = toString(getpid()) + ".temp"; + string locationsFile = m->mothurGetpid(process) + ".temp"; num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded); //force parent to wait until all the processes are done diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 05c8b7d..9acfa65 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -390,9 +390,9 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s process++; }else if (pid == 0){ outputNames.clear(); - num = driverGroups(newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups); + num = driverGroups(newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMFile, lines[process].start, lines[process].end, groups); - string tempFile = toString(getpid()) + ".outputNames.temp"; + string tempFile = m->mothurGetpid(process) + ".outputNames.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); diff --git a/primerdesigncommand.cpp b/primerdesigncommand.cpp index bd68e2c..584aa9d 100644 --- a/primerdesigncommand.cpp +++ b/primerdesigncommand.cpp @@ -567,11 +567,11 @@ set PrimerDesignCommand::createProcesses(string newSummaryFile, vectormothurRemove(newSummaryFile + toString(getpid()) + ".temp"); + m->mothurRemove(newSummaryFile + m->mothurGetpid(process) + ".temp"); - otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex); + otusToRemove = driver(newSummaryFile + m->mothurGetpid(process) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex); - string tempFile = toString(getpid()) + ".otus2Remove.temp"; + string tempFile = m->mothurGetpid(process) + ".otus2Remove.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); @@ -827,7 +827,7 @@ vector PrimerDesignCommand::createProcessesConSeqs(map& n }else if (pid == 0){ counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[process].start, lines[process].end); - string tempFile = toString(getpid()) + ".cons_counts.temp"; + string tempFile = m->mothurGetpid(process) + ".cons_counts.temp"; ofstream outTemp; m->openOutputFile(tempFile, outTemp); diff --git a/rarefact.cpp b/rarefact.cpp index 0454d9e..039cde9 100644 --- a/rarefact.cpp +++ b/rarefact.cpp @@ -133,7 +133,7 @@ int Rarefact::createProcesses(vector& procIters, RarefactionCurveData* rcd, //pass numSeqs to parent for(int i=0;imothurGetpid(process) + toString(i) + ".rarefact.temp"; displays[i]->outputTempFiles(tempFile); } exit(0); diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 8146285..1775944 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -570,6 +570,7 @@ vector RareFactCommand::createGroupFile(vector& outputNames, map typesFiles[extension] = temp; } if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); } + } //for each type create a combo file @@ -648,9 +649,8 @@ vector RareFactCommand::createGroupFile(vector& outputNames, map for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk //grab data for each group - for (map > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) { - - string group = itFileNameGroup->first; + for (int n = 0; n < groupNames.size(); n++) { + string group = groupNames[n]; map > >::iterator itLine = files[group].find(*itNumSampled); if (itLine != files[group].end()) { diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index bf702e1..4671b79 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -1443,7 +1443,7 @@ int ScreenSeqsCommand::createProcessesContigsSummary(vector& oLength, vecto //pass numSeqs to parent ofstream out; - string tempFile = contigsreport + toString(getpid()) + ".num.temp"; + string tempFile = contigsreport + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; @@ -1713,7 +1713,7 @@ int ScreenSeqsCommand::createProcessesAlignSummary(vector& sims, vectormothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; @@ -1996,7 +1996,7 @@ int ScreenSeqsCommand::createProcessesCreateSummary(vector& startPosition, //pass numSeqs to parent ofstream out; - string tempFile = fastafile + toString(getpid()) + ".num.temp"; + string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; @@ -2532,11 +2532,11 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, st processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames); + num = driver(lines[process], goodFileName + m->mothurGetpid(process) + ".temp", badAccnos + m->mothurGetpid(process) + ".temp", filename, badSeqNames); //pass numSeqs to parent ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 41ddf35..d196db8 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -469,11 +469,11 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r process++; }else if (pid == 0){ - num = driver(filename, qFileName, rFileName, summaryFileName + toString(getpid()) + ".temp", errorOutputFileName+ toString(getpid()) + ".temp", chimeraOutputFileName + toString(getpid()) + ".temp", lines[process], qLines[process], rLines[process]); + num = driver(filename, qFileName, rFileName, summaryFileName + m->mothurGetpid(process) + ".temp", errorOutputFileName+ m->mothurGetpid(process) + ".temp", chimeraOutputFileName + m->mothurGetpid(process) + ".temp", lines[process], qLines[process], rLines[process]); //pass groupCounts to parent ofstream out; - string tempFile = filename + toString(getpid()) + ".info.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".info.temp"; m->openOutputFile(tempFile, out); //output totalBases and totalMatches diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 70aa55d..3f75659 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -590,11 +590,11 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector& startPosition, processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]); + num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + m->mothurGetpid(process) + ".temp", lines[process]); //pass numSeqs to parent ofstream out; - string tempFile = fastafile + toString(getpid()) + ".num.temp"; + string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; diff --git a/sffmultiplecommand.cpp b/sffmultiplecommand.cpp index f75662b..c1dd258 100644 --- a/sffmultiplecommand.cpp +++ b/sffmultiplecommand.cpp @@ -753,11 +753,11 @@ int SffMultipleCommand::createProcesses(vector sffFiles, vector processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name + toString(getpid()) + ".temp", group + toString(getpid()) + ".temp"); + num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + m->mothurGetpid(process) + ".temp", name + m->mothurGetpid(process) + ".temp", group + m->mothurGetpid(process) + ".temp"); //pass numSeqs to parent ofstream out; - string tempFile = toString(getpid()) + ".num.temp"; + string tempFile = m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << '\t' << outputNames.size() << endl; for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; } diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 7fa99f1..21dd089 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -2076,11 +2076,11 @@ int ShhherCommand::createProcesses(vector filenames){ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(dividedFiles[process], compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp"); + num = driver(dividedFiles[process], compositeFASTAFileName + m->mothurGetpid(process) + ".temp", compositeNamesFileName + m->mothurGetpid(process) + ".temp"); //pass numSeqs to parent ofstream out; - string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp"; + string tempFile = compositeFASTAFileName + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp index 0cc6eb4..f998322 100644 --- a/shhhseqscommand.cpp +++ b/shhhseqscommand.cpp @@ -398,11 +398,11 @@ vector ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, st processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups); + mapfileNames = driverGroups(parser, newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMName, lines[process].start, lines[process].end, groups); //pass filenames to parent ofstream out; - string tempFile = newMName + toString(getpid()) + ".temp"; + string tempFile = newMName + m->mothurGetpid(process) + ".temp"; m->openOutputFile(tempFile, out); out << mapfileNames.size() << endl; for (int i = 0; i < mapfileNames.size(); i++) { diff --git a/sparcccommand.cpp b/sparcccommand.cpp index 4c9049a..f4d08df 100644 --- a/sparcccommand.cpp +++ b/sparcccommand.cpp @@ -437,7 +437,7 @@ vector > SparccCommand::createProcesses(vector >& sh //pass pvalues to parent ofstream out; - string tempFile = toString(getpid()) + ".pvalues.temp"; + string tempFile = m->mothurGetpid(process) + ".pvalues.temp"; m->openOutputFile(tempFile, out); //pass values diff --git a/sracommand.cpp b/sracommand.cpp index 2c4c2b1..aa35365 100644 --- a/sracommand.cpp +++ b/sracommand.cpp @@ -18,8 +18,14 @@ vector SRACommand::setParameters(){ CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile); CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq); + CommandParameter pcontact("contact", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact); //choose only one multiple options - CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform); + CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform); + CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument); + CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy); + CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource); + CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); @@ -44,20 +50,26 @@ string SRACommand::getHelpString(){ try { string helpString = ""; helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n"; - helpString += "The sra command parameters are: sff, fastq, file, oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group.\n"; + helpString += "The sra command parameters are: sff, fastq, file, oligos, contact, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group, platform, libstrategy, libsource, libselection and instrument.\n"; helpString += "The sff parameter is used to provide the original sff file.\n"; helpString += "The fastq parameter is used to provide the original fastq file.\n"; + helpString += "The contact parameter is used to provide your contact file.\n"; helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n"; helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n"; - helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files.\n"; + helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; - - helpString += "The new command should be in the following format: \n"; - helpString += "new(...)\n"; + helpString += "The platform parameter is used to specify platfrom you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n"; + helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n"; + helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n"; + helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n"; + helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n"; + + helpString += "The sra should be in the following format: \n"; + helpString += "sra(...)\n"; return helpString; } catch(exception& e) { @@ -97,6 +109,7 @@ SRACommand::SRACommand(){ SRACommand::SRACommand(string option) { try { abort = false; calledHelp = false; + libLayout = "single"; //controlled vocab //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -164,6 +177,14 @@ SRACommand::SRACommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["oligos"] = inputDir + it->second; } } + + it = parameters.find("contact"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["contact"] = inputDir + it->second; } + } } //check for parameters @@ -189,6 +210,9 @@ SRACommand::SRACommand(string option) { else if(oligosfile == "not open") { abort = true; } else { m->setOligosFile(oligosfile); } + contactfile = validParameter.validFile(parameters, "contact", true); + if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a contact file before you can use the sra command."); m->mothurOutEndLine(); abort = true; } + else if(contactfile == "not open") { abort = true; } file = validParameter.validFile(parameters, "file", true); if (file == "not open") { file = ""; abort = true; } @@ -216,13 +240,36 @@ SRACommand::SRACommand(string option) { } } - //use only one Mutliple type - platform = validParameter.validFile(parameters, "platform", false); - if (platform == "not found") { platform = "454"; } - - if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { } - else { m->mothurOut("Not a valid platform option. Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; } + //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT + platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; } + if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function + + if (!abort) { //don't check instrument model is platform is bad + //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified + instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; } + if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function + } + //turn _ to spaces mothur's work around + for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } } + + libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; } + if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function + + //turn _ to spaces mothur's work around + for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } } + + libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; } + if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function + + //turn _ to spaces mothur's work around + for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } } + + libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; } + if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function + //turn _ to spaces mothur's work around + for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } } + string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; } m->mothurConvert(temp, bdiffs); @@ -255,16 +302,195 @@ int SRACommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } + readContactFile(); + if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); } + if (groupfile != "") { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); Groups.push_back("scrap"); } + + if (m->control_pressed) { return 0; } + //parse files - vector filesBySample; + map > filesBySample; isSFF = false; if (file != "") { readFile(filesBySample); } else if (sfffile != "") { parseSffFile(filesBySample); } else if (fastqfile != "") { parseFastqFile(filesBySample); } + //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files. + checkGroups(filesBySample); + //create xml file + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile)); + string outputFileName = getOutputFileName("xml", variables); + outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName); + ofstream out; + m->openOutputFile(outputFileName, out); + + //contacts portion + //////////////////////////////////////////////////////// + out << "\n"; + out << "\t\n"; + out << "\t\t New Submission. Generated by mothur version " + m->getVersion() + " \n"; + out << "\t\t\n"; + out << "\t\t\n"; + out << "\t\t" + centerName + "\n"; + out << "\t\t email=\"" + email + "\">\n"; + out << "\t\t\t\n"; + out << "\t\t\t\t" + firstName + "\n"; + out << "\t\t\t\t" + firstName + "\n"; + out << "\t\t\t\n"; + out << "\t\t\n"; + out << "\t\t\n"; + out << "\t\n"; + //////////////////////////////////////////////////////// + + //bioproject + //////////////////////////////////////////////////////// + out << "\t\n"; + out << "\t\t\n"; + out << "\t\t\t\n"; + out << "\t\t\t\t\n"; + out << "\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + ///////////////////////out << "\t\t\t\t\t\t" + ProjectID + " \n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + ////////////////////out << "\t\t\t\t\t\t\t" + title + " \n"; + out << "\t\t\t\t\t\t\t

" + description + "

\n"; + out << "\t\t\t\t\t\t\t\n"; + /////////////////////////out << "\t\t\t\t\t\t\t\t" + website + "\n"; + out << "\t\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\t\n"; + //////////////////////out << "\t\t\t\t\t\t\t\t" + medicalRelevance + "\n"; + out << "\t\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t
\n"; + out << "\t\t\t\t\t\t\n"; + /////////////////////////out << "\t\t\t\t\t\t\t\n"; // + out << "\t\t\t\t\t\t\t\t\n"; + ////////////////////out << "\t\t\t\t\t\t\t\t\t" + scientificName + " \n"; + out << "\t\t\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\t\t\n"; + ////////////////////out << "\t\t\t\t\t\t\t\t\t" + dataType + " \n"; + out << "\t\t\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t
\n"; + out << "\t\t\t\t
\n"; + out << "\t\t\t
\n"; + out << "\t\t\t\n"; + ////////////////////////////out << "\t\t\t\t" + ProjectID + " \n"; + out << "\t\t\t\n"; + out << "\t\t
\n"; + out << "\t
\n"; + //////////////////////////////////////////////////////// + + //bioSample + //////////////////////////////////////////////////////// + for (int i = 0; i < Groups.size(); i++) { + + vector thisGroupsFiles = filesBySample[Groups[i]]; + string barcodeForThisSample = Group2Barcode[Groups[i]]; + + for (int j = 0; j < thisGroupsFiles.size(); j++) { + if (m->control_pressed) { break; } + out << "\t\n"; + out << "\t\t\n"; + out << "\t\t\t\n"; + out << "\t\t\t\t\n"; + out << "\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t" + Groups[i] + " \n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + ////////////////////out << "\t\t\t\t\t\t\t" + title + " \n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + ////////////////////out << "\t\t\t\t\t\t\t" + scientificName + " \n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\t\n"; + ///////////////////////out << "\t\t\t\t\t\t\t" + BioProject + " \n"; + out << "\t\t\t\t\t\t\n"; + out << "\t\t\t\t\t\tMIMARKS.specimenn"; + out << "\t\t\t\t\t\tn"; + //add biosample required attributes + /////////////////////////////////////////////////////////////////////// + + out << "\t\t\t\t\t\tn"; + out << "\t\t\t\t\t\n"; + out << "\t\t\t\t\n"; + out << "\t\t\t\n"; + + //libID + out << "\t\t\t\n"; + string libId = thisGroupsFiles[j] + barcodeForThisSample; + if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames + vector pieces = m->splitWhiteSpace(thisGroupsFiles[j]); + libId = pieces[0] + barcodeForThisSample; + } + out << "\t\t\t\t" + libId + " \n"; + out << "\t\t\t\n"; + + out << "\t\t\n"; + out << "\t\n"; + } + } + + for (int i = 0; i < Groups.size(); i++) { + + vector thisGroupsFiles = filesBySample[Groups[i]]; + string barcodeForThisSample = Group2Barcode[Groups[i]]; + + for (int j = 0; j < thisGroupsFiles.size(); j++) { + if (m->control_pressed) { break; } + out << "\t\n"; + out << "\t\t\n"; + if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames + vector pieces = m->splitWhiteSpace(thisGroupsFiles[j]); + out << "\t\t\t\n"; + ////////////////////out << "\t\t\t\tfastq \n"; //since its paired we know its fastq, is the dataType the fileType??? + out << "\t\t\t\n"; + out << "\t\t\t\n"; + ////////////////////out << "\t\t\t\tfastq \n"; //since its paired we know its fastq, is the dataType the fileType??? + out << "\t\t\t\n"; + }else { //single + out << "\t\t\t\n"; + string dataType = "fastq"; + if (isSFF) { dataType = "sff"; } + ////////////////////out << "\t\t\t\t" + dataType + " \n"; //is the dataType the fileType??? + out << "\t\t\t\n"; + } + //attributes + out << "\t\t\t" + instrumentModel + "\n"; + out << "\t\t\t" + libStrategy + "\n"; + out << "\t\t\t" + libSource + "\n"; + out << "\t\t\t" + libSelection + "\n"; + out << "\t\t\t" + libLayout + "\n"; + + //////////////////bioSample info + ///////////////////bioProject info + + //libID + out << "\t\t\t\n"; + string libId = thisGroupsFiles[j] + barcodeForThisSample; + if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames + vector pieces = m->splitWhiteSpace(thisGroupsFiles[j]); + libId = pieces[0] + barcodeForThisSample; + } + out << "\t\t\t\t" + libId + " \n"; + out << "\t\t\t\n"; + out << "\t\t\n"; + out << "\t\n"; + } + } + + //////////////////////////////////////////////////////// + out << "
\n"; + out.close(); + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOutEndLine(); @@ -280,29 +506,201 @@ int SRACommand::execute(){ } } //********************************************************************************************************************** -int SRACommand::readFile(vector& files){ +int SRACommand::readContactFile(){ try { - files.clear(); + lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; ifstream in; - m->openInputFile(file, in); + m->openInputFile(contactfile, in); while(!in.eof()) { if (m->control_pressed) { break; } - string filename; - in >> filename; m->gobble(in); - files.push_back(filename); + string key, value; + in >> key; m->gobble(in); + value = m->getline(in); m->gobble(in); + + for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); } + + if (key == "USERNAME") { submissionName = value; } + else if (key == "LAST") { lastName = value; } + else if (key == "FIRST") { firstName = value; } + else if (key == "EMAIL") { email = value; } + else if (key == "CENTER") { centerName = value; } + else if (key == "TYPE") { + centerType = value; + for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); } + if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {} + else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; } + }else if (key == "DESCRIPTION") { description = value; } } in.close(); - if (!m->control_pressed) { - if (files.size() > 0) { - int pos = files[0].find(".sff"); - if (pos != string::npos) { isSFF = true; } //these files are sff files + if (lastName == "") { m->mothurOut("[ERROR]: missing last name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (firstName == "") { m->mothurOut("[ERROR]: missing first name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (email == "") { m->mothurOut("[ERROR]: missing email from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (centerName == "") { m->mothurOut("[ERROR]: missing center name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (centerType == "") { m->mothurOut("[ERROR]: missing center type from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (description == "") { m->mothurOut("[ERROR]: missing description from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "readContactFile"); + exit(1); + } +} + +//********************************************************************************************************************** +// going to have to rework this to allow for other options -- +/* + file option 1 + + sfffile1 oligosfile1 + sfffile2 oligosfile2 + ... + + file option 2 + + fastqfile1 oligosfile1 + fastqfile2 oligosfile2 + ... + + file option 3 + + fastqfile fastqfile group + fastqfile fastqfile group + fastqfile fastqfile group + ... + +*/ + +int SRACommand::readFile(map >& files){ + try { + vector theseFiles; + inputfile = file; + files.clear(); + + ifstream in; + m->openInputFile(file, in); + + while(!in.eof()) { + + if (m->control_pressed) { return 0; } + + string line = m->getline(in); m->gobble(in); + vector pieces = m->splitWhiteSpace(line); + + string group = ""; + string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = ""; + if (pieces.size() == 2) { + thisFileName1 = pieces[0]; + thisFileName2 = pieces[1]; + }else if (pieces.size() == 3) { + thisFileName1 = pieces[1]; + thisFileName2 = pieces[2]; + string group = pieces[0]; + libLayout = "paired"; + }else { + m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true; + } + + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); } + + //check to make sure both are able to be opened + ifstream in2; + int openForward = m->openInputFile(thisFileName1, in2, "noerror"); + + //if you can't open it, try default location + if (openForward == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1); + m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in3; + openForward = m->openInputFile(tryPath, in3, "noerror"); + in3.close(); + thisFileName1 = tryPath; + } + } + + //if you can't open it, try output location + if (openForward == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1); + m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in4; + openForward = m->openInputFile(tryPath, in4, "noerror"); + thisFileName1 = tryPath; + in4.close(); + } + } + + if (openForward == 1) { //can't find it + m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n"); + }else{ in2.close(); } + + ifstream in3; + int openReverse = m->openInputFile(thisFileName2, in3, "noerror"); + + //if you can't open it, try default location + if (openReverse == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2); + m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in3; + openReverse = m->openInputFile(tryPath, in3, "noerror"); + in3.close(); + thisFileName2 = tryPath; + } + } + + //if you can't open it, try output location + if (openReverse == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2); + m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in4; + openReverse = m->openInputFile(tryPath, in4, "noerror"); + thisFileName2 = tryPath; + in4.close(); + } + } + + if (openReverse == 1) { //can't find it + m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n"); + }else{ in3.close(); } + + + + if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos + //process pair + int pos = theseFiles[0].find(".sff"); + if (pos != string::npos) {//these files are sff files + isSFF = true; + sfffile = thisFileName1; oligosfile = thisFileName2; + readOligos(); + parseSffFile(files); + }else{ + isSFF = false; + fastqfile = thisFileName1; oligosfile = thisFileName2; + readOligos(); + parseFastqFile(files); + } + + }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read + map >::iterator it = files.find(group); + if (it == files.end()) { + vector temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp; + }else { + files[group].push_back(thisFileName1 + " " + thisFileName2); + } } } + in.close(); + + inputfile = file; return 0; } @@ -312,8 +710,12 @@ int SRACommand::readFile(vector& files){ } } //********************************************************************************************************************** -int SRACommand::parseSffFile(vector& files){ +int SRACommand::parseSffFile(map >& files){ try { + vector theseFiles; + inputfile = sfffile; + libLayout = "single"; //controlled vocab + isSFF = true; //run sffinfo to parse sff file into individual sampled sff files string commandString = "sff=" + sfffile; @@ -337,13 +739,15 @@ int SRACommand::parseSffFile(vector& files){ map > filenames = sffinfoCommand->getOutputFiles(); map >::iterator it = filenames.find("sff"); - if (it != filenames.end()) { files = it->second; } + if (it != filenames.end()) { theseFiles = it->second; } else { m->control_pressed = true; } // error in sffinfo delete sffinfoCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + mapGroupToFile(files, theseFiles); + return 0; } catch(exception& e) { @@ -353,8 +757,11 @@ int SRACommand::parseSffFile(vector& files){ } //********************************************************************************************************************** -int SRACommand::parseFastqFile(vector& files){ +int SRACommand::parseFastqFile(map >& files){ try { + vector theseFiles; + inputfile = fastqfile; + libLayout = "single"; //controlled vocab //run sffinfo to parse sff file into individual sampled sff files string commandString = "fastq=" + fastqfile; @@ -378,13 +785,15 @@ int SRACommand::parseFastqFile(vector& files){ map > filenames = fastqinfoCommand->getOutputFiles(); map >::iterator it = filenames.find("fastq"); - if (it != filenames.end()) { files = it->second; } + if (it != filenames.end()) { theseFiles = it->second; } else { m->control_pressed = true; } // error in sffinfo delete fastqinfoCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + mapGroupToFile(files, theseFiles); + return 0; } catch(exception& e) { @@ -392,6 +801,540 @@ int SRACommand::parseFastqFile(vector& files){ exit(1); } } +//*************************************************************************************************************** +//maps group to file +int SRACommand::mapGroupToFile(map >& files, vector theseFiles){ + try { + + for (int i = 0; i < Groups.size(); i++) { + + set matches; + for (int j = 0; j < theseFiles.size(); j++) { + int pos = theseFiles[j].find(Groups[i]); + if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name + if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil. + matches.insert(i); + } + } + } + + if(matches.size() == 1) { + map >::iterator it = files.find(Groups[i]); + if (it == files.end()) { + vector temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp; + }else { + files[Groups[i]].push_back(theseFiles[*matches.begin()]); + } + } + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkGroups"); + exit(1); + } +} + +//*************************************************************************************************************** +//checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files. +int SRACommand::checkGroups(map >& files){ + try { + vector newGroups; + for (int i = 0; i < Groups.size(); i++) { + + map >::iterator it = files.find(Groups[i]); + //no files for this group, remove it + if (it == files.end()) { } + else { newGroups.push_back(Groups[i]); } + } + + Groups = newGroups; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkGroups"); + exit(1); + } +} +//*************************************************************************************************************** +int SRACommand::readOligos(){ + try { + ifstream inOligos; + m->openInputFile(oligosfile, inOligos); + + string type, oligo, roligo, group; + bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false; + + int indexPrimer = 0; + int indexBarcode = 0; + int indexPairedPrimer = 0; + int indexPairedBarcode = 0; + set uniquePrimers; + set uniqueBarcodes; + + while(!inOligos.eof()){ + + inOligos >> type; + + if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); } + + if(type[0] == '#'){ + while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + m->gobble(inOligos); + } + else{ + m->gobble(inOligos); + //make type case insensitive + for(int i=0;i> oligo; + + if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } + + for(int i=0;i::iterator itPrime = primers.find(oligo); + if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } + + primers[oligo] = indexPrimer; indexPrimer++; + primerNameVector.push_back(group); + } + else if (type == "PRIMER"){ + m->gobble(inOligos); + + inOligos >> roligo; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } + + //check for repeat barcodes + string tempPair = oligo+roligo; + if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } + else { uniquePrimers.insert(tempPair); } + + if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } + + pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; + primerNameVector.push_back(group); + hasPrimer = true; + } + else if(type == "REVERSE"){ + //Sequence oligoRC("reverse", oligo); + //oligoRC.reverseComplement(); + string oligoRC = reverseOligo(oligo); + revPrimer.push_back(oligoRC); + } + else if(type == "BARCODE"){ + inOligos >> group; + + //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs + //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info + + string temp = ""; + while (!inOligos.eof()) { + char c = inOligos.get(); + if (c == 10 || c == 13 || c == -1){ break; } + else if (c == 32 || c == 9){;} //space or tab + else { temp += c; } + } + + //then this is illumina data with 4 columns + if (temp != "") { + hasPairedBarcodes = true; + string reverseBarcode = group; //reverseOligo(group); //reverse barcode + group = temp; + + for(int i=0;idebug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } + //check for repeat barcodes + string tempPair = oligo+reverseBarcode; + if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); } + else { uniqueBarcodes.insert(tempPair); } + + pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; + barcodeNameVector.push_back(group); + }else { + //check for repeat barcodes + map::iterator itBar = barcodes.find(oligo); + if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } + + barcodes[oligo]=indexBarcode; indexBarcode++; + barcodeNameVector.push_back(group); + } + }else if(type == "LINKER"){ + linker.push_back(oligo); + }else if(type == "SPACER"){ + spacer.push_back(oligo); + } + else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } + } + m->gobble(inOligos); + } + inOligos.close(); + + if (hasPairedBarcodes || hasPrimer) { + pairedOligos = true; + if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; } + } + + + //add in potential combos + if(barcodeNameVector.size() == 0){ + barcodeNameVector.push_back(""); + } + + if(primerNameVector.size() == 0){ + primerNameVector.push_back(""); + } + + set uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ + for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->first]; + string barcodeName = barcodeNameVector[itBar->first]; + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastqFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->first]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->first]; + } + else{ + comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; + } + } + uniqueNames.insert(comboGroupName); + Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse; + } + } + } + }else { + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = primerNameVector[itPrimer->second]; + string barcodeName = barcodeNameVector[itBar->second]; + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fastqFileName = ""; + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + } + uniqueNames.insert(comboGroupName); + Group2Barcode[comboGroupName] = itBar->first; + } + } + } + } + + + if (m->debug) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } + + for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } + + return true; + + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "readOligos"); + exit(1); + } +} +//********************************************************************/ +string SRACommand::reverseOligo(string oligo){ + try { + string reverse = ""; + + for(int i=oligo.length()-1;i>=0;i--){ + + if(oligo[i] == 'A') { reverse += 'T'; } + else if(oligo[i] == 'T'){ reverse += 'A'; } + else if(oligo[i] == 'U'){ reverse += 'A'; } + + else if(oligo[i] == 'G'){ reverse += 'C'; } + else if(oligo[i] == 'C'){ reverse += 'G'; } + + else if(oligo[i] == 'R'){ reverse += 'Y'; } + else if(oligo[i] == 'Y'){ reverse += 'R'; } + + else if(oligo[i] == 'M'){ reverse += 'K'; } + else if(oligo[i] == 'K'){ reverse += 'M'; } + + else if(oligo[i] == 'W'){ reverse += 'W'; } + else if(oligo[i] == 'S'){ reverse += 'S'; } + + else if(oligo[i] == 'B'){ reverse += 'V'; } + else if(oligo[i] == 'V'){ reverse += 'B'; } + + else if(oligo[i] == 'D'){ reverse += 'H'; } + else if(oligo[i] == 'H'){ reverse += 'D'; } + + else { reverse += 'N'; } + } + + + return reverse; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "reverseOligo"); + exit(1); + } +} +//********************************************************************/ +//_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT +bool SRACommand::checkCasesPlatforms(string& platform){ + try { + string original = platform; + bool isOkay = true; + + //remove users possible case errors + for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); } + + //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT + + if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { } + else { isOkay = false; } + + if (isOkay) { + if (platform == "454") { platform = "_LS454"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true; + } + + return isOkay; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkCasesPlatforms"); + exit(1); + } +} +//********************************************************************/ +//454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified +bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){ + try { + string original = instrumentModel; + bool isOkay = true; + + //remove users possible case errors + for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); } + + //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT + if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified + if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { } + else { isOkay = false; } + if (isOkay) { + if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; } + if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; } + if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true; + } + + }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified + if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { } + else { isOkay = false; } + + if (isOkay) { + if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; } + if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; } + if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; } + if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; } + if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; } + if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; } + if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true; + } + + }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified + if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { } + else { isOkay = false; } + + if (isOkay) { + if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; } + if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true; + } + }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified + if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { } + else { isOkay = false; } + + if (isOkay) { + if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; } + if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true; + } + } + return isOkay; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkCasesInstrumentModels"); + exit(1); + } +} //********************************************************************************************************************** +//AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER +bool SRACommand::checkCasesLibStrategy(string& libStrategy){ + try { + string original = libStrategy; + bool isOkay = true; + + //remove users possible case errors + for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); } + + if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { } + else { isOkay = false; } + + if (isOkay) { + if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; } + if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; } + if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; } + if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; } + if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; } + if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; } + if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; } + if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; } + if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; } + if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; } + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true; + } + + return isOkay; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkCasesLibStrategy"); + exit(1); + } +} +//********************************************************************************************************************** +//METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER +bool SRACommand::checkCasesLibSource(string& libSource){ + try { + string original = libSource; + bool isOkay = true; + + //remove users possible case errors + for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); } + + if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { } + else { isOkay = false; } + + if (isOkay) { + + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true; + } + + return isOkay; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkCasesLibStrategy"); + exit(1); + } +} +//********************************************************************************************************************** +//PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified +bool SRACommand::checkCasesLibSelection(string& libSelection){ + try { + string original = libSelection; + bool isOkay = true; + + //remove users possible case errors + for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); } + + if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { } + else { isOkay = false; } + + if (isOkay) { + if (libSelection == "CDNA") { libSelection = "cDNA"; } + if (libSelection == "CHIP") { libSelection = "ChIP"; } + if (libSelection == "MNASE") { libSelection = "MNase"; } + if (libSelection == "DNASE") { libSelection = "DNAse"; } + if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; } + if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; } + if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; } + if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; } + if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; } + if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; } + if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; } + if (libSelection == "OTHER") { libSelection = "other"; } + if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; } + + }else { + m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true; + } + + return isOkay; + } + catch(exception& e) { + m->errorOut(e, "SRACommand", "checkCasesLibSelection"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/sracommand.h b/sracommand.h index 3f7f156..5cc3714 100644 --- a/sracommand.h +++ b/sracommand.h @@ -10,7 +10,7 @@ #define Mothur_sracommand_h #include "command.hpp" - +#include "trimoligos.h" /**************************************************************************************************/ @@ -34,14 +34,35 @@ public: void help() { m->mothurOut(getHelpString()); } private: - bool abort, isSFF; + bool abort, isSFF, pairedOligos; int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; - string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile; - vector outputNames; - - int readFile(vector&); - int parseSffFile(vector&); - int parseFastqFile(vector&); + string sfffile, fastqfile, outputDir, groupfile, file, oligosfile, contactfile, inputfile; + string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType; + string submissionName, lastName, firstName, email, centerName, centerType, description; + vector outputNames, Groups, revPrimer; + vector primerNameVector; + vector barcodeNameVector; + map Group2Barcode; + map pairedBarcodes; + map pairedPrimers; + map barcodes; + map primers; + vector linker; + vector spacer; + + bool checkCasesInstrumentModels(string&); + bool checkCasesPlatforms(string&); + bool checkCasesLibStrategy(string&); + bool checkCasesLibSource(string&); + bool checkCasesLibSelection(string&); + int readFile(map >&); + int readContactFile(); + int readOligos(); + int parseSffFile(map >&); + int parseFastqFile(map >&); + int checkGroups(map >&); + int mapGroupToFile(map >&, vector); + string reverseOligo(string oligo); }; diff --git a/summaryqualcommand.cpp b/summaryqualcommand.cpp index 56a6fcb..3dc6582 100644 --- a/summaryqualcommand.cpp +++ b/summaryqualcommand.cpp @@ -347,7 +347,7 @@ int SummaryQualCommand::createProcessesCreateSummary(vector& position, vect //pass numSeqs to parent ofstream out; - string tempFile = qualfile + toString(getpid()) + ".num.temp"; + string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << numSeqs << endl; diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 59c12a0..fd8f775 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -611,7 +611,7 @@ int SummarySharedCommand::process(vector thisLookup, string if(processors == 1){ - driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); + driver(thisItersLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists); m->appendFiles((sumFileName + ".temp"), sumFileName); m->mothurRemove((sumFileName + ".temp")); if (mult) { @@ -632,11 +632,11 @@ int SummarySharedCommand::process(vector thisLookup, string processIDS.push_back(pid); process++; }else if (pid == 0){ - driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); + driver(thisItersLookup, lines[process].start, lines[process].end, sumFileName + m->mothurGetpid(process) + ".temp", sumAllFileName + m->mothurGetpid(process) + ".temp", calcDists); //only do this if you want a distance file if (createPhylip) { - string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist"; + string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); @@ -659,10 +659,10 @@ int SummarySharedCommand::process(vector thisLookup, string } //parent do your part - driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists); - m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName); - m->mothurRemove((sumFileName + toString(getpid()) + ".temp")); - if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); } + driver(thisItersLookup, lines[0].start, lines[0].end, sumFileName + m->mothurGetpid(process) + ".temp", sumAllFileName + m->mothurGetpid(process) + ".temp", calcDists); + m->appendFiles((sumFileName + m->mothurGetpid(process) + ".temp"), sumFileName); + m->mothurRemove((sumFileName + m->mothurGetpid(process) + ".temp")); + if (mult) { m->appendFiles((sumAllFileName + m->mothurGetpid(process) + ".temp"), sumAllFileName); } //force parent to wait until all the processes are done for (int i = 0; i < processIDS.size(); i++) { @@ -724,9 +724,9 @@ int SummarySharedCommand::process(vector thisLookup, string //for each bin - for (int k = 0; k < thisLookup[0]->getNumBins(); k++) { + for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } - for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); } + for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } } // Allocate memory for thread data. @@ -738,7 +738,7 @@ int SummarySharedCommand::process(vector thisLookup, string } //parent do your part - driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists); + driver(thisItersLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists); m->appendFiles((sumFileName + "0.temp"), sumFileName); m->mothurRemove((sumFileName + "0.temp")); if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); } @@ -754,7 +754,7 @@ int SummarySharedCommand::process(vector thisLookup, string m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp")); - for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } + for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } if (createPhylip) { for (int k = 0; k < calcDists.size(); k++) { diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 753b5cb..2ea653b 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -25,7 +25,7 @@ vector TreeGroupCommand::setParameters(){ CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); - CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); //CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput); @@ -397,7 +397,12 @@ int TreeGroupCommand::execute(){ treeCalculators.push_back(new MemEuclidean()); }else if (Estimators[i] == "mempearson") { treeCalculators.push_back(new MemPearson()); - } + }else if (Estimators[i] == "jsd") { + treeCalculators.push_back(new JSD()); + }else if (Estimators[i] == "rjsd") { + treeCalculators.push_back(new RJSD()); + } + } } @@ -804,7 +809,7 @@ int TreeGroupCommand::process(vector thisLookup) { driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); - string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist"; + string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist"; ofstream outtemp; m->openOutputFile(tempdistFileName, outtemp); diff --git a/treegroupscommand.h b/treegroupscommand.h index 36c852b..3c26eb1 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -60,6 +60,8 @@ #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" +#include "sharedrjsd.h" +#include "sharedjsd.h" diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 33349de..37beccd 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -837,7 +837,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim for(int i=0;imothurGetpid(process) + ".temp"; ofstream temp; m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); temp.close(); @@ -846,9 +846,9 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim } } driverCreateTrim(flowFileName, - (trimFlowFileName + toString(getpid()) + ".temp"), - (scrapFlowFileName + toString(getpid()) + ".temp"), - (fastaFileName + toString(getpid()) + ".temp"), + (trimFlowFileName + m->mothurGetpid(process) + ".temp"), + (scrapFlowFileName + m->mothurGetpid(process) + ".temp"), + (fastaFileName + m->mothurGetpid(process) + ".temp"), tempBarcodePrimerComboFileNames, lines[process]); exit(0); diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 951ce65..7f2a852 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -1047,15 +1047,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName for(int i=0;imothurGetpid(process) + ".temp"; m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); if(qFileName != ""){ - tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + tempPrimerQualFileNames[i][j] += m->mothurGetpid(process) + ".temp"; m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); } if(nameFile != ""){ - tempNameFileNames[i][j] += toString(getpid()) + ".temp"; + tempNameFileNames[i][j] += m->mothurGetpid(process) + ".temp"; m->openOutputFile(tempNameFileNames[i][j], temp); temp.close(); } } @@ -1065,27 +1065,27 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName driverCreateTrim(filename, qFileName, - (trimFASTAFileName + toString(getpid()) + ".temp"), - (scrapFASTAFileName + toString(getpid()) + ".temp"), - (trimQualFileName + toString(getpid()) + ".temp"), - (scrapQualFileName + toString(getpid()) + ".temp"), - (trimNameFileName + toString(getpid()) + ".temp"), - (scrapNameFileName + toString(getpid()) + ".temp"), - (trimCountFileName + toString(getpid()) + ".temp"), - (scrapCountFileName + toString(getpid()) + ".temp"), - (groupFile + toString(getpid()) + ".temp"), + (trimFASTAFileName + m->mothurGetpid(process) + ".temp"), + (scrapFASTAFileName + m->mothurGetpid(process) + ".temp"), + (trimQualFileName + m->mothurGetpid(process) + ".temp"), + (scrapQualFileName + m->mothurGetpid(process) + ".temp"), + (trimNameFileName + m->mothurGetpid(process) + ".temp"), + (scrapNameFileName + m->mothurGetpid(process) + ".temp"), + (trimCountFileName + m->mothurGetpid(process) + ".temp"), + (scrapCountFileName + m->mothurGetpid(process) + ".temp"), + (groupFile + m->mothurGetpid(process) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[process], qLines[process]); - if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); } + if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + m->mothurGetpid(process) + '\n'); } //pass groupCounts to parent if(createGroup){ ofstream out; - string tempFile = filename + toString(getpid()) + ".num.temp"; + string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << groupCounts.size() << endl; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 1c146ac..fb36577 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -778,7 +778,7 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector > na //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp"; + string tempFile = outputDir + m->mothurGetpid(process) + ".weightedcommand.results.temp"; m->openOutputFile(tempFile, out); for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl; out.close(); diff --git a/unweighted.cpp b/unweighted.cpp index 8fbd9d2..30c54b6 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -100,7 +100,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp"; m->openOutputFile(tempFile, out); out << myresults.size() << endl; for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; @@ -360,7 +360,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp"; m->openOutputFile(tempFile, out); out << myresults.size() << endl; for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; diff --git a/validcalculator.cpp b/validcalculator.cpp index f5f6562..b907541 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -675,6 +675,8 @@ void ValidCalculators::initialTreeGroups() { treegroup["gower"] = "gower"; treegroup["memchi2"] = "memchi2"; treegroup["memchord"] = "memchord"; + treegroup["jsd"] = "jsd"; + treegroup["rjsd"] = "rjsd"; treegroup["memeuclidean"] = "memeuclidean"; treegroup["mempearson"] = "mempearson"; diff --git a/weighted.cpp b/weighted.cpp index 49bf6bf..91a2144 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -79,7 +79,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp"; + string tempFile = outputDir + m->mothurGetpid(process) + ".weighted.results.temp"; m->openOutputFile(tempFile, out); out << Myresults.size() << endl;