From: westcott Date: Tue, 4 Jan 2011 15:40:55 +0000 (+0000) Subject: removed parse.sff command and made its functionality part of sffinfo command. fixed... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=3b137af694d322b7162e97275c070c41b42597a3 removed parse.sff command and made its functionality part of sffinfo command. fixed issues with corr.axes method=kendall. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 72be773..edb26d3 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -29,8 +29,6 @@ 7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = ""; }; 7E99CA1312C8B0970041246B /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhercommand.h; sourceTree = ""; }; 7E99CA1412C8B0970041246B /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shhhercommand.cpp; sourceTree = ""; }; - 7E99D77C12CBAD780041246B /* untitled.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = untitled.h; path = ../untitled.h; sourceTree = SOURCE_ROOT; }; - 7E99D77D12CBAD780041246B /* untitled.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = untitled.cpp; path = ../untitled.cpp; sourceTree = SOURCE_ROOT; }; 7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = ""; }; 7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = ""; }; 7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = ""; }; @@ -559,8 +557,6 @@ A7DF0AE0121EBB14004A03EA /* getopt_long.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getopt_long.h; sourceTree = ""; }; A7DF0AE1121EBB14004A03EA /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prng.cpp; sourceTree = ""; }; A7DF0AE2121EBB14004A03EA /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prng.h; sourceTree = ""; }; - A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = ""; }; - A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = ""; }; A7F0C06412B7D64F0048BC64 /* odum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = odum.h; sourceTree = ""; }; A7F0C06512B7D64F0048BC64 /* odum.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = odum.cpp; sourceTree = ""; }; A7F0C08A12B7EAE80048BC64 /* canberra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = canberra.h; sourceTree = ""; }; @@ -581,8 +577,6 @@ 08FB7794FE84155DC02AAC07 /* mothur */ = { isa = PBXGroup; children = ( - 7E99D77C12CBAD780041246B /* untitled.h */, - 7E99D77D12CBAD780041246B /* untitled.cpp */, A768D95D1248FEAA008AB1D0 /* mothur */, A7639F8D1175DF35008F5578 /* makefile */, A7DA1FF0113FECD400BF472F /* alignment.cpp */, @@ -962,8 +956,6 @@ A724C2B71254961E006BB1C7 /* parsefastaqcommand.cpp */, A7DA20BD113FECD400BF472F /* parselistscommand.h */, A7DA20BC113FECD400BF472F /* parselistscommand.cpp */, - A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */, - A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */, A7DA20C1113FECD400BF472F /* parsimonycommand.h */, A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */, A7DA20C3113FECD400BF472F /* pcacommand.h */, diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index ee449cc..78c9f9e 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -44,7 +44,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, int k, int ms, int mms, int win, float div, +ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); @@ -64,6 +64,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, increment = inc; numWanted = numw; realign = r; + includeAbunds = abunds; //read name file and create nameMapRank readNameFile(name); @@ -283,10 +284,26 @@ vector ChimeraSlayer::getTemplate(Sequence* q) { //create list of names we want to put into the template set namesToAdd; for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) { - if ((itRank->second).size() > thisRank) { - //you are more abundant than me - for (int i = 0; i < (itRank->second).size(); i++) { - namesToAdd.insert((itRank->second)[i]); + if (itRank->first != thisName) { + if (includeAbunds == "greaterequal") { + if ((itRank->second).size() >= thisRank) { + //you are more abundant than me or equal to my abundance + for (int i = 0; i < (itRank->second).size(); i++) { + namesToAdd.insert((itRank->second)[i]); + } + } + }else if (includeAbunds == "greater") { + if ((itRank->second).size() > thisRank) { + //you are more abundant than me + for (int i = 0; i < (itRank->second).size(); i++) { + namesToAdd.insert((itRank->second)[i]); + } + } + }else if (includeAbunds == "all") { + //add everyone + for (int i = 0; i < (itRank->second).size(); i++) { + namesToAdd.insert((itRank->second)[i]); + } } } } diff --git a/chimeraslayer.h b/chimeraslayer.h index b6cae49..fbe880f 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -26,7 +26,7 @@ class ChimeraSlayer : public Chimera { public: ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); - ChimeraSlayer(string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); + ChimeraSlayer(string, string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); ~ChimeraSlayer(); @@ -48,7 +48,7 @@ class ChimeraSlayer : public Chimera { map > nameMapRank; //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template vector chimeraResults; - string chimeraFlags, searchMethod, fastafile; + string chimeraFlags, searchMethod, fastafile, includeAbunds; bool realign; int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment; float divR; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 394172c..b94d445 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -14,7 +14,7 @@ //********************************************************************************************************************** vector ChimeraSlayerCommand::getValidParameters(){ try { - string AlignArray[] = {"fasta", "processors", "name","window", "template","numwanted", "ksize", "match","mismatch", + string AlignArray[] = {"fasta", "processors", "name","window", "include","template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); return myArray; @@ -69,7 +69,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta", "processors","name", "window", "template","numwanted", "ksize", "match","mismatch", + string Array[] = {"fasta", "processors","name", "include","window", "template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); @@ -231,6 +231,9 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); + includeAbunds = validParameter.validFile(parameters, "include", false); if (includeAbunds == "not found") { includeAbunds = "greater"; } + if ((includeAbunds != "greater") && (includeAbunds != "greaterequal") && (includeAbunds != "all")) { includeAbunds = "greater"; m->mothurOut("Invalid include setting. options are greater, greaterequal or all. using greater."); m->mothurOutEndLine(); } + temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; } convert(temp, ksize); @@ -346,7 +349,7 @@ int ChimeraSlayerCommand::execute(){ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); }else { if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); }else { m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -368,7 +371,7 @@ int ChimeraSlayerCommand::execute(){ string nameFile = filenames["name"][0]; fastaFileNames[s] = filenames["fasta"][0]; - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); } } diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 9ff0c8f..9567fca 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -49,7 +49,7 @@ private: #endif bool abort, realign; - string fastafile, templatefile, outputDir, search, namefile; + string fastafile, templatefile, outputDir, search, namefile, includeAbunds; int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength; float divR; Chimera* chimera; diff --git a/commandfactory.cpp b/commandfactory.cpp index 7d47491..5a7b237 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -65,7 +65,6 @@ #include "otuhierarchycommand.h" #include "setdircommand.h" #include "parselistscommand.h" -#include "parsesffcommand.h" #include "chimeraccodecommand.h" #include "chimeracheckcommand.h" #include "chimeraslayercommand.h" @@ -186,7 +185,6 @@ CommandFactory::CommandFactory(){ commands["set.dir"] = "set.dir"; commands["merge.files"] = "merge.files"; commands["parse.list"] = "parse.list"; - commands["parse.sff"] = "parse.sff"; commands["set.logfile"] = "set.logfile"; commands["phylo.diversity"] = "phylo.diversity"; commands["make.group"] = "make.group"; @@ -340,7 +338,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } else if(commandName == "parse.list") { command = new ParseListCommand(optionString); } - else if(commandName == "parse.sff") { command = new ParseSFFCommand(optionString); } else if(commandName == "phylo.diversity") { command = new PhyloDiversityCommand(optionString); } else if(commandName == "make.group") { command = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { command = new ChopSeqsCommand(optionString); } @@ -465,7 +462,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "set.dir") { pipecommand = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { pipecommand = new SetLogFileCommand(optionString); } else if(commandName == "parse.list") { pipecommand = new ParseListCommand(optionString); } - else if(commandName == "parse.sff") { pipecommand = new ParseSFFCommand(optionString); } else if(commandName == "phylo.diversity") { pipecommand = new PhyloDiversityCommand(optionString); } else if(commandName == "make.group") { pipecommand = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { pipecommand = new ChopSeqsCommand(optionString); } @@ -577,7 +573,6 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "set.dir") { shellcommand = new SetDirectoryCommand(); } else if(commandName == "set.logfile") { shellcommand = new SetLogFileCommand(); } else if(commandName == "parse.list") { shellcommand = new ParseListCommand(); } - else if(commandName == "parse.sff") { shellcommand = new ParseSFFCommand(); } else if(commandName == "phylo.diversity") { shellcommand = new PhyloDiversityCommand(); } else if(commandName == "make.group") { shellcommand = new MakeGroupCommand(); } else if(commandName == "chop.seqs") { shellcommand = new ChopSeqsCommand(); } diff --git a/corraxescommand.cpp b/corraxescommand.cpp index ccdd740..429ef14 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -185,7 +185,17 @@ CorrAxesCommand::CorrAxesCommand(string option) { void CorrAxesCommand::help(){ try { - + m->mothurOut("The corr.axes command reads a shared or relabund file as well as a pcoa file and calculates the correlation coefficient.\n"); + m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label. The shared or relabund and axes parameters are required. If shared is given the relative abundance is calculated.\n"); + m->mothurOut("The metadata parameter.....\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n"); + m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n"); + m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n"); + m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n"); + m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n"); + m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n"); + m->mothurOut("The corr.axes command outputs a .corr.axes file.\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "help"); @@ -254,7 +264,7 @@ int CorrAxesCommand::execute(){ // calc the r values // /************************************************************************************/ - string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes"; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes"; outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName); ofstream out; m->openOutputFile(outputFileName, out); @@ -478,21 +488,20 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou //sort each axis for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); } - //find ranks of xi in each axis - map > rankAxes; + //convert scores to ranks of xi in each axis for (int i = 0; i < numaxes; i++) { - vector ties; + vector ties; int rankTotal = 0; for (int j = 0; j < scores[i].size(); j++) { rankTotal += j; - ties.push_back(scores[i][j]); + ties.push_back(&(scores[i][j])); if (j != scores.size()-1) { // you are not the last so you can look ahead if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); - rankAxes[ties[k].name].push_back(thisrank); + (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; @@ -500,17 +509,15 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); - rankAxes[ties[k].name].push_back(thisrank); + (*ties[k]).score = thisrank; } } } } - - //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - + out << i+1 << '\t'; //find the ranks of this otu - Y @@ -519,7 +526,7 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i)); otuScores.push_back(member); } - + sort(otuScores.begin(), otuScores.end(), compareSpearman); map rankOtus; @@ -546,37 +553,36 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou } } - //calc kendall coeffient for each axis for this otu + //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { + + int P = 0; + //assemble otus ranks in same order as axis ranks + vector otus; + for (int l = 0; l < scores[j].size(); l++) { + spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]); + otus.push_back(member); + } - int numConcordant = 0; - int numDiscordant = 0; - - for (int f = 0; f < j; f++) { + for (int l = 0; l < scores[j].size(); l++) { - for (int k = 0; k < lookupFloat.size(); k++) { - - float xi = rankAxes[lookupFloat[k]->getGroup()][j]; - float yi = rankOtus[lookupFloat[k]->getGroup()]; - - for (int h = 0; h < k; h++) { - - float xj = rankAxes[lookupFloat[k]->getGroup()][f]; - float yj = rankOtus[lookupFloat[h]->getGroup()]; - - if ( ((xi > xj) && (yi < yj)) || ((xi < xj) && (yi > yj)) ){ numDiscordant++; } - if ( ((xi > xj) && (yi > yj)) || ((xi < xj) && (yi < yj)) ){ numConcordant++; } - } + int numWithHigherRank = 0; + float thisrank = otus[l].score; + + for (int u = l; u < scores[j].size(); u++) { + if (otus[u].score > thisrank) { numWithHigherRank++; } } + + P += numWithHigherRank; } int n = lookupFloat.size(); - double p = (numConcordant - numDiscordant) / (float) (0.5 * n * (n - 1)); + + double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0; out << p << '\t'; } - out << endl; } @@ -840,6 +846,8 @@ map > CorrAxesCommand::readAxes(){ }else { done = true; } } + if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; } + while (!in.eof()) { if (m->control_pressed) { in.close(); return axes; } diff --git a/getseqscommand.cpp b/getseqscommand.cpp index d40dcc6..a16accb 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -779,6 +779,30 @@ int GetSeqsCommand::compareAccnos(){ set namesDups; set namesAccnos = names; + map nameCount; + + if (namefile != "") { + ifstream inName; + m->openInputFile(namefile, inName); + + + while(!inName.eof()){ + + if (m->control_pressed) { inName.close(); return 0; } + + string thisname, repnames; + + inName >> thisname; m->gobble(inName); //read from first column + inName >> repnames; //read from second column + + int num = m->getNumNames(repnames); + nameCount[thisname] = num; + + m->gobble(inName); + } + inName.close(); + } + while(!in.eof()){ in >> name; @@ -797,21 +821,27 @@ int GetSeqsCommand::compareAccnos(){ m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine(); for (set::iterator it = namesDups.begin(); it != namesDups.end(); it++) { - out << (*it) << endl; + out << (*it); + if (namefile != "") { out << '\t' << nameCount[(*it)]; } + out << endl; } out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl; m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine(); for (set::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) { - out << (*it) << endl; + out << (*it); + if (namefile != "") { out << '\t' << nameCount[(*it)]; } + out << endl; } out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl; m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine(); for (set::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) { - out << (*it) << endl; + out << (*it); + if (namefile != "") { out << '\t' << nameCount[(*it)]; } + out << endl; } out.close(); diff --git a/makefile b/makefile index 5eb8daa..266c0c3 100644 --- a/makefile +++ b/makefile @@ -58,7 +58,7 @@ ifeq ($(strip $(USEREADLINE)),yes) -lncurses endif -USEMPI ?= yes +USEMPI ?= no ifeq ($(strip $(USEMPI)),yes) CXX = mpic++ diff --git a/mothur b/mothur index a425fb1..2619f5b 100755 Binary files a/mothur and b/mothur differ diff --git a/parsesffcommand.cpp b/parsesffcommand.cpp deleted file mode 100644 index e2f9d61..0000000 --- a/parsesffcommand.cpp +++ /dev/null @@ -1,614 +0,0 @@ -/* - * parsesffcommand.cpp - * Mothur - * - * Created by Pat Schloss on 2/6/10. - * Copyright 2010 Patrick D. Schloss. All rights reserved. - * - */ - -#include "parsesffcommand.h" -#include "sequence.hpp" - -//********************************************************************************************************************** -vector ParseSFFCommand::getValidParameters(){ - try { - string Array[] = {"sff", "oligos", "minlength", "outputdir", "inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "getValidParameters"); - exit(1); - } -} -//********************************************************************************************************************** -ParseSFFCommand::ParseSFFCommand(){ - try { - abort = true; - //initialize outputTypes - vector tempOutNames; - outputTypes["flow"] = tempOutNames; - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand"); - exit(1); - } -} -//********************************************************************************************************************** -vector ParseSFFCommand::getRequiredParameters(){ - try { - string Array[] = {"sff"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "getRequiredParameters"); - exit(1); - } -} -//********************************************************************************************************************** -vector ParseSFFCommand::getRequiredFiles(){ - try { - vector myArray; - return myArray; - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "getRequiredFiles"); - exit(1); - } -} -//********************************************************************************************************************** - -ParseSFFCommand::ParseSFFCommand(string option){ - try { - abort = false; - - if(option == "help") { - help(); - abort = true; - } - else { - //valid paramters for this command - string Array[] = {"sff", "oligos", "minlength", "outputdir", "inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - - OptionParser parser(option); - map parameters = parser.getParameters(); - - ValidParameters validParameter; - map::iterator it; - - //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } - } - - //initialize outputTypes - vector tempOutNames; - outputTypes["flow"] = tempOutNames; - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); - if (inputDir == "not found"){ inputDir = ""; } - else { - string path; - it = parameters.find("sff"); - //user has given a template file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["sff"] = inputDir + it->second; } - } - - it = parameters.find("oligos"); - //user has given an oligos file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["oligos"] = inputDir + it->second; } - } - } - - - //check for required parameters - sffFile = validParameter.validFile(parameters, "sff", true); - if (sffFile == "not found"){ - m->mothurOut("sff is a required parameter for the parse.sff command."); - m->mothurOutEndLine(); - abort = true; - } - else if (sffFile == "not open") { abort = true; } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); - if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(sffFile); //if user entered a file with a path then preserve it - } - - //check for optional parameter and set defaults - // ...at some point should added some additional type checking... - oligoFile = validParameter.validFile(parameters, "oligos", true); - if (oligoFile == "not found") { oligoFile = ""; } - else if(oligoFile == "not open"){ abort = true; } - - string temp = validParameter.validFile(parameters, "minlength", false); - if (temp == "not found") { temp = "0"; } - convert(temp, minLength); - } - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand"); - exit(1); - } -} - -//********************************************************************************************************************** - -ParseSFFCommand::~ParseSFFCommand() { /* do nothing */ } - -//********************************************************************************************************************** - -int ParseSFFCommand::execute(){ - try { - if (abort == true) { return 0; } - - ifstream inSFF; - m->openInputFile(sffFile, inSFF); - - cout.setf(ios::fixed, ios::floatfield); - cout.setf(ios::showpoint); - cout << setprecision(2); - - vector flowFileNames; - if(oligoFile != ""){ - getOligos(flowFileNames); - } - else{ - flowFileNames.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow").c_str(), ios::ate)); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow")); outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow")); - } - - for(int i=0;isetf(ios::fixed, ios::floatfield); - flowFileNames[i]->setf(ios::showpoint); - *flowFileNames[i] << setprecision(2); - } - - if (m->control_pressed) { outputTypes.clear(); for(int i=0;iclose(); } return 0; } - -// ofstream fastaFile; -// m->openOutputFile(m->getRootName(sffFile) + "fasta", fastaFile); - -// ofstream qualFile; -// m->openOutputFile(m->getRootName(sffFile) + "qual", qualFile); - - string commonHeader = m->getline(inSFF); - string magicNumber = m->getline(inSFF); - string version = m->getline(inSFF); - string indexOffset = m->getline(inSFF); - string indexLength = m->getline(inSFF); - int numReads = parseHeaderLineToInt(inSFF); - string headerLength = m->getline(inSFF); - string keyLength = m->getline(inSFF); - int numFlows = parseHeaderLineToInt(inSFF); - string flowgramCode = m->getline(inSFF); - string flowChars = m->getline(inSFF); - string keySequence = m->getline(inSFF); - m->gobble(inSFF); - - string seqName; - bool good = 0; - - for(int i=0;icontrol_pressed) { outputTypes.clear(); for(int i=0;iclose(); } return 0; } - - inSFF >> seqName; - seqName = seqName.substr(1); - m->gobble(inSFF); - - string runPrefix = parseHeaderLineToString(inSFF); - string regionNumber = parseHeaderLineToString(inSFF); - string xyLocation = parseHeaderLineToString(inSFF); - m->gobble(inSFF); - - string runName = parseHeaderLineToString(inSFF); - string analysisName = parseHeaderLineToString(inSFF); - string fullPath = parseHeaderLineToString(inSFF); - m->gobble(inSFF); - - string readHeaderLen = parseHeaderLineToString(inSFF); - string nameLength = parseHeaderLineToString(inSFF); - int numBases = parseHeaderLineToInt(inSFF); - string clipQualLeft = parseHeaderLineToString(inSFF); - int clipQualRight = parseHeaderLineToInt(inSFF); - string clipAdapLeft = parseHeaderLineToString(inSFF); - string clipAdapRight = parseHeaderLineToString(inSFF); - m->gobble(inSFF); - - vector flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); - vector flowIndices = parseHeaderLineToIntVector(inSFF, numBases); - string bases = parseHeaderLineToString(inSFF); - string qualityScores = parseHeaderLineToString(inSFF); - m->gobble(inSFF); - - - - int flowLength = flowIndices[clipQualRight-1]; - - screenFlow(flowVector, flowLength); - string sequence = flow2seq(flowVector, flowLength); - - int group = 0; - - if(minLength != 0 || numFPrimers != 0 || numBarcodes != 0 || numRPrimers != 0){ - good = screenSeq(sequence, group); - } - - if(good){ - *flowFileNames[group] << seqName << ' ' << flowLength; - for(int i=0;imothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); - -// fastaFile.close(); -// qualFile.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "execute"); - exit(1); - } -} - -//********************************************************************************************************************** - -void ParseSFFCommand::help(){ - try { - m->mothurOut("The parse.sff command..."); - m->mothurOutEndLine(); - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "help"); - exit(1); - } -} - -//********************************************************************************************************************** - -void ParseSFFCommand::getOligos(vector& outSFFFlowVec){ - try { - - ifstream inOligos; - m->openInputFile(oligoFile, inOligos); - - string type, oligo, group; - - int index = 0; - - while(!inOligos.eof()){ - inOligos >> type; - - if(type[0] == '#'){ m->getline(inOligos); } // get rest of line if there's any crap there - else{ - inOligos >> oligo; - - for(int i=0;i> group; - barcodes[oligo]=index++; - groupVector.push_back(group); - - outSFFFlowVec.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + ".flow").c_str(), ios::ate)); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow")); - outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow")); - } - } - m->gobble(inOligos); - } - - inOligos.close(); - - numFPrimers = forPrimer.size(); - numRPrimers = revPrimer.size(); - numBarcodes = barcodes.size(); - } - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "getOligos"); - exit(1); - } - -} - -//********************************************************************************************************************** - -int ParseSFFCommand::parseHeaderLineToInt(ifstream& file){ - - int number; - - while (!file.eof()) { - - char c = file.get(); - if (c == ':'){ - file >> number; - break; - } - - } - m->gobble(file); - return number; -} - -//********************************************************************************************************************** - -string ParseSFFCommand::parseHeaderLineToString(ifstream& file){ - - string text; - - while (!file.eof()) { - char c = file.get(); - - if (c == ':'){ - m->gobble(file); - text = m->getline(file); - break; - } - } - m->gobble(file); - - return text; -} - -//********************************************************************************************************************** - -vector ParseSFFCommand::parseHeaderLineToFloatVector(ifstream& file, int length){ - - vector floatVector(length); - - while (!file.eof()) { - char c = file.get(); - if (c == ':'){ - for(int i=0;i> floatVector[i]; - } - break; - } - } - m->gobble(file); - return floatVector; -} - -//********************************************************************************************************************** - -vector ParseSFFCommand::parseHeaderLineToIntVector(ifstream& file, int length){ - - vector intVector(length); - - while (!file.eof()) { - char c = file.get(); - if (c == ':'){ - for(int i=0;i> intVector[i]; - } - break; - } - } - m->gobble(file); - return intVector; -} - -//********************************************************************************************************************** - - -void ParseSFFCommand::screenFlow(vector flowgram, int& length){ - try{ - - int newLength = 0; - - while(newLength * 4 < length){ - - int signal = 0; - int noise = 0; - for(int i=0;i<4;i++){ - float flow = flowgram[i + 4 * newLength]; - - if(flow > 0.50){ - signal++; - if(flow <= 0.69){ // not sure why, but if i make it <0.70 it doesn't work... - noise++; - } - } - } - if(noise > 0 || signal == 0){ - break; - } - newLength++; - } - length = newLength * 4; - } - - catch(exception& e) { - m->errorOut(e, "ParseSFFCommand", "screenFlow"); - exit(1); - } -} - -//********************************************************************************************************************** - -string ParseSFFCommand::flow2seq(vector flowgram, int length){ - - string flow = "TACG"; - string sequence = ""; - for(int i=8;i::iterator it=barcodes.begin();it!=barcodes.end();it++){ - if(compareDNASeq(it->first, sequence.substr(0,(it->first).length()))){ - barcode = 1; - barcodeLength = (it->first).size(); - group = it->second; - break; - } - else{ - barcode = 0; - } - } - - int fPrimer = 1; - for(int i=0;ierrorOut(e, "TrimSeqsCommand", "compareDNASeq"); - exit(1); - } -} - -//********************************************************************************************************************** - -//string ParseSFFCommand::stripSeqQual(string qScores, int start, int end){ -// -// -// return qScores.substr(start-1, end-start+1); -// -//} - -//********************************************************************************************************************** - -//string ParseSFFCommand::stripQualQual(string qScores, int start, int end){ -// -// start--; -// -// int startCount = 0; -// int startIndex = 0; -// -// while(startCount < start && startIndex < qScores.length()){ -// if(isspace(qScores[startIndex])){ -// startCount++; -// } -// startIndex++; -// } -// -// int endCount = startCount; -// int endIndex = startIndex; -// -// while(endCount < end && endIndex < qScores.length()){ -// if(isspace(qScores[endIndex])){ -// endCount++; -// } -// endIndex++; -// } -// -// return qScores.substr(startIndex, endIndex-startIndex-1);//, endCount-startCount); -// -//} - -//********************************************************************************************************************** - - diff --git a/parsesffcommand.h b/parsesffcommand.h deleted file mode 100644 index 9dfa083..0000000 --- a/parsesffcommand.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef PARSESFFCOMMAND_H -#define PARSESFFCOMMAND_H - -/* - * parsesffcommand.h - * Mothur - * - * Created by Pat Schloss on 2/6/10. - * Copyright 2010 Patrick D. Schloss. All rights reserved. - * - */ - - -#include "command.hpp" - -class ParseSFFCommand : public Command { -public: - ParseSFFCommand(string); - ParseSFFCommand(); - ~ParseSFFCommand(); - vector getRequiredParameters(); - vector getValidParameters(); - vector getRequiredFiles(); - map > getOutputFiles() { return outputTypes; } - int execute(); - void help(); - -private: - - int parseHeaderLineToInt(ifstream&); - vector parseHeaderLineToFloatVector(ifstream&, int); - vector parseHeaderLineToIntVector(ifstream&, int); - string parseHeaderLineToString(ifstream&); - void screenFlow(vector, int&); - string flow2seq(vector, int); - bool screenSeq(string&, int&); - bool compareDNASeq(string, string); - void getOligos(vector&); - - - string sffFile; - string oligoFile; - - int minLength; - int numFPrimers, numRPrimers, numBarcodes; - vector forPrimer, revPrimer; - map barcodes; - vector groupVector; - vector outputNames; - map > outputTypes; - -// string stripSeqQual(string, int, int); -// string stripQualQual(string, int, int); - - string outputDir; - bool abort; -}; - -#endif - - diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 3562dcc..c9bc469 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -41,7 +41,7 @@ SffInfoCommand::SffInfoCommand(){ //********************************************************************************************************************** vector SffInfoCommand::getRequiredParameters(){ try { - string Array[] = {"sff"}; + string Array[] = {"sff", "sfftxt", "or"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -99,7 +99,7 @@ SffInfoCommand::SffInfoCommand(string option) { string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } sffFilename = validParameter.validFile(parameters, "sff", false); - if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; } + if (sffFilename == "not found") { sffFilename = ""; } else { m->splitAtDash(sffFilename, filenames); @@ -221,8 +221,27 @@ SffInfoCommand::SffInfoCommand(string option) { temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; } trim = m->isTrue(temp); - temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; } - sfftxt = m->isTrue(temp); + temp = validParameter.validFile(parameters, "sfftxt", false); + if (temp == "not found") { temp = "F"; sfftxt = false; sfftxtFilename = ""; } + else if (m->isTrue(temp)) { sfftxt = true; sfftxtFilename = ""; } + else { + //you are a filename + if (inputDir != "") { + map::iterator it = parameters.find("sfftxt"); + //user has given a template file + if(it != parameters.end()){ + string path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["sfftxt"] = inputDir + it->second; } + } + } + + sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true); + if (sfftxtFilename == "not found") { sfftxtFilename = ""; } + else if (sfftxtFilename == "not open") { sfftxtFilename = ""; } + } + + if ((sfftxtFilename == "") && (filenames.size() == 0)) { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; } } } catch(exception& e) { @@ -234,13 +253,14 @@ SffInfoCommand::SffInfoCommand(string option) { void SffInfoCommand::help(){ try { - m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n"); + m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n"); m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n"); m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n"); m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"); m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"); m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n"); m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"); + m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"); m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"); m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n"); m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n"); @@ -277,6 +297,8 @@ int SffInfoCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + "."); } + if (sfftxtFilename != "") { parseSffTxt(); } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //report output filenames @@ -823,4 +845,233 @@ int SffInfoCommand::readAccnosFile(string filename) { exit(1); } } -//**********************************************************************************************************************/ +//********************************************************************************************************************** +int SffInfoCommand::parseSffTxt() { + try { + + ifstream inSFF; + m->openInputFile(sfftxtFilename, inSFF); + + if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); } + + //output file names + ofstream outFasta, outQual, outFlow; + string outFastaFileName, outQualFileName; + string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow"; + if (trim) { + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual"; + }else{ + outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta"; + outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual"; + } + + if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } + if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); } + if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } + + //read common header + string commonHeader = m->getline(inSFF); + string magicNumber = m->getline(inSFF); + string version = m->getline(inSFF); + string indexOffset = m->getline(inSFF); + string indexLength = m->getline(inSFF); + int numReads = parseHeaderLineToInt(inSFF); + string headerLength = m->getline(inSFF); + string keyLength = m->getline(inSFF); + int numFlows = parseHeaderLineToInt(inSFF); + string flowgramCode = m->getline(inSFF); + string flowChars = m->getline(inSFF); + string keySequence = m->getline(inSFF); + m->gobble(inSFF); + + string seqName; + + if (flow) { outFlow << numFlows << endl; } + + for(int i=0;imothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; } + + Header header; + + //parse read header + inSFF >> seqName; + seqName = seqName.substr(1); + m->gobble(inSFF); + header.name = seqName; + + string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix; + string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber; + string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation; + m->gobble(inSFF); + + string runName = parseHeaderLineToString(inSFF); + string analysisName = parseHeaderLineToString(inSFF); + string fullPath = parseHeaderLineToString(inSFF); + m->gobble(inSFF); + + string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength); + string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength); + int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases; + string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft); + int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight; + string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft); + string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight); + m->gobble(inSFF); + + seqRead read; + + //parse read + vector flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector; + vector flowIndices = parseHeaderLineToIntVector(inSFF, numBases); + + //adjust for print + vector flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]); + for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); } + read.flowIndex = flowIndicesAdjusted; + + string bases = parseHeaderLineToString(inSFF); read.bases = bases; + vector qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores; + m->gobble(inSFF); + + //if you have provided an accosfile and this seq is not in it, then dont print + bool print = true; + if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } } + + //print + if (print) { + if (fasta) { printFastaSeqData(outFasta, read, header); } + if (qual) { printQualSeqData(outQual, read, header); } + if (flow) { printFlowSeqData(outFlow, read, header); } + } + + //report progress + if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + + if (m->control_pressed) { break; } + } + + //report progress + if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } } + + inSFF.close(); + + if (fasta) { outFasta.close(); } + if (qual) { outQual.close(); } + if (flow) { outFlow.close(); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseSffTxt"); + exit(1); + } +} +//********************************************************************************************************************** + +int SffInfoCommand::parseHeaderLineToInt(ifstream& file){ + try { + int number; + + while (!file.eof()) { + + char c = file.get(); + if (c == ':'){ + file >> number; + break; + } + + } + m->gobble(file); + return number; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt"); + exit(1); + } + +} + +//********************************************************************************************************************** + +string SffInfoCommand::parseHeaderLineToString(ifstream& file){ + try { + string text; + + while (!file.eof()) { + char c = file.get(); + + if (c == ':'){ + //m->gobble(file); + //text = m->getline(file); + file >> text; + break; + } + } + m->gobble(file); + + return text; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){ + try { + vector floatVector(length); + + while (!file.eof()) { + char c = file.get(); + if (c == ':'){ + float temp; + for(int i=0;i> temp; + floatVector[i] = temp * 100; + } + break; + } + } + m->gobble(file); + return floatVector; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){ + try { + vector intVector(length); + + while (!file.eof()) { + char c = file.get(); + if (c == ':'){ + for(int i=0;i> intVector[i]; + } + break; + } + } + m->gobble(file); + return intVector; + } + catch(exception& e) { + m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector"); + exit(1); + } +} + +//********************************************************************************************************************** + + + + diff --git a/sffinfocommand.h b/sffinfocommand.h index d8eb853..d98edbd 100644 --- a/sffinfocommand.h +++ b/sffinfocommand.h @@ -73,12 +73,13 @@ public: void help(); private: - string sffFilename, outputDir, accnosName; + string sffFilename, sfftxtFilename, outputDir, accnosName; vector filenames, outputNames, accnosFileNames; bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos; set seqNames; map > outputTypes; + //extract sff file functions int extractSffInfo(string, string); int readCommonHeader(ifstream&, CommonHeader&); int readHeader(ifstream&, Header&); @@ -92,7 +93,13 @@ private: int printFastaSeqData(ofstream&, seqRead&, Header&); int printQualSeqData(ofstream&, seqRead&, Header&); int readAccnosFile(string); - + int parseSffTxt(); + + //parsesfftxt file functions + int parseHeaderLineToInt(ifstream&); + vector parseHeaderLineToFloatVector(ifstream&, int); + vector parseHeaderLineToIntVector(ifstream&, int); + string parseHeaderLineToString(ifstream&); }; /**********************************************************/