7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = "<group>"; };
7E99CA1312C8B0970041246B /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhercommand.h; sourceTree = "<group>"; };
7E99CA1412C8B0970041246B /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shhhercommand.cpp; sourceTree = "<group>"; };
- 7E99D77C12CBAD780041246B /* untitled.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = untitled.h; path = ../untitled.h; sourceTree = SOURCE_ROOT; };
- 7E99D77D12CBAD780041246B /* untitled.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = untitled.cpp; path = ../untitled.cpp; sourceTree = SOURCE_ROOT; };
7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = "<group>"; };
7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = "<group>"; };
7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = "<group>"; };
A7DF0AE0121EBB14004A03EA /* getopt_long.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getopt_long.h; sourceTree = "<group>"; };
A7DF0AE1121EBB14004A03EA /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prng.cpp; sourceTree = "<group>"; };
A7DF0AE2121EBB14004A03EA /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prng.h; sourceTree = "<group>"; };
- A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = "<group>"; };
- A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = "<group>"; };
A7F0C06412B7D64F0048BC64 /* odum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = odum.h; sourceTree = "<group>"; };
A7F0C06512B7D64F0048BC64 /* odum.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = odum.cpp; sourceTree = "<group>"; };
A7F0C08A12B7EAE80048BC64 /* canberra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = canberra.h; sourceTree = "<group>"; };
08FB7794FE84155DC02AAC07 /* mothur */ = {
isa = PBXGroup;
children = (
- 7E99D77C12CBAD780041246B /* untitled.h */,
- 7E99D77D12CBAD780041246B /* untitled.cpp */,
A768D95D1248FEAA008AB1D0 /* mothur */,
A7639F8D1175DF35008F5578 /* makefile */,
A7DA1FF0113FECD400BF472F /* alignment.cpp */,
A724C2B71254961E006BB1C7 /* parsefastaqcommand.cpp */,
A7DA20BD113FECD400BF472F /* parselistscommand.h */,
A7DA20BC113FECD400BF472F /* parselistscommand.cpp */,
- A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */,
- A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */,
A7DA20C1113FECD400BF472F /* parsimonycommand.h */,
A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */,
A7DA20C3113FECD400BF472F /* pcacommand.h */,
}
}
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, int k, int ms, int mms, int win, float div,
+ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
fastafile = file; templateSeqs = readSeqs(fastafile);
increment = inc;
numWanted = numw;
realign = r;
+ includeAbunds = abunds;
//read name file and create nameMapRank
readNameFile(name);
//create list of names we want to put into the template
set<string> namesToAdd;
for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
- if ((itRank->second).size() > thisRank) {
- //you are more abundant than me
- for (int i = 0; i < (itRank->second).size(); i++) {
- namesToAdd.insert((itRank->second)[i]);
+ if (itRank->first != thisName) {
+ if (includeAbunds == "greaterequal") {
+ if ((itRank->second).size() >= thisRank) {
+ //you are more abundant than me or equal to my abundance
+ for (int i = 0; i < (itRank->second).size(); i++) {
+ namesToAdd.insert((itRank->second)[i]);
+ }
+ }
+ }else if (includeAbunds == "greater") {
+ if ((itRank->second).size() > thisRank) {
+ //you are more abundant than me
+ for (int i = 0; i < (itRank->second).size(); i++) {
+ namesToAdd.insert((itRank->second)[i]);
+ }
+ }
+ }else if (includeAbunds == "all") {
+ //add everyone
+ for (int i = 0; i < (itRank->second).size(); i++) {
+ namesToAdd.insert((itRank->second)[i]);
+ }
}
}
}
public:
ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
- ChimeraSlayer(string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
+ ChimeraSlayer(string, string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
~ChimeraSlayer();
map<string, vector<string> > nameMapRank; //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template
vector<data_struct> chimeraResults;
- string chimeraFlags, searchMethod, fastafile;
+ string chimeraFlags, searchMethod, fastafile, includeAbunds;
bool realign;
int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment;
float divR;
//**********************************************************************************************************************
vector<string> ChimeraSlayerCommand::getValidParameters(){
try {
- string AlignArray[] = {"fasta", "processors", "name","window", "template","numwanted", "ksize", "match","mismatch",
+ string AlignArray[] = {"fasta", "processors", "name","window", "include","template","numwanted", "ksize", "match","mismatch",
"divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
return myArray;
else {
//valid paramters for this command
- string Array[] = {"fasta", "processors","name", "window", "template","numwanted", "ksize", "match","mismatch",
+ string Array[] = {"fasta", "processors","name", "include","window", "template","numwanted", "ksize", "match","mismatch",
"divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
+ includeAbunds = validParameter.validFile(parameters, "include", false); if (includeAbunds == "not found") { includeAbunds = "greater"; }
+ if ((includeAbunds != "greater") && (includeAbunds != "greaterequal") && (includeAbunds != "all")) { includeAbunds = "greater"; m->mothurOut("Invalid include setting. options are greater, greaterequal or all. using greater."); m->mothurOutEndLine(); }
+
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
convert(temp, ksize);
chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}else {
if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
- chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}else {
m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
string nameFile = filenames["name"][0];
fastaFileNames[s] = filenames["fasta"][0];
- chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
+ chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);
}
}
#endif
bool abort, realign;
- string fastafile, templatefile, outputDir, search, namefile;
+ string fastafile, templatefile, outputDir, search, namefile, includeAbunds;
int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
Chimera* chimera;
#include "otuhierarchycommand.h"
#include "setdircommand.h"
#include "parselistscommand.h"
-#include "parsesffcommand.h"
#include "chimeraccodecommand.h"
#include "chimeracheckcommand.h"
#include "chimeraslayercommand.h"
commands["set.dir"] = "set.dir";
commands["merge.files"] = "merge.files";
commands["parse.list"] = "parse.list";
- commands["parse.sff"] = "parse.sff";
commands["set.logfile"] = "set.logfile";
commands["phylo.diversity"] = "phylo.diversity";
commands["make.group"] = "make.group";
else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); }
else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); }
else if(commandName == "parse.list") { command = new ParseListCommand(optionString); }
- else if(commandName == "parse.sff") { command = new ParseSFFCommand(optionString); }
else if(commandName == "phylo.diversity") { command = new PhyloDiversityCommand(optionString); }
else if(commandName == "make.group") { command = new MakeGroupCommand(optionString); }
else if(commandName == "chop.seqs") { command = new ChopSeqsCommand(optionString); }
else if(commandName == "set.dir") { pipecommand = new SetDirectoryCommand(optionString); }
else if(commandName == "set.logfile") { pipecommand = new SetLogFileCommand(optionString); }
else if(commandName == "parse.list") { pipecommand = new ParseListCommand(optionString); }
- else if(commandName == "parse.sff") { pipecommand = new ParseSFFCommand(optionString); }
else if(commandName == "phylo.diversity") { pipecommand = new PhyloDiversityCommand(optionString); }
else if(commandName == "make.group") { pipecommand = new MakeGroupCommand(optionString); }
else if(commandName == "chop.seqs") { pipecommand = new ChopSeqsCommand(optionString); }
else if(commandName == "set.dir") { shellcommand = new SetDirectoryCommand(); }
else if(commandName == "set.logfile") { shellcommand = new SetLogFileCommand(); }
else if(commandName == "parse.list") { shellcommand = new ParseListCommand(); }
- else if(commandName == "parse.sff") { shellcommand = new ParseSFFCommand(); }
else if(commandName == "phylo.diversity") { shellcommand = new PhyloDiversityCommand(); }
else if(commandName == "make.group") { shellcommand = new MakeGroupCommand(); }
else if(commandName == "chop.seqs") { shellcommand = new ChopSeqsCommand(); }
void CorrAxesCommand::help(){
try {
-
+ m->mothurOut("The corr.axes command reads a shared or relabund file as well as a pcoa file and calculates the correlation coefficient.\n");
+ m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label. The shared or relabund and axes parameters are required. If shared is given the relative abundance is calculated.\n");
+ m->mothurOut("The metadata parameter.....\n");
+ m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n");
+ m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n");
+ m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n");
+ m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n");
+ m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n");
+ m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n");
+ m->mothurOut("The corr.axes command outputs a .corr.axes file.\n");
+ m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
}
catch(exception& e) {
m->errorOut(e, "CorrAxesCommand", "help");
// calc the r values //
/************************************************************************************/
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
//sort each axis
for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); }
- //find ranks of xi in each axis
- map<string, vector<float> > rankAxes;
+ //convert scores to ranks of xi in each axis
for (int i = 0; i < numaxes; i++) {
- vector<spearmanRank> ties;
+ vector<spearmanRank*> ties;
int rankTotal = 0;
for (int j = 0; j < scores[i].size(); j++) {
rankTotal += j;
- ties.push_back(scores[i][j]);
+ ties.push_back(&(scores[i][j]));
if (j != scores.size()-1) { // you are not the last so you can look ahead
if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
for (int k = 0; k < ties.size(); k++) {
float thisrank = rankTotal / (float) ties.size();
- rankAxes[ties[k].name].push_back(thisrank);
+ (*ties[k]).score = thisrank;
}
ties.clear();
rankTotal = 0;
}else { // you are the last one
for (int k = 0; k < ties.size(); k++) {
float thisrank = rankTotal / (float) ties.size();
- rankAxes[ties[k].name].push_back(thisrank);
+ (*ties[k]).score = thisrank;
}
}
}
}
-
-
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
-
+
out << i+1 << '\t';
//find the ranks of this otu - Y
spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
otuScores.push_back(member);
}
-
+
sort(otuScores.begin(), otuScores.end(), compareSpearman);
map<string, float> rankOtus;
}
}
- //calc kendall coeffient for each axis for this otu
+ //calc spearman ranks for each axis for this otu
for (int j = 0; j < numaxes; j++) {
+
+ int P = 0;
+ //assemble otus ranks in same order as axis ranks
+ vector<spearmanRank> otus;
+ for (int l = 0; l < scores[j].size(); l++) {
+ spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
+ otus.push_back(member);
+ }
- int numConcordant = 0;
- int numDiscordant = 0;
-
- for (int f = 0; f < j; f++) {
+ for (int l = 0; l < scores[j].size(); l++) {
- for (int k = 0; k < lookupFloat.size(); k++) {
-
- float xi = rankAxes[lookupFloat[k]->getGroup()][j];
- float yi = rankOtus[lookupFloat[k]->getGroup()];
-
- for (int h = 0; h < k; h++) {
-
- float xj = rankAxes[lookupFloat[k]->getGroup()][f];
- float yj = rankOtus[lookupFloat[h]->getGroup()];
-
- if ( ((xi > xj) && (yi < yj)) || ((xi < xj) && (yi > yj)) ){ numDiscordant++; }
- if ( ((xi > xj) && (yi > yj)) || ((xi < xj) && (yi < yj)) ){ numConcordant++; }
- }
+ int numWithHigherRank = 0;
+ float thisrank = otus[l].score;
+
+ for (int u = l; u < scores[j].size(); u++) {
+ if (otus[u].score > thisrank) { numWithHigherRank++; }
}
+
+ P += numWithHigherRank;
}
int n = lookupFloat.size();
- double p = (numConcordant - numDiscordant) / (float) (0.5 * n * (n - 1));
+
+ double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
out << p << '\t';
}
-
out << endl;
}
}else { done = true; }
}
+ if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
+
while (!in.eof()) {
if (m->control_pressed) { in.close(); return axes; }
set<string> namesDups;
set<string> namesAccnos = names;
+ map<string, int> nameCount;
+
+ if (namefile != "") {
+ ifstream inName;
+ m->openInputFile(namefile, inName);
+
+
+ while(!inName.eof()){
+
+ if (m->control_pressed) { inName.close(); return 0; }
+
+ string thisname, repnames;
+
+ inName >> thisname; m->gobble(inName); //read from first column
+ inName >> repnames; //read from second column
+
+ int num = m->getNumNames(repnames);
+ nameCount[thisname] = num;
+
+ m->gobble(inName);
+ }
+ inName.close();
+ }
+
while(!in.eof()){
in >> name;
m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
- out << (*it) << endl;
+ out << (*it);
+ if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+ out << endl;
}
out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
- out << (*it) << endl;
+ out << (*it);
+ if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+ out << endl;
}
out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
- out << (*it) << endl;
+ out << (*it);
+ if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+ out << endl;
}
out.close();
-lncurses
endif
-USEMPI ?= yes
+USEMPI ?= no
ifeq ($(strip $(USEMPI)),yes)
CXX = mpic++
+++ /dev/null
-/*
- * parsesffcommand.cpp
- * Mothur
- *
- * Created by Pat Schloss on 2/6/10.
- * Copyright 2010 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "parsesffcommand.h"
-#include "sequence.hpp"
-
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getValidParameters(){
- try {
- string Array[] = {"sff", "oligos", "minlength", "outputdir", "inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "getValidParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-ParseSFFCommand::ParseSFFCommand(){
- try {
- abort = true;
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["flow"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getRequiredParameters(){
- try {
- string Array[] = {"sff"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "getRequiredParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getRequiredFiles(){
- try {
- vector<string> myArray;
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "getRequiredFiles");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-ParseSFFCommand::ParseSFFCommand(string option){
- try {
- abort = false;
-
- if(option == "help") {
- help();
- abort = true;
- }
- else {
- //valid paramters for this command
- string Array[] = {"sff", "oligos", "minlength", "outputdir", "inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-
- OptionParser parser(option);
- map<string,string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string,string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["flow"] = tempOutNames;
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("sff");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["sff"] = inputDir + it->second; }
- }
-
- it = parameters.find("oligos");
- //user has given an oligos file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["oligos"] = inputDir + it->second; }
- }
- }
-
-
- //check for required parameters
- sffFile = validParameter.validFile(parameters, "sff", true);
- if (sffFile == "not found"){
- m->mothurOut("sff is a required parameter for the parse.sff command.");
- m->mothurOutEndLine();
- abort = true;
- }
- else if (sffFile == "not open") { abort = true; }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false);
- if (outputDir == "not found"){
- outputDir = "";
- outputDir += m->hasPath(sffFile); //if user entered a file with a path then preserve it
- }
-
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- oligoFile = validParameter.validFile(parameters, "oligos", true);
- if (oligoFile == "not found") { oligoFile = ""; }
- else if(oligoFile == "not open"){ abort = true; }
-
- string temp = validParameter.validFile(parameters, "minlength", false);
- if (temp == "not found") { temp = "0"; }
- convert(temp, minLength);
- }
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-ParseSFFCommand::~ParseSFFCommand() { /* do nothing */ }
-
-//**********************************************************************************************************************
-
-int ParseSFFCommand::execute(){
- try {
- if (abort == true) { return 0; }
-
- ifstream inSFF;
- m->openInputFile(sffFile, inSFF);
-
- cout.setf(ios::fixed, ios::floatfield);
- cout.setf(ios::showpoint);
- cout << setprecision(2);
-
- vector<ofstream*> flowFileNames;
- if(oligoFile != ""){
- getOligos(flowFileNames);
- }
- else{
- flowFileNames.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow").c_str(), ios::ate));
- outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow")); outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow"));
- }
-
- for(int i=0;i<flowFileNames.size();i++){
- flowFileNames[i]->setf(ios::fixed, ios::floatfield);
- flowFileNames[i]->setf(ios::showpoint);
- *flowFileNames[i] << setprecision(2);
- }
-
- if (m->control_pressed) { outputTypes.clear(); for(int i=0;i<flowFileNames.size();i++){ flowFileNames[i]->close(); } return 0; }
-
-// ofstream fastaFile;
-// m->openOutputFile(m->getRootName(sffFile) + "fasta", fastaFile);
-
-// ofstream qualFile;
-// m->openOutputFile(m->getRootName(sffFile) + "qual", qualFile);
-
- string commonHeader = m->getline(inSFF);
- string magicNumber = m->getline(inSFF);
- string version = m->getline(inSFF);
- string indexOffset = m->getline(inSFF);
- string indexLength = m->getline(inSFF);
- int numReads = parseHeaderLineToInt(inSFF);
- string headerLength = m->getline(inSFF);
- string keyLength = m->getline(inSFF);
- int numFlows = parseHeaderLineToInt(inSFF);
- string flowgramCode = m->getline(inSFF);
- string flowChars = m->getline(inSFF);
- string keySequence = m->getline(inSFF);
- m->gobble(inSFF);
-
- string seqName;
- bool good = 0;
-
- for(int i=0;i<numReads;i++){
-
- if (m->control_pressed) { outputTypes.clear(); for(int i=0;i<flowFileNames.size();i++){ flowFileNames[i]->close(); } return 0; }
-
- inSFF >> seqName;
- seqName = seqName.substr(1);
- m->gobble(inSFF);
-
- string runPrefix = parseHeaderLineToString(inSFF);
- string regionNumber = parseHeaderLineToString(inSFF);
- string xyLocation = parseHeaderLineToString(inSFF);
- m->gobble(inSFF);
-
- string runName = parseHeaderLineToString(inSFF);
- string analysisName = parseHeaderLineToString(inSFF);
- string fullPath = parseHeaderLineToString(inSFF);
- m->gobble(inSFF);
-
- string readHeaderLen = parseHeaderLineToString(inSFF);
- string nameLength = parseHeaderLineToString(inSFF);
- int numBases = parseHeaderLineToInt(inSFF);
- string clipQualLeft = parseHeaderLineToString(inSFF);
- int clipQualRight = parseHeaderLineToInt(inSFF);
- string clipAdapLeft = parseHeaderLineToString(inSFF);
- string clipAdapRight = parseHeaderLineToString(inSFF);
- m->gobble(inSFF);
-
- vector<float> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);
- vector<int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases);
- string bases = parseHeaderLineToString(inSFF);
- string qualityScores = parseHeaderLineToString(inSFF);
- m->gobble(inSFF);
-
-
-
- int flowLength = flowIndices[clipQualRight-1];
-
- screenFlow(flowVector, flowLength);
- string sequence = flow2seq(flowVector, flowLength);
-
- int group = 0;
-
- if(minLength != 0 || numFPrimers != 0 || numBarcodes != 0 || numRPrimers != 0){
- good = screenSeq(sequence, group);
- }
-
- if(good){
- *flowFileNames[group] << seqName << ' ' << flowLength;
- for(int i=0;i<numFlows;i++){
- *flowFileNames[group] << ' ' << flowVector[i];
- }
- *flowFileNames[group] << endl;
- }
-
-// string fastaHeader = '>' + seqName + "\tregion=" + regionNumber + " xy=" + xyLocation;
-// fastaFile << fastaHeader << endl;
-// fastaFile << stripSeqQual(bases, clipQualLeft, clipQualRight) << endl;
-//
-// qualFile << fastaHeader << endl;
-// qualFile << stripQualQual(qualityScores, clipQualLeft, clipQualRight) << endl;
-
- }
- for(int i=0;i<flowFileNames.size();i++){
- flowFileNames[i]->close();
- }
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-
-// fastaFile.close();
-// qualFile.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "execute");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-void ParseSFFCommand::help(){
- try {
- m->mothurOut("The parse.sff command...");
- m->mothurOutEndLine();
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "help");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-void ParseSFFCommand::getOligos(vector<ofstream*>& outSFFFlowVec){
- try {
-
- ifstream inOligos;
- m->openInputFile(oligoFile, inOligos);
-
- string type, oligo, group;
-
- int index = 0;
-
- while(!inOligos.eof()){
- inOligos >> type;
-
- if(type[0] == '#'){ m->getline(inOligos); } // get rest of line if there's any crap there
- else{
- inOligos >> oligo;
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
- if(type == "forward"){
- forPrimer.push_back(oligo);
- }
- else if(type == "reverse"){
- Sequence oligoRC("reverse", oligo);
- oligoRC.reverseComplement();
- revPrimer.push_back(oligoRC.getUnaligned());
- }
- else if(type == "barcode"){
- inOligos >> group;
- barcodes[oligo]=index++;
- groupVector.push_back(group);
-
- outSFFFlowVec.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + ".flow").c_str(), ios::ate));
- outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow"));
- outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow"));
- }
- }
- m->gobble(inOligos);
- }
-
- inOligos.close();
-
- numFPrimers = forPrimer.size();
- numRPrimers = revPrimer.size();
- numBarcodes = barcodes.size();
- }
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "getOligos");
- exit(1);
- }
-
-}
-
-//**********************************************************************************************************************
-
-int ParseSFFCommand::parseHeaderLineToInt(ifstream& file){
-
- int number;
-
- while (!file.eof()) {
-
- char c = file.get();
- if (c == ':'){
- file >> number;
- break;
- }
-
- }
- m->gobble(file);
- return number;
-}
-
-//**********************************************************************************************************************
-
-string ParseSFFCommand::parseHeaderLineToString(ifstream& file){
-
- string text;
-
- while (!file.eof()) {
- char c = file.get();
-
- if (c == ':'){
- m->gobble(file);
- text = m->getline(file);
- break;
- }
- }
- m->gobble(file);
-
- return text;
-}
-
-//**********************************************************************************************************************
-
-vector<float> ParseSFFCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
-
- vector<float> floatVector(length);
-
- while (!file.eof()) {
- char c = file.get();
- if (c == ':'){
- for(int i=0;i<length;i++){
- file >> floatVector[i];
- }
- break;
- }
- }
- m->gobble(file);
- return floatVector;
-}
-
-//**********************************************************************************************************************
-
-vector<int> ParseSFFCommand::parseHeaderLineToIntVector(ifstream& file, int length){
-
- vector<int> intVector(length);
-
- while (!file.eof()) {
- char c = file.get();
- if (c == ':'){
- for(int i=0;i<length;i++){
- file >> intVector[i];
- }
- break;
- }
- }
- m->gobble(file);
- return intVector;
-}
-
-//**********************************************************************************************************************
-
-
-void ParseSFFCommand::screenFlow(vector<float> flowgram, int& length){
- try{
-
- int newLength = 0;
-
- while(newLength * 4 < length){
-
- int signal = 0;
- int noise = 0;
- for(int i=0;i<4;i++){
- float flow = flowgram[i + 4 * newLength];
-
- if(flow > 0.50){
- signal++;
- if(flow <= 0.69){ // not sure why, but if i make it <0.70 it doesn't work...
- noise++;
- }
- }
- }
- if(noise > 0 || signal == 0){
- break;
- }
- newLength++;
- }
- length = newLength * 4;
- }
-
- catch(exception& e) {
- m->errorOut(e, "ParseSFFCommand", "screenFlow");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-string ParseSFFCommand::flow2seq(vector<float> flowgram, int length){
-
- string flow = "TACG";
- string sequence = "";
- for(int i=8;i<length;i++){
- int signal = int(flowgram[i] + 0.5);
- char base = flow[ i % 4 ];
- for(int j=0;j<signal;j++){
- sequence += base;
- }
- }
- return sequence;
-}
-
-//**********************************************************************************************************************
-
-bool ParseSFFCommand::screenSeq(string& sequence, int& group){
-
- int length = 1;
- group = -1;
-
- if(sequence.length() < minLength){ length = 0; }
-
- int barcode = 1;
- int barcodeLength = 0;
-
- for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
- if(compareDNASeq(it->first, sequence.substr(0,(it->first).length()))){
- barcode = 1;
- barcodeLength = (it->first).size();
- group = it->second;
- break;
- }
- else{
- barcode = 0;
- }
- }
-
- int fPrimer = 1;
- for(int i=0;i<numFPrimers;i++){
- if(compareDNASeq(forPrimer[i], sequence.substr(barcodeLength,forPrimer[i].length()))){
- fPrimer = 1;
- break;
- }
- else{
- fPrimer = 0;
- }
- }
-
- int rPrimer = 1;
- for(int i=0;i<numRPrimers;i++){
- if(compareDNASeq(revPrimer[i], sequence.substr(sequence.length()-revPrimer[i].length(),revPrimer[i].length()))){
- rPrimer = 1;
- break;
- }
- else{
- rPrimer = 0;
- }
- }
-
- return fPrimer * rPrimer * length * barcode;
-
-}
-
-//**********************************************************************************************************************
-
-bool ParseSFFCommand::compareDNASeq(string oligo, string seq){
- try {
- bool success = 1;
- int length = oligo.length();
-
- for(int i=0;i<length;i++){
-
- if(oligo[i] != seq[i]){
- if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
- else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
- else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
- else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
- else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- return success;
- }
- catch(exception& e) {
- m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-//string ParseSFFCommand::stripSeqQual(string qScores, int start, int end){
-//
-//
-// return qScores.substr(start-1, end-start+1);
-//
-//}
-
-//**********************************************************************************************************************
-
-//string ParseSFFCommand::stripQualQual(string qScores, int start, int end){
-//
-// start--;
-//
-// int startCount = 0;
-// int startIndex = 0;
-//
-// while(startCount < start && startIndex < qScores.length()){
-// if(isspace(qScores[startIndex])){
-// startCount++;
-// }
-// startIndex++;
-// }
-//
-// int endCount = startCount;
-// int endIndex = startIndex;
-//
-// while(endCount < end && endIndex < qScores.length()){
-// if(isspace(qScores[endIndex])){
-// endCount++;
-// }
-// endIndex++;
-// }
-//
-// return qScores.substr(startIndex, endIndex-startIndex-1);//, endCount-startCount);
-//
-//}
-
-//**********************************************************************************************************************
-
-
+++ /dev/null
-#ifndef PARSESFFCOMMAND_H
-#define PARSESFFCOMMAND_H
-
-/*
- * parsesffcommand.h
- * Mothur
- *
- * Created by Pat Schloss on 2/6/10.
- * Copyright 2010 Patrick D. Schloss. All rights reserved.
- *
- */
-
-
-#include "command.hpp"
-
-class ParseSFFCommand : public Command {
-public:
- ParseSFFCommand(string);
- ParseSFFCommand();
- ~ParseSFFCommand();
- vector<string> getRequiredParameters();
- vector<string> getValidParameters();
- vector<string> getRequiredFiles();
- map<string, vector<string> > getOutputFiles() { return outputTypes; }
- int execute();
- void help();
-
-private:
-
- int parseHeaderLineToInt(ifstream&);
- vector<float> parseHeaderLineToFloatVector(ifstream&, int);
- vector<int> parseHeaderLineToIntVector(ifstream&, int);
- string parseHeaderLineToString(ifstream&);
- void screenFlow(vector<float>, int&);
- string flow2seq(vector<float>, int);
- bool screenSeq(string&, int&);
- bool compareDNASeq(string, string);
- void getOligos(vector<ofstream*>&);
-
-
- string sffFile;
- string oligoFile;
-
- int minLength;
- int numFPrimers, numRPrimers, numBarcodes;
- vector<string> forPrimer, revPrimer;
- map<string, int> barcodes;
- vector<string> groupVector;
- vector<string> outputNames;
- map<string, vector<string> > outputTypes;
-
-// string stripSeqQual(string, int, int);
-// string stripQualQual(string, int, int);
-
- string outputDir;
- bool abort;
-};
-
-#endif
-
-
//**********************************************************************************************************************
vector<string> SffInfoCommand::getRequiredParameters(){
try {
- string Array[] = {"sff"};
+ string Array[] = {"sff", "sfftxt", "or"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
sffFilename = validParameter.validFile(parameters, "sff", false);
- if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true; }
+ if (sffFilename == "not found") { sffFilename = ""; }
else {
m->splitAtDash(sffFilename, filenames);
temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }
trim = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "sfftxt", false); if (temp == "not found"){ temp = "F"; }
- sfftxt = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "sfftxt", false);
+ if (temp == "not found") { temp = "F"; sfftxt = false; sfftxtFilename = ""; }
+ else if (m->isTrue(temp)) { sfftxt = true; sfftxtFilename = ""; }
+ else {
+ //you are a filename
+ if (inputDir != "") {
+ map<string,string>::iterator it = parameters.find("sfftxt");
+ //user has given a template file
+ if(it != parameters.end()){
+ string path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["sfftxt"] = inputDir + it->second; }
+ }
+ }
+
+ sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
+ if (sfftxtFilename == "not found") { sfftxtFilename = ""; }
+ else if (sfftxtFilename == "not open") { sfftxtFilename = ""; }
+ }
+
+ if ((sfftxtFilename == "") && (filenames.size() == 0)) { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
}
}
catch(exception& e) {
void SffInfoCommand::help(){
try {
- m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n");
+ m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n");
m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n");
m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n");
m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n");
m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n");
+ m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n");
m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
}
+ if (sfftxtFilename != "") { parseSffTxt(); }
+
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
//report output filenames
exit(1);
}
}
-//**********************************************************************************************************************/
+//**********************************************************************************************************************
+int SffInfoCommand::parseSffTxt() {
+ try {
+
+ ifstream inSFF;
+ m->openInputFile(sfftxtFilename, inSFF);
+
+ if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); }
+
+ //output file names
+ ofstream outFasta, outQual, outFlow;
+ string outFastaFileName, outQualFileName;
+ string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
+ if (trim) {
+ outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
+ outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
+ }else{
+ outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
+ outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
+ }
+
+ if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
+ if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName); }
+ if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }
+
+ //read common header
+ string commonHeader = m->getline(inSFF);
+ string magicNumber = m->getline(inSFF);
+ string version = m->getline(inSFF);
+ string indexOffset = m->getline(inSFF);
+ string indexLength = m->getline(inSFF);
+ int numReads = parseHeaderLineToInt(inSFF);
+ string headerLength = m->getline(inSFF);
+ string keyLength = m->getline(inSFF);
+ int numFlows = parseHeaderLineToInt(inSFF);
+ string flowgramCode = m->getline(inSFF);
+ string flowChars = m->getline(inSFF);
+ string keySequence = m->getline(inSFF);
+ m->gobble(inSFF);
+
+ string seqName;
+
+ if (flow) { outFlow << numFlows << endl; }
+
+ for(int i=0;i<numReads;i++){
+
+ //sanity check
+ if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
+
+ Header header;
+
+ //parse read header
+ inSFF >> seqName;
+ seqName = seqName.substr(1);
+ m->gobble(inSFF);
+ header.name = seqName;
+
+ string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix;
+ string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber;
+ string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation;
+ m->gobble(inSFF);
+
+ string runName = parseHeaderLineToString(inSFF);
+ string analysisName = parseHeaderLineToString(inSFF);
+ string fullPath = parseHeaderLineToString(inSFF);
+ m->gobble(inSFF);
+
+ string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength);
+ string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength);
+ int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases;
+ string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft);
+ int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight;
+ string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft);
+ string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight);
+ m->gobble(inSFF);
+
+ seqRead read;
+
+ //parse read
+ vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector;
+ vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases);
+
+ //adjust for print
+ vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
+ for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); }
+ read.flowIndex = flowIndicesAdjusted;
+
+ string bases = parseHeaderLineToString(inSFF); read.bases = bases;
+ vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores;
+ m->gobble(inSFF);
+
+ //if you have provided an accosfile and this seq is not in it, then dont print
+ bool print = true;
+ if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } }
+
+ //print
+ if (print) {
+ if (fasta) { printFastaSeqData(outFasta, read, header); }
+ if (qual) { printQualSeqData(outQual, read, header); }
+ if (flow) { printFlowSeqData(outFlow, read, header); }
+ }
+
+ //report progress
+ if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
+
+ if (m->control_pressed) { break; }
+ }
+
+ //report progress
+ if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } }
+
+ inSFF.close();
+
+ if (fasta) { outFasta.close(); }
+ if (qual) { outQual.close(); }
+ if (flow) { outFlow.close(); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "parseSffTxt");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
+ try {
+ int number;
+
+ while (!file.eof()) {
+
+ char c = file.get();
+ if (c == ':'){
+ file >> number;
+ break;
+ }
+
+ }
+ m->gobble(file);
+ return number;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
+ exit(1);
+ }
+
+}
+
+//**********************************************************************************************************************
+
+string SffInfoCommand::parseHeaderLineToString(ifstream& file){
+ try {
+ string text;
+
+ while (!file.eof()) {
+ char c = file.get();
+
+ if (c == ':'){
+ //m->gobble(file);
+ //text = m->getline(file);
+ file >> text;
+ break;
+ }
+ }
+ m->gobble(file);
+
+ return text;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
+ try {
+ vector<unsigned short> floatVector(length);
+
+ while (!file.eof()) {
+ char c = file.get();
+ if (c == ':'){
+ float temp;
+ for(int i=0;i<length;i++){
+ file >> temp;
+ floatVector[i] = temp * 100;
+ }
+ break;
+ }
+ }
+ m->gobble(file);
+ return floatVector;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
+ try {
+ vector<unsigned int> intVector(length);
+
+ while (!file.eof()) {
+ char c = file.get();
+ if (c == ':'){
+ for(int i=0;i<length;i++){
+ file >> intVector[i];
+ }
+ break;
+ }
+ }
+ m->gobble(file);
+ return intVector;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+
+
+
void help();
private:
- string sffFilename, outputDir, accnosName;
+ string sffFilename, sfftxtFilename, outputDir, accnosName;
vector<string> filenames, outputNames, accnosFileNames;
bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos;
set<string> seqNames;
map<string, vector<string> > outputTypes;
+ //extract sff file functions
int extractSffInfo(string, string);
int readCommonHeader(ifstream&, CommonHeader&);
int readHeader(ifstream&, Header&);
int printFastaSeqData(ofstream&, seqRead&, Header&);
int printQualSeqData(ofstream&, seqRead&, Header&);
int readAccnosFile(string);
-
+ int parseSffTxt();
+
+ //parsesfftxt file functions
+ int parseHeaderLineToInt(ifstream&);
+ vector<unsigned short> parseHeaderLineToFloatVector(ifstream&, int);
+ vector<unsigned int> parseHeaderLineToIntVector(ifstream&, int);
+ string parseHeaderLineToString(ifstream&);
};
/**********************************************************/