#include "makelefsecommand.h"
#include "lefsecommand.h"
#include "kruskalwalliscommand.h"
+#include "sracommand.h"
/*******************************************************/
commands["make.lefse"] = "make.lefse";
commands["lefse"] = "lefse";
commands["kruskal.wallis"] = "kruskal.wallis";
+ commands["sra"] = "sra";
}
else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); }
else if(commandName == "load.logfile") { command = new LoadLogfileCommand(optionString); }
else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); }
- else if(commandName == "classify.rf") { command = new ClassifyRFSharedCommand(optionString); }
+ else if(commandName == "classify.rf") { command = new ClassifyRFSharedCommand(optionString); }
else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); }
else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); }
else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); }
else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); }
else if(commandName == "lefse") { command = new LefseCommand(optionString); }
else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); }
+ else if(commandName == "sra") { command = new SRACommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); }
else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); }
else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); }
+ else if(commandName == "sra") { pipecommand = new SRACommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); }
else if(commandName == "lefse") { shellcommand = new LefseCommand(); }
else if(commandName == "kruskal.wallis") { shellcommand = new KruskalWallisCommand(); }
+ else if(commandName == "sra") { shellcommand = new SRACommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
else { uniquePrimers.insert(tempPair); }
if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
-
primers[indexPrimer]=newPrimer; indexPrimer++;
primerNameVector.push_back(group);
}else if(type == "BARCODE"){
vector<string> ParseFastaQCommand::setParameters(){
try {
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
+ CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+ CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+ CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
try {
string helpString = "";
helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
- helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
+ helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
+ helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
+ helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
string pattern = "";
if (type == "fasta") { pattern = "[filename],fasta"; }
- else if (type == "qfile") { pattern = "[filename],qual"; }
+ else if (type == "qfile") { pattern = "[filename],qual"; }
+ else if (type == "fastq") { pattern = "[filename],[group],fastq"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
//**********************************************************************************************************************
ParseFastaQCommand::ParseFastaQCommand(string option){
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ split = 1;
if(option == "help") { help(); abort = true; calledHelp = true; }
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["fastq"] = inputDir + it->second; }
}
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
}
//check for required parameters
fastaQFile = validParameter.validFile(parameters, "fastq", true);
if (fastaQFile == "not found") { m->mothurOut("fastq is a required parameter for the fastq.info command."); m->mothurOutEndLine(); abort = true; }
- else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
+ else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
+
+ oligosfile = validParameter.validFile(parameters, "oligos", true);
+ if (oligosfile == "not found") { oligosfile = ""; }
+ else if (oligosfile == "not open") { oligosfile = ""; abort = true; }
+ else { m->setOligosFile(oligosfile); split = 2; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not found") { groupfile = ""; }
+ else if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else { m->setGroupFile(groupfile); split = 2; }
+
+ if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); }
temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
pacbio = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, bdiffs);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, ldiffs);
+
+ temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, sdiffs);
+
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
+ m->mothurConvert(temp, tdiffs);
+
+ if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
+
format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
+
+ TrimOligos* trimOligos = NULL;
+ int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+ if (oligosfile != "") {
+ readOligos(oligosfile);
+ numPrimers = primers.size(); numBarcodes = barcodes.size();
+ //find group read belongs to
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
+
+ }
+ else if (groupfile != "") { readGroup(groupfile); }
ifstream in;
m->openInputFile(fastaQFile, in);
convertTable.push_back(temp);
}
+
+ int count = 0;
while (!in.eof()) {
if (m->control_pressed) { break; }
-
- //read sequence name
- string name = m->getline(in); m->gobble(in);
- if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else {
- name = name.substr(1);
- m->checkName(name);
- }
-
- //read sequence
- string sequence = m->getline(in); m->gobble(in);
- if (sequence == "") { m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- //read sequence name
- string name2 = m->getline(in); m->gobble(in);
- if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else {
- name2 = name2.substr(1);
- m->checkName(name2);
- }
-
- //read quality scores
- string quality = m->getline(in); m->gobble(in);
- if (quality == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- //sanity check sequence length and number of quality scores match
- if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
- if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- vector<int> qualScores;
- if (qual) {
- qualScores = convertQual(quality);
- outQual << ">" << name << endl;
- for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
- outQual << endl;
- }
- if (m->control_pressed) { break; }
+ bool ignore;
+ fastqRead2 thisRead = readFastq(in, ignore);
- if (pacbio) {
- if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
- for (int i = 0; i < qualScores.size(); i++) {
- if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ if (!ignore) {
+ vector<int> qualScores;
+ if (qual) {
+ qualScores = convertQual(thisRead.quality);
+ outQual << ">" << thisRead.seq.getName() << endl;
+ for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+ outQual << endl;
}
- }
-
- //print sequence info to files
- if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-
+
+ if (m->control_pressed) { break; }
+
+ if (pacbio) {
+ if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
+ string sequence = thisRead.seq.getAligned();
+ for (int i = 0; i < qualScores.size(); i++) {
+ if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ }
+ thisRead.seq.setAligned(sequence);
+ }
+
+ //print sequence info to files
+ if (fasta) { thisRead.seq.printSequence(outFasta); }
+
+ if (split > 1) {
+ int barcodeIndex, primerIndex, trashCodeLength;
+ if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers); }
+ else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); }
+ else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
+
+ if(trashCodeLength == 0){
+ ofstream out;
+ m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
+ out << thisRead.wholeRead;
+ out.close();
+ }else{
+ ofstream out;
+ m->openOutputFileAppend(noMatchFile, out);
+ out << thisRead.wholeRead;
+ out.close();
+ }
+ }
+ //report progress
+ if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
+ if(count > 100000){ break; }
+ count++;
+ }
}
in.close();
if (fasta) { outFasta.close(); }
if (qual) { outQual.close(); }
+
+ //report progress
+ if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
+
+ if (split > 1) {
+
+ if (groupfile != "") { delete groupMap; }
+ else if (oligosfile != "") { delete trimOligos; }
+
+ map<string, string>::iterator it;
+ set<string> namesToRemove;
+ for(int i=0;i<fastqFileNames.size();i++){
+ for(int j=0;j<fastqFileNames[0].size();j++){
+ if (fastqFileNames[i][j] != "") {
+ if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
+ if(m->isBlank(fastqFileNames[i][j])){
+ m->mothurRemove(fastqFileNames[i][j]);
+ namesToRemove.insert(fastqFileNames[i][j]);
+ }
+ }
+ }
+ }
+ }
+
+ //remove names for outputFileNames, just cleans up the output
+ for(int i = 0; i < outputNames.size(); i++) {
+ if (namesToRemove.count(outputNames[i]) != 0) {
+ outputNames.erase(outputNames.begin()+i);
+ i--;
+ }
+ }
+ if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }
+ else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
+ }
if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
exit(1);
}
}
+//**********************************************************************************************************************
+fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
+ try {
+ ignore = false;
+ string wholeRead = "";
+
+ //read sequence name
+ string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+ vector<string> pieces = m->splitWhiteSpace(line);
+ string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
+ if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+ else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+ else { name = name.substr(1); }
+
+ //read sequence
+ string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
+ if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
+
+ //read sequence name
+ line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+ pieces = m->splitWhiteSpace(line);
+ string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
+ if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
+ else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
+ else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
+
+
+ //read quality scores
+ string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
+ if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
+
+ //sanity check sequence length and number of quality scores match
+ if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
+ if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
+
+ m->checkName(name);
+ Sequence seq(name, sequence);
+ fastqRead2 read(seq, quality, wholeRead);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
+
+ return read;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "readFastq");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
vector<int> ParseFastaQCommand::convertQual(string qual) {
try {
}
}
//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+ try {
+ int success = 1;
+ string trashCode = "";
+ int currentSeqsDiffs = 0;
+
+ Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
+ QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
+
+ if(linker.size() != 0){
+ success = trimOligos->stripLinker(currSeq, currQual);
+ if(success > ldiffs) { trashCode += 'k'; }
+ else{ currentSeqsDiffs += success; }
+
+ }
+
+ if(numBarcodes != 0){
+ success = trimOligos->stripBarcode(currSeq, currQual, barcode);
+ if(success > bdiffs) { trashCode += 'b'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if(spacer.size() != 0){
+ success = trimOligos->stripSpacer(currSeq, currQual);
+ if(success > sdiffs) { trashCode += 's'; }
+ else{ currentSeqsDiffs += success; }
+
+ }
+
+ if(numPrimers != 0){
+ success = trimOligos->stripForward(currSeq, currQual, primer, true);
+ if(success > pdiffs) { trashCode += 'f'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
+
+ if(revPrimer.size() != 0){
+ success = trimOligos->stripReverse(currSeq, currQual);
+ if(!success) { trashCode += 'r'; }
+ }
+
+
+ return trashCode.length();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "findGroup");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
+ try {
+ string trashCode = "";
+ primer = 0;
+
+ string group = groupMap->getGroup(thisRead.seq.getName());
+ if (group == "not found") { trashCode += "g"; } //scrap for group
+ else { //find file group
+ map<string, int>::iterator it = barcodes.find(group);
+ if (it != barcodes.end()) {
+ barcode = it->second;
+ }else { trashCode += "g"; }
+ }
+
+ return trashCode.length();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "findGroup");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+bool ParseFastaQCommand::readOligos(string oligoFile){
+ try {
+ ifstream inOligos;
+ m->openInputFile(oligoFile, inOligos);
+
+ string type, oligo, roligo, group;
+ bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
+
+ int indexPrimer = 0;
+ int indexBarcode = 0;
+ int indexPairedPrimer = 0;
+ int indexPairedBarcode = 0;
+ set<string> uniquePrimers;
+ set<string> uniqueBarcodes;
+
+ while(!inOligos.eof()){
+
+ inOligos >> type;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+
+ if(type[0] == '#'){
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ m->gobble(inOligos);
+ }
+ else{
+ m->gobble(inOligos);
+ //make type case insensitive
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
+
+ inOligos >> oligo;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+
+ for(int i=0;i<oligo.length();i++){
+ oligo[i] = toupper(oligo[i]);
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }
+ }
+
+ if(type == "FORWARD"){
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ //check for repeat barcodes
+ map<string, int>::iterator itPrime = primers.find(oligo);
+ if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
+
+ primers[oligo]=indexPrimer; indexPrimer++;
+ primerNameVector.push_back(group);
+ }
+ else if (type == "PRIMER"){
+ m->gobble(inOligos);
+
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ roligo = reverseOligo(roligo);
+
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ oligosPair newPrimer(oligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+roligo;
+ if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
+ else { uniquePrimers.insert(tempPair); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
+
+ pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+ primerNameVector.push_back(group);
+ hasPrimer = true;
+ }
+ else if(type == "REVERSE"){
+ //Sequence oligoRC("reverse", oligo);
+ //oligoRC.reverseComplement();
+ string oligoRC = reverseOligo(oligo);
+ revPrimer.push_back(oligoRC);
+ }
+ else if(type == "BARCODE"){
+ inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ hasPairedBarcodes = true;
+ string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+ group = temp;
+
+ for(int i=0;i<reverseBarcode.length();i++){
+ reverseBarcode[i] = toupper(reverseBarcode[i]);
+ if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
+ }
+
+ reverseBarcode = reverseOligo(reverseBarcode);
+ oligosPair newPair(oligo, reverseBarcode);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+ //check for repeat barcodes
+ string tempPair = oligo+reverseBarcode;
+ if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
+ else { uniqueBarcodes.insert(tempPair); }
+
+ pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+ barcodeNameVector.push_back(group);
+ }else {
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = barcodes.find(oligo);
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ barcodes[oligo]=indexBarcode; indexBarcode++;
+ barcodeNameVector.push_back(group);
+ }
+ }else if(type == "LINKER"){
+ linker.push_back(oligo);
+ }else if(type == "SPACER"){
+ spacer.push_back(oligo);
+ }
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+ }
+ m->gobble(inOligos);
+ }
+ inOligos.close();
+
+ if (hasPairedBarcodes || hasPrimer) {
+ pairedOligos = true;
+ if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ }
+
+ //add in potential combos
+ if(barcodeNameVector.size() == 0){
+ barcodes[""] = 0;
+ barcodeNameVector.push_back("");
+ }
+
+ if(primerNameVector.size() == 0){
+ primers[""] = 0;
+ primerNameVector.push_back("");
+ }
+
+ fastqFileNames.resize(barcodeNameVector.size());
+ for(int i=0;i<fastqFileNames.size();i++){
+ fastqFileNames[i].assign(primerNameVector.size(), "");
+ }
+
+
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->first];
+ string barcodeName = barcodeNameVector[itBar->first];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->first];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->first];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
+
+ }
+ }
+ }
+ }else {
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->second];
+ string barcodeName = barcodeNameVector[itBar->second];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->second];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->second];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
+
+ }
+ }
+ }
+ }
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = "scrap";
+ noMatchFile = getOutputFileName("fastq", variables);
+ m->openOutputFile(noMatchFile, temp); temp.close();
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "getOligos");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+bool ParseFastaQCommand::readGroup(string groupfile){
+ try {
+ fastqFileNames.clear();
+
+ groupMap = new GroupMap();
+ groupMap->readMap(groupfile);
+
+ //like barcodeNameVector - no primer names
+ vector<string> groups = groupMap->getNamesOfGroups();
+
+ fastqFileNames.resize(groups.size());
+ for (int i = 0; i < fastqFileNames.size(); i++) {
+ for (int j = 0; j < 1; j++) {
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = groups[i];
+ string thisFilename = getOutputFileName("fastq",variables);
+ outputNames.push_back(thisFilename);
+ outputTypes["fastq"].push_back(thisFilename);
+
+ ofstream temp;
+ m->openOutputFileBinary(thisFilename, temp); temp.close();
+ fastqFileNames[i].push_back(thisFilename);
+ barcodes[groups[i]] = i;
+ }
+ }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = "scrap";
+ noMatchFile = getOutputFileName("fastq",variables);
+ m->mothurRemove(noMatchFile);
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "readGroup");
+ exit(1);
+ }
+}
+//********************************************************************/
+string ParseFastaQCommand::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
+ exit(1);
+ }
+}
+
+
+//**********************************************************************************************************************
#include "command.hpp"
+#include "trimoligos.h"
+#include "sequence.hpp"
+#include "groupmap.h"
+
+struct fastqRead2 {
+ string quality;
+ Sequence seq;
+ string wholeRead;
+
+ fastqRead2() { };
+ fastqRead2(Sequence s, string q, string w) : seq(s), quality(q), wholeRead(w){};
+ ~fastqRead2() {};
+};
+
class ParseFastaQCommand : public Command {
private:
vector<string> outputNames;
- string outputDir, fastaQFile, format;
- bool abort, fasta, qual, pacbio;
+ string outputDir, fastaQFile, format, oligosfile, groupfile;
+ bool abort, fasta, qual, pacbio, pairedOligos;
+ int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+ GroupMap* groupMap;
+
+ //oligos file data structures
+ vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
+ map<string, int> barcodes;
+ map<string, int> primers;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
+ vector<vector<string> > fastqFileNames;
+ string noMatchFile;
vector<int> convertQual(string);
vector<char> convertTable;
+ bool readOligos(string oligosFile);
+ bool readGroup(string oligosFile);
+ string reverseOligo(string oligo);
+ fastqRead2 readFastq(ifstream&, bool&);
+ int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+ int findGroup(fastqRead2, int&, int&, string);
+
};
#endif
vector<string> SffInfoCommand::setParameters(){ \r
try { \r
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
- CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
try {\r
string helpString = "";\r
helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
- helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+ helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";\r
helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";\r
helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";\r
helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
+ helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
SffInfoCommand::SffInfoCommand(string option) {\r
try {\r
abort = false; calledHelp = false; \r
- hasAccnos = false; hasOligos = false;\r
+ hasAccnos = false; hasOligos = false; hasGroup = false;\r
split = 1;\r
\r
//allow user to run help\r
bool ignore = false;\r
if (oligosFileNames[i] == "current") { \r
oligosFileNames[i] = m->getOligosFile(); \r
- if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
+ if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
else { \r
m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
//erase from file list\r
//make sure there is at least one valid file left\r
if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
}\r
+ \r
+ groupfile = validParameter.validFile(parameters, "group", false);\r
+ if (groupfile == "not found") { groupfile = ""; }\r
+ else {\r
+ hasGroup = true;\r
+ m->splitAtDash(groupfile, groupFileNames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < groupFileNames.size(); i++) {\r
+ bool ignore = false;\r
+ if (groupFileNames[i] == "current") {\r
+ groupFileNames[i] = m->getGroupFile();\r
+ if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else {\r
+ m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ \r
+ if (inputDir != "") {\r
+ string path = m->hasPath(groupFileNames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) {\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
\r
- if (hasOligos) {\r
+ if (hasGroup) {\r
+ split = 2;\r
+ if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ }\r
+ \r
+ if (hasOligos) {\r
split = 2;\r
- if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
}\r
\r
+ if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
+ \r
if (hasAccnos) {\r
if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
}\r
\r
string oligos = "";\r
if (hasOligos) { oligos = oligosFileNames[s]; }\r
- \r
+ if (hasGroup) { oligos = groupFileNames[s]; }\r
+ \r
int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
\r
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
\r
if (accnos != "") { readAccnosFile(accnos); }\r
else { seqNames.clear(); }\r
- \r
- if (oligos != "") { readOligos(oligos); split = 2; }\r
-\r
+ \r
+ if (hasOligos) { readOligos(oligos); split = 2; }\r
+ if (hasGroup) { readGroup(oligos); split = 2; }\r
+ \r
ofstream outSfftxt, outFasta, outQual, outFlow;\r
string outFastaFileName, outQualFileName;\r
string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
//print common header\r
if (sfftxt) { printCommonHeader(outSfftxt, header); }\r
if (flow) { outFlow << header.numFlowsPerRead << endl; }\r
- \r
+ \r
//read through the sff file\r
while (!in.eof()) {\r
\r
}\r
\r
count++;\r
- \r
+ \r
//report progress\r
if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }\r
\r
//create new common headers for each file with the correct number of reads\r
adjustCommonHeader(header);\r
\r
- //close files and delete ofstreams\r
- for(int i=0;i<filehandles.size();i++){\r
- for(int j=0;j<filehandles[0].size();j++){\r
- (filehandles[i][j].begin()->second)->close(); delete (filehandles[i][j].begin()->second);\r
- (filehandlesHeaders[i][j].begin()->second)->close(); delete (filehandlesHeaders[i][j].begin()->second);\r
- }\r
- }\r
+ if (hasGroup) { delete groupMap; }\r
\r
//cout << "here" << endl;\r
map<string, string>::iterator it;\r
for(int i=0;i<filehandles.size();i++){\r
for(int j=0;j<filehandles[0].size();j++){\r
//cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << endl;\r
- if (filehandles[i][j].begin()->first != "") {\r
- if (namesToRemove.count(filehandles[i][j].begin()->first) == 0) {\r
- if(m->isBlank(filehandles[i][j].begin()->first)){\r
+ if (filehandles[i][j] != "") {\r
+ if (namesToRemove.count(filehandles[i][j]) == 0) {\r
+ if(m->isBlank(filehandles[i][j])){\r
//cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
- m->mothurRemove(filehandles[i][j].begin()->first);\r
- m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
- namesToRemove.insert(filehandles[i][j].begin()->first);\r
+ m->mothurRemove(filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
+ namesToRemove.insert(filehandles[i][j]);\r
}\r
}\r
}\r
//append new header to reads\r
for (int i = 0; i < filehandles.size(); i++) {\r
for (int j = 0; j < filehandles[i].size(); j++) {\r
- m->appendBinaryFiles(filehandles[i][j].begin()->first, filehandlesHeaders[i][j].begin()->first);\r
- m->renameFile(filehandlesHeaders[i][j].begin()->first, filehandles[i][j].begin()->first);\r
- m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
+ m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
+ m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
//cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
- if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j].begin()->first); }\r
+ if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
}\r
}\r
//cout << "here3" << endl;\r
in.read(mybuffer,4);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,4);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
thisbuffer[7] = offset & 0xFF;\r
for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 8);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 8);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(thisbuffer, 8);\r
thisbuffer2[3] = offset & 0xFF;\r
for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer2, 4);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer2, 4);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(thisbuffer2, 4);\r
thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
}\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 4);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 4);\r
+ out.close();\r
delete[] thisbuffer;\r
}\r
}\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,2);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,1);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,header.numFlowsPerRead);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
in.read(mybuffer,header.keyLength);\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, in.gcount());\r
mybuffer = new char[spot-spotInFile];\r
for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
- (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, spot-spotInFile);\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, spot-spotInFile);\r
+ out.close();\r
}\r
}\r
outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
\r
if (split > 1) { \r
\r
- int barcodeIndex, primerIndex;\r
- int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);\r
- \r
+ int barcodeIndex, primerIndex, trashCodeLength;\r
+ \r
+ if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); }\r
+ else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); }\r
+ else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
+\r
char * mybuffer;\r
mybuffer = new char [spot-startSpotInFile];\r
\r
\r
\r
if(trashCodeLength == 0){\r
- (*(filehandles[barcodeIndex][primerIndex].begin()->second)).write(mybuffer, in2.gcount());\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
+ out.write(mybuffer, in2.gcount());\r
+ out.close();\r
numSplitReads[barcodeIndex][primerIndex]++;\r
}\r
else{\r
m->errorOut(e, "SffInfoCommand", "findGroup");\r
exit(1);\r
}\r
-} \r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
+ try {\r
+ string trashCode = "";\r
+ primer = 0;\r
+ \r
+ string group = groupMap->getGroup(header.name);\r
+ if (group == "not found") { trashCode += "g"; } //scrap for group\r
+ else { //find file group\r
+ map<string, int>::iterator it = barcodes.find(group);\r
+ if (it != barcodes.end()) {\r
+ barcode = it->second;\r
+ }else { trashCode += "g"; }\r
+ }\r
+ \r
+ return trashCode.length();\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "findGroup");\r
+ exit(1);\r
+ }\r
+}\r
//**********************************************************************************************************************\r
int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
try {\r
\r
while(!inOligos.eof()){\r
\r
- inOligos >> type; \r
+ inOligos >> type;\r
\r
if(type[0] == '#'){\r
while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
group = "";\r
\r
// get rest of line in case there is a primer name\r
- while (!inOligos.eof()) { \r
- char c = inOligos.get(); \r
+ while (!inOligos.eof()) {\r
+ char c = inOligos.get();\r
if (c == 10 || c == 13 || c == -1){ break; }\r
else if (c == 32 || c == 9){;} //space or tab\r
else { group += c; }\r
- } \r
+ }\r
\r
//check for repeat barcodes\r
map<string, int>::iterator itPrime = primers.find(oligo);\r
if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
\r
- primers[oligo]=indexPrimer; indexPrimer++; \r
+ primers[oligo]=indexPrimer; indexPrimer++;\r
primerNameVector.push_back(group);\r
+ \r
}else if(type == "REVERSE"){\r
//Sequence oligoRC("reverse", oligo);\r
//oligoRC.reverseComplement();\r
}\r
else if(type == "BARCODE"){\r
inOligos >> group;\r
+ \r
\r
//check for repeat barcodes\r
map<string, int>::iterator itBar = barcodes.find(oligo);\r
else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
}\r
m->gobble(inOligos);\r
- } \r
+ }\r
inOligos.close();\r
- \r
+ \r
if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
- \r
+ \r
//add in potential combos\r
if(barcodeNameVector.size() == 0){\r
barcodes[""] = 0;\r
- barcodeNameVector.push_back(""); \r
+ barcodeNameVector.push_back("");\r
}\r
\r
if(primerNameVector.size() == 0){\r
primers[""] = 0;\r
- primerNameVector.push_back(""); \r
+ primerNameVector.push_back("");\r
}\r
\r
filehandles.resize(barcodeNameVector.size());\r
- for (int i = 0; i < filehandles.size(); i++) {\r
- for (int j = 0; j < primerNameVector.size(); j++) {\r
- ofstream* temp;\r
- map<string, ofstream*> myMap; myMap[""] = temp;\r
- filehandles[i].push_back(myMap);\r
- }\r
- }\r
- \r
+ for(int i=0;i<filehandles.size();i++){\r
+ filehandles[i].assign(primerNameVector.size(), "");\r
+ }\r
+ \r
if(split > 1){\r
set<string> uniqueNames; //used to cleanup outputFileNames\r
for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
}\r
}\r
\r
- ofstream* temp = new ofstream;\r
- map<string, string> variables; \r
+ ofstream temp;\r
+ map<string, string> variables;\r
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
variables["[group]"] = comboGroupName;\r
string thisFilename = getOutputFileName("sff",variables);\r
uniqueNames.insert(thisFilename);\r
}\r
\r
- map<string, ofstream*> myMap; myMap[thisFilename] = temp;\r
- m->openOutputFileBinary(thisFilename, *(temp));\r
- filehandles[itBar->second][itPrimer->second] = myMap;\r
- map<string, ofstream*>::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find("");\r
- if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map\r
+ filehandles[itBar->second][itPrimer->second] = thisFilename;\r
+ temp.open(thisFilename.c_str(), ios::binary); temp.close();\r
}\r
}\r
}\r
numFPrimers = primers.size();\r
numLinkers = linker.size();\r
numSpacers = spacer.size();\r
- map<string, string> variables; \r
+ map<string, string> variables;\r
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
variables["[group]"] = "scrap";\r
noMatchFile = getOutputFileName("sff",variables);\r
m->mothurRemove(noMatchFile);\r
numNoMatch = 0;\r
\r
+ \r
bool allBlank = true;\r
for (int i = 0; i < barcodeNameVector.size(); i++) {\r
if (barcodeNameVector[i] != "") {\r
\r
filehandlesHeaders.resize(filehandles.size());\r
numSplitReads.resize(filehandles.size());\r
- for (int i = 0; i < filehandles.size(); i++) { \r
- numSplitReads[i].resize(filehandles[i].size(), 0); \r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
for (int j = 0; j < filehandles[i].size(); j++) {\r
- ofstream* temp = new ofstream;\r
- map<string, ofstream* > myMap;\r
- string thisHeader = (filehandles[i][j].begin())->first+"headers";\r
- myMap[thisHeader] = temp;\r
- m->openOutputFileBinary(thisHeader, *(temp));\r
- filehandlesHeaders[i].push_back(myMap);\r
+ filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
}\r
}\r
\r
exit(1);\r
}\r
}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readGroup(string oligoFile){\r
+ try {\r
+ filehandles.clear();\r
+ numSplitReads.clear();\r
+ filehandlesHeaders.clear();\r
+ barcodes.clear();\r
+ \r
+ groupMap = new GroupMap();\r
+ groupMap->readMap(oligoFile);\r
+ \r
+ //like barcodeNameVector - no primer names\r
+ vector<string> groups = groupMap->getNamesOfGroups();\r
+ \r
+ filehandles.resize(groups.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ for (int j = 0; j < 1; j++) {\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = groups[i];\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ \r
+ ofstream temp;\r
+ m->openOutputFileBinary(thisFilename, temp); temp.close();\r
+ filehandles[i].push_back(thisFilename);\r
+ barcodes[groups[i]] = i;\r
+ }\r
+ }\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = "scrap";\r
+ noMatchFile = getOutputFileName("sff",variables);\r
+ m->mothurRemove(noMatchFile);\r
+ numNoMatch = 0;\r
+ \r
+ \r
+ filehandlesHeaders.resize(groups.size());\r
+ numSplitReads.resize(filehandles.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ ofstream temp ;\r
+ string thisHeader = filehandles[i][j]+"headers";\r
+ m->openOutputFileBinary(thisHeader, temp); temp.close();\r
+ filehandlesHeaders[i].push_back(thisHeader);\r
+ }\r
+ }\r
+ \r
+ return true;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readGroup");\r
+ exit(1);\r
+ }\r
+}\r
+\r
//********************************************************************/\r
string SffInfoCommand::reverseOligo(string oligo){\r
try {\r
*/
#include "command.hpp"
-
+#include "groupmap.h"
/**********************************************************/
struct CommonHeader {
void help() { m->mothurOut(getHelpString()); }
private:
- string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile;
- vector<string> filenames, outputNames, accnosFileNames, oligosFileNames;
- bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos;
+ string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
+ vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
+ bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
set<string> seqNames;
map<string, int> barcodes;
map<string, int> primers;
+ GroupMap* groupMap;
vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
vector<vector<int> > numSplitReads;
- vector<vector<map<string, ofstream*> > > filehandles;
- vector<vector<map<string, ofstream*> > > filehandlesHeaders;
+ vector<vector<string> > filehandles;
+ vector<vector<string> > filehandlesHeaders;
//extract sff file functions
int extractSffInfo(string, string, string);
bool readSeqData(ifstream&, seqRead&, int, Header&);
int decodeName(string&, string&, string&, string);
bool readOligos(string oligosFile);
+ bool readGroup(string oligosFile);
int printCommonHeader(ofstream&, CommonHeader&);
int printHeader(ofstream&, Header&);
bool sanityCheck(Header&, seqRead&);
int adjustCommonHeader(CommonHeader);
int findGroup(Header header, seqRead read, int& barcode, int& primer);
+ int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
string reverseOligo(string oligo);
//parsesfftxt file functions
//
#include "sracommand.h"
+#include "sffinfocommand.h"
+#include "parsefastaqcommand.h"
//**********************************************************************************************************************
vector<string> SRACommand::setParameters(){
try {
- CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","sra",false,false); parameters.push_back(psff);
- CommandParameter pfastqfile("fastqfile", "InputTypes", "", "", "none", "none", "none","sra",false,false); parameters.push_back(pfastqfile);
+ CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff);
+ CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos);
+ CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile);
+ CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq);
//choose only one multiple options
CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+ CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+ CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+
//every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
helpString += "The sffiles parameter is used to provide a file containing a \n";
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+
helpString += "The new command should be in the following format: \n";
helpString += "new(...)\n";
return helpString;
else {
string path;
- it = parameters.find("sfffiles");
+ it = parameters.find("sff");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["sfffiles"] = inputDir + it->second; }
+ if (path == "") { parameters["sff"] = inputDir + it->second; }
}
- it = parameters.find("fastqfiles");
+ it = parameters.find("fastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fastq"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("file");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["fastqfiles"] = inputDir + it->second; }
+ if (path == "") { parameters["file"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
}
}
//check for parameters
- fastqfiles = validParameter.validFile(parameters, "fastqfiles", true);
- if (fastqfiles == "not open") { fastqfiles = ""; abort = true; }
- else if (fastqfiles == "not found") { fastqfiles = ""; }
+ fastqfile = validParameter.validFile(parameters, "fastq", true);
+ if (fastqfile == "not open") { fastqfile = ""; abort = true; }
+ else if (fastqfile == "not found") { fastqfile = ""; }
- sfffiles = validParameter.validFile(parameters, "sfffiles", true);
- if (sfffiles == "not open") { sfffiles = ""; abort = true; }
- else if (sfffiles == "not found") { sfffiles = ""; }
+ sfffile = validParameter.validFile(parameters, "sff", true);
+ if (sfffile == "not open") { sfffile = ""; abort = true; }
+ else if (sfffile == "not found") { sfffile = ""; }
+
+ file = validParameter.validFile(parameters, "file", true);
+ if (file == "not open") { file = ""; abort = true; }
+ else if (file == "not found") { file = ""; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
+
+ oligosfile = validParameter.validFile(parameters, "oligos", true);
+ if (oligosfile == "not found") { oligosfile = ""; }
+ else if(oligosfile == "not open") { abort = true; }
+ else { m->setOligosFile(oligosfile); }
+
+
+ file = validParameter.validFile(parameters, "file", true);
+ if (file == "not open") { file = ""; abort = true; }
+ else if (file == "not found") { file = ""; }
- if ((fastqfiles == "") && (sfffiles == "")) {
- m->mothurOut("No valid current files. You must provide a sfffiles or fastqfiles file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+ if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
+ m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (oligosfile != "")) {
+ m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((fastqfile != "") || (sfffile != "")) {
+ if ((groupfile == "") && (oligosfile == "")) {
+ oligosfile = m->getOligosFile();
+ if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
+ else {
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
}
//use only one Mutliple type
if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
else { m->mothurOut("Not a valid platform option. Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
+
+ string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
+ m->mothurConvert(temp, bdiffs);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, ldiffs);
+
+ temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, sdiffs);
+
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
+ m->mothurConvert(temp, tdiffs);
+
+ if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
+
+
}
}
}
//**********************************************************************************************************************
-
int SRACommand::execute(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+ //parse files
+ vector<string> filesBySample;
+
+ if (file != "") { readFile(filesBySample); }
+ else if (sfffile != "") { parseSffFile(filesBySample); }
+ else if (fastqfile != "") { parseFastqFile(filesBySample); }
+
+
+
//output files created by command
m->mothurOutEndLine();
}
}
//**********************************************************************************************************************
+int SRACommand::readFile(vector<string>& files){
+ try {
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SRACommand::parseSffFile(vector<string>& files){
+ try {
+ //run sffinfo to parse sff file into individual sampled sff files
+ string commandString = "sff=" + sfffile;
+ if (groupfile != "") { commandString += ", group=" + groupfile; }
+ else if (oligosfile != "") {
+ commandString += ", oligos=" + oligosfile;
+ //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+ if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+ if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+ if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+ if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ }
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* sffinfoCommand = new SffInfoCommand(commandString);
+ sffinfoCommand->execute();
+
+ map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
+ map<string, vector<string> >::iterator it = filenames.find("sff");
+ if (it != filenames.end()) { files = it->second; }
+ else { m->control_pressed = true; } // error in sffinfo
+
+ delete sffinfoCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int SRACommand::parseFastqFile(vector<string>& files){
+ try {
+
+ //run sffinfo to parse sff file into individual sampled sff files
+ string commandString = "fastq=" + fastqfile;
+ if (groupfile != "") { commandString += ", group=" + groupfile; }
+ else if (oligosfile != "") {
+ commandString += ", oligos=" + oligosfile;
+ //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+ if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+ if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+ if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+ if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ }
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
+ fastqinfoCommand->execute();
+
+ map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
+ map<string, vector<string> >::iterator it = filenames.find("fastq");
+ if (it != filenames.end()) { files = it->second; }
+ else { m->control_pressed = true; } // error in sffinfo
+
+ delete fastqinfoCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
private:
bool abort;
- string sfffiles, fastqfiles, platform, outputDir;
+ int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
+ string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
vector<string> outputNames;
+
+ int readFile(vector<string>&);
+ int parseSffFile(vector<string>&);
+ int parseFastqFile(vector<string>&);
+
};
/**************************************************************************************************/