//moved this into driver to avoid deep copies in windows paralellized version
Alignment* alignment;
int longestBase = templateDB->getLongestBase();
+ if (m->debug) { m->mothurOut("[DEBUG]: template longest base = " + toString(templateDB->getLongestBase()) + " \n"); }
if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); }
int numBasesNeeded = origNumBases * threshold;
if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
- if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
- alignment->resize(candidateSeq->getUnaligned().length()+1);
+ if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
+ if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
+ alignment->resize(candidateSeq->getUnaligned().length()+2);
}
Sequence temp = templateDB->findClosestSequence(candidateSeq);
- Sequence* templateSeq = &temp;
+ Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
float searchScore = templateDB->getSearchScore();
//get reverse compliment
copy = new Sequence(candidateSeq->getName(), originalUnaligned);
copy->reverseComplement();
+
+ if (m->debug) { m->mothurOut("[DEBUG]: flipping " + candidateSeq->getName() + " \n"); }
//rerun alignment
Sequence temp2 = templateDB->findClosestSequence(copy);
- Sequence* templateSeq2 = &temp2;
+ Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
+
+ if (m->debug) { m->mothurOut("[DEBUG]: closest template " + temp2.getName() + " \n"); }
searchScore = templateDB->getSearchScore();
nast2 = new Nast(alignment, copy, templateSeq2);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 " + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
//check if any better
if (copy->getNumBases() > candidateSeq->getNumBases()) {
candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better
- templateSeq = templateSeq2;
+ delete templateSeq;
+ templateSeq = templateSeq2;
delete nast;
nast = nast2;
needToDeleteCopy = true;
}else{
wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
delete nast2;
+ delete templateSeq2;
delete copy;
}
+ if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); }
}
//create accnos file with names
report.print();
delete nast;
+ delete templateSeq;
if (needToDeleteCopy) { delete copy; }
count++;
}
Sequence temp = templateDB->findClosestSequence(candidateSeq);
- Sequence* templateSeq = &temp;
+ Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
float searchScore = templateDB->getSearchScore();
//rerun alignment
Sequence temp2 = templateDB->findClosestSequence(copy);
- Sequence* templateSeq2 = &temp2;
+ Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
searchScore = templateDB->getSearchScore();
//check if any better
if (copy->getNumBases() > candidateSeq->getNumBases()) {
candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better
- templateSeq = templateSeq2;
+ delete templateSeq;
+ templateSeq = templateSeq2;
delete nast;
nast = nast2;
needToDeleteCopy = true;
}else{
wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
delete nast2;
+ delete templateSeq2;
delete copy;
}
}
delete buf3;
delete nast;
+ delete templateSeq;
if (needToDeleteCopy) { delete copy; }
}
delete candidateSeq;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename);
//pass numSeqs to parent
ofstream out;
- string tempFile = alignFileName + toString(getpid()) + ".num.temp";
+ string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
}
Sequence temp = templateDB->findClosestSequence(candidateSeq);
- Sequence* templateSeq = &temp;
+ Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
float searchScore = templateDB->getSearchScore();
//rerun alignment
Sequence temp2 = templateDB->findClosestSequence(copy);
- Sequence* templateSeq2 = &temp2;
+ Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
searchScore = templateDB->getSearchScore();
//check if any better
if (copy->getNumBases() > candidateSeq->getNumBases()) {
candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better
- templateSeq = templateSeq2;
+ delete templateSeq;
+ templateSeq = templateSeq2;
delete nast;
nast = nast2;
needToDeleteCopy = true;
}else{
wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
delete nast2;
+ delete templateSeq2;
delete copy;
}
}
report.print();
delete nast;
+ delete templateSeq;
if (needToDeleteCopy) { delete copy; }
pDataArray->count++;
process++;
}else if (pid == 0){
exitCommand = driverChimeras(mid, lines[process]);
- string tempOut = outputDir + toString(getpid()) + ".temp";
+ string tempOut = outputDir + toString(m->mothurGetpid(process)) + ".temp";
writePrefs(tempOut, lines[process]);
exit(0);
}else {
int randNumber = rand();
//int randNumber = 12345;
- string pid = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- pid += getpid();
-#else
- pid += toString(threadID);
-#endif
-
+ string pid = m->mothurGetpid(threadID);
+ if (m->debug) { m->mothurOut("[DEBUG]: tag = " + tag + "\t pid = " + pid + "\n"); }
+
dbFileName = tag + pid + toString(randNumber) + ".template.unaligned.fasta";
queryFileName = tag + pid + toString(randNumber) + ".candidate.unaligned.fasta";
blastFileName = tag + pid + toString(randNumber) + ".blast";
}
int randNumber = rand();
- string pid = "";
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- pid += getpid();
-#else
- pid += toString(threadID);
-#endif
-
+ string pid = m->mothurGetpid(threadID);
dbFileName = pid + toString(randNumber) + ".template.unaligned.fasta";
queryFileName = pid + toString(randNumber) + ".candidate.unaligned.fasta";
blastFileName = pid + toString(randNumber) + ".blast";
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp");
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+ num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp");
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup, accnos + toString(getpid()) + ".byCount");
+ num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", accnos + m->mothurGetpid(process) + ".temp", fasta + toString(m->mothurGetpid(process)) + ".temp", breakUp[process], fileGroup, accnos + toString(m->mothurGetpid(process)) + ".byCount");
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map<string, int>& priority){
try {
+ if (m->debug) { m->mothurOut("[DEBUG]: filename = " + filename + "\n"); }
+
Chimera* chimera;
if (templatefile != "self") { //you want to run slayer with a reference template
chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());
int process = 0;
int num = 0;
processIDS.clear();
+
+ if (m->debug) { m->mothurOut("[DEBUG]: filename = " + filename + "\n"); }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
//loop through and create all the processes you want
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority);
+ num = driver(lines[process], outputFileName + toString(m->mothurGetpid(process)) + ".temp", filename, accnos + toString(m->mothurGetpid(process)) + ".temp", fasta + toString(m->mothurGetpid(process)) + ".temp", thisPriority);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
+ num = driver(outputFileName + toString(m->mothurGetpid(process)) + ".temp", files[process], accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", numChimeras);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out << numChimeras << endl;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+ num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", filename + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- wroteAccnos = driver(lines[process], filename, outFasta + toString(getpid()) + ".temp", outAccnos + toString(getpid()) + ".temp");
+ wroteAccnos = driver(lines[process], filename, outFasta + m->mothurGetpid(process) + ".temp", outAccnos + m->mothurGetpid(process) + ".temp");
//pass numSeqs to parent
ofstream out;
- string tempFile = fastafile + toString(getpid()) + ".bool.temp";
+ string tempFile = fastafile + m->mothurGetpid(process) + ".bool.temp";
m->openOutputFile(tempFile, out);
out << wroteAccnos << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], taxFileName + m->mothurGetpid(process) + ".temp", tempTaxFile + m->mothurGetpid(process) + ".temp", accnos + m->mothurGetpid(process) + ".temp", filename);
//pass numSeqs to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
vector<string> listFileNames = cluster(dividedNames[process], labels);
//write out names to file
- string filename = toString(getpid()) + ".temp";
+ string filename = m->mothurGetpid(process) + ".temp";
ofstream out;
m->openOutputFile(filename, out);
out << tag << endl;
//print out labels
ofstream outLabels;
- filename = toString(getpid()) + ".temp.labels";
+ filename = m->mothurGetpid(process) + ".temp.labels";
m->openOutputFile(filename, outLabels);
outLabels << cutoff << endl;
map<string, string> fastaMap;
map<string, string> nameMap;
map<string, int> nameFileMap;
- int cutoff, seqLength;
+ int seqLength;
+ float cutoff;
int readFasta();
int readNames();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- string filename = toString(getpid()) + ".temp";
+ string filename = m->mothurGetpid(process) + ".temp";
numSeqs = driver(lines[process].start, lines[process].end, filename, groupMap);
- string tempFile = toString(getpid()) + ".num.temp";
+ string tempFile = m->mothurGetpid(process) + ".num.temp";
ofstream outTemp;
m->openOutputFile(tempFile, outTemp);
}
}
- string filename = toString(getpid()) + ".temp";
+ string filename = m->mothurGetpid(process) + ".temp";
numSeqs = driver(lines[processors-1].start, lines[processors-1].end, filename, groupMap);
//force parent to wait until all the processes are done
CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
- CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy);
CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
try {
string helpString = "";
helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n";
- helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n";
+ helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List, repfasta, repnames or count, and constaxonomy are required.\n";
helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
- helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
+ helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
if (path == "") { parameters["repname"] = inputDir + it->second; }
}
- it = parameters.find("contaxonomy");
+ it = parameters.find("constaxonomy");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["contaxonomy"] = inputDir + it->second; }
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
}
it = parameters.find("repfasta");
if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
- contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
+ contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
if (contaxonomyfile == "not found") { //if there is a current list file, use it
- contaxonomyfile = ""; m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true;
+ contaxonomyfile = ""; m->mothurOut("The constaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true;
}
else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
int binNumber = 0;
string temp = "";
for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } }
- m->mothurConvert(temp, binNumber);
+ m->mothurConvert(m->getSimpleLabel(temp), binNumber);
set<int>::iterator it = sanity.find(binNumber);
if (it != sanity.end()) {
m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;
m->gobble(in);
- if(count % 1000 == 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); }
+ if(count % 1000 == 0) { m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n"); }
}
if(count % 1000 != 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); }
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- if (output != "square") { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
- else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
+ if (output != "square") { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", cutoff); }
+ else { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", "square"); }
exit(0);
}else {
m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- string filteredFasta = filename + toString(getpid()) + ".temp";
+ string filteredFasta = filename + m->mothurGetpid(process) + ".temp";
num = driverRunFilter(F, filteredFasta, filename, lines[process]);
//pass numSeqs to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
num = driverCreateFilter(F, filename, lines[process]);
//write out filter counts to file
- filename += toString(getpid()) + "filterValues.temp";
+ filename += m->mothurGetpid(process) + "filterValues.temp";
ofstream out;
m->openOutputFile(filename, out);
process++;
}else if (pid == 0){
outputNames.clear();
- num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process);
+ num = processDriver(thislookup, dividedPartitions[process], (outputFileName + m->mothurGetpid(process)), rels[process], matrix[process], doneFlags, process);
//pass numSeqs to parent
ofstream out;
- string tempFile = toString(getpid()) + ".outputNames.temp";
+ string tempFile = m->mothurGetpid(process) + ".outputNames.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out << outputNames.size() << endl;
//pass pvalues to parent
ofstream out;
- string tempFile = toString(getpid()) + ".pvalues.temp";
+ string tempFile = m->mothurGetpid(process) + ".pvalues.temp";
m->openOutputFile(tempFile, out);
//pass values
//pass pvalues to parent
ofstream out;
- string tempFile = toString(getpid()) + ".pvalues.temp";
+ string tempFile = m->mothurGetpid(process) + ".pvalues.temp";
m->openOutputFile(tempFile, out);
//pass values
for(int i=0;i<tempFASTAFileNames.size();i++){
for(int j=0;j<tempFASTAFileNames[i].size();j++){
if (tempFASTAFileNames[i][j] != "") {
- tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
+ tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
}
}
}
num = driver(files[process],
- outputFasta + toString(getpid()) + ".temp",
- outputScrapFasta + toString(getpid()) + ".temp",
- outputMisMatches + toString(getpid()) + ".temp",
+ outputFasta + m->mothurGetpid(process) + ".temp",
+ outputScrapFasta + m->mothurGetpid(process) + ".temp",
+ outputMisMatches + m->mothurGetpid(process) + ".temp",
tempFASTAFileNames, process, group);
//pass groupCounts to parent
ofstream out;
- string tempFile = toString(getpid()) + ".num.temp";
+ string tempFile = m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
if (createFileGroup || createOligosGroup) {
num++;
//report progress
- if((num) % 1000 == 0){ m->mothurOut(toString(num)); m->mothurOutEndLine(); }
+ if((num) % 1000 == 0){ m->mothurOutJustToScreen(toString(num)); m->mothurOutEndLine(); }
}
//report progress
count++;
//report progress
- if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+ if((count) % 10000 == 0){ m->mothurOutJustToScreen(toString(count)); m->mothurOutEndLine(); }
//}
}
}
string current = "";
itTypes = outputTypes.find("phylip");
if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; if (!subsample) { m->setPhylipFile(current); } }
}
m->mothurOutEndLine();
driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);
- string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist";
+ string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
ofstream outtemp;
m->openOutputFile(tempdistFileName, outtemp);
variables["[calc]"] = matrixCalculators[i]->getName();
string distFileName = getOutputFileName("phylip",variables);
outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+ //set current phylip file to average distance matrix
+ m->setPhylipFile(distFileName);
ofstream outAve;
m->openOutputFile(distFileName, outAve);
outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
if (outputDir == "") { thisOutputDir += m->hasPath(outputFile); }
variables["[filename]"] = thisOutputDir + m->getSimpleName(outputFile);
outputFile = getOutputFileName("sff",variables);
- m->openOutputFile(outputFile, out);
+ m->openOutputFileBinary(outputFile, out);
outputNames.push_back(outputFile); outputTypes["sff"].push_back(outputFile);
outputFileHeader = outputFile + ".headers";
numTotalReads = 0;
ifstream input;
//open output file in append mode
- openOutputFileAppend(filename, output);
- int ableToOpen = openInputFile(temp, input, "no error");
+ openOutputFileBinaryAppend(filename, output);
+ int ableToOpen = openInputFileBinary(temp, input, "no error");
//int ableToOpen = openInputFile(temp, input);
int numLines = 0;
exit(1);
}
}
+/***********************************************************************/
+string MothurOut::mothurGetpid(int threadID){
+ try {
+
+ string pid = "";
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ pid += toString(getpid()); if(debug) { mothurOut("[DEBUG]: " + pid + "\n"); }
+ //remove any weird chars
+ string pid1 = "";
+ for (int i = 0; i < pid.length(); i++) {
+ if(pid[i]>47 && pid[i]<58) { //is a digit
+ pid1 += pid[i];
+ }
+ }
+ pid = pid1;
+#else
+ pid += toString(threadID);
+#endif
+ return pid;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "mothurGetpid");
+ exit(1);
+ }
+}
+
/***********************************************************************/
bool MothurOut::isLabelEquivalent(string label1, string label2){
bool isLabelEquivalent(string, string);
string getSimpleLabel(string);
string findEdianness();
+ string mothurGetpid(int);
//string manipulation
process++;
}
else if(pid == 0){
- driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp");
+ driver(start[process], end[process], distanceFileName + m->mothurGetpid(process) + ".temp");
exit(0);
}
else{
processIDS.push_back(pid);
process++;
}else if (pid == 0){
- if (output != "square") { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); }
- else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); }
+ if (output != "square") { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", cutoff); }
+ else { driver(lines[process].start, lines[process].end, filename + m->mothurGetpid(process) + ".temp", "square"); }
exit(0);
}else {
m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".pars.results.temp";
+ string tempFile = outputDir + m->mothurGetpid(process) + ".pars.results.temp";
m->openOutputFile(tempFile, out);
out << myresults.size() << endl;
for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl;
}else if (pid == 0){
driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
- string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
+ string outTemp = outputDir + m->mothurGetpid(process) + ".sumDiv.temp";
ofstream out;
m->openOutputFile(outTemp, out);
//write out data to file so parent can read it
ofstream out;
- string s = toString(getpid()) + ".temp";
+ string s = m->mothurGetpid(process) + ".temp";
m->openOutputFile(s, out);
//output observed distances
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- string locationsFile = toString(getpid()) + ".temp";
- num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
+ string locationsFile = m->mothurGetpid(process) + ".temp";
+ num = driverPcr(filename, goodFileName + m->mothurGetpid(process) + ".temp", badFileName + m->mothurGetpid(process) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
//pass numSeqs to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << pstart << '\t' << adjustNeeded << endl;
out << num << '\t' << badSeqNames.size() << endl;
}
}
- string locationsFile = toString(getpid()) + ".temp";
+ string locationsFile = m->mothurGetpid(process) + ".temp";
num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
//force parent to wait until all the processes are done
process++;
}else if (pid == 0){
outputNames.clear();
- num = driverGroups(newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
+ num = driverGroups(newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
- string tempFile = toString(getpid()) + ".outputNames.temp";
+ string tempFile = m->mothurGetpid(process) + ".outputNames.temp";
ofstream outTemp;
m->openOutputFile(tempFile, outTemp);
process++;
}else if (pid == 0){
//clear old file because we append in driver
- m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
+ m->mothurRemove(newSummaryFile + m->mothurGetpid(process) + ".temp");
- otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex);
+ otusToRemove = driver(newSummaryFile + m->mothurGetpid(process) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex);
- string tempFile = toString(getpid()) + ".otus2Remove.temp";
+ string tempFile = m->mothurGetpid(process) + ".otus2Remove.temp";
ofstream outTemp;
m->openOutputFile(tempFile, outTemp);
}else if (pid == 0){
counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[process].start, lines[process].end);
- string tempFile = toString(getpid()) + ".cons_counts.temp";
+ string tempFile = m->mothurGetpid(process) + ".cons_counts.temp";
ofstream outTemp;
m->openOutputFile(tempFile, outTemp);
//pass numSeqs to parent
for(int i=0;i<displays.size();i++){
- string tempFile = toString(getpid()) + toString(i) + ".rarefact.temp";
+ string tempFile = m->mothurGetpid(process) + toString(i) + ".rarefact.temp";
displays[i]->outputTempFiles(tempFile);
}
exit(0);
typesFiles[extension] = temp;
}
if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); }
+
}
//for each type create a combo file
for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
//grab data for each group
- for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
-
- string group = itFileNameGroup->first;
+ for (int n = 0; n < groupNames.size(); n++) {
+ string group = groupNames[n];
map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
if (itLine != files[group].end()) {
//pass numSeqs to parent
ofstream out;
- string tempFile = contigsreport + toString(getpid()) + ".num.temp";
+ string tempFile = contigsreport + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
//pass numSeqs to parent
ofstream out;
- string tempFile = alignreport + toString(getpid()) + ".num.temp";
+ string tempFile = alignreport + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
//pass numSeqs to parent
ofstream out;
- string tempFile = fastafile + toString(getpid()) + ".num.temp";
+ string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+ num = driver(lines[process], goodFileName + m->mothurGetpid(process) + ".temp", badAccnos + m->mothurGetpid(process) + ".temp", filename, badSeqNames);
//pass numSeqs to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
process++;
}else if (pid == 0){
- num = driver(filename, qFileName, rFileName, summaryFileName + toString(getpid()) + ".temp", errorOutputFileName+ toString(getpid()) + ".temp", chimeraOutputFileName + toString(getpid()) + ".temp", lines[process], qLines[process], rLines[process]);
+ num = driver(filename, qFileName, rFileName, summaryFileName + m->mothurGetpid(process) + ".temp", errorOutputFileName+ m->mothurGetpid(process) + ".temp", chimeraOutputFileName + m->mothurGetpid(process) + ".temp", lines[process], qLines[process], rLines[process]);
//pass groupCounts to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".info.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".info.temp";
m->openOutputFile(tempFile, out);
//output totalBases and totalMatches
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + m->mothurGetpid(process) + ".temp", lines[process]);
//pass numSeqs to parent
ofstream out;
- string tempFile = fastafile + toString(getpid()) + ".num.temp";
+ string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name + toString(getpid()) + ".temp", group + toString(getpid()) + ".temp");
+ num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + m->mothurGetpid(process) + ".temp", name + m->mothurGetpid(process) + ".temp", group + m->mothurGetpid(process) + ".temp");
//pass numSeqs to parent
ofstream out;
- string tempFile = toString(getpid()) + ".num.temp";
+ string tempFile = m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << '\t' << outputNames.size() << endl;
for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; }
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(dividedFiles[process], compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp");
+ num = driver(dividedFiles[process], compositeFASTAFileName + m->mothurGetpid(process) + ".temp", compositeNamesFileName + m->mothurGetpid(process) + ".temp");
//pass numSeqs to parent
ofstream out;
- string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp";
+ string tempFile = compositeFASTAFileName + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
+ mapfileNames = driverGroups(parser, newFName + m->mothurGetpid(process) + ".temp", newNName + m->mothurGetpid(process) + ".temp", newMName, lines[process].start, lines[process].end, groups);
//pass filenames to parent
ofstream out;
- string tempFile = newMName + toString(getpid()) + ".temp";
+ string tempFile = newMName + m->mothurGetpid(process) + ".temp";
m->openOutputFile(tempFile, out);
out << mapfileNames.size() << endl;
for (int i = 0; i < mapfileNames.size(); i++) {
//pass pvalues to parent
ofstream out;
- string tempFile = toString(getpid()) + ".pvalues.temp";
+ string tempFile = m->mothurGetpid(process) + ".pvalues.temp";
m->openOutputFile(tempFile, out);
//pass values
CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
+ CommandParameter pcontact("contact", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
//choose only one multiple options
- CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
+ CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
+ CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
+ CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
+ CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
+ CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
+
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
try {
string helpString = "";
helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
- helpString += "The sra command parameters are: sff, fastq, file, oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group.\n";
+ helpString += "The sra command parameters are: sff, fastq, file, oligos, contact, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group, platform, libstrategy, libsource, libselection and instrument.\n";
helpString += "The sff parameter is used to provide the original sff file.\n";
helpString += "The fastq parameter is used to provide the original fastq file.\n";
+ helpString += "The contact parameter is used to provide your contact file.\n";
helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n";
helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n";
- helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files.\n";
+ helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
-
- helpString += "The new command should be in the following format: \n";
- helpString += "new(...)\n";
+ helpString += "The platform parameter is used to specify platfrom you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
+ helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
+ helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
+ helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
+ helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
+
+ helpString += "The sra should be in the following format: \n";
+ helpString += "sra(...)\n";
return helpString;
}
catch(exception& e) {
SRACommand::SRACommand(string option) {
try {
abort = false; calledHelp = false;
+ libLayout = "single"; //controlled vocab
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["oligos"] = inputDir + it->second; }
}
+
+ it = parameters.find("contact");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["contact"] = inputDir + it->second; }
+ }
}
//check for parameters
else if(oligosfile == "not open") { abort = true; }
else { m->setOligosFile(oligosfile); }
+ contactfile = validParameter.validFile(parameters, "contact", true);
+ if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a contact file before you can use the sra command."); m->mothurOutEndLine(); abort = true; }
+ else if(contactfile == "not open") { abort = true; }
file = validParameter.validFile(parameters, "file", true);
if (file == "not open") { file = ""; abort = true; }
}
}
- //use only one Mutliple type
- platform = validParameter.validFile(parameters, "platform", false);
- if (platform == "not found") { platform = "454"; }
-
- if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
- else { m->mothurOut("Not a valid platform option. Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
+ //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+ platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; }
+ if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
+
+ if (!abort) { //don't check instrument model is platform is bad
+ //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
+ instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
+ if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
+ }
+ //turn _ to spaces mothur's work around
+ for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
+
+ libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
+ if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
+
+ //turn _ to spaces mothur's work around
+ for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } }
+
+ libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; }
+ if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
+
+ //turn _ to spaces mothur's work around
+ for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } }
+
+ libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; }
+ if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
+ //turn _ to spaces mothur's work around
+ for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } }
+
string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
m->mothurConvert(temp, bdiffs);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ readContactFile();
+ if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
+ if (groupfile != "") { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); Groups.push_back("scrap"); }
+
+ if (m->control_pressed) { return 0; }
+
//parse files
- vector<string> filesBySample;
+ map<string, vector<string> > filesBySample;
isSFF = false;
if (file != "") { readFile(filesBySample); }
else if (sfffile != "") { parseSffFile(filesBySample); }
else if (fastqfile != "") { parseFastqFile(filesBySample); }
+ //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
+ checkGroups(filesBySample);
+
//create xml file
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
+ string outputFileName = getOutputFileName("xml", variables);
+ outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ //contacts portion
+ ////////////////////////////////////////////////////////
+ out << "<Submission>\n";
+ out << "\t<Description>\n";
+ out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
+ out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
+ out << "\t\t<Organization type=\"" + centerType + "\">\n";
+ out << "\t\t<Name>" + centerName + "</Name>\n";
+ out << "\t\t<Contact> email=\"" + email + "\">\n";
+ out << "\t\t\t<Name>\n";
+ out << "\t\t\t\t<First>" + firstName + "</First>\n";
+ out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
+ out << "\t\t\t</Name>\n";
+ out << "\t\t</Contact>\n";
+ out << "\t\t</Organization>\n";
+ out << "\t</Description>\n";
+ ////////////////////////////////////////////////////////
+
+ //bioproject
+ ////////////////////////////////////////////////////////
+ out << "\t<Action>\n";
+ out << "\t\t<AddData target_db=\"BioProject\">\n";
+ out << "\t\t\t<Data content_type=\"XML\">\n";
+ out << "\t\t\t\t<XmlContent>\n";
+ out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
+ out << "\t\t\t\t\t\t<ProjectID>\n";
+ ///////////////////////out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID> \n";
+ out << "\t\t\t\t\t\t</ProjectID>\n";
+ out << "\t\t\t\t\t\t<Descriptor>\n";
+ ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
+ out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
+ out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
+ /////////////////////////out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
+ out << "\t\t\t\t\t\t\t</ExternalLink>\n";
+ out << "\t\t\t\t\t\t\t<Relevance>\n";
+ //////////////////////out << "\t\t\t\t\t\t\t\t<Medical>" + medicalRelevance + "</Medical>\n";
+ out << "\t\t\t\t\t\t\t</Relevance>\n";
+ out << "\t\t\t\t\t\t</Descriptor>\n";
+ out << "\t\t\t\t\t\t<ProjectType>\n";
+ /////////////////////////out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eMultiisolate\">\n"; //<!-- controlled vocabulary? -->
+ out << "\t\t\t\t\t\t\t\t<Organism>\n";
+ ////////////////////out << "\t\t\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
+ out << "\t\t\t\t\t\t\t\t</Organism>\n";
+ out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
+ ////////////////////out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n"; <!-- controlled vocabulary? -->
+ out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
+ out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
+ out << "\t\t\t\t\t\t</ProjectType>\n";
+ out << "\t\t\t\t\t</Project>\n";
+ out << "\t\t\t\t</XmlContent>\n";
+ out << "\t\t\t</Data>\n";
+ out << "\t\t\t<Identifier>\n";
+ ////////////////////////////out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID>\n";
+ out << "\t\t\t</Identifier>\n";
+ out << "\t\t</AddData>\n";
+ out << "\t</Action>\n";
+ ////////////////////////////////////////////////////////
+
+ //bioSample
+ ////////////////////////////////////////////////////////
+ for (int i = 0; i < Groups.size(); i++) {
+
+ vector<string> thisGroupsFiles = filesBySample[Groups[i]];
+ string barcodeForThisSample = Group2Barcode[Groups[i]];
+
+ for (int j = 0; j < thisGroupsFiles.size(); j++) {
+ if (m->control_pressed) { break; }
+ out << "\t<Action>\n";
+ out << "\t\t<AddData target_db=\"BioSample\">\n";
+ out << "\t\t\t<Data content_type=\"XML\">\n";
+ out << "\t\t\t\t<XmlContent>\n";
+ out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
+ out << "\t\t\t\t\t\t<SampleId>\n";
+ out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + Groups[i] + " </SPUID> \n";
+ out << "\t\t\t\t\t\t</SampleId>\n";
+ out << "\t\t\t\t\t\t<Descriptor>\n";
+ ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
+ out << "\t\t\t\t\t\t</Descriptor>\n";
+ out << "\t\t\t\t\t\t<Organism>\n";
+ ////////////////////out << "\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
+ out << "\t\t\t\t\t\t</Organism>\n";
+ out << "\t\t\t\t\t\t<BioProject>\n";
+ ///////////////////////out << "\t\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + BioProject + " </SPUID> \n";
+ out << "\t\t\t\t\t\t</BioProject>\n";
+ out << "\t\t\t\t\t\t<Package>MIMARKS.specimen</Package>n";
+ out << "\t\t\t\t\t\t<Attributes>n";
+ //add biosample required attributes
+ ///////////////////////////////////////////////////////////////////////
+
+ out << "\t\t\t\t\t\t</Attributes>n";
+ out << "\t\t\t\t\t</BioSample>\n";
+ out << "\t\t\t\t</XmlContent>\n";
+ out << "\t\t\t</Data>\n";
+
+ //libID
+ out << "\t\t\t<Identifier>\n";
+ string libId = thisGroupsFiles[j] + barcodeForThisSample;
+ if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
+ vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
+ libId = pieces[0] + barcodeForThisSample;
+ }
+ out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
+ out << "\t\t\t</Identifier>\n";
+
+ out << "\t\t</AddData>\n";
+ out << "\t</Action>\n";
+ }
+ }
+
+ for (int i = 0; i < Groups.size(); i++) {
+
+ vector<string> thisGroupsFiles = filesBySample[Groups[i]];
+ string barcodeForThisSample = Group2Barcode[Groups[i]];
+
+ for (int j = 0; j < thisGroupsFiles.size(); j++) {
+ if (m->control_pressed) { break; }
+ out << "\t<Action>\n";
+ out << "\t\t<AddFiles target_db=\"SRA\">\n";
+ if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
+ vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
+ out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
+ ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n"; //since its paired we know its fastq, is the dataType the fileType???
+ out << "\t\t\t</File>\n";
+ out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
+ ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n"; //since its paired we know its fastq, is the dataType the fileType???
+ out << "\t\t\t</File>\n";
+ }else { //single
+ out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
+ string dataType = "fastq";
+ if (isSFF) { dataType = "sff"; }
+ ////////////////////out << "\t\t\t\t<DataType>" + dataType + " </DataType> \n"; //is the dataType the fileType???
+ out << "\t\t\t</File>\n";
+ }
+ //attributes
+ out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
+ out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
+ out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
+ out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
+ out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
+
+ //////////////////bioSample info
+ ///////////////////bioProject info
+
+ //libID
+ out << "\t\t\t<Identifier>\n";
+ string libId = thisGroupsFiles[j] + barcodeForThisSample;
+ if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
+ vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
+ libId = pieces[0] + barcodeForThisSample;
+ }
+ out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
+ out << "\t\t\t</Identifier>\n";
+ out << "\t\t</AddFiles>\n";
+ out << "\t</Action>\n";
+ }
+ }
+
+ ////////////////////////////////////////////////////////
+ out << "</Submission>\n";
+ out.close();
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//output files created by command
m->mothurOutEndLine();
}
}
//**********************************************************************************************************************
-int SRACommand::readFile(vector<string>& files){
+int SRACommand::readContactFile(){
try {
- files.clear();
+ lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = "";
ifstream in;
- m->openInputFile(file, in);
+ m->openInputFile(contactfile, in);
while(!in.eof()) {
if (m->control_pressed) { break; }
- string filename;
- in >> filename; m->gobble(in);
- files.push_back(filename);
+ string key, value;
+ in >> key; m->gobble(in);
+ value = m->getline(in); m->gobble(in);
+
+ for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
+
+ if (key == "USERNAME") { submissionName = value; }
+ else if (key == "LAST") { lastName = value; }
+ else if (key == "FIRST") { firstName = value; }
+ else if (key == "EMAIL") { email = value; }
+ else if (key == "CENTER") { centerName = value; }
+ else if (key == "TYPE") {
+ centerType = value;
+ for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
+ if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {}
+ else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
+ }else if (key == "DESCRIPTION") { description = value; }
}
in.close();
- if (!m->control_pressed) {
- if (files.size() > 0) {
- int pos = files[0].find(".sff");
- if (pos != string::npos) { isSFF = true; } //these files are sff files
+ if (lastName == "") { m->mothurOut("[ERROR]: missing last name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (firstName == "") { m->mothurOut("[ERROR]: missing first name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (email == "") { m->mothurOut("[ERROR]: missing email from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (centerName == "") { m->mothurOut("[ERROR]: missing center name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (centerType == "") { m->mothurOut("[ERROR]: missing center type from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (description == "") { m->mothurOut("[ERROR]: missing description from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readContactFile");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+// going to have to rework this to allow for other options --
+/*
+ file option 1
+
+ sfffile1 oligosfile1
+ sfffile2 oligosfile2
+ ...
+
+ file option 2
+
+ fastqfile1 oligosfile1
+ fastqfile2 oligosfile2
+ ...
+
+ file option 3
+
+ fastqfile fastqfile group
+ fastqfile fastqfile group
+ fastqfile fastqfile group
+ ...
+
+*/
+
+int SRACommand::readFile(map<string, vector<string> >& files){
+ try {
+ vector<string> theseFiles;
+ inputfile = file;
+ files.clear();
+
+ ifstream in;
+ m->openInputFile(file, in);
+
+ while(!in.eof()) {
+
+ if (m->control_pressed) { return 0; }
+
+ string line = m->getline(in); m->gobble(in);
+ vector<string> pieces = m->splitWhiteSpace(line);
+
+ string group = "";
+ string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
+ if (pieces.size() == 2) {
+ thisFileName1 = pieces[0];
+ thisFileName2 = pieces[1];
+ }else if (pieces.size() == 3) {
+ thisFileName1 = pieces[1];
+ thisFileName2 = pieces[2];
+ string group = pieces[0];
+ libLayout = "paired";
+ }else {
+ m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
+
+ //check to make sure both are able to be opened
+ ifstream in2;
+ int openForward = m->openInputFile(thisFileName1, in2, "noerror");
+
+ //if you can't open it, try default location
+ if (openForward == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
+ m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in3;
+ openForward = m->openInputFile(tryPath, in3, "noerror");
+ in3.close();
+ thisFileName1 = tryPath;
+ }
+ }
+
+ //if you can't open it, try output location
+ if (openForward == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
+ m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in4;
+ openForward = m->openInputFile(tryPath, in4, "noerror");
+ thisFileName1 = tryPath;
+ in4.close();
+ }
+ }
+
+ if (openForward == 1) { //can't find it
+ m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
+ }else{ in2.close(); }
+
+ ifstream in3;
+ int openReverse = m->openInputFile(thisFileName2, in3, "noerror");
+
+ //if you can't open it, try default location
+ if (openReverse == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
+ m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in3;
+ openReverse = m->openInputFile(tryPath, in3, "noerror");
+ in3.close();
+ thisFileName2 = tryPath;
+ }
+ }
+
+ //if you can't open it, try output location
+ if (openReverse == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
+ m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in4;
+ openReverse = m->openInputFile(tryPath, in4, "noerror");
+ thisFileName2 = tryPath;
+ in4.close();
+ }
+ }
+
+ if (openReverse == 1) { //can't find it
+ m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
+ }else{ in3.close(); }
+
+
+
+ if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
+ //process pair
+ int pos = theseFiles[0].find(".sff");
+ if (pos != string::npos) {//these files are sff files
+ isSFF = true;
+ sfffile = thisFileName1; oligosfile = thisFileName2;
+ readOligos();
+ parseSffFile(files);
+ }else{
+ isSFF = false;
+ fastqfile = thisFileName1; oligosfile = thisFileName2;
+ readOligos();
+ parseFastqFile(files);
+ }
+
+ }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
+ map<string, vector<string> >::iterator it = files.find(group);
+ if (it == files.end()) {
+ vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
+ }else {
+ files[group].push_back(thisFileName1 + " " + thisFileName2);
+ }
}
}
+ in.close();
+
+ inputfile = file;
return 0;
}
}
}
//**********************************************************************************************************************
-int SRACommand::parseSffFile(vector<string>& files){
+int SRACommand::parseSffFile(map<string, vector<string> >& files){
try {
+ vector<string> theseFiles;
+ inputfile = sfffile;
+ libLayout = "single"; //controlled vocab
+
isSFF = true;
//run sffinfo to parse sff file into individual sampled sff files
string commandString = "sff=" + sfffile;
map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
map<string, vector<string> >::iterator it = filenames.find("sff");
- if (it != filenames.end()) { files = it->second; }
+ if (it != filenames.end()) { theseFiles = it->second; }
else { m->control_pressed = true; } // error in sffinfo
delete sffinfoCommand;
m->mothurCalling = false;
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ mapGroupToFile(files, theseFiles);
+
return 0;
}
catch(exception& e) {
}
//**********************************************************************************************************************
-int SRACommand::parseFastqFile(vector<string>& files){
+int SRACommand::parseFastqFile(map<string, vector<string> >& files){
try {
+ vector<string> theseFiles;
+ inputfile = fastqfile;
+ libLayout = "single"; //controlled vocab
//run sffinfo to parse sff file into individual sampled sff files
string commandString = "fastq=" + fastqfile;
map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
map<string, vector<string> >::iterator it = filenames.find("fastq");
- if (it != filenames.end()) { files = it->second; }
+ if (it != filenames.end()) { theseFiles = it->second; }
else { m->control_pressed = true; } // error in sffinfo
delete fastqinfoCommand;
m->mothurCalling = false;
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ mapGroupToFile(files, theseFiles);
+
return 0;
}
catch(exception& e) {
exit(1);
}
}
+//***************************************************************************************************************
+//maps group to file
+int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
+ try {
+
+ for (int i = 0; i < Groups.size(); i++) {
+
+ set<int> matches;
+ for (int j = 0; j < theseFiles.size(); j++) {
+ int pos = theseFiles[j].find(Groups[i]);
+ if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
+ if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
+ matches.insert(i);
+ }
+ }
+ }
+
+ if(matches.size() == 1) {
+ map<string, vector<string> >::iterator it = files.find(Groups[i]);
+ if (it == files.end()) {
+ vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
+ }else {
+ files[Groups[i]].push_back(theseFiles[*matches.begin()]);
+ }
+ }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkGroups");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+//checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
+int SRACommand::checkGroups(map<string, vector<string> >& files){
+ try {
+ vector<string> newGroups;
+ for (int i = 0; i < Groups.size(); i++) {
+
+ map<string, vector<string> >::iterator it = files.find(Groups[i]);
+ //no files for this group, remove it
+ if (it == files.end()) { }
+ else { newGroups.push_back(Groups[i]); }
+ }
+
+ Groups = newGroups;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkGroups");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+int SRACommand::readOligos(){
+ try {
+ ifstream inOligos;
+ m->openInputFile(oligosfile, inOligos);
+
+ string type, oligo, roligo, group;
+ bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
+
+ int indexPrimer = 0;
+ int indexBarcode = 0;
+ int indexPairedPrimer = 0;
+ int indexPairedBarcode = 0;
+ set<string> uniquePrimers;
+ set<string> uniqueBarcodes;
+
+ while(!inOligos.eof()){
+
+ inOligos >> type;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+
+ if(type[0] == '#'){
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ m->gobble(inOligos);
+ }
+ else{
+ m->gobble(inOligos);
+ //make type case insensitive
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
+
+ inOligos >> oligo;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+
+ for(int i=0;i<oligo.length();i++){
+ oligo[i] = toupper(oligo[i]);
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }
+ }
+
+ if(type == "FORWARD"){
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ //check for repeat barcodes
+ map<string, int>::iterator itPrime = primers.find(oligo);
+ if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
+
+ primers[oligo] = indexPrimer; indexPrimer++;
+ primerNameVector.push_back(group);
+ }
+ else if (type == "PRIMER"){
+ m->gobble(inOligos);
+
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ roligo = reverseOligo(roligo);
+
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ oligosPair newPrimer(oligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+roligo;
+ if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
+ else { uniquePrimers.insert(tempPair); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
+
+ pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+ primerNameVector.push_back(group);
+ hasPrimer = true;
+ }
+ else if(type == "REVERSE"){
+ //Sequence oligoRC("reverse", oligo);
+ //oligoRC.reverseComplement();
+ string oligoRC = reverseOligo(oligo);
+ revPrimer.push_back(oligoRC);
+ }
+ else if(type == "BARCODE"){
+ inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ hasPairedBarcodes = true;
+ string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+ group = temp;
+
+ for(int i=0;i<reverseBarcode.length();i++){
+ reverseBarcode[i] = toupper(reverseBarcode[i]);
+ if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
+ }
+
+ reverseBarcode = reverseOligo(reverseBarcode);
+ oligosPair newPair(oligo, reverseBarcode);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+ //check for repeat barcodes
+ string tempPair = oligo+reverseBarcode;
+ if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
+ else { uniqueBarcodes.insert(tempPair); }
+
+ pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+ barcodeNameVector.push_back(group);
+ }else {
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = barcodes.find(oligo);
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ barcodes[oligo]=indexBarcode; indexBarcode++;
+ barcodeNameVector.push_back(group);
+ }
+ }else if(type == "LINKER"){
+ linker.push_back(oligo);
+ }else if(type == "SPACER"){
+ spacer.push_back(oligo);
+ }
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+ }
+ m->gobble(inOligos);
+ }
+ inOligos.close();
+
+ if (hasPairedBarcodes || hasPrimer) {
+ pairedOligos = true;
+ if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ }
+
+
+ //add in potential combos
+ if(barcodeNameVector.size() == 0){
+ barcodeNameVector.push_back("");
+ }
+
+ if(primerNameVector.size() == 0){
+ primerNameVector.push_back("");
+ }
+
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->first];
+ string barcodeName = barcodeNameVector[itBar->first];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->first];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->first];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ }
+ }
+ uniqueNames.insert(comboGroupName);
+ Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse;
+ }
+ }
+ }
+ }else {
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->second];
+ string barcodeName = barcodeNameVector[itBar->second];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->second];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->second];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ }
+ }
+ uniqueNames.insert(comboGroupName);
+ Group2Barcode[comboGroupName] = itBar->first;
+ }
+ }
+ }
+ }
+
+
+ if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+
+ for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readOligos");
+ exit(1);
+ }
+}
+//********************************************************************/
+string SRACommand::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "reverseOligo");
+ exit(1);
+ }
+}
+//********************************************************************/
+//_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+bool SRACommand::checkCasesPlatforms(string& platform){
+ try {
+ string original = platform;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
+
+ //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+
+ if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (platform == "454") { platform = "_LS454"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
+ }
+
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesPlatforms");
+ exit(1);
+ }
+}
+//********************************************************************/
+//454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
+bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
+ try {
+ string original = instrumentModel;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
+
+ //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
+ if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
+ if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+ if (isOkay) {
+ if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
+ if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+
+ }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
+ if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
+ if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
+ if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
+ if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
+ if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+
+ }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
+ if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+ }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
+ if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
+ if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
+//AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
+bool SRACommand::checkCasesLibStrategy(string& libStrategy){
+ try {
+ string original = libStrategy;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
+
+ if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; }
+ if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; }
+ if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; }
+ if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; }
+ if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; }
+ if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; }
+ if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; }
+ if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; }
+ if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; }
+ if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; }
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
+ }
+
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+//METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
+bool SRACommand::checkCasesLibSource(string& libSource){
+ try {
+ string original = libSource;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
+
+ if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
+ }
+
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+//PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
+bool SRACommand::checkCasesLibSelection(string& libSelection){
+ try {
+ string original = libSelection;
+ bool isOkay = true;
+
+ //remove users possible case errors
+ for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
+
+ if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
+ else { isOkay = false; }
+
+ if (isOkay) {
+ if (libSelection == "CDNA") { libSelection = "cDNA"; }
+ if (libSelection == "CHIP") { libSelection = "ChIP"; }
+ if (libSelection == "MNASE") { libSelection = "MNase"; }
+ if (libSelection == "DNASE") { libSelection = "DNAse"; }
+ if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; }
+ if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; }
+ if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; }
+ if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; }
+ if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; }
+ if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; }
+ if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; }
+ if (libSelection == "OTHER") { libSelection = "other"; }
+ if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; }
+
+ }else {
+ m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
+ }
+
+ return isOkay;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "checkCasesLibSelection");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
#define Mothur_sracommand_h
#include "command.hpp"
-
+#include "trimoligos.h"
/**************************************************************************************************/
void help() { m->mothurOut(getHelpString()); }
private:
- bool abort, isSFF;
+ bool abort, isSFF, pairedOligos;
int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
- string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
- vector<string> outputNames;
-
- int readFile(vector<string>&);
- int parseSffFile(vector<string>&);
- int parseFastqFile(vector<string>&);
+ string sfffile, fastqfile, outputDir, groupfile, file, oligosfile, contactfile, inputfile;
+ string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType;
+ string submissionName, lastName, firstName, email, centerName, centerType, description;
+ vector<string> outputNames, Groups, revPrimer;
+ vector<string> primerNameVector;
+ vector<string> barcodeNameVector;
+ map<string, string> Group2Barcode;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
+ map<string, int> barcodes;
+ map<string, int> primers;
+ vector<string> linker;
+ vector<string> spacer;
+
+ bool checkCasesInstrumentModels(string&);
+ bool checkCasesPlatforms(string&);
+ bool checkCasesLibStrategy(string&);
+ bool checkCasesLibSource(string&);
+ bool checkCasesLibSelection(string&);
+ int readFile(map<string, vector<string> >&);
+ int readContactFile();
+ int readOligos();
+ int parseSffFile(map<string, vector<string> >&);
+ int parseFastqFile(map<string, vector<string> >&);
+ int checkGroups(map<string, vector<string> >&);
+ int mapGroupToFile(map<string, vector<string> >&, vector<string>);
+ string reverseOligo(string oligo);
};
//pass numSeqs to parent
ofstream out;
- string tempFile = qualfile + toString(getpid()) + ".num.temp";
+ string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << numSeqs << endl;
if(processors == 1){
- driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+ driver(thisItersLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
m->appendFiles((sumFileName + ".temp"), sumFileName);
m->mothurRemove((sumFileName + ".temp"));
if (mult) {
processIDS.push_back(pid);
process++;
}else if (pid == 0){
- driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
+ driver(thisItersLookup, lines[process].start, lines[process].end, sumFileName + m->mothurGetpid(process) + ".temp", sumAllFileName + m->mothurGetpid(process) + ".temp", calcDists);
//only do this if you want a distance file
if (createPhylip) {
- string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+ string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + m->mothurGetpid(process) + ".dist";
ofstream outtemp;
m->openOutputFile(tempdistFileName, outtemp);
}
//parent do your part
- driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);
- m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
- m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
- if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+ driver(thisItersLookup, lines[0].start, lines[0].end, sumFileName + m->mothurGetpid(process) + ".temp", sumAllFileName + m->mothurGetpid(process) + ".temp", calcDists);
+ m->appendFiles((sumFileName + m->mothurGetpid(process) + ".temp"), sumFileName);
+ m->mothurRemove((sumFileName + m->mothurGetpid(process) + ".temp"));
+ if (mult) { m->appendFiles((sumAllFileName + m->mothurGetpid(process) + ".temp"), sumAllFileName); }
//force parent to wait until all the processes are done
for (int i = 0; i < processIDS.size(); i++) {
//for each bin
- for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
- for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
}
// Allocate memory for thread data.
}
//parent do your part
- driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
+ driver(thisItersLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);
m->appendFiles((sumFileName + "0.temp"), sumFileName);
m->mothurRemove((sumFileName + "0.temp"));
if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
- for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
+ for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
if (createPhylip) {
for (int k = 0; k < calcDists.size(); k++) {
CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
- CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
//CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
treeCalculators.push_back(new MemEuclidean());
}else if (Estimators[i] == "mempearson") {
treeCalculators.push_back(new MemPearson());
- }
+ }else if (Estimators[i] == "jsd") {
+ treeCalculators.push_back(new JSD());
+ }else if (Estimators[i] == "rjsd") {
+ treeCalculators.push_back(new RJSD());
+ }
+
}
}
driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);
- string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist";
+ string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + m->mothurGetpid(process) + ".dist";
ofstream outtemp;
m->openOutputFile(tempdistFileName, outtemp);
#include "memchord.h"
#include "memeuclidean.h"
#include "mempearson.h"
+#include "sharedrjsd.h"
+#include "sharedjsd.h"
for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
if (tempBarcodePrimerComboFileNames[i][j] != "") {
- tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+ tempBarcodePrimerComboFileNames[i][j] += m->mothurGetpid(process) + ".temp";
ofstream temp;
m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
temp.close();
}
}
driverCreateTrim(flowFileName,
- (trimFlowFileName + toString(getpid()) + ".temp"),
- (scrapFlowFileName + toString(getpid()) + ".temp"),
- (fastaFileName + toString(getpid()) + ".temp"),
+ (trimFlowFileName + m->mothurGetpid(process) + ".temp"),
+ (scrapFlowFileName + m->mothurGetpid(process) + ".temp"),
+ (fastaFileName + m->mothurGetpid(process) + ".temp"),
tempBarcodePrimerComboFileNames, lines[process]);
exit(0);
for(int i=0;i<tempFASTAFileNames.size();i++){
for(int j=0;j<tempFASTAFileNames[i].size();j++){
if (tempFASTAFileNames[i][j] != "") {
- tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
+ tempFASTAFileNames[i][j] += m->mothurGetpid(process) + ".temp";
m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
if(qFileName != ""){
- tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
+ tempPrimerQualFileNames[i][j] += m->mothurGetpid(process) + ".temp";
m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
}
if(nameFile != ""){
- tempNameFileNames[i][j] += toString(getpid()) + ".temp";
+ tempNameFileNames[i][j] += m->mothurGetpid(process) + ".temp";
m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
}
}
driverCreateTrim(filename,
qFileName,
- (trimFASTAFileName + toString(getpid()) + ".temp"),
- (scrapFASTAFileName + toString(getpid()) + ".temp"),
- (trimQualFileName + toString(getpid()) + ".temp"),
- (scrapQualFileName + toString(getpid()) + ".temp"),
- (trimNameFileName + toString(getpid()) + ".temp"),
- (scrapNameFileName + toString(getpid()) + ".temp"),
- (trimCountFileName + toString(getpid()) + ".temp"),
- (scrapCountFileName + toString(getpid()) + ".temp"),
- (groupFile + toString(getpid()) + ".temp"),
+ (trimFASTAFileName + m->mothurGetpid(process) + ".temp"),
+ (scrapFASTAFileName + m->mothurGetpid(process) + ".temp"),
+ (trimQualFileName + m->mothurGetpid(process) + ".temp"),
+ (scrapQualFileName + m->mothurGetpid(process) + ".temp"),
+ (trimNameFileName + m->mothurGetpid(process) + ".temp"),
+ (scrapNameFileName + m->mothurGetpid(process) + ".temp"),
+ (trimCountFileName + m->mothurGetpid(process) + ".temp"),
+ (scrapCountFileName + m->mothurGetpid(process) + ".temp"),
+ (groupFile + m->mothurGetpid(process) + ".temp"),
tempFASTAFileNames,
tempPrimerQualFileNames,
tempNameFileNames,
lines[process],
qLines[process]);
- if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
+ if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + m->mothurGetpid(process) + '\n'); }
//pass groupCounts to parent
if(createGroup){
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
out << groupCounts.size() << endl;
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
+ string tempFile = outputDir + m->mothurGetpid(process) + ".weightedcommand.results.temp";
m->openOutputFile(tempFile, out);
for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
out.close();
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
+ string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp";
m->openOutputFile(tempFile, out);
out << myresults.size() << endl;
for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl;
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
+ string tempFile = outputDir + m->mothurGetpid(process) + ".unweighted.results.temp";
m->openOutputFile(tempFile, out);
out << myresults.size() << endl;
for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl;
treegroup["gower"] = "gower";
treegroup["memchi2"] = "memchi2";
treegroup["memchord"] = "memchord";
+ treegroup["jsd"] = "jsd";
+ treegroup["rjsd"] = "rjsd";
treegroup["memeuclidean"] = "memeuclidean";
treegroup["mempearson"] = "mempearson";
//pass numSeqs to parent
ofstream out;
- string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp";
+ string tempFile = outputDir + m->mothurGetpid(process) + ".weighted.results.temp";
m->openOutputFile(tempFile, out);
out << Myresults.size() << endl;