for(int i=0;i<iters;i++){
map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
double ssWithinRand = calcSSWithin(randomizedGroup);
- if(ssWithinRand < ssWithinOrig){ counter++; }
+ if(ssWithinRand <= ssWithinOrig){ counter++; }
}
double pValue = (double)counter / (double) iters;
helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
- helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
+ helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
- else if (type == "alns") { pattern = "[filename],uchime.alns"; }
+ else if (type == "alns") { pattern = "[filename],uchime.alns"; }
+ else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
outputTypes["alns"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
outputTypes["alns"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
string accnosFileName = getOutputFileName("accnos", variables);
string alnsFileName = getOutputFileName("alns", variables);
string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
+ string newCountFile = "";
//you provided a groupfile
string groupFile = "";
else if (hasCount) {
CountTable ct;
if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
+ newCountFile = getOutputFileName("count", variables);
}
if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
int totalSeqs = 0;
- if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
- else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
+ if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
+
+ if (hasCount && dups) {
+ CountTable c; c.readTable(nameFile);
+ if (!m->isBlank(newCountFile)) {
+ ifstream in2;
+ m->openInputFile(newCountFile, in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ c.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(newCountFile);
+ c.printTable(newCountFile);
+ }
+
+ }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
- if (hasCount) { delete cparser; }
- else { delete sparser; }
+
if (!dups) {
int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
- }
+ }else {
+
+ if (hasCount) {
+ set<string> doNotRemove;
+ CountTable c; c.readTable(newCountFile);
+ vector<string> namesInTable = c.getNamesOfSeqs();
+ for (int i = 0; i < namesInTable.size(); i++) {
+ int temp = c.getNumSeqs(namesInTable[i]);
+ if (temp == 0) { c.remove(namesInTable[i]); }
+ else { doNotRemove.insert((namesInTable[i])); }
+ }
+ //remove names we want to keep from accnos file.
+ set<string> accnosNames = m->readAccnos(accnosFileName);
+ ofstream out2;
+ m->openOutputFile(accnosFileName, out2);
+ for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+ if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
+ }
+ out2.close();
+ c.printTable(newCountFile);
+ }
+ }
+
+ if (hasCount) { delete cparser; }
+ else { delete sparser; }
if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
}
}
//**********************************************************************************************************************
-int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
+int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
try {
int totalSeqs = 0;
int numChimeras = 0;
-
+
+ ofstream outCountList;
+ if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
+
for (int i = start; i < end; i++) {
- int start = time(NULL); if (m->control_pressed) { return 0; }
+ int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
int error;
if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
- int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
+ int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
totalSeqs += numSeqs;
if (m->control_pressed) { return 0; }
if (!m->debug) { m->mothurRemove(filename); }
else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
+ //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+ //This table will zero out group counts for seqs determined to be chimeric by that group.
+ if (dups) {
+ if (!m->isBlank(accnos+groups[i])) {
+ ifstream in;
+ m->openInputFile(accnos+groups[i], in);
+ string name;
+ if (hasCount) {
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ outCountList << name << '\t' << groups[i] << endl;
+ }
+ in.close();
+ }else {
+ map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
+ map<string, string>::iterator itN;
+ ofstream out;
+ m->openOutputFile(accnos+groups[i]+".temp", out);
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+
+ }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+ }
+ out.close();
+ in.close();
+ m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+ }
+
+ }
+ }
+
//append files
m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
- }
- return totalSeqs;
+ }
+
+ if (hasCount && dups) { outCountList.close(); }
+
+ return totalSeqs;
}
catch(exception& e) {
// Allocate memory for thread data.
string extension = toString(i) + ".temp";
- uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
- tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+ uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
+ tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
pDataArray.push_back(tempUchime);
}
/**************************************************************************************************/
-int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
+int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
try {
processIDS.clear();
int process = 1;
int num = 0;
+
+ CountTable newCount;
+ if (hasCount && dups) { newCount.readTable(nameFile); }
//sanity check
if (groups.size() < processors) { processors = groups.size(); }
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+ num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
//pass numSeqs to parent
ofstream out;
}
//do my part
- num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+ num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
int temp = processIDS[i];
wait(&temp);
}
-
+
for (int i = 0; i < processIDS.size(); i++) {
ifstream in;
string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
in.close(); m->mothurRemove(tempFile);
- }
-
+ }
+
#else
//////////////////////////////////////////////////////////////////////////////////////////////////////
//Windows version shared memory, so be careful when passing variables through the uchimeData struct.
// Allocate memory for thread data.
string extension = toString(i) + ".temp";
- uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
- tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
+ uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
+ tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
pDataArray.push_back(tempUchime);
//using the main process as a worker saves time and memory
- num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
+ num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
//Wait until all threads have terminated.
WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
+
+
#endif
-
-
+
+ //read my own
+ if (hasCount && dups) {
+ if (!m->isBlank(accnos + ".byCount")) {
+ ifstream in2;
+ m->openInputFile(accnos + ".byCount", in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ newCount.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(accnos + ".byCount");
+ }
+
//append output files
for(int i=0;i<processIDS.size();i++){
m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
}
+
+ if (hasCount && dups) {
+ if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+ ifstream in2;
+ m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ newCount.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+ }
+
}
-
+
+ //print new *.pick.count_table
+ if (hasCount && dups) { newCount.printTable(newCountFile); }
+
return num;
}
int readFasta(string, map<string, string>&);
int printFile(vector<seqPriorityNode>&, string);
int deconvoluteResults(map<string, string>&, string, string, string);
- int driverGroups(string, string, string, string, int, int, vector<string>);
- int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
+ int driverGroups(string, string, string, string, string, int, int, vector<string>);
+ int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
int prepFile(string filename, string);
string namefile;
string groupfile;
string outputFName;
- string accnos, alns, filename, templatefile, uchimeLocation;
+ string accnos, alns, filename, templatefile, uchimeLocation, countlist;
MothurOut* m;
int start;
int end;
int threadID, count, numChimeras;
vector<string> groups;
- bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
+ bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
uchimeData(){}
- uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+ uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, string nc, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
fastafile = f;
namefile = n;
groupfile = g;
count = 0;
numChimeras = 0;
uchimeLocation = uloc;
+ countlist = nc;
}
- void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
+ void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
useAbskew = Abskew;
chimealns = calns;
useMinH = MinH;
ucl = uc;
useQueryfract = Queryfract;
hasCount = hc;
+ dups = dps;
}
void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
int totalSeqs = 0;
int numChimeras = 0;
+
+ ofstream outCountList;
+ if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
+
for (int i = pDataArray->start; i < pDataArray->end; i++) {
int start = time(NULL); if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
ofstream out;
pDataArray->m->openOutputFile(accnos, out);
+
int num = 0;
numChimeras = 0;
+ map<string, string> thisnamemap;
+ map<string, string>::iterator itN;
+ if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); }
+
while(!in.eof()) {
if (pDataArray->m->control_pressed) { break; }
for (int j = 0; j < 15; j++) { in >> chimeraFlag; }
pDataArray->m->gobble(in);
- if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
+ if (chimeraFlag == "Y") {
+ if (pDataArray->dups) {
+ if (!pDataArray->hasCount) { //output redundant names for each group
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+ }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+
+ }else {
+ out << name << endl;
+ outCountList << name << '\t' << pDataArray->groups[i] << endl;
+ }
+ }else{ out << name << endl; }
+ numChimeras++;
+ }
num++;
}
in.close();
}
+ if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
pDataArray->count = totalSeqs;
if (pDataArray->hasCount) { delete cparser; } { delete parser; }
return totalSeqs;
for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
out << endl;
- for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+ map<int, string> reverse; //use this to preserve order
+ for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
+
+ for (int i = 0; i < totals.size(); i++) {
+ map<int, string>::iterator itR = reverse.find(i);
+
+ if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+ out << itR->second << '\t' << totals[i] << '\t';
+ if (hasGroups) {
+ for (int j = 0; j < groups.size(); j++) {
+ out << counts[i][j] << '\t';
+ }
+ }
+ out << endl;
+ }
+ }
+ /*for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
out << itNames->first << '\t' << totals[itNames->second] << '\t';
if (hasGroups) {
}
}
out << endl;
- }
+ }*/
out.close();
return 0;
}
if (hasGroups) {
map<string, int>::iterator it = indexGroupMap.find(groupName);
if (it == indexGroupMap.end()) {
- m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
}else {
return totalGroups[it->second];
}
if (hasGroups) {
map<string, int>::iterator it = indexGroupMap.find(groupName);
if (it == indexGroupMap.end()) {
- m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
}else {
map<string, int>::iterator it2 = indexNameMap.find(seqName);
if (it2 == indexNameMap.end()) {
- m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
}else {
return counts[it2->second][it->second];
}
}
//**********************************************************************************************************************
-
+//TATGCT
+//1 0 0 0 0 1
+//then the second positive flow is for a T, but you saw a T between the last and previous flow adn it wasn't positive, so something is missing
+//Becomes TNT
void FlowData::translateFlow(){
-
try{
- sequence = "";
- for(int i=0;i<endFlow;i++){
+ sequence = "";
+ set<char> charInMiddle;
+ int oldspot = -1;
+ bool updateOld = false;
+
+ for(int i=0;i<endFlow;i++){
int intensity = (int)(flowData[i] + 0.5);
char base = baseFlow[i % baseFlow.length()];
+
+ if (intensity == 0) { //are we in the middle
+ if (oldspot != -1) { charInMiddle.insert(base); }
+ }else if (intensity >= 1) {
+ if (oldspot == -1) { updateOld = true; }
+ else { //check for bases inbetween two 1's
+ if (charInMiddle.count(base) != 0) { //we want to covert to an N
+ sequence = sequence.substr(0, oldspot+1);
+ sequence += 'N';
+ }
+ updateOld = true;
+ charInMiddle.clear();
+ }
+ }
for(int j=0;j<intensity;j++){
sequence += base;
}
+
+ if (updateOld) { oldspot = sequence.length()-1; updateOld = false; }
}
if(sequence.size() > 4){
else{
sequence = "NNNN";
}
- }
+ }
catch(exception& e) {
m->errorOut(e, "FlowData", "translateFlow");
exit(1);
//**********************************************************************************************************************
vector<string> GetSharedOTUCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(pgroup);
- CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(plist);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "groupList","",false,false,true); parameters.push_back(pgroup);
+ CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist);
+ CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared);
CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
- CommandParameter punique("unique", "String", "", "", "", "", "","",false,false,true); parameters.push_back(punique);
- CommandParameter pshared("shared", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pshared);
+ CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups);
+ CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
string GetSharedOTUCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta. The list and group parameters are required, unless you have valid current files.\n";
+ helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta. The list and group or shared parameters are required, unless you have valid current files.\n";
helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
- helpString += "The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
- helpString += "If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
- helpString += "If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
- helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n";
- helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n";
+ helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
+ helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
+ helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
+ helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n";
+ helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n";
helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
- helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
- helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+ helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+ helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["list"] = inputDir + it->second; }
}
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
it = parameters.find("group");
//user has given a template file
//check for required parameters
listfile = validParameter.validFile(parameters, "list", true);
if (listfile == "not open") { abort = true; }
- else if (listfile == "not found") {
- listfile = m->getListFile();
- if (listfile != "") { format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine();
- abort = true;
- }
- }else { format = "list"; m->setListFile(listfile); }
+ else if (listfile == "not found") { listfile = ""; }
+ else { format = "list"; m->setListFile(listfile); }
groupfile = validParameter.validFile(parameters, "group", true);
if (groupfile == "not open") { abort = true; }
- else if (groupfile == "not found") {
- groupfile = m->getGroupFile();
- if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine();
- abort = true;
+ else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { m->setSharedFile(sharedfile); }
+
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { abort = true; }
+ else if (fastafile == "not found") { fastafile = ""; }
+ else { m->setFastaFile(fastafile); }
+
+
+ if ((sharedfile == "") && (listfile == "")) { //look for currents
+ //is there are current file available for either of these?
+ //give priority to shared, then list
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ listfile = m->getListFile();
+ if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or list file."); m->mothurOutEndLine();
+ abort = true;
+ }
}
- }else { m->setGroupFile(groupfile); }
-
- if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; }
+ }else if ((sharedfile != "") && (listfile != "")) {
+ m->mothurOut("You may enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true;
+ }
+ if (listfile != "") {
+ if (groupfile == ""){
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You need to provide a group file if you are going to use the list format."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
+
+ if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file."); m->mothurOutEndLine(); abort = true; }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
label = validParameter.validFile(parameters, "label", false);
if (output == "not found") { output = ""; }
else if (output == "default") { output = ""; }
- groups = validParameter.validFile(parameters, "unique", false);
+ groups = validParameter.validFile(parameters, "uniquegroups", false);
if (groups == "not found") { groups = ""; }
else {
userGroups = "unique." + groups;
m->splitAtDash(groups, Groups);
- m->setGroups(Groups);
-
}
- groups = validParameter.validFile(parameters, "shared", false);
+ groups = validParameter.validFile(parameters, "sharedgroups", false);
if (groups == "not found") { groups = ""; }
else {
userGroups = groups;
m->splitAtDash(groups, Groups);
- m->setGroups(Groups);
unique = false;
}
- fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
- else if (fastafile == "not found") { fastafile = ""; }
- else { m->setFastaFile(fastafile); }
- }
+ }
}
catch(exception& e) {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- groupMap = new GroupMap(groupfile);
- int error = groupMap->readMap();
- if (error == 1) { delete groupMap; return 0; }
-
- if (m->control_pressed) { delete groupMap; return 0; }
-
- if (Groups.size() == 0) {
- Groups = groupMap->getNamesOfGroups();
-
- //make string for outputfile name
- userGroups = "unique.";
- for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; }
- userGroups = userGroups.substr(0, userGroups.length()-1);
- }else{
- //sanity check for group names
- SharedUtil util;
- vector<string> namesOfGroups = groupMap->getNamesOfGroups();
- util.setGroups(Groups, namesOfGroups);
- groupMap->setNamesOfGroups(namesOfGroups);
- }
-
- //put groups in map to find easier
- for(int i = 0; i < Groups.size(); i++) {
- groupFinder[Groups[i]] = Groups[i];
- }
-
- if (fastafile != "") {
- ifstream inFasta;
- m->openInputFile(fastafile, inFasta);
-
- while(!inFasta.eof()) {
- if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
-
- Sequence seq(inFasta); m->gobble(inFasta);
- if (seq.getName() != "") { seqs.push_back(seq); }
- }
- inFasta.close();
- }
-
- ListVector* lastlist = NULL;
- string lastLabel = "";
-
- //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
- set<string> processedLabels;
- set<string> userLabels = labels;
-
- ifstream in;
- m->openInputFile(listfile, in);
-
- //as long as you are not at the end of the file or done wih the lines you want
- while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
-
- if (m->control_pressed) {
- if (lastlist != NULL) { delete lastlist; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- delete groupMap; return 0;
- }
-
- list = new ListVector(in);
-
- if(allLines == 1 || labels.count(list->getLabel()) == 1){
- m->mothurOut(list->getLabel());
- process(list);
-
- processedLabels.insert(list->getLabel());
- userLabels.erase(list->getLabel());
- }
-
- if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
- string saveLabel = list->getLabel();
-
- m->mothurOut(lastlist->getLabel());
- process(lastlist);
-
- processedLabels.insert(lastlist->getLabel());
- userLabels.erase(lastlist->getLabel());
-
- //restore real lastlabel to save below
- list->setLabel(saveLabel);
- }
+ if ( sharedfile != "") { runShared(); }
+ else {
+ m->setGroups(Groups);
+ groupMap = new GroupMap(groupfile);
+ int error = groupMap->readMap();
+ if (error == 1) { delete groupMap; return 0; }
+
+ if (m->control_pressed) { delete groupMap; return 0; }
+
+ if (Groups.size() == 0) {
+ Groups = groupMap->getNamesOfGroups();
+
+ //make string for outputfile name
+ userGroups = "unique.";
+ for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; }
+ userGroups = userGroups.substr(0, userGroups.length()-1);
+ }else{
+ //sanity check for group names
+ SharedUtil util;
+ vector<string> namesOfGroups = groupMap->getNamesOfGroups();
+ util.setGroups(Groups, namesOfGroups);
+ groupMap->setNamesOfGroups(namesOfGroups);
+ }
+
+ //put groups in map to find easier
+ for(int i = 0; i < Groups.size(); i++) {
+ groupFinder[Groups[i]] = Groups[i];
+ }
+
+ if (fastafile != "") {
+ ifstream inFasta;
+ m->openInputFile(fastafile, inFasta);
+
+ while(!inFasta.eof()) {
+ if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
+
+ Sequence seq(inFasta); m->gobble(inFasta);
+ if (seq.getName() != "") { seqs.push_back(seq); }
+ }
+ inFasta.close();
+ }
+
+ ListVector* lastlist = NULL;
+ string lastLabel = "";
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ ifstream in;
+ m->openInputFile(listfile, in);
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) {
+ if (lastlist != NULL) { delete lastlist; }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
+ delete groupMap; return 0;
+ }
+
+ list = new ListVector(in);
+
+ if(allLines == 1 || labels.count(list->getLabel()) == 1){
+ m->mothurOut(list->getLabel());
+ process(list);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = list->getLabel();
+
+ m->mothurOut(lastlist->getLabel());
+ process(lastlist);
+
+ processedLabels.insert(lastlist->getLabel());
+ userLabels.erase(lastlist->getLabel());
+
+ //restore real lastlabel to save below
+ list->setLabel(saveLabel);
+ }
- lastLabel = list->getLabel();
-
- if (lastlist != NULL) { delete lastlist; }
- lastlist = list;
- }
-
- in.close();
-
- //output error messages about any remaining user labels
- set<string>::iterator it;
- bool needToRun = false;
- for (it = userLabels.begin(); it != userLabels.end(); it++) {
- m->mothurOut("Your file does not include the label " + *it);
- if (processedLabels.count(lastLabel) != 1) {
- m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
- needToRun = true;
- }else {
- m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
- }
- }
-
- //run last label if you need to
- if (needToRun == true) {
- m->mothurOut(lastlist->getLabel());
- process(lastlist);
-
- processedLabels.insert(lastlist->getLabel());
- userLabels.erase(lastlist->getLabel());
- }
-
+ lastLabel = list->getLabel();
+
+ if (lastlist != NULL) { delete lastlist; }
+ lastlist = list;
+ }
+
+ in.close();
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ m->mothurOut(lastlist->getLabel());
+ process(lastlist);
+
+ processedLabels.insert(lastlist->getLabel());
+ userLabels.erase(lastlist->getLabel());
+ }
+
- //reset groups parameter
- m->clearGroups();
-
- if (lastlist != NULL) { delete lastlist; }
-
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete groupMap; return 0; }
-
+ //reset groups parameter
+ m->clearGroups();
+
+ if (lastlist != NULL) { delete lastlist; }
+
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete groupMap; return 0; }
+ }
//set fasta file as new current fastafile
string current = "";
itTypes = outputTypes.find("fasta");
exit(1);
}
}
+/***********************************************************/
+int GetSharedOTUCommand::runShared() {
+ try {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ if (Groups.size() == 0) {
+ Groups = m->getGroups();
+
+ //make string for outputfile name
+ userGroups = "unique.";
+ for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; }
+ userGroups = userGroups.substr(0, userGroups.length()-1);
+ }else {
+ //sanity check for group names
+ SharedUtil util;
+ vector<string> allGroups = m->getAllGroups();
+ util.setGroups(Groups, allGroups);
+ }
+
+ //put groups in map to find easier
+ for(int i = 0; i < Groups.size(); i++) {
+ groupFinder[Groups[i]] = Groups[i];
+ }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+ if (m->control_pressed) {
+ outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+ }
+
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+ m->mothurOut(lookup[0]->getLabel());
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel());
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) {
+ outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
+ }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel());
+ process(lookup);
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+ //reset groups parameter
+ m->clearGroups();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSharedOTUCommand", "runShared");
+ exit(1);
+ }
+}
+/***********************************************************/
+int GetSharedOTUCommand::process(vector<SharedRAbundVector*>& lookup) {
+ try {
+
+ string outputFileNames;
+ if (outputDir == "") { outputDir += m->hasPath(sharedfile); }
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ variables["[group]"] = userGroups;
+ if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
+ else { outputFileNames = getOutputFileName("accnos", variables); }
+
+ ofstream outNames;
+ m->openOutputFile(outputFileNames, outNames);
+
+ bool wroteSomething = false;
+ int num = 0;
+
+ //go through each bin, find out if shared
+ for (int i = 0; i < lookup[0]->getNumBins(); i++) {
+ if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
+
+ bool uniqueOTU = true;
+ map<string, int> atLeastOne;
+ for (int f = 0; f < Groups.size(); f++) { atLeastOne[Groups[f]] = 0; }
+
+ set<string> namesOfGroupsInThisBin;
+
+ for(int j = 0; j < lookup.size(); j++) {
+ string seqGroup = lookup[j]->getGroup();
+ string name = m->currentBinLabels[i];
+
+ if (lookup[j]->getAbundance(i) != 0) {
+ if (output != "accnos") {
+ namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(lookup[j]->getAbundance(i)));
+ }else { namesOfGroupsInThisBin.insert(name); }
+
+ //is this seq in one of the groups we care about
+ it = groupFinder.find(seqGroup);
+ if (it == groupFinder.end()) { uniqueOTU = false; } //you have sequences from a group you don't want
+ else { atLeastOne[seqGroup]++; }
+ }
+ }
+
+ //make sure you have at least one seq from each group you want
+ bool sharedByAll = true;
+ map<string, int>::iterator it2;
+ for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) {
+ if (it2->second == 0) { sharedByAll = false; }
+ }
+
+ //if the user wants unique bins and this is unique then print
+ //or this the user wants shared bins and this bin is shared then print
+ if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) {
+
+ wroteSomething = true;
+ num++;
+
+ //output list of names
+ for (set<string>::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) {
+ outNames << (*itNames) << endl;
+ }
+ }
+ }
+ outNames.close();
+
+ if (!wroteSomething) {
+ m->mothurRemove(outputFileNames);
+ string outputString = "\t" + toString(num) + " - No otus shared by groups";
+
+ string groupString = "";
+ for (int h = 0; h < Groups.size(); h++) {
+ groupString += " " + Groups[h];
+ }
+
+ outputString += groupString + ".";
+ m->mothurOut(outputString); m->mothurOutEndLine();
+ }else {
+ m->mothurOut("\t" + toString(num)); m->mothurOutEndLine();
+ outputNames.push_back(outputFileNames);
+ if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); }
+ else { outputTypes["accnos"].push_back(outputFileNames); }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSharedOTUCommand", "process");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
#include "listvector.hpp"
#include "sequence.hpp"
#include "groupmap.h"
+#include "sharedrabundvector.h"
+#include "inputdata.h"
//**********************************************************************************************************************
class GetSharedOTUCommand : public Command {
GroupMap* groupMap;
set<string> labels;
- string fastafile, label, groups, listfile, groupfile, output, userGroups, outputDir, format;
+ string fastafile, label, groups, listfile, groupfile, sharedfile, output, userGroups, outputDir, format;
bool abort, allLines, unique;
vector<string> Groups;
map<string, string> groupFinder;
vector<string> outputNames;
int process(ListVector*);
+ int process(vector<SharedRAbundVector*>&);
+ int runShared();
};
//**********************************************************************************************************************
vector<double> ssWithinRandVector;
map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
- if(bValueRand > bValueOrig){ counter++; }
+ if(bValueRand >= bValueOrig){ counter++; }
}
double pValue = (double) counter / (double) iters;
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
+ CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
+ helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
helpString += "Example fastq.info(fastaq=test.fastaq).\n";
helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
return helpString;
fasta = m->isTrue(temp);
temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; }
- qual = m->isTrue(temp);
+ qual = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
+ pacbio = m->isTrue(temp);
+
format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- //print sequence info to files
- if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-
- if (qual) {
- vector<int> qualScores = convertQual(quality);
+ vector<int> qualScores;
+ if (qual) {
+ qualScores = convertQual(quality);
outQual << ">" << name << endl;
for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
outQual << endl;
}
+
+ if (m->control_pressed) { break; }
+
+ if (pacbio) {
+ if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
+ for (int i = 0; i < qualScores.size(); i++) {
+ if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ }
+ }
+
+ //print sequence info to files
+ if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
+
}
in.close();
if (fasta) { outFasta.close(); }
if (qual) { outQual.close(); }
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
+ if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
//set fasta file as new current fastafile
string current = "";
vector<string> outputNames;
string outputDir, fastaQFile, format;
- bool abort, fasta, qual;
+ bool abort, fasta, qual, pacbio;
vector<int> convertQual(string);
vector<char> convertTable;
void updateReverseMap(vector<vector<int> >&, int, int, int);
void setName(string n);
void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
-
+ vector<int> getScores() { return qScores; }
private:
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");
m->mothurOutEndLine();
+ //set fasta file as new current fastafile
+ string current = "";
+ itTypes = outputTypes.find("errorseq");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+ }
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
if (pos == string::npos) {
flowData.printFlows(trimFlowFile);
- if(fasta) { currSeq.printSequence(fastaFile); }
+ if(fasta) { currSeq.printSequence(fastaFile); }
if(allFiles){
ofstream output;
flowData.printFlows(trimFlowFile);
- if(pDataArray->fasta) { currSeq.printSequence(fastaFile); }
+ if(pDataArray->fasta) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile); }
if(pDataArray->allFiles){
ofstream output;
try {
m = MothurOut::getInstance();
paired = false;
+ trashCode = "";
pdiffs = p;
bdiffs = b;
ldiffs = l;
sdiffs = s;
paired = true;
+ trashCode = "";
maxFBarcodeLength = 0;
for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
primers = pr;
revPrimer = r;
paired = false;
+ trashCode = "";
maxFBarcodeLength = 0;
for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
try {
string rawSequence = seq.getUnaligned();
+ trashCode = "";
for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
string oligo = it->first;
primerStart = 0; primerEnd = 0;
//if you don't want to allow for diffs
- if (pdiffs == 0) { return false; }
+ if (pdiffs == 0) { trashCode = "f"; return false; }
else { //try aligning and see if you can find it
//can you find the barcode
if (alignment != NULL) { delete alignment; }
- if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
- else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
- else{ return true; }
+ if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; trashCode = "f"; return false; } //no good matches
+ else if(minCount > 1) { primerStart = 0; primerEnd = 0; trashCode = "f"; return false; } //can't tell the difference between multiple primers
+ else{ trashCode = ""; return true; }
}
-
+
+ trashCode = "f";
primerStart = 0; primerEnd = 0;
return false;
//******************************************************************/
bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
try {
-
+ trashCode = "";
string rawSequence = seq.getUnaligned();
for(int i=0;i<revPrimer.size();i++){
}
}
+ trashCode = "r";
primerStart = 0; primerEnd = 0;
return false;
}
int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
try {
-
+ trashCode = "";
if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
string rawSequence = seq.getUnaligned();
}
success = 0;
- break;
+ trashCode = "";
+ return success;
}
}
//if you found the barcode or if you don't want to allow for diffs
- if ((bdiffs == 0) || (success == 0)) { return success; }
+ if (bdiffs == 0) { trashCode = "b"; return success; }
else { //try aligning and see if you can find it
Alignment* alignment;
// int length = oligo.length();
if(rawSequence.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
- success = bdiffs + 10;
+ success = bdiffs + 10; trashCode = "b";
break;
}
}
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
- else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
+ if(minDiff > bdiffs) { trashCode = "b"; success = minDiff; } //no good matches
+ else if(minCount > 1) { trashCode = "b"; success = bdiffs + 100; } //can't tell the difference between multiple barcodes
else{ //use the best match
group = minGroup;
seq.setUnaligned(rawSequence.substr(minPos));
qual.trimQScores(minPos, -1);
}
success = minDiff;
+ trashCode = "";
}
if (alignment != NULL) { delete alignment; }
//*******************************************************************/
int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& group){
try {
+ trashCode = "";
//look for forward barcode
string rawFSequence = forwardSeq.getUnaligned();
string rawRSequence = reverseSeq.getUnaligned();
if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
- break;
+ trashCode += "b";
+ break;
}
if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode += "d";
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
- group = it->first;
- forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
- success = 0;
- break;
- }
+ if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
+ group = it->first;
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+ reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ success = 0; trashCode = "";
+ return success;
+ }
+ }else {
+ trashCode = "b";
+ if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) { trashCode += "d"; }
+ }
}
//if you found the barcode or if you don't want to allow for diffs
- if ((bdiffs == 0) || (success == 0)) { return success; }
+ if (bdiffs == 0) { return success; }
else { //try aligning and see if you can find it
+ trashCode = "";
Alignment* alignment;
if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
else{ alignment = NULL; }
}
//cout << minDiff << '\t' << minCount << '\t' << endl;
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
- else{
+ if(minDiff > bdiffs) { trashCode = "b"; minFGroup.clear(); success = minDiff; } //no good matches
+ //else{
//check for reverse match
if (alignment != NULL) { delete alignment; }
cout << endl;
}
cout << endl;*/
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ if(minDiff > bdiffs) { trashCode += "d"; success = minDiff; } //no good matches
else {
bool foundMatch = false;
vector<int> matches;
forwardSeq.setUnaligned(rawFSequence.substr(fStart));
reverseSeq.setUnaligned(rawRSequence.substr(rStart));
success = minDiff;
- }else { success = bdiffs + 100; }
+ }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
}
- }
+ //}
if (alignment != NULL) { delete alignment; }
}
+ //catchall for case I didn't think of
+ if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+
return success;
}
//*******************************************************************/
int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
try {
+ trashCode = "";
+
//look for forward barcode
string rawFSequence = forwardSeq.getUnaligned();
string rawRSequence = reverseSeq.getUnaligned();
if(rawFSequence.length() < foligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode = "b";
break;
}
if(rawRSequence.length() < roligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode += "d";
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
- group = it->first;
- forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
- reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
- forwardQual.trimQScores(foligo.length(), -1);
- reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
- success = 0;
- break;
+ if(compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ if (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) {
+ group = it->first;
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
+ reverseSeq.setUnaligned(rawRSequence.substr(0,(rawRSequence.length()-roligo.length())));
+ forwardQual.trimQScores(foligo.length(), -1);
+ reverseQual.trimQScores(-1, rawRSequence.length()-roligo.length());
+ success = 0; trashCode = "";
+ return success;
+ }
+ }else {
+ trashCode = "b";
+ if (!compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length()))) { trashCode += "d";
+ }
}
}
//if you found the barcode or if you don't want to allow for diffs
- if ((bdiffs == 0) || (success == 0)) { return success; }
+ if (bdiffs == 0) { return success; }
else { //try aligning and see if you can find it
+ trashCode = "";
Alignment* alignment;
if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
else{ alignment = NULL; }
}
//cout << minDiff << '\t' << minCount << '\t' << endl;
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
- else{
+ if(minDiff > bdiffs) { trashCode = "b"; minFGroup.clear(); success = minDiff; } //no good matches
+ //else{
//check for reverse match
if (alignment != NULL) { delete alignment; }
cout << endl;
}
cout << endl;*/
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ if(minDiff > bdiffs) { trashCode = "d"; success = minDiff; } //no good matches
else {
bool foundMatch = false;
vector<int> matches;
forwardQual.trimQScores(fStart, -1);
reverseQual.trimQScores(rStart, -1);
success = minDiff;
- }else { success = bdiffs + 100; }
+ }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
}
- }
+ //}
if (alignment != NULL) { delete alignment; }
}
+ //catchall for case I didn't think of
+ if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+
return success;
}
//*******************************************************************/
int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
try {
+ trashCode = "";
+
//look for forward barcode
string rawSeq = seq.getUnaligned();
int success = bdiffs + 1; //guilty until proven innocent
//cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode = "b";
break;
}
if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode += "d";
break;
}
- if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
- group = it->first;
- string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
- seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
- if(qual.getName() != ""){
- qual.trimQScores(-1, rawSeq.length()-roligo.length());
- qual.trimQScores(foligo.length(), -1);
+ if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
+ if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
+ group = it->first;
+ string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+ seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+ if(qual.getName() != ""){
+ qual.trimQScores(-1, rawSeq.length()-roligo.length());
+ qual.trimQScores(foligo.length(), -1);
+ }
+ success = 0;
+ trashCode = "";
+ return success;
}
- success = 0;
- break;
+ }else {
+ trashCode = "b";
+ if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
}
}
//cout << "success=" << success << endl;
//if you found the barcode or if you don't want to allow for diffs
- if ((bdiffs == 0) || (success == 0)) { return success; }
+ if (bdiffs == 0) { return success; }
else { //try aligning and see if you can find it
Alignment* alignment;
if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
}
//cout << minDiff << '\t' << minCount << '\t' << endl;
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
- else{
+ if(minDiff > bdiffs) { trashCode = "b"; minFGroup.clear(); success = minDiff; } //no good matches
+ //else{
//check for reverse match
if (alignment != NULL) { delete alignment; }
cout << endl;
}
cout << endl;*/
- if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ if(minDiff > bdiffs) { trashCode += "d"; success = minDiff; } //no good matches
else {
bool foundMatch = false;
vector<int> matches;
}
success = minDiff;
//cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
- }else { success = bdiffs + 100; }
+ }else { if (trashCode == "") { trashCode = "bd"; } success = bdiffs + 100; }
}
- }
+ //}
if (alignment != NULL) { delete alignment; }
}
+ //catchall for case I didn't think of
+ if ((trashCode == "") && (success > bdiffs)) { trashCode = "bd"; }
+
return success;
}
//*******************************************************************/
int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
try {
+ trashCode = "";
//look for forward
string rawSeq = seq.getUnaligned();
int success = pdiffs + 1; //guilty until proven innocent
//cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode = "p";
break;
}
if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ trashCode += "q";
break;
}
- if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
- group = it->first;
- if (!keepForward) {
+ if(compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) { //find forward
+ if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { //find reverse
+ group = it->first;
string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
if(qual.getName() != ""){
qual.trimQScores(-1, rawSeq.length()-roligo.length());
qual.trimQScores(foligo.length(), -1);
}
+ success = 0;
+ trashCode = "";
+ return success;
}
- success = 0;
- break;
+ }else {
+ trashCode = "b";
+ if (!compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) { trashCode += "d"; }
}
- }
+
+ }
//cout << "success=" << success << endl;
//if you found the barcode or if you don't want to allow for diffs
- if ((pdiffs == 0) || (success == 0)) { return success; }
+ if (pdiffs == 0) { return success; }
else { //try aligning and see if you can find it
Alignment* alignment;
if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
}
//cout << minDiff << '\t' << minCount << '\t' << endl;
- if(minDiff > pdiffs) { success = minDiff; } //no good matches
- else{
+ if(minDiff > pdiffs) { trashCode = "p"; minFGroup.clear(); success = minDiff; } //no good matches
+ //else{
//check for reverse match
if (alignment != NULL) { delete alignment; }
cout << endl;
}
cout << endl;*/
- if(minDiff > pdiffs) { success = minDiff; } //no good matches
+ if(minDiff > pdiffs) { trashCode += "q"; success = minDiff; } //no good matches
else {
bool foundMatch = false;
vector<int> matches;
}
success = minDiff;
//cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
- }else { success = pdiffs + 100; }
+ }else { if (trashCode == "") { trashCode = "pq"; } success = pdiffs + 100; }
}
- }
+ //}
if (alignment != NULL) { delete alignment; }
}
+ //catchall for case I didn't think of
+ if ((trashCode == "") && (success > pdiffs)) { trashCode = "pq"; }
+
return success;
}
bool findReverse(Sequence&, int&, int&);
string reverseOligo(string);
+ string getTrashCode() { return trashCode; }
private:
int pdiffs, bdiffs, ldiffs, sdiffs;
bool paired;
+ string trashCode;
map<string, int> barcodes;
map<string, int> primers;
//create reoriented primer and barcode pairs
map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
- cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
- cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
- oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+ oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
rpairedPrimers[it->first] = tempPair;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
}
for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
- cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
- cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
- oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+ oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
rpairedBarcodes[it->first] = tempPair;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
}
rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
}
Sequence currSeq(inFASTA); m->gobble(inFASTA);
//cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
- QualityScores currQual;
+ QualityScores currQual; QualityScores savedQual;
if(qFileName != ""){
currQual = QualityScores(qFile); m->gobble(qFile);
+ savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
//cout << currQual.getName() << endl;
}
-
+
string origSeq = currSeq.getUnaligned();
if (origSeq != "") {
if(numFPrimers != 0){
success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
- if(success > pdiffs) { trashCode += 'f'; }
+ if(success > pdiffs) {
+ //if (pairedOligos) { trashCode += trimOligos->getTrashCode(); }
+ //else { trashCode += 'f'; }
+ trashCode += 'f';
+ }
else{ currentSeqsDiffs += success; }
}
int thisPrimerIndex = 0;
if(numBarcodes != 0){
- thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
if(numFPrimers != 0){
- thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
- if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
+ if(thisSuccess > pdiffs) {
+ //if (pairedOligos) { thisTrashCode += rtrimOligos->getTrashCode(); }
+ //else { thisTrashCode += 'f'; }
+ thisTrashCode += 'f';
+ }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
-
+
if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
if (thisTrashCode == "") {
currentSeqsDiffs = thisCurrentSeqsDiffs;
barcodeIndex = thisBarcodeIndex;
primerIndex = thisPrimerIndex;
- currSeq.reverseComplement();
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
if(qFileName != ""){
- currQual.flipQScores();
+ savedQual.flipQScores();
+ currQual.setScores(savedQual.getScores());
}
}
}
// get rest of line in case there is a primer name
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}
oligosPair newPrimer(oligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
//check for repeat barcodes
string tempPair = oligo+roligo;
string temp = "";
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { temp += c; }
}
if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
}
- oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+ reverseBarcode = reverseOligo(reverseBarcode);
+ oligosPair newPair(oligo, reverseBarcode);
if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
TrimOligos* trimOligos = NULL;
int numBarcodes = pDataArray->barcodes.size();
- if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); }
+ if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); }
TrimOligos* rtrimOligos = NULL;
for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
if (pDataArray->m->control_pressed) {
- delete trimOligos;
+ delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); }
if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
int currentSeqsDiffs = 0;
Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
- QualityScores currQual;
+ QualityScores currQual; QualityScores savedQual;
if(pDataArray->qFileName != ""){
currQual = QualityScores(qFile); pDataArray->m->gobble(qFile);
+ savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
}
+
string origSeq = currSeq.getUnaligned();
if (origSeq != "") {
int thisPrimerIndex = 0;
if(numBarcodes != 0){
- thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
if(pDataArray->numFPrimers != 0){
- thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
currentSeqsDiffs = thisCurrentSeqsDiffs;
barcodeIndex = thisBarcodeIndex;
primerIndex = thisPrimerIndex;
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
+ if(pDataArray->qFileName != ""){
+ savedQual.flipQScores();
+ currQual.setScores(savedQual.getScores());
+ }
}
}
}
//report progress
- if((i) % 1000 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); }
+ if((pDataArray->count) % 1000 == 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
}
//report progress
if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
+ if (pDataArray->reorient) { delete rtrimOligos; }
delete trimOligos;
inFASTA.close();
trimFASTAFile.close();