exit(1);
}
}
+//**********************************************************************************************************************
+string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "qfile") { outputFileName = "qual"; }
+ else if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "group") { outputFileName = "groups"; }
+ else if (type == "name") { outputFileName = "names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
- string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
+ string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
+ string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
- string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
- string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
+ string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
+ string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
if (qFileName != "") {
outputNames.push_back(trimQualFile);
outputTypes["qfile"].push_back(scrapQualFile);
}
- string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
- string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
+ string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
+ string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
if (nameFile != "") {
m->readNames(nameFile, nameMap);
if(oligoFile != ""){
createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
if (createGroup) {
- outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+ outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
m->openInputFile(it->first, in);
ofstream out;
- string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
+ string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
int count = 0;
bool moreSeqs = 1;
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
+ TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
while (moreSeqs) {
if(success > bdiffs) { trashCode += 'b'; }
else{ currentSeqsDiffs += success; }
}
+
+ if(rbarcodes.size() != 0){
+ success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
+ if(success > bdiffs) { trashCode += 'b'; }
+ else{ currentSeqsDiffs += success; }
+ }
if(numSpacers != 0){
success = trimOligos.stripSpacer(currSeq, currQual);
currQual.printQScores(trimQualFile);
}
+
if(nameFile != ""){
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ int numRedundants = 0;
if (nameFile != "") {
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) {
vector<string> thisSeqsNames;
m->splitAtChar(itName->second, thisSeqsNames, ',');
+ numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
}
}
map<string, int>::iterator it = groupCounts.find(thisGroup);
- if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
- else { groupCounts[it->first]++; }
+ if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
+ else { groupCounts[it->first] += (1 + numRedundants); }
}
}
tempPrimerQualFileNames,
tempNameFileNames,
lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
- pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
+ pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer,
primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
int startIndex = i * numSeqsPerProcessor;
if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
- cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
}
}
}
else if(type == "BARCODE"){
inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ string reverseBarcode = reverseOligo(group); //reverse barcode
+ group = temp;
+
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
+ if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ rbarcodes[reverseBarcode]=indexBarcode;
+ }
+
//check for repeat barcodes
map<string, int>::iterator itBar = barcodes.find(oligo);
if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }