exit(1);
}
}
+//**********************************************************************************************************************
+string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "qfile") { outputFileName = "qual"; }
+ else if (type == "fasta") { outputFileName = "fasta"; }
+ else if (type == "group") { outputFileName = "groups"; }
+ else if (type == "name") { outputFileName = "names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
numFPrimers = 0; //this needs to be initialized
numRPrimers = 0;
+ numSpacers = 0;
+ numLinkers = 0;
createGroup = false;
vector<vector<string> > fastaFileNames;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
- string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
+ string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
+ string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
- string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
- string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
+ string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
+ string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
if (qFileName != "") {
outputNames.push_back(trimQualFile);
outputTypes["qfile"].push_back(scrapQualFile);
}
- string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
- string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
+ string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
+ string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
if (nameFile != "") {
m->readNames(nameFile, nameMap);
if(oligoFile != ""){
createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
if (createGroup) {
- outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+ outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
+
+ //fills lines and qlines
+ setLines(fastaFile, qFileName);
- vector<unsigned long long> fastaFilePos;
- vector<unsigned long long> qFilePos;
+ if(processors == 1){
+ driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+ }else{
+ createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
+ }
- setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
-
- for (int i = 0; i < (fastaFilePos.size()-1); i++) {
- lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
- if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
- }
- if(qFileName == "") { qLines = lines; } //files with duds
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
- }else{
- createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
- }
- #else
- driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
- #endif
if (m->control_pressed) { return 0; }
m->openInputFile(it->first, in);
ofstream out;
- string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
+ string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
Sequence currSeq(in); m->gobble(in);
out << currSeq.getName() << '\t' << it->second << endl;
+
+ if (nameFile != "") {
+ map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+ if (itName != nameMap.end()) {
+ vector<string> thisSeqsNames;
+ m->splitAtChar(itName->second, thisSeqsNames, ',');
+ for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+ out << thisSeqsNames[k] << '\t' << it->second << endl;
+ }
+ }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
+ }
}
in.close();
out.close();
/**************************************************************************************/
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
try {
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
+ inFASTA.seekg(line.start);
ifstream qFile;
if(qFileName != "") {
m->openInputFile(qFileName, qFile);
- qFile.seekg(qline->start);
+ qFile.seekg(qline.start);
}
int count = 0;
bool moreSeqs = 1;
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
+ TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
while (moreSeqs) {
if(numLinkers != 0){
success = trimOligos.stripLinker(currSeq, currQual);
- if(!success) { trashCode += 'k'; }
+ if(success > ldiffs) { trashCode += 'k'; }
+ else{ currentSeqsDiffs += success; }
+
}
if(barcodes.size() != 0){
if(success > bdiffs) { trashCode += 'b'; }
else{ currentSeqsDiffs += success; }
}
+
+ if(rbarcodes.size() != 0){
+ success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
+ if(success > bdiffs) { trashCode += 'b'; }
+ else{ currentSeqsDiffs += success; }
+ }
if(numSpacers != 0){
success = trimOligos.stripSpacer(currSeq, currQual);
- if(!success) { trashCode += 's'; }
+ if(success > sdiffs) { trashCode += 's'; }
+ else{ currentSeqsDiffs += success; }
+
}
if(numFPrimers != 0){
currQual.printQScores(trimQualFile);
}
+
if(nameFile != ""){
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ int numRedundants = 0;
if (nameFile != "") {
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) {
vector<string> thisSeqsNames;
m->splitAtChar(itName->second, thisSeqsNames, ',');
+ numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
}
}
map<string, int>::iterator it = groupCounts.find(thisGroup);
- if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
- else { groupCounts[it->first]++; }
+ if (it == groupCounts.end()) { groupCounts[thisGroup] = 1 + numRedundants; }
+ else { groupCounts[it->first] += (1 + numRedundants); }
}
}
count++;
}
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
unsigned long long pos = inFASTA.tellg();
- if ((pos == -1) || (pos >= line->end)) { break; }
+ if ((pos == -1) || (pos >= line.end)) { break; }
#else
if (inFASTA.eof()) { break; }
int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 1;
+
+ int process = 1;
int exitCommand = 1;
processIDS.clear();
- //loop through and create all the processes you want
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //loop through and create all the processes you want
while (process != processors) {
int pid = fork();
int temp = processIDS[i];
wait(&temp);
}
-
- //append files
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the trimData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<trimData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++){
+
+ string extension = "";
+ if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+ vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+ vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+ vector<vector<string> > tempNameFileNames = nameFileNames;
+
+ if(allFiles){
+ ofstream temp;
+
+ for(int i=0;i<tempFASTAFileNames.size();i++){
+ for(int j=0;j<tempFASTAFileNames[i].size();j++){
+ if (tempFASTAFileNames[i][j] != "") {
+ tempFASTAFileNames[i][j] += extension;
+ m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
+
+ if(qFileName != ""){
+ tempPrimerQualFileNames[i][j] += extension;
+ m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
+ }
+ if(nameFile != ""){
+ tempNameFileNames[i][j] += extension;
+ m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
+ }
+ }
+ }
+ }
+ }
+
+
+ trimData* tempTrim = new trimData(filename,
+ qFileName, nameFile,
+ (trimFASTAFileName+extension),
+ (scrapFASTAFileName+extension),
+ (trimQualFileName+extension),
+ (scrapQualFileName+extension),
+ (trimNameFileName+extension),
+ (scrapNameFileName+extension),
+ (groupFile+extension),
+ tempFASTAFileNames,
+ tempPrimerQualFileNames,
+ tempNameFileNames,
+ lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+ pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer,
+ primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+ qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+ minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+ pDataArray.push_back(tempTrim);
+
+ hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //parent do my part
+ ofstream temp;
+ m->openOutputFile(trimFASTAFileName, temp); temp.close();
+ m->openOutputFile(scrapFASTAFileName, temp); temp.close();
+ if(qFileName != ""){
+ m->openOutputFile(trimQualFileName, temp); temp.close();
+ m->openOutputFile(scrapQualFileName, temp); temp.close();
+ }
+ if (nameFile != "") {
+ m->openOutputFile(trimNameFileName, temp); temp.close();
+ m->openOutputFile(scrapNameFileName, temp); temp.close();
+ }
+
+ driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+ processIDS.push_back(processors-1);
+
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
+ map<string, int>::iterator it2 = groupCounts.find(it->first);
+ if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
+ else { groupCounts[it->first] += it->second; }
+ }
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+
+
+ //append files
for(int i=0;i<processIDS.size();i++){
m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
}
}
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(createGroup){
ifstream in;
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
if (tempNum != 0) {
while (!in.eof()) {
in >> group >> tempNum; m->gobble(in);
-
+
map<string, int>::iterator it = groupCounts.find(group);
if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
else { groupCounts[it->first] += tempNum; }
}
in.close(); m->mothurRemove(tempFile);
}
-
+ #endif
}
-
- return exitCommand;
-#endif
+
+ return exitCommand;
}
catch(exception& e) {
m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
/**************************************************************************************************/
-int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
+int TrimSeqsCommand::setLines(string filename, string qfilename) {
try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
+ vector<unsigned long long> fastaFilePos;
+ vector<unsigned long long> qfileFilePos;
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
//set file positions for fasta file
fastaFilePos = m->divideFile(filename, processors);
- if (qfilename == "") { return processors; }
-
//get name of first sequence in each chunk
map<string, int> firstSeqNames;
for (int i = 0; i < (fastaFilePos.size()-1); i++) {
in.close();
}
-
- //seach for filePos of each first name in the qfile and save in qfileFilePos
- ifstream inQual;
- m->openInputFile(qfilename, inQual);
- string input;
- while(!inQual.eof()){
- input = m->getline(inQual);
-
- if (input.length() != 0) {
- if(input[0] == '>'){ //this is a sequence name line
- istringstream nameStream(input);
-
- string sname = ""; nameStream >> sname;
- sname = sname.substr(1);
-
- map<string, int>::iterator it = firstSeqNames.find(sname);
-
- if(it != firstSeqNames.end()) { //this is the start of a new chunk
- unsigned long long pos = inQual.tellg();
- qfileFilePos.push_back(pos - input.length() - 1);
- firstSeqNames.erase(it);
- }
- }
- }
-
- if (firstSeqNames.size() == 0) { break; }
- }
- inQual.close();
-
-
- if (firstSeqNames.size() != 0) {
- for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
- m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
- }
- qFileName = "";
- return processors;
- }
-
- //get last file position of qfile
- FILE * pFile;
- unsigned long long size;
-
- //get num bytes in file
- pFile = fopen (qfilename.c_str(),"rb");
- if (pFile==NULL) perror ("Error opening file");
- else{
- fseek (pFile, 0, SEEK_END);
- size=ftell (pFile);
- fclose (pFile);
- }
-
- qfileFilePos.push_back(size);
+ if(qfilename != "") {
+ //seach for filePos of each first name in the qfile and save in qfileFilePos
+ ifstream inQual;
+ m->openInputFile(qfilename, inQual);
+
+ string input;
+ while(!inQual.eof()){
+ input = m->getline(inQual);
+
+ if (input.length() != 0) {
+ if(input[0] == '>'){ //this is a sequence name line
+ istringstream nameStream(input);
+
+ string sname = ""; nameStream >> sname;
+ sname = sname.substr(1);
+
+ map<string, int>::iterator it = firstSeqNames.find(sname);
+
+ if(it != firstSeqNames.end()) { //this is the start of a new chunk
+ unsigned long long pos = inQual.tellg();
+ qfileFilePos.push_back(pos - input.length() - 1);
+ firstSeqNames.erase(it);
+ }
+ }
+ }
+
+ if (firstSeqNames.size() == 0) { break; }
+ }
+ inQual.close();
+
+
+ if (firstSeqNames.size() != 0) {
+ for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+ m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+ }
+ qFileName = "";
+ return processors;
+ }
+
+ //get last file position of qfile
+ FILE * pFile;
+ unsigned long long size;
+
+ //get num bytes in file
+ pFile = fopen (qfilename.c_str(),"rb");
+ if (pFile==NULL) perror ("Error opening file");
+ else{
+ fseek (pFile, 0, SEEK_END);
+ size=ftell (pFile);
+ fclose (pFile);
+ }
+
+ qfileFilePos.push_back(size);
+ }
+
+ for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+ lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
+ if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
+ }
+ if(qfilename == "") { qLines = lines; } //files with duds
return processors;
#else
-
- fastaFilePos.push_back(0); qfileFilePos.push_back(0);
- fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+
+ if (processors == 1) { //save time
+ //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
+ //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+ lines.push_back(linePair(0, 1000));
+ if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
+ }else{
+ int numFastaSeqs = 0;
+ fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
+ if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
+
+ if (qfilename != "") {
+ int numQualSeqs = 0;
+ qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
+
+ if (numFastaSeqs != numQualSeqs) {
+ m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+ }
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numFastaSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
+ if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
+ }
+ }
+ if(qfilename == "") { qLines = lines; } //files with duds
return 1;
#endif
primerNameVector.push_back(group);
}
else if(type == "REVERSE"){
- Sequence oligoRC("reverse", oligo);
- oligoRC.reverseComplement();
- revPrimer.push_back(oligoRC.getUnaligned());
+ //Sequence oligoRC("reverse", oligo);
+ //oligoRC.reverseComplement();
+ string oligoRC = reverseOligo(oligo);
+ revPrimer.push_back(oligoRC);
}
else if(type == "BARCODE"){
inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ string reverseBarcode = reverseOligo(group); //reverse barcode
+ group = temp;
+
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
+ if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ rbarcodes[reverseBarcode]=indexBarcode;
+ }
+
//check for repeat barcodes
map<string, int>::iterator itBar = barcodes.find(oligo);
if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
}
}
+//********************************************************************/
+string TrimSeqsCommand::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
+ exit(1);
+ }
+}
//***************************************************************************************************************