}
}
- vector<unsigned long long> fastaFilePos;
- vector<unsigned long long> qFilePos;
+ //fills lines and qlines
+ setLines(fastaFile, qFileName);
- setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
-
- for (int i = 0; i < (fastaFilePos.size()-1); i++) {
- lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
- if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
- }
- if(qFileName == "") { qLines = lines; } //files with duds
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
}else{
createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
}
- #else
- driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
- #endif
+ //#else
+ // driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+ //#endif
if (m->control_pressed) { return 0; }
/**************************************************************************************/
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
try {
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
+ inFASTA.seekg(line.start);
ifstream qFile;
if(qFileName != "") {
m->openInputFile(qFileName, qFile);
- qFile.seekg(qline->start);
+ qFile.seekg(qline.start);
}
int count = 0;
if(numLinkers != 0){
success = trimOligos.stripLinker(currSeq, currQual);
- if(!success) { trashCode += 'k'; }
+ if(success > ldiffs) { trashCode += 'k'; }
+ else{ currentSeqsDiffs += success; }
+
}
if(barcodes.size() != 0){
if(numSpacers != 0){
success = trimOligos.stripSpacer(currSeq, currQual);
- if(!success) { trashCode += 's'; }
+ if(success > sdiffs) { trashCode += 's'; }
+ else{ currentSeqsDiffs += success; }
+
}
if(numFPrimers != 0){
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
unsigned long long pos = inFASTA.tellg();
- if ((pos == -1) || (pos >= line->end)) { break; }
+ if ((pos == -1) || (pos >= line.end)) { break; }
#else
if (inFASTA.eof()) { break; }
int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 1;
+
+ int process = 1;
int exitCommand = 1;
processIDS.clear();
- //loop through and create all the processes you want
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //loop through and create all the processes you want
while (process != processors) {
int pid = fork();
int temp = processIDS[i];
wait(&temp);
}
-
- //append files
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the trimData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<trimData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++){
+
+ string extension = "";
+ if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+ vector<vector<string> > tempFASTAFileNames = fastaFileNames;
+ vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
+ vector<vector<string> > tempNameFileNames = nameFileNames;
+
+ if(allFiles){
+ ofstream temp;
+
+ for(int i=0;i<tempFASTAFileNames.size();i++){
+ for(int j=0;j<tempFASTAFileNames[i].size();j++){
+ if (tempFASTAFileNames[i][j] != "") {
+ tempFASTAFileNames[i][j] += extension;
+ m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
+
+ if(qFileName != ""){
+ tempPrimerQualFileNames[i][j] += extension;
+ m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
+ }
+ if(nameFile != ""){
+ tempNameFileNames[i][j] += extension;
+ m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
+ }
+ }
+ }
+ }
+ }
+
+
+ trimData* tempTrim = new trimData(filename,
+ qFileName, nameFile,
+ (trimFASTAFileName+extension),
+ (scrapFASTAFileName+extension),
+ (trimQualFileName+extension),
+ (scrapQualFileName+extension),
+ (trimNameFileName+extension),
+ (scrapNameFileName+extension),
+ (groupFile+extension),
+ tempFASTAFileNames,
+ tempPrimerQualFileNames,
+ tempNameFileNames,
+ lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
+ pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
+ primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+ qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
+ minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+ pDataArray.push_back(tempTrim);
+
+ hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //parent do my part
+ ofstream temp;
+ m->openOutputFile(trimFASTAFileName, temp); temp.close();
+ m->openOutputFile(scrapFASTAFileName, temp); temp.close();
+ if(qFileName != ""){
+ m->openOutputFile(trimQualFileName, temp); temp.close();
+ m->openOutputFile(scrapQualFileName, temp); temp.close();
+ }
+ if (nameFile != "") {
+ m->openOutputFile(trimNameFileName, temp); temp.close();
+ m->openOutputFile(scrapNameFileName, temp); temp.close();
+ }
+
+ driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+ processIDS.push_back(processors-1);
+
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
+ map<string, int>::iterator it2 = groupCounts.find(it->first);
+ if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
+ else { groupCounts[it->first] += it->second; }
+ }
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+
+
+ //append files
for(int i=0;i<processIDS.size();i++){
m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
}
}
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(createGroup){
ifstream in;
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
if (tempNum != 0) {
while (!in.eof()) {
in >> group >> tempNum; m->gobble(in);
-
+
map<string, int>::iterator it = groupCounts.find(group);
if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
else { groupCounts[it->first] += tempNum; }
}
in.close(); m->mothurRemove(tempFile);
}
-
+ #endif
}
-
- return exitCommand;
-#endif
+
+ return exitCommand;
}
catch(exception& e) {
m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
/**************************************************************************************************/
-int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
+int TrimSeqsCommand::setLines(string filename, string qfilename) {
try {
+
+ vector<unsigned long long> fastaFilePos;
+ vector<unsigned long long> qfileFilePos;
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
//set file positions for fasta file
fastaFilePos = m->divideFile(filename, processors);
}
qfileFilePos.push_back(size);
+
+ for (int i = 0; i < (fastaFilePos.size()-1); i++) {
+ lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
+ if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); }
+ }
+ if(qfilename == "") { qLines = lines; } //files with duds
return processors;
#else
-
- fastaFilePos.push_back(0); qfileFilePos.push_back(0);
- fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+
+ if (processors == 1) { //save time
+ //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
+ //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
+ lines.push_back(linePair(0, 1000));
+ if (qfilename != "") { qLines.push_back(linePair(0, 1000)); }
+ }else{
+ int numFastaSeqs = 0;
+ fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs);
+
+ if (qfilename != "") {
+ int numQualSeqs = 0;
+ qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs);
+
+ if (numFastaSeqs != numQualSeqs) {
+ m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+ }
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numFastaSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
+ cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
+ if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
+ }
+
+ if(qfilename == "") { qLines = lines; } //files with duds
+ }
return 1;
#endif
#include "sequence.hpp"
#include "qualityscores.h"
#include "groupmap.h"
+#include "trimoligos.h"
+
class TrimSeqsCommand : public Command {
public:
private:
GroupMap* groupMap;
-
- struct linePair {
- unsigned long long start;
- unsigned long long end;
- linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
- };
-
+
+ struct linePair {
+ unsigned long long start;
+ unsigned long long end;
+ linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+ linePair() {}
+ };
+
bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
bool keepFirstTrim(Sequence&, QualityScores&);
bool removeLastTrim(Sequence&, QualityScores&);
map<string, string> nameMap;
vector<int> processIDS; //processid
- vector<linePair*> lines;
- vector<linePair*> qLines;
+ vector<linePair> lines;
+ vector<linePair> qLines;
- int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair*, linePair*);
+ int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);
int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
- int setLines(string, string, vector<unsigned long long>&, vector<unsigned long long>&);
+ int setLines(string, string);
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct trimData {
+ unsigned long long start, end;
+ MothurOut* m;
+ string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+ vector<vector<string> > fastaFileNames;
+ vector<vector<string> > qualFileNames;
+ vector<vector<string> > nameFileNames;
+ unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
+ bool flip, allFiles, qtrim, keepforward, createGroup;
+ int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+ int qWindowSize, qWindowStep, keepFirst, removeLast, count;
+ double qRollAverage, qThreshold, qWindowAverage, qAverage;
+ vector<string> revPrimer;
+ map<string, int> barcodes;
+ map<string, int> primers;
+ vector<string> linker;
+ vector<string> spacer;
+ map<string, int> combos;
+ vector<string> primerNameVector;
+ vector<string> barcodeNameVector;
+ map<string, int> groupCounts;
+ map<string, string> nameMap;
+
+ trimData(){}
+ trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
+ int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa,
+ vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
+ int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+ int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+ filename = fn;
+ qFileName = qn;
+ nameFile = nf;
+ trimFileName = tn;
+ scrapFileName = sn;
+ trimQFileName = tqn;
+ scrapQFileName = sqn;
+ trimNFileName = tnn;
+ scrapNFileName = snn;
+ groupFileName = gn;
+ fastaFileNames = ffn;
+ qualFileNames = qfn;
+ nameFileNames = nfn;
+ lineStart = lstart;
+ lineEnd = lend;
+ qlineStart = qstart;
+ qlineEnd = qend;
+ m = mout;
+
+ pdiffs = pd;
+ bdiffs = bd;
+ ldiffs = ld;
+ sdiffs = sd;
+ tdiffs = td;
+ barcodes = bar;
+ primers = pri; numFPrimers = primers.size();
+ revPrimer = revP; numRPrimers = revPrimer.size();
+ linker = li; numLinkers = linker.size();
+ spacer = spa; numSpacers = spacer.size();
+ primerNameVector = priNameVector;
+ barcodeNameVector = barNameVector;
+
+ createGroup = cGroup;
+ allFiles = aFiles;
+ keepforward = keepF;
+ keepFirst = keepfi;
+ removeLast = removeL;
+ qWindowStep = WindowStep;
+ qWindowSize = WindowSize;
+ qWindowAverage = WindowAverage;
+ qtrim = trim;
+ qThreshold = Threshold;
+ qAverage = Average;
+ qRollAverage = RollAverage;
+ minLength = minL;
+ maxAmbig = maxA;
+ maxHomoP = maxH;
+ maxLength = maxL;
+ flip = fli;
+ nameMap = nm;
+ count = 0;
+ }
};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
+ trimData* pDataArray;
+ pDataArray = (trimData*)lpParam;
+
+ try {
+ ofstream trimFASTAFile;
+ pDataArray->m->openOutputFile(pDataArray->trimFileName, trimFASTAFile);
+
+ ofstream scrapFASTAFile;
+ pDataArray->m->openOutputFile(pDataArray->scrapFileName, scrapFASTAFile);
+
+ ofstream trimQualFile;
+ ofstream scrapQualFile;
+ if(pDataArray->qFileName != ""){
+ pDataArray->m->openOutputFile(pDataArray->trimQFileName, trimQualFile);
+ pDataArray->m->openOutputFile(pDataArray->scrapQFileName, scrapQualFile);
+ }
+
+ ofstream trimNameFile;
+ ofstream scrapNameFile;
+ if(pDataArray->nameFile != ""){
+ pDataArray->m->openOutputFile(pDataArray->trimNFileName, trimNameFile);
+ pDataArray->m->openOutputFile(pDataArray->scrapNFileName, scrapNameFile);
+ }
+
+
+ ofstream outGroupsFile;
+ if (pDataArray->createGroup){ pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile); }
+ if(pDataArray->allFiles){
+ for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
+ for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
+ if (pDataArray->fastaFileNames[i][j] != "") {
+ ofstream temp;
+ pDataArray->m->openOutputFile(pDataArray->fastaFileNames[i][j], temp); temp.close();
+ if(pDataArray->qFileName != ""){
+ pDataArray->m->openOutputFile(pDataArray->qualFileNames[i][j], temp); temp.close();
+ }
+
+ if(pDataArray->nameFile != ""){
+ pDataArray->m->openOutputFile(pDataArray->nameFileNames[i][j], temp); temp.close();
+ }
+ }
+ }
+ }
+ }
+
+ ifstream inFASTA;
+ pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
+ if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
+ inFASTA.seekg(0);
+ }else { //this accounts for the difference in line endings.
+ inFASTA.seekg(pDataArray->lineStart-1); pDataArray->m->gobble(inFASTA);
+ }
+
+ ifstream qFile;
+ if(pDataArray->qFileName != "") {
+ pDataArray->m->openInputFile(pDataArray->qFileName, qFile);
+ if ((pDataArray->qlineStart == 0) || (pDataArray->qlineStart == 1)) {
+ qFile.seekg(0);
+ }else { //this accounts for the difference in line endings.
+ qFile.seekg(pDataArray->qlineStart-1); pDataArray->m->gobble(qFile);
+ }
+ }
+
+
+ TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+
+ pDataArray->count = pDataArray->lineEnd;
+ for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
+
+ if (pDataArray->m->control_pressed) {
+ inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
+ if (pDataArray->createGroup) { outGroupsFile.close(); }
+ if(pDataArray->qFileName != ""){ qFile.close(); }
+ return 0;
+ }
+
+ int success = 1;
+ string trashCode = "";
+ int currentSeqsDiffs = 0;
+
+ Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+
+ QualityScores currQual;
+ if(pDataArray->qFileName != ""){
+ currQual = QualityScores(qFile); pDataArray->m->gobble(qFile);
+ }
+
+ string origSeq = currSeq.getUnaligned();
+ if (origSeq != "") {
+
+ int barcodeIndex = 0;
+ int primerIndex = 0;
+
+ if(pDataArray->numLinkers != 0){
+ success = trimOligos.stripLinker(currSeq, currQual);
+ if(success > pDataArray->ldiffs) { trashCode += 'k'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if(pDataArray->barcodes.size() != 0){
+ success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+ if(success > pDataArray->bdiffs) { trashCode += 'b'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if(pDataArray->numSpacers != 0){
+ success = trimOligos.stripSpacer(currSeq, currQual);
+ if(success > pDataArray->sdiffs) { trashCode += 's'; }
+ else{ currentSeqsDiffs += success; }
+
+ }
+
+ if(pDataArray->numFPrimers != 0){
+ success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
+ if(success > pDataArray->pdiffs) { trashCode += 'f'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; }
+
+ if(pDataArray->numRPrimers != 0){
+ success = trimOligos.stripReverse(currSeq, currQual);
+ if(!success) { trashCode += 'r'; }
+ }
+
+ if(pDataArray->keepFirst != 0){
+ //success = keepFirstTrim(currSeq, currQual);
+ success = 1;
+ if(currQual.getName() != ""){
+ currQual.trimQScores(-1, pDataArray->keepFirst);
+ }
+ currSeq.trim(pDataArray->keepFirst);
+ }
+
+ if(pDataArray->removeLast != 0){
+ //success = removeLastTrim(currSeq, currQual);
+ success = 0;
+ int length = currSeq.getNumBases() - pDataArray->removeLast;
+
+ if(length > 0){
+ if(currQual.getName() != ""){
+ currQual.trimQScores(-1, length);
+ }
+ currSeq.trim(length);
+ success = 1;
+ }
+ else{ success = 0; }
+
+ if(!success) { trashCode += 'l'; }
+ }
+
+
+ if(pDataArray->qFileName != ""){
+ int origLength = currSeq.getNumBases();
+
+ if(pDataArray->qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold); }
+ else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage); }
+ else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage); }
+ else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage); }
+ else { success = 1; }
+
+ //you don't want to trim, if it fails above then scrap it
+ if ((!pDataArray->qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
+
+ if(!success) { trashCode += 'q'; }
+ }
+
+ if(pDataArray->minLength > 0 || pDataArray->maxLength > 0){
+ //success = cullLength(currSeq);
+ int length = currSeq.getNumBases();
+ success = 0; //guilty until proven innocent
+ if(length >= pDataArray->minLength && pDataArray->maxLength == 0) { success = 1; }
+ else if(length >= pDataArray->minLength && length <= pDataArray->maxLength) { success = 1; }
+ else { success = 0; }
+
+ if(!success) { trashCode += 'l'; }
+ }
+ if(pDataArray->maxHomoP > 0){
+ //success = cullHomoP(currSeq);
+ int longHomoP = currSeq.getLongHomoPolymer();
+ success = 0; //guilty until proven innocent
+ if(longHomoP <= pDataArray->maxHomoP){ success = 1; }
+ else { success = 0; }
+
+ if(!success) { trashCode += 'h'; }
+ }
+ if(pDataArray->maxAmbig != -1){
+ //success = cullAmbigs(currSeq);
+ int numNs = currSeq.getAmbigBases();
+ success = 0; //guilty until proven innocent
+ if(numNs <= pDataArray->maxAmbig) { success = 1; }
+ else { success = 0; }
+ if(!success) { trashCode += 'n'; }
+ }
+
+ if(pDataArray->flip){ // should go last
+ currSeq.reverseComplement();
+ if(pDataArray->qFileName != ""){
+ currQual.flipQScores();
+ }
+ }
+
+ if(trashCode.length() == 0){
+ currSeq.setAligned(currSeq.getUnaligned());
+ currSeq.printSequence(trimFASTAFile);
+
+ if(pDataArray->qFileName != ""){
+ currQual.printQScores(trimQualFile);
+ }
+
+ if(pDataArray->nameFile != ""){
+ map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+ if (itName != pDataArray->nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
+ else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
+
+ if (pDataArray->createGroup) {
+ if(pDataArray->barcodes.size() != 0){
+ string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
+ if (pDataArray->primers.size() != 0) {
+ if (pDataArray->primerNameVector[primerIndex] != "") {
+ if(thisGroup != "") {
+ thisGroup += "." + pDataArray->primerNameVector[primerIndex];
+ }else {
+ thisGroup = pDataArray->primerNameVector[primerIndex];
+ }
+ }
+ }
+
+ outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+
+ if (pDataArray->nameFile != "") {
+ map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+ if (itName != pDataArray->nameMap.end()) {
+ vector<string> thisSeqsNames;
+ pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+ for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+ outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
+ }
+ }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
+
+ map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+ if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; }
+ else { pDataArray->groupCounts[it->first]++; }
+
+ }
+ }
+
+ if(pDataArray->allFiles){
+ ofstream output;
+ pDataArray->m->openOutputFileAppend(pDataArray->fastaFileNames[barcodeIndex][primerIndex], output);
+ currSeq.printSequence(output);
+ output.close();
+
+ if(pDataArray->qFileName != ""){
+ pDataArray->m->openOutputFileAppend(pDataArray->qualFileNames[barcodeIndex][primerIndex], output);
+ currQual.printQScores(output);
+ output.close();
+ }
+
+ if(pDataArray->nameFile != ""){
+ map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+ if (itName != pDataArray->nameMap.end()) {
+ pDataArray->m->openOutputFileAppend(pDataArray->nameFileNames[barcodeIndex][primerIndex], output);
+ output << itName->first << '\t' << itName->second << endl;
+ output.close();
+ }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
+ }
+ }
+ else{
+ if(pDataArray->nameFile != ""){ //needs to be before the currSeq name is changed
+ map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
+ if (itName != pDataArray->nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
+ else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
+ currSeq.setName(currSeq.getName() + '|' + trashCode);
+ currSeq.setUnaligned(origSeq);
+ currSeq.setAligned(origSeq);
+ currSeq.printSequence(scrapFASTAFile);
+ if(pDataArray->qFileName != ""){
+ currQual.printQScores(scrapQualFile);
+ }
+ }
+
+ }
+
+ //report progress
+ if((i) % 1000 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); }
+
+ }
+ //report progress
+ if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
+
+
+ inFASTA.close();
+ trimFASTAFile.close();
+ scrapFASTAFile.close();
+ if (pDataArray->createGroup) { outGroupsFile.close(); }
+ if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
+ if(pDataArray->nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "TrimSeqsCommand", "MyTrimThreadFunction");
+ exit(1);
+ }
+ }
+#endif
+
+
+/**************************************************************************************************/
#endif