struct pairFastqRead {
fastqRead forward;
fastqRead reverse;
+ fastqRead findex;
+ fastqRead rindex;
pairFastqRead() {};
pairFastqRead(fastqRead f, fastqRead r) : forward(f), reverse(r){};
+ pairFastqRead(fastqRead f, fastqRead r, fastqRead fi, fastqRead ri) : forward(f), reverse(r), findex(fi), rindex(ri) {};
~pairFastqRead() {};
};
/**************************************************************************************************/
void help() { m->mothurOut(getHelpString()); }
private:
- bool abort, allFiles, createGroup, trimOverlap;
- string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
+ bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk;
+ string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format;
float match, misMatch, gapOpen, gapExtend;
int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
vector<string> outputNames;
map<string, int> groupCounts;
map<string, string> groupMap;
+ map<int, string> file2Group;
vector<int> convertQual(string);
fastqRead readFastq(ifstream&, bool&);
vector< vector< vector<string> > > preProcessData(unsigned long int&);
vector< vector<string> > readFileNames(string);
- vector< vector<string> > readFastqFiles(unsigned long int&, string, string);
+ vector< vector<string> > readFastqFiles(unsigned long int&, string, string, string, string);
vector< vector<string> > readFastaFiles(unsigned long int&, string, string);
//bool checkReads(fastqRead&, fastqRead&, string, string);
- int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >);
- int driver(vector<string>, string, string, string, vector<vector<string> >, int);
+ int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >, int);
+ int driver(vector<string>, string, string, string, vector<vector<string> >, int, string);
bool getOligos(vector<vector<string> >&, string);
string reverseOligo(string);
- vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques);
+ vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool);
+ vector<pairFastqRead> mergeReads(vector<pairFastqRead> frReads, vector<pairFastqRead> friReads, map<string, pairFastqRead>& pairUniques);
};
/**************************************************************************************************/
string outputFasta;
string outputScrapFasta;
string outputMisMatches;
- string align;
+ string align, group;
vector<string> files;
vector<vector<string> > fastaFileNames;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
- bool allFiles, createGroup, done, trimOverlap;
+ bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap;
map<string, int> groupCounts;
map<string, string> groupMap;
vector<string> primerNameVector;
map<int, oligosPair> primers;
contigsData(){}
- contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, bool to, int tid) {
+ contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
files = f;
outputFasta = of;
outputMisMatches = om;
gapExtend = gapE;
insert = thr;
align = al;
+ group = g;
count = 0;
outputScrapFasta = osf;
fastaFileNames = ffn;
tdiffs = tdf;
allFiles = all;
trimOverlap = to;
- createGroup = cg;
+ createOligosGroup = cg;
+ createFileGroup = cfg;
threadID = tid;
deltaq = delt;
done=false;
string thisfqualfile = pDataArray->files[1];
string thisrfastafile = pDataArray->files[2];
string thisrqualfile = pDataArray->files[3];
+ string thisfindexfile = pDataArray->files[4];
+ string thisrindexfile = pDataArray->files[5];
- if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+ if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); }
if(pDataArray->allFiles){
for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
}
}
- ifstream inFFasta, inRFasta, inFQual, inRQual;
+ ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex;
ofstream outFasta, outMisMatch, outScrapFasta;
pDataArray->m->openInputFile(thisffastafile, inFFasta);
pDataArray->m->openInputFile(thisrfastafile, inRFasta);
pDataArray->m->openInputFile(thisfqualfile, inFQual);
pDataArray->m->openInputFile(thisrqualfile, inRQual);
}
+
+ if (thisfindexfile != "") { pDataArray->m->openInputFile(thisfindexfile, inFIndex); }
+ if (thisrindexfile != "") { pDataArray->m->openInputFile(thisrindexfile, inRIndex); }
+
pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta);
- if (pDataArray->threadID == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; }
+ outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
rQual = new QualityScores(inRQual); pDataArray->m->gobble(inRQual);
}
+ Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE");
+ if (thisfindexfile != "") {
+ Sequence temp(inFIndex); pDataArray->m->gobble(inFIndex);
+ findexBarcode.setAligned(temp.getAligned());
+ }
+
+ if (thisrindexfile != "") {
+ Sequence temp(inRIndex); pDataArray->m->gobble(inRIndex);
+ rindexBarcode.setAligned(temp.getAligned());
+ }
+
int barcodeIndex = 0;
int primerIndex = 0;
if(pDataArray->barcodes.size() != 0){
if (thisfqualfile != "") {
- success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
+ if ((thisfindexfile != "") || (thisrindexfile != "")) {
+ success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
+ }else {
+ success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex);
+ }
}else {
success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex);
}
if(trashCode.length() == 0){
bool ignore = false;
- if (pDataArray->createGroup) {
+ if (pDataArray->createOligosGroup) {
if(pDataArray->barcodes.size() != 0){
string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
if (pDataArray->primers.size() != 0) {
else { pDataArray->groupCounts[it->first] ++; }
}else { ignore = true; }
}
+ }else if (pDataArray->createFileGroup) {
+ int pos = pDataArray->group.find("ignore");
+ if (pos == string::npos) {
+ pDataArray->groupMap[fSeq.getName()] = pDataArray->group;
+
+ map<string, int>::iterator it = pDataArray->groupCounts.find(pDataArray->group);
+ if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[pDataArray->group] = 1; }
+ else { pDataArray->groupCounts[it->first]++; }
+ }else { ignore = true; }
}
+
if(pDataArray->allFiles && !ignore){
ofstream output;