Sequence seq;
string names;
bool active;
+ int diffs;
seqPNode() {}
- seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {}
+ seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) { diffs = 0; }
~seqPNode() {}
};
/************************************************************/
vector<string> setParameters();
string getCommandName() { return "pre.cluster"; }
string getCommandCategory() { return "Sequence Processing"; }
+ string getOutputFileNameTag(string, string);
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Pre.cluster"; }
string getDescription() { return "implements a pseudo-single linkage algorithm with the goal of removing sequences that are likely due to pyrosequencing errors"; }
//int readNamesFASTA();
int calcMisMatches(string, string);
void printData(string, string); //fasta filename, names file name
- int process();
+ int process(string);
int loadSeqs(map<string, string>&, vector<Sequence>&);
- int driverGroups(SequenceParser*, string, string, int, int, vector<string> groups);
- int createProcessesGroups(SequenceParser*, string, string, vector<string>);
+ int driverGroups(SequenceParser*, string, string, string, int, int, vector<string> groups);
+ int createProcessesGroups(SequenceParser*, string, string, string, vector<string>);
};
/**************************************************************************************************/
string fastafile;
string namefile;
string groupfile;
- string newFName, newNName;
+ string newFName, newNName, newMName;
MothurOut* m;
int start;
int end;
int diffs, threadID;
vector<string> groups;
+ vector<string> mapFileNames;
preClusterData(){}
- preClusterData(string f, string n, string g, string nff, string nnf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
+ preClusterData(string f, string n, string g, string nff, string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
fastafile = f;
namefile = n;
groupfile = g;
newFName = nff;
newNName = nnf;
+ newMName = nmf;
m = mout;
start = st;
end = en;
};
/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
preClusterData* pDataArray;
////////////////////////////////////////////////////
//int count = process(); - same function below
+ ofstream out;
+ pDataArray->m->openOutputFile(pDataArray->newMName+pDataArray->groups[k]+".map", out);
+ pDataArray->mapFileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map");
+
//sort seqs by number of identical seqs
sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
if (alignSeqs[i].active) { //this sequence has not been merged yet
+ string chunk = alignSeqs[i].seq.getName() + "\t" + toString(alignSeqs[i].numIdentical) + "\t" + toString(0) + "\t" + alignSeqs[i].seq.getAligned() + "\n";
+
//try to merge it with all smaller seqs
for (int j = i+1; j < numSeqs; j++) {
alignSeqs[j].active = 0;
alignSeqs[j].numIdentical = 0;
+ alignSeqs[j].diffs = mismatch;
count++;
+ chunk += alignSeqs[j].seq.getName() + "\t" + toString(alignSeqs[j].numIdentical) + "\t" + toString(mismatch) + "\t" + alignSeqs[j].seq.getAligned() + "\n";
}
}//end if j active
- }//end if i != j
+ }//end for loop j
//remove from active list
alignSeqs[i].active = 0;
+ out << "ideal_seq_" << (i+1) << '\t' << alignSeqs[i].numIdentical << endl << chunk << endl;
+
}//end if active i
if(i % 100 == 0) { pDataArray->m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); pDataArray->m->mothurOutEndLine(); }
}
-
+ out.close();
if(numSeqs % 100 != 0) { pDataArray->m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); pDataArray->m->mothurOutEndLine(); }
////////////////////////////////////////////////////