//create database
if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); }
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); }
else if(method == "distance") { database = new DistanceDB(); }
else {
m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
*/
#include "distancecommand.h"
-#include "ignoregaps.h"
-#include "eachgapdist.h"
-#include "eachgapignore.h"
-#include "onegapdist.h"
-#include "onegapignore.h"
//**********************************************************************************************************************
vector<string> DistanceCommand::setParameters(){
if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
-
- ValidCalculators validCalculator;
-
- if (m->isTrue(countends) == true) {
- for (int i=0; i<Estimators.size(); i++) {
- if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
- if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
- else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
- else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
- }
- }
- }else {
- for (int i=0; i<Estimators.size(); i++) {
- if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
- if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
- else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
- else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
- }
- }
- }
}
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
//if you don't need to fork anything
if(processors == 1){
if (output != "square") { driver(0, numSeqs, outputFile, cutoff); }
createProcesses(outputFile);
}
- #else
+ //#else
//ifstream inFASTA;
- if (output != "square") { driver(0, numSeqs, outputFile, cutoff); }
- else { driver(0, numSeqs, outputFile, "square"); }
- #endif
+ //if (output != "square") { driver(0, numSeqs, outputFile, cutoff); }
+ //else { driver(0, numSeqs, outputFile, "square"); }
+ //#endif
#endif
- if (m->control_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; }
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; }
#ifdef USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
}
#endif
- if (m->control_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; }
-
- delete distCalculator;
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; }
//set phylip file as new current phylipfile
string current = "";
int temp = processIDS[i];
wait(&temp);
}
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the distanceData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //that's why the distance calculator was moved inside of the driver to make separate copies.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<distanceData*> pDataArray; //[processors-1];
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor-1 worker threads.
+ for( int i=0; i<processors-1; i++ ){
+
+ // Allocate memory for thread data.
+ distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
+ pDataArray.push_back(tempDist);
+ processIDS.push_back(i);
+
+ //MyDistThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //do your part
+ if (output != "square") { driver(lines[0].start, lines[0].end, filename, cutoff); }
+ else { driver(lines[0].start, lines[0].end, filename, "square"); }
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+#endif
//append and remove temp files
for (int i=0;i<processIDS.size();i++) {
m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
}
-#endif
+
}
catch(exception& e) {
m->errorOut(e, "DistanceCommand", "createProcesses");
exit(1);
}
}
-
/**************************************************************************************************/
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
try {
-
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (m->isTrue(countends) == true) {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
int startTime = time(NULL);
//column file
}
for(int j=0;j<i;j++){
- if (m->control_pressed) { outFile.close(); return 0; }
+ if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
//if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
//the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
outFile.close();
+ delete distCalculator;
return 1;
}
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
try {
-
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (m->isTrue(countends) == true) {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
int startTime = time(NULL);
//column file
for(int j=0;j<alignDB.getNumSeqs();j++){
- if (m->control_pressed) { outFile.close(); return 0; }
+ if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
double dist = distCalculator->getDist();
m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
outFile.close();
+ delete distCalculator;
return 1;
}
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
try {
+
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (m->isTrue(countends) == true) {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
+
MPI_Status status;
int startTime = time(NULL);
for(int j=0;j<i;j++){
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { delete distCalculator; return 0; }
//if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
//the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
}
//m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
- cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
+ cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
+ delete distCalculator;
return 1;
}
catch(exception& e) {
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
try {
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (m->isTrue(countends) == true) {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
+
MPI_Status status;
MPI_File outMPI;
for(int j=0;j<i;j++){
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { delete distCalculator; return 0; }
distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
double dist = distCalculator->getDist();
//m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
MPI_File_close(&outMPI);
+ delete distCalculator;
return 1;
}
/////// need to fix to work with calcs and sequencedb
int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
try {
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (m->isTrue(countends) == true) {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) {
+ if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
MPI_Status status;
MPI_File outMPI;
for(int j=0;j<alignDB.getNumSeqs();j++){
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { delete distCalculator; return 0; }
distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
double dist = distCalculator->getDist();
//m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
MPI_File_close(&outMPI);
-
+ delete distCalculator;
return 1;
}
catch(exception& e) {
#include "validcalculator.h"
#include "dist.h"
#include "sequencedb.h"
+#include "ignoregaps.h"
+#include "eachgapdist.h"
+#include "eachgapignore.h"
+#include "onegapdist.h"
+#include "onegapignore.h"
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+typedef struct distanceData {
+ int startLine;
+ int endLine;
+ string dFileName;
+ float cutoff;
+ SequenceDB alignDB;
+ vector<string> Estimators;
+ MothurOut* m;
+ string output;
+ int numNewFasta;
+ string countends;
+
+ distanceData(){}
+ distanceData(int s, int e, string dbname, float c, SequenceDB db, vector<string> Est, MothurOut* mout, string o, int num, string count) {
+ startLine = s;
+ endLine = e;
+ dFileName = dbname;
+ cutoff = c;
+ alignDB = db;
+ Estimators = Est;
+ m = mout;
+ output = o;
+ numNewFasta = num;
+ countends = count;
+
+ }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){
+ distanceData* pDataArray;
+ pDataArray = (distanceData*)lpParam;
+
+ try {
+ ValidCalculators validCalculator;
+ Dist* distCalculator;
+ if (pDataArray->m->isTrue(pDataArray->countends) == true) {
+ for (int i=0; i<pDataArray->Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) {
+ if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (pDataArray->Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); }
+ else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapDist(); }
+ }
+ }
+ }else {
+ for (int i=0; i<pDataArray->Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) {
+ if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); }
+ else if (pDataArray->Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); }
+ else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); }
+ }
+ }
+ }
+
+ int startTime = time(NULL);
+
+ //column file
+ ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc);
+ outFile.setf(ios::fixed, ios::showpoint);
+ outFile << setprecision(4);
+
+
+ if (pDataArray->output != "square") {
+ if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
+
+ for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
+ if(pDataArray->output == "lt") {
+ string name = pDataArray->alignDB.get(i).getName();
+ if (name.length() < 10) { //pad with spaces to make compatible
+ while (name.length() < 10) { name += " "; }
+ }
+ outFile << name << '\t';
+ }
+ for(int j=0;j<i;j++){
+
+ if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
+
+ //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
+ //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
+ if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; }
+
+ distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
+ double dist = distCalculator->getDist();
+
+ if(dist <= pDataArray->cutoff){
+ if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
+ }
+ if (pDataArray->output == "lt") { outFile << dist << '\t'; }
+ }
+
+ if (pDataArray->output == "lt") { outFile << endl; }
+
+ if(i % 100 == 0){
+ pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+ }
+
+ }
+ pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+ }else{
+ if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; }
+
+ for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
+
+ string name = pDataArray->alignDB.get(i).getName();
+ //pad with spaces to make compatible
+ if (name.length() < 10) { while (name.length() < 10) { name += " "; } }
+
+ outFile << name << '\t';
+
+ for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
+
+ if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; }
+
+ distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
+ double dist = distCalculator->getDist();
+
+ outFile << dist << '\t';
+ }
+
+ outFile << endl;
+
+ if(i % 100 == 0){
+ pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+ }
+
+ }
+ pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+ }
+
+ outFile.close();
+ delete distCalculator;
+
+ return 0;
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+/**************************************************************************************************/
class DistanceCommand : public Command {
public:
};
- Dist* distCalculator;
+ //Dist* distCalculator;
SequenceDB alignDB;
string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress;
#endif
+/**************************************************************************************************/
+
//get set names
string setA = namesOfGroupCombos[c][0];
string setB = namesOfGroupCombos[c][1];
-
+
//get filename
string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
setACount++;
}
}
-
+
+ //for (int i = 0; i < subset.size(); i++) { cout << subset[i]->getGroup() << endl; }
+
if ((setACount == 0) || (setBCount == 0)) {
m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine();
outputNames.pop_back();
#include <windows.h>
#include <psapi.h>
#include <direct.h>
+ #include <tchar.h>
+
#endif
using namespace std;
*/
#include "seqsummarycommand.h"
-#include "sequence.hpp"
+
//**********************************************************************************************************************
vector<string> SeqSummaryCommand::setParameters(){
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- vector<unsigned long int> positions = m->divideFile(fastafile, processors);
-
- for (int i = 0; i < (positions.size()-1); i++) {
- lines.push_back(new linePair(positions[i], positions[(i+1)]));
- }
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
- }else{
- numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile);
-
- rename((summaryFile + toString(processIDS[0]) + ".temp").c_str(), summaryFile.c_str());
- //append files
- for(int i=1;i<processors;i++){
- m->appendFiles((summaryFile + toString(processIDS[i]) + ".temp"), summaryFile);
- m->mothurRemove((summaryFile + toString(processIDS[i]) + ".temp"));
- }
+ vector<unsigned long int> positions;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ positions = m->divideFile(fastafile, processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
+ #else
+ positions = m->setFilePosFasta(fastafile, numSeqs);
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
}
-
- if (m->control_pressed) { return 0; }
- #else
+ #endif
+
+
+ if(processors == 1){
numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
- if (m->control_pressed) { return 0; }
- #endif
+ }else{
+ numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile);
+ }
+
+ if (m->control_pressed) { return 0; }
#endif
#ifdef USE_MPI
/**************************************************************************************************/
int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
+ int process = 1;
int num = 0;
processIDS.clear();
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
}
}
+ //do your part
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]);
+
//force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
+ for (int i=0;i<processIDS.size();i++) {
int temp = processIDS[i];
wait(&temp);
}
in.close();
m->mothurRemove(tempFilename);
+
+ m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+ m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
}
- return num;
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the seqSumData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to allow both threads to add info to vectors.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<seqSumData*> pDataArray;
+ DWORD dwThreadIdArray[processors];
+ HANDLE hThreadArray[processors];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors; i++ ){
+
+ //cout << i << '\t' << lines[i]->start << '\t' << lines[i]->end << endl;
+ // Allocate memory for thread data.
+ seqSumData* tempSum = new seqSumData(&startPosition, &endPosition, &seqLength, &ambigBases, &longHomoPolymer, filename, (sumFile + toString(i) + ".temp"), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+ pDataArray.push_back(tempSum);
+ processIDS.push_back(i);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ //rename((sumFile + toString(processIDS[0]) + ".temp").c_str(), sumFile.c_str());
+ //append files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+ m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
+ }
#endif
+ return num;
}
catch(exception& e) {
m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
#include "mothur.h"
#include "command.hpp"
+#include "sequence.hpp"
+
+/**************************************************************************************************/
class SeqSummaryCommand : public Command {
public:
};
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+typedef struct seqSumData {
+ vector<int>* startPosition;
+ vector<int>* endPosition;
+ vector<int>* seqLength;
+ vector<int>* ambigBases;
+ vector<int>* longHomoPolymer;
+ string filename;
+ string sumFile;
+ unsigned long int start;
+ unsigned long int end;
+ int count;
+ MothurOut* m;
+ string namefile;
+ map<string, int> nameMap;
+
+
+ seqSumData(){}
+ seqSumData(vector<int>* s, vector<int>* e, vector<int>* l, vector<int>* a, vector<int>* h, string f, string sf, MothurOut* mout, unsigned long int st, unsigned long int en, string na, map<string, int> nam) {
+ startPosition = s;
+ endPosition = e;
+ seqLength = l;
+ ambigBases = a;
+ longHomoPolymer = h;
+ filename = f;
+ sumFile = sf;
+ m = mout;
+ start = st;
+ end = en;
+ namefile = na;
+ nameMap = nam;
+ count = 0;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
+ seqSumData* pDataArray;
+ pDataArray = (seqSumData*)lpParam;
+
+ try {
+ ofstream outSummary;
+ pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary);
+
+ ifstream in;
+ pDataArray->m->openInputFile(pDataArray->filename, in);
+
+ //print header if you are process 0
+ if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+ outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;
+ in.seekg(0);
+ }else { //this accounts for the difference in line endings.
+ in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
+ }
+
+ pDataArray->count = pDataArray->end;
+ for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+
+ if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
+
+ Sequence current(in); pDataArray->m->gobble(in);
+
+ if (current.getName() != "") {
+
+ int num = 1;
+ if (pDataArray->namefile != "") {
+ //make sure this sequence is in the namefile, else error
+ map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
+
+ if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+ else { num = it->second; }
+ }
+
+ //for each sequence this sequence represents
+ for (int i = 0; i < num; i++) {
+ pDataArray->startPosition->push_back(current.getStartPos());
+ pDataArray->endPosition->push_back(current.getEndPos());
+ pDataArray->seqLength->push_back(current.getNumBases());
+ pDataArray->ambigBases->push_back(current.getAmbigBases());
+ pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer());
+ }
+
+ outSummary << current.getName() << '\t';
+ outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+ outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+ outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
+ }
+ }
+
+ in.close();
+ outSummary.close();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction");
+ exit(1);
+ }
+}
#endif
+
+
+
+
+#endif
+
+/**************************************************************************************************/
+
+