+/**************************************************************************************/
+int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile, linePair* filePos) {
+ try {
+
+ ofstream outSummary;
+ m->openOutputFile(sumFile, outSummary);
+
+ //print header if you are process 0
+ if (filePos->start == 0) {
+ outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;
+ }
+
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ in.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
+
+ if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
+
+ Sequence current(in); m->gobble(in);
+
+ if (current.getName() != "") {
+
+ int num = 1;
+ if ((namefile != "") || (countfile != "")) {
+ //make sure this sequence is in the namefile, else error
+ map<string, int>::iterator it = nameMap.find(current.getName());
+
+ if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your name or count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { num = it->second; }
+ }
+
+ //for each sequence this sequence represents
+ for (int i = 0; i < num; i++) {
+ startPosition.push_back(current.getStartPos());
+ endPosition.push_back(current.getEndPos());
+ seqLength.push_back(current.getNumBases());
+ ambigBases.push_back(current.getAmbigBases());
+ longHomoPolymer.push_back(current.getLongHomoPolymer());
+ }
+
+ count++;
+ outSummary << current.getName() << '\t';
+ outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+ outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+ outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
+ }
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = in.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+ #else
+ if (in.eof()) { break; }
+ #endif
+
+ //report progress
+ //if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+ }
+ //report progress
+ //if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
+ in.close();
+
+ return count;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SeqSummaryCommand", "driverCreateSummary");
+ exit(1);
+ }
+}
+#ifdef USE_MPI
+/**************************************************************************************/
+int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long long>& MPIPos) {
+ try {
+
+ int pid;
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ for(int i=0;i<num;i++){
+
+ if (m->control_pressed) { return 0; }
+
+ //read next sequence
+ int length = MPIPos[start+i+1] - MPIPos[start+i];
+
+ char* buf4 = new char[length];
+ MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ istringstream iss (tempBuf,istringstream::in);
+ delete buf4;
+
+ Sequence current(iss);
+
+ if (current.getName() != "") {
+
+ int num = 1;
+ if ((namefile != "") || (countfile != "")) {
+ //make sure this sequence is in the namefile, else error
+ map<string, int>::iterator it = nameMap.find(current.getName());
+
+ if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your name or count file, please correct." << endl; m->control_pressed = true; }
+ else { num = it->second; }
+ }
+
+ //for each sequence this sequence represents
+ for (int i = 0; i < num; i++) {
+ startPosition.push_back(current.getStartPos());
+ endPosition.push_back(current.getEndPos());
+ seqLength.push_back(current.getNumBases());
+ ambigBases.push_back(current.getAmbigBases());
+ longHomoPolymer.push_back(current.getLongHomoPolymer());
+ }
+
+ string outputString = current.getName() + "\t" + toString(current.getStartPos()) + "\t" + toString(current.getEndPos()) + "\t";
+ outputString += toString(current.getNumBases()) + "\t" + toString(current.getAmbigBases()) + "\t" + toString(current.getLongHomoPolymer()) + "\t" + toString(num) + "\n";
+
+ //output to file
+ length = outputString.length();
+ char* buf3 = new char[length];
+ memcpy(buf3, outputString.c_str(), length);
+
+ MPI_File_write_shared(outMPI, buf3, length, MPI_CHAR, &status);
+ delete buf3;
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SeqSummaryCommand", "MPICreateSummary");
+ exit(1);
+ }
+}
+#endif
+/**************************************************************************************************/
+int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile) {
+ try {
+ int process = 1;
+ int num = 0;
+ processIDS.clear();
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = fastafile + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+
+ out << num << endl;
+ out << startPosition.size() << endl;
+ for (int k = 0; k < startPosition.size(); k++) { out << startPosition[k] << '\t'; } out << endl;
+ for (int k = 0; k < endPosition.size(); k++) { out << endPosition[k] << '\t'; } out << endl;
+ for (int k = 0; k < seqLength.size(); k++) { out << seqLength[k] << '\t'; } out << endl;
+ for (int k = 0; k < ambigBases.size(); k++) { out << ambigBases[k] << '\t'; } out << endl;
+ for (int k = 0; k < longHomoPolymer.size(); k++) { out << longHomoPolymer[k] << '\t'; } out << endl;
+
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do your part
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ //parent reads in and combine Filter info
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
+ ifstream in;
+ m->openInputFile(tempFilename, in);
+
+ int temp, tempNum;
+ in >> tempNum; m->gobble(in); num += tempNum;
+ in >> tempNum; m->gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; startPosition.push_back(temp); } m->gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; endPosition.push_back(temp); } m->gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; seqLength.push_back(temp); } m->gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; ambigBases.push_back(temp); } m->gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; longHomoPolymer.push_back(temp); } m->gobble(in);
+
+ in.close();
+ m->mothurRemove(tempFilename);
+
+ m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+ m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
+ }
+
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the seqSumData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to allow both threads to add info to vectors.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<seqSumData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ bool hasNameMap = false;
+ if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++ ){
+
+ string extension = "";
+ if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+ // Allocate memory for thread data.
+ seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
+ pDataArray.push_back(tempSum);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //do your part
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, (sumFile+toString(processors-1)+".temp"), lines[processors-1]);
+ processIDS.push_back(processors-1);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) { startPosition.push_back(pDataArray[i]->startPosition[k]); }
+ for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) { endPosition.push_back(pDataArray[i]->endPosition[k]); }
+ for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) { seqLength.push_back(pDataArray[i]->seqLength[k]); }
+ for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) { ambigBases.push_back(pDataArray[i]->ambigBases[k]); }
+ for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) { longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]); }
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ //append files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+ m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
+ }
+#endif
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/