- mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n");
- mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n");
- mothurOut("The template and candidate parameters are required.\n");
- mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n");
- mothurOut("The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
- mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 7.\n");
- mothurOut("The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n");
- mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n");
- mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
- mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -2.0.\n");
- mothurOut("The align.seqs command should be in the following format: \n");
- mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n");
- mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n");
- mothurOut("Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).\n\n");
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, rand());
+
+ for (int s = 0; s < candidateFileNames.size(); s++) {
+ if (m->control_pressed) { outputTypes.clear(); return 0; }
+
+ m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine();
+
+ if (outputDir == "") { outputDir += m->hasPath(candidateFileNames[s]); }
+ string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("fasta");
+ string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("alignreport");
+ string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + getOutputFileNameTag("accnos");
+ bool hasAccnos = true;
+
+ int numFastaSeqs = 0;
+ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+ int start = time(NULL);
+
+#ifdef USE_MPI
+ int pid, numSeqsPerProcessor;
+ int tag = 2001;
+ vector<unsigned long long> MPIPos;
+ MPIWroteAccnos = false;
+
+ MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ MPI_File inMPI;
+ MPI_File outMPIAlign;
+ MPI_File outMPIReport;
+ MPI_File outMPIAccnos;
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+ int inMode=MPI_MODE_RDONLY;
+
+ char outAlignFilename[1024];
+ strcpy(outAlignFilename, alignFileName.c_str());
+
+ char outReportFilename[1024];
+ strcpy(outReportFilename, reportFileName.c_str());
+
+ char outAccnosFilename[1024];
+ strcpy(outAccnosFilename, accnosFileName.c_str());
+
+ char inFileName[1024];
+ strcpy(inFileName, candidateFileNames[s].c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign);
+ MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport);
+ MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
+
+ if (pid == 0) { //you are the root process
+
+ MPIPos = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numFastaSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; }
+
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
+
+ for (int i = 1; i < processors; i++) {
+ bool tempResult;
+ MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+ if (tempResult != 0) { MPIWroteAccnos = true; }
+ }
+ }else{ //you are a child process
+ MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ MPIPos.resize(numFastaSeqs+1);
+ MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+
+ //figure out how many sequences you have to align
+ numSeqsPerProcessor = numFastaSeqs / processors;
+ int startIndex = pid * numSeqsPerProcessor;
+ if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; }
+
+
+ //align your part
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
+
+ MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+ }
+
+ //close files
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPIAlign);
+ MPI_File_close(&outMPIReport);
+ MPI_File_close(&outMPIAccnos);
+
+ //delete accnos file if blank
+ if (pid == 0) {
+ //delete accnos file if its blank else report to user
+ if (MPIWroteAccnos) {
+ m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ if (!flip) {
+ m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
+ }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); }
+ m->mothurOutEndLine();
+ }else {
+ //MPI_Info info;
+ //MPI_File_delete(outAccnosFilename, info);
+ hasAccnos = false;
+ m->mothurRemove(accnosFileName);
+ }
+ }
+
+#else
+
+ vector<unsigned long long> positions;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ positions = m->divideFile(candidateFileNames[s], processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
+ #else
+ if (processors == 1) {
+ lines.push_back(new linePair(0, 1000));
+ }else {
+ positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs);
+ if (positions.size() < processors) { processors = positions.size(); }
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numFastaSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
+ }
+ }
+ #endif
+
+ if(processors == 1){
+ numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+ }else{
+ numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+ }
+
+ if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear(); return 0; }
+
+ //delete accnos file if its blank else report to user
+ if (m->isBlank(accnosFileName)) { m->mothurRemove(accnosFileName); hasAccnos = false; }
+ else {
+ m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ if (!flip) {
+ m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
+ }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); }
+ m->mothurOutEndLine();
+ }
+
+#endif
+
+
+ #ifdef USE_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
+ outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName);
+ outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName);
+ if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); }
+
+ #ifdef USE_MPI
+ }
+ #endif
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
+ m->mothurOutEndLine();
+ m->mothurOutEndLine();
+ }
+
+ //set align file as new current fastafile
+ string currentFasta = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;