14 If no genome is provided, seqname field is used to store the allele name.
20 Interval(int start, int end) {
32 seqname = gene_id = transcript_id = "";
36 Transcript(const std::string& transcript_id, const std::string& gene_id, const std::string& seqname,
37 const char& strand, const std::vector<Interval>& structure, const std::string& left) {
38 this->structure = structure;
39 this->strand = strand;
40 this->seqname = seqname;
41 this->gene_id = gene_id;
42 this->transcript_id = transcript_id;
44 //eliminate prefix spaces in string variable "left"
46 int len = left.length();
47 while (pos < len && left[pos] == ' ') ++pos;
48 this->left = left.substr(pos);
51 int s = structure.size();
52 for (int i = 0; i < s; i++) length += structure[i].end + 1 - structure[i].start;
55 bool operator< (const Transcript& o) const {
56 return gene_id < o.gene_id || (gene_id == o.gene_id && transcript_id < o.transcript_id) || (gene_id == o.gene_id && transcript_id == o.transcript_id && seqname < o.seqname);
59 const std::string& getTranscriptID() const { return transcript_id; }
61 const std::string& getGeneID() const { return gene_id; }
63 const std::string& getSeqName() const { return seqname; }
65 char getStrand() const { return strand; }
67 const std::string& getLeft() const { return left; }
69 int getLength() const { return length; }
71 const std::vector<Interval>& getStructure() const { return structure; }
73 void extractSeq (const std::string&, std::string&) const;
75 void read(std::ifstream&);
76 void write(std::ofstream&);
79 int length; // transcript length
80 std::vector<Interval> structure; // transcript structure , coordinate starts from 1
82 std::string seqname, gene_id, transcript_id; // follow GTF definition
86 //gseq : genomic sequence
87 void Transcript::extractSeq(const std::string& gseq, std::string& seq) const {
89 int s = structure.size();
90 size_t glen = gseq.length();
92 if (structure[0].start < 1 || (size_t)structure[s - 1].end > glen) {
93 fprintf(stderr, "Transcript %s is out of chromosome %s's boundary!\n", transcript_id.c_str(), seqname.c_str());
99 for (int i = 0; i < s; i++) {
100 seq += gseq.substr(structure[i].start - 1, structure[i].end - structure[i].start + 1); // gseq starts from 0!
104 for (int i = s - 1; i >= 0; i--) {
105 for (int j = structure[i].end; j >= structure[i].start; j--) {
106 seq += getOpp(gseq[j - 1]);
110 default: assert(false);
113 assert(seq.length() > 0);
116 void Transcript::read(std::ifstream& fin) {
120 getline(fin, transcript_id);
121 getline(fin, gene_id);
122 getline(fin, seqname);
124 assert(tmp.length() == 1 && (tmp[0] == '+' || tmp[0] == '-'));
128 for (int i = 0; i < s; i++) {
131 structure.push_back(Interval(start, end));
133 getline(fin, tmp); //get the end of this line
137 void Transcript::write(std::ofstream& fout) {
138 int s = structure.size();
140 fout<<transcript_id<<std::endl;
141 fout<<gene_id<<std::endl;
142 fout<<seqname<<std::endl;
143 fout<<strand<<" "<<length<<std::endl;
145 for (int i = 0; i < s; i++) fout<<" "<<structure[i].start<<" "<<structure[i].end;
147 fout<<left<<std::endl;
150 #endif /* TRANSCRIPT_H_ */