16 Interval(int start, int end) {
28 seqname = gene_id = transcript_id = "";
32 Transcript(const std::string& transcript_id, const std::string& gene_id, const std::string& seqname,
33 const char& strand, const std::vector<Interval>& structure, const std::string& left) {
34 this->structure = structure;
35 this->strand = strand;
36 this->seqname = seqname;
37 this->gene_id = gene_id;
38 this->transcript_id = transcript_id;
40 //eliminate prefix spaces in string variable "left"
42 int len = left.length();
43 while (pos < len && left[pos] == ' ') ++pos;
44 this->left = left.substr(pos);
47 int s = structure.size();
48 for (int i = 0; i < s; i++) length += structure[i].end + 1 - structure[i].start;
51 bool operator< (const Transcript& o) const {
52 return gene_id < o.gene_id || gene_id == o.gene_id && transcript_id < o.transcript_id;
55 const std::string& getTranscriptID() const { return transcript_id; }
57 const std::string& getGeneID() const { return gene_id; }
59 const std::string& getSeqName() const { return seqname; }
61 char getStrand() const { return strand; }
63 const std::string& getLeft() const { return left; }
65 int getLength() const { return length; }
67 const std::vector<Interval>& getStructure() const { return structure; }
69 void extractSeq (const std::string&, std::string&) const;
71 void read(std::ifstream&);
72 void write(std::ofstream&);
75 int length; // transcript length
76 std::vector<Interval> structure; // transcript structure , coordinate starts from 1
78 std::string seqname, gene_id, transcript_id; // follow GTF definition
82 //gseq : genomic sequence
83 void Transcript::extractSeq(const std::string& gseq, std::string& seq) const {
85 int s = structure.size();
86 size_t glen = gseq.length();
88 if (structure[0].start < 1 || (size_t)structure[s - 1].end > glen) {
89 fprintf(stderr, "Transcript %s is out of chromosome %s's boundary!\n", transcript_id.c_str(), seqname.c_str());
95 for (int i = 0; i < s; i++) {
96 seq += gseq.substr(structure[i].start - 1, structure[i].end - structure[i].start + 1); // gseq starts from 0!
100 for (int i = s - 1; i >= 0; i--) {
101 for (int j = structure[i].end; j >= structure[i].start; j--) {
102 seq += getOpp(gseq[j - 1]);
106 default: assert(false);
109 assert(seq.length() > 0);
112 void Transcript::read(std::ifstream& fin) {
116 fin>>transcript_id>>gene_id>>seqname>>tmp>>length;
117 assert(tmp.length() == 1 && (tmp[0] == '+' || tmp[0] == '-'));
121 for (int i = 0; i < s; i++) {
124 structure.push_back(Interval(start, end));
126 getline(fin, tmp); //get the end of this line
130 void Transcript::write(std::ofstream& fout) {
131 int s = structure.size();
133 fout<<transcript_id<<" "<<gene_id<<" "<<seqname<<" "<<strand<<" "<<length<<" ";
135 for (int i = 0; i < s; i++) fout<<" "<<structure[i].start<<" "<<structure[i].end;
137 fout<<left<<std::endl;
140 #endif /* TRANSCRIPT_H_ */