7 extends 'Org::Element';
9 use Time::HiRes qw(gettimeofday tv_interval);
11 our $VERSION = '0.23'; # VERSION
13 has tags => (is => 'rw');
14 has todo_states => (is => 'rw');
15 has done_states => (is => 'rw');
16 has priorities => (is => 'rw');
17 has drawer_names => (is => 'rw');
18 has properties => (is => 'rw');
19 has radio_targets => (is => 'rw');
21 has time_zone => (is => 'rw');
23 our $tags_re = qr/:(?:[A-Za-z0-9_@#%]+:)+/;
24 my $ls_re = qr/(?:(?<=[\015\012])|\A)/;
25 my $le_re = qr/(?:\R|\z)/;
26 our $arg_re = qr/(?: '(?<squote> [^']*)' |
30 our $args_re = qr/(?: $arg_re (?:[ \t]+ $arg_re)*)/x;
31 my $tstamp_re = qr/(?:\[\d{4}-\d{2}-\d{2} [ ] [^\n\]]*\])/x;
32 my $act_tstamp_re = qr/(?: <\d{4}-\d{2}-\d{2} [ ] [^\n>]* >)/x;
33 my $fn_name_re = qr/(?:[^ \t\n:\]]+)/x;
36 (?<link> \[\[(?<link_link> [^\]\n]+)\]
37 (?:\[(?<link_desc> (?:[^\]]|\R)+)\])?\]) |
38 (?<radio_target> <<<(?<rt_target> [^>\n]+)>>>) |
39 (?<target> <<(?<t_target> [^>\n]+)>>) |
41 # timestamp & time range
42 (?<trange> (?<trange_ts1> $tstamp_re)--
43 (?<trange_ts2> $tstamp_re)) |
44 (?<tstamp> $tstamp_re) |
45 (?<act_trange> (?<act_trange_ts1> $act_tstamp_re)--
46 (?<act_trange_ts2> $act_tstamp_re)) |
47 (?<act_tstamp> $act_tstamp_re) |
49 # footnote (num, name + def, name + inline definition)
50 (?<fn_num> \[(?<fn_num_num>\d+)\]) |
51 (?<fn_namedef> $ls_re \[fn:(?<fn_namedef_name> $fn_name_re)\]
52 [ \t]* (?<fn_namedef_def> [^ \t\n]+)) |
53 (?<fn_nameidef> \[fn:(?<fn_nameidef_name> $fn_name_re?):?
54 (?<fn_nameidef_def> ([^\n\]]+)?)\]) |
56 (?<markup_start> (?:(?<=\s)|\A)
59 (?<markup_end> (?<=\S)
61 # actually emacs doesn't allow ! after markup
62 (?:(?=[ \t\n:;"',.!?\)*-])|\z)) |
64 (?<plain_text> (?:[^\[<*/+=~_\n]+|.+?))
65 #(?<plain_text> .+?) # too dispersy
68 my $block_elems_re = # top level elements
69 qr/(?<block> $ls_re (?<block_begin_indent>[ \t]*)
70 \#\+BEGIN_(?<block_name>\w+)
71 (?:[ \t]+(?<block_raw_arg>[^\n]*))?\R
72 (?<block_content>(?:.|\R)*?)
73 \R(?<block_end_indent>[ \t]*)
74 \#\+END_\k<block_name> $le_re) |
75 (?<setting> $ls_re (?<setting_indent>[ \t]*) \#\+
76 (?<setting_name> \w+): [ \t]+
77 (?<setting_raw_arg> [^\n]+) $le_re) |
78 (?<fixedw> (?: $ls_re [ \t]* (?::[ ][^\n]* | :$) $le_re )+ ) |
79 (?<comment> $ls_re \#[^\n]*(?:\R\#[^\n]*)* (?:\R|\z)) |
80 (?<headline> $ls_re (?<h_bullet>\*+) [ \t]
82 (?:[ \t]+(?<h_tags> $tags_re))?[ \t]* $le_re) |
83 (?<li_header> $ls_re (?<li_indent>[ \t]*)
84 (?<li_bullet>[+*-]|\d+\.) [ \t]+
85 (?<li_checkbox> \[(?<li_cbstate> [ X-])\])?
86 (?: (?<li_dt> [^\n]+?) [ \t]+ ::)?) |
87 (?<table> (?: $ls_re [ \t]* \| [ \t]* \S[^\n]* $le_re)+) |
88 (?<drawer> $ls_re [ \t]* :(?<drawer_name> \w+): [ \t]*\R
89 (?<drawer_content>(?:.|\R)*?)
90 $ls_re [ \t]* :END:) |
91 (?<text> (?:[^#|:+*0-9\n-]+|\n|.)+?)
92 #(?<text> .+?) # too dispersy
98 $self->todo_states([]);
99 $self->done_states([]);
100 $self->priorities([]);
101 $self->properties({});
102 $self->drawer_names([qw/CLOCK LOGBOOK PROPERTIES/]);
104 $self->radio_targets([]);
109 if (!@{ $self->todo_states } && !@{ $self->done_states }) {
110 $self->todo_states(['TODO']);
111 $self->done_states(['DONE']);
113 if (!@{ $self->priorities }) {
114 $self->priorities([qw/A B C/]);
121 return [] unless defined($args) && length($args);
122 #$log->tracef("args = %s", $args);
124 while ($args =~ /$arg_re (?:\s+|\z)/xg) {
125 if (defined $+{squote}) {
126 push @args, $+{squote};
127 } elsif (defined $+{dquote}) {
128 push @args, $+{dquote};
130 push @args, $+{bare};
133 #$log->tracef("\\\@args = %s", \@args);
141 if (/\A[A-Za-z0-9_:-]+\z/) {
153 my ($self, $args) = @_;
154 $self->document($self) unless $self->document;
156 if (defined $args->{from_string}) {
158 # NOTE: parsing is done twice. first pass will set settings (e.g. custom
159 # todo keywords set by #+TODO), scan for radio targets. after that we
160 # scan again to build the elements tree.
162 $self->_init_pass1();
163 $self->_parse($args->{from_string}, 1);
164 $self->_init_pass2();
165 $self->_parse($args->{from_string}, 2);
169 # parse blocky elements: setting, blocks, headline, drawer
171 my ($self, $str, $pass) = @_;
172 $log->tracef('-> _parse(%s, pass=%d)', $str, $pass);
173 my $t0 = [gettimeofday];
176 my $last_headlines = [$self]; # [$doc, $last_hl_level1, $last_hl_lvl2, ...]
178 my $last_lists = []; # [last_List_obj_for_indent_level0, ...]
182 while ($str =~ /$block_elems_re/og) {
183 $parent = $last_listitem // $last_headline // $self;
184 #$log->tracef("TMP: parent=%s (%s)", ref($parent), $parent->_str);
186 next unless keys %m; # perlre bug?
187 #if ($log->is_trace) {
188 # # profiler shows that this is very heavy
189 # $log->tracef("match block element: %s", \%+);
192 if (defined $m{text}) {
193 push @text, $m{text};
197 $self->_add_text(join("", @text), $parent, $pass);
203 if ($m{block} && $pass == 2) {
205 require Org::Element::Block;
206 $el = Org::Element::Block->new(
208 document=>$self, parent=>$parent,
209 begin_indent=>$m{block_begin_indent},
210 end_indent=>$m{block_end_indent},
211 name=>$m{block_name}, args=>__parse_args($m{block_raw_arg}),
212 raw_content=>$m{block_content},
215 } elsif ($m{setting}) {
217 require Org::Element::Setting;
218 if ($m{setting_indent} &&
219 !(uc($m{setting_name}) ~~
220 @{Org::Element::Setting->indentable_settings})) {
221 push @text, $m{setting};
224 $el = Org::Element::Setting->new(
227 document=>$self, parent=>$parent,
228 indent => $m{setting_indent},
229 name=>$m{setting_name},
230 args=>__parse_args($m{setting_raw_arg}),
234 } elsif ($m{fixedw} && $pass == 2) {
236 require Org::Element::FixedWidthSection;
237 $el = Org::Element::FixedWidthSection->new(
240 document=>$self, parent=>$parent,
243 } elsif ($m{comment} && $pass == 2) {
245 require Org::Element::Comment;
246 $el = Org::Element::Comment->new(
248 document=>$self, parent=>$parent,
251 } elsif ($m{table} && $pass == 2) {
253 require Org::Element::Table;
254 $el = Org::Element::Table->new(
257 document=>$self, parent=>$parent,
260 } elsif ($m{drawer} && $pass == 2) {
262 require Org::Element::Drawer;
263 my $raw_content = $m{drawer_content};
264 $el = Org::Element::Drawer->new(
265 document=>$self, parent=>$parent,
266 name => uc($m{drawer_name}), pass => $pass,
268 $self->_add_text($raw_content, $el, $pass);
270 # for properties, we also parse property lines from raw drawer
271 # content. this is currently separate from normal Org text parsing,
272 # i'm not clear yet on how to do this canonically.
273 $el->_parse_properties($raw_content);
275 } elsif ($m{li_header} && $pass == 2) {
277 require Org::Element::List;
278 require Org::Element::ListItem;
280 my $level = length($m{li_indent});
281 my $bullet = $m{li_bullet};
282 my $indent = $m{li_indent};
284 my $cbstate = $m{li_cbstate};
285 my $type = defined($dt) ? 'D' :
286 $bullet =~ /^\d+\./ ? 'O' : 'U';
287 my $bstyle = $type eq 'O' ? '<N>.' : $bullet;
289 # parent for list is lesser-indented list (or last headline)
290 $parent = $last_headline // $self;
291 for (my $i=$level-1; $i>=0; $i--) {
292 if ($last_lists->[$i]) {
293 $parent = $last_lists->[$i];
298 my $list = $last_lists->[$level];
299 if (!$list || $list->type ne $type ||
300 $list->bullet_style ne $bstyle) {
301 $list = Org::Element::List->new(
302 document => $self, parent => $parent,
303 indent=>$indent, type=>$type, bullet_style=>$bstyle,
305 $last_lists->[$level] = $list;
306 $parent->children([]) if !$parent->children;
307 push @{ $parent->children }, $list;
309 $last_lists->[$level] = $list;
311 # parent for list item is list
314 $el = Org::Element::ListItem->new(
315 document=>$self, parent=>$list,
316 indent=>$indent, bullet=>$bullet);
317 $el->check_state($cbstate) if $cbstate;
318 $el->desc_term($self->_add_text_container($dt, $list, $pass))
321 splice @$last_lists, $level+1;
322 $last_listitem = $el;
324 } elsif ($m{headline} && $pass == 2) {
326 require Org::Element::Headline;
327 my $level = length $m{h_bullet};
329 # parent is upper-level headline
331 for (my $i=$level-1; $i>=0; $i--) {
332 $parent = $last_headlines->[$i] and last;
336 $el = Org::Element::Headline->new(
338 document=>$self, parent=>$parent,
341 $el->tags(__split_tags($m{h_tags})) if ($m{h_tags});
342 my $title = $m{h_title};
344 # recognize todo keyword. XXX cache re
345 my $todo_kw_re = "(?:".
346 join("|", map {quotemeta}
347 @{$self->todo_states}, @{$self->done_states}) . ")";
348 if ($title =~ s/^($todo_kw_re)(\s+|\W)/$2/) {
352 $el->todo_state($state);
353 $el->is_done($state ~~ @{ $self->done_states } ? 1:0);
355 # recognize priority. XXX cache re
357 join("|", map {quotemeta} @{$self->priorities}) . ")";
358 if ($title =~ s/\[#($prio_re)\]\s*//) {
359 $el->todo_priority($1);
363 $el->title($self->_add_text_container($title, $parent, $pass));
365 $last_headlines->[$el->level] = $el;
366 splice @$last_headlines, $el->level+1;
367 $last_headline = $el;
368 $last_listitem = undef;
372 # we haven't caught other matches to become element
373 die "BUG1: no element" unless $el || $pass != 2;
375 $parent->children([]) if !$parent->children;
376 push @{ $parent->children }, $el;
381 $self->_add_text(join("", @text), $parent, $pass);
385 $log->tracef('<- _parse(), elapsed time=%.3fs',
386 tv_interval($t0, [gettimeofday]));
389 sub _add_text_container {
390 require Org::Element::Text;
391 my ($self, $str, $parent, $pass) = @_;
392 my $container = Org::Element::Text->new(
393 document=>$self, parent=>$parent,
396 $self->_add_text($str, $container, $pass);
397 $container = $container->children->[0] if
398 $container->children && @{$container->children} == 1 &&
399 $container->children->[0]->isa('Org::Element::Text');
404 require Org::Element::Text;
405 my ($self, $str, $parent, $pass) = @_;
407 #$log->tracef("-> _add_text(%s, pass=%d)", $str, $pass);
410 while ($str =~ /$text_re/og) {
412 #if ($log->is_trace) {
413 # # profiler shows that this is very heavy
414 # $log->tracef("match text: %s", \%+);
418 if (defined $m{plain_text} && $pass == 2) {
419 push @plain_text, $m{plain_text};
423 $self->_add_plain_text(join("", @plain_text), $parent, $pass);
428 if ($m{link} && $pass == 2) {
429 require Org::Element::Link;
430 $el = Org::Element::Link->new(
431 document => $self, parent => $parent,
434 if (defined($m{link_desc}) && length($m{link_desc})) {
436 $self->_add_text_container($m{link_desc},
439 } elsif ($m{radio_target}) {
440 require Org::Element::RadioTarget;
441 $el = Org::Element::RadioTarget->new(
443 document => $self, parent => $parent,
444 target=>$m{rt_target},
446 } elsif ($m{target} && $pass == 2) {
447 require Org::Element::Target;
448 $el = Org::Element::Target->new(
449 document => $self, parent => $parent,
450 target=>$m{t_target},
452 } elsif ($m{fn_num} && $pass == 2) {
453 require Org::Element::Footnote;
454 $el = Org::Element::Footnote->new(
455 document => $self, parent => $parent,
456 name=>$m{fn_num_num}, is_ref=>1,
458 } elsif ($m{fn_namedef} && $pass == 2) {
459 require Org::Element::Footnote;
460 $el = Org::Element::Footnote->new(
461 document => $self, parent => $parent,
462 name=>$m{fn_namedef_name},
463 is_ref=>$m{fn_namedef_def} ? 0:1,
465 $el->def($self->_add_text_container($m{fn_namedef_def},
467 } elsif ($m{fn_nameidef} && $pass == 2) {
468 require Org::Element::Footnote;
469 $el = Org::Element::Footnote->new(
470 document => $self, parent => $parent,
471 name=>$m{fn_nameidef_name},
472 is_ref=>($m{fn_nameidef_def} ? 0:1) ||
473 !length($m{fn_nameidef_name}),
475 $el->def(length($m{fn_nameidef_def}) ?
476 $self->_add_text_container($m{fn_nameidef_def},
477 $parent, $pass) : undef);
478 } elsif ($m{trange} && $pass == 2) {
479 require Org::Element::TimeRange;
480 require Org::Element::Timestamp;
481 $el = Org::Element::TimeRange->new(
482 document => $self, parent => $parent,
484 my $opts = {allow_event_duration=>0, allow_repeater=>0};
485 $el->ts1(Org::Element::Timestamp->new(
486 document=>$self, parent=>$parent));
487 $el->ts1->_parse_timestamp($m{trange_ts1}, $opts);
488 $el->ts2(Org::Element::Timestamp->new(
489 document=>$self, parent=>$parent));
490 $el->ts2->_parse_timestamp($m{trange_ts2}, $opts);
491 $el->children([$el->ts1, $el->ts2]);
492 } elsif ($m{tstamp} && $pass == 2) {
493 require Org::Element::Timestamp;
494 $el = Org::Element::Timestamp->new(
495 document => $self, parent => $parent,
497 $el->_parse_timestamp($m{tstamp});
498 } elsif ($m{act_trange} && $pass == 2) {
499 require Org::Element::TimeRange;
500 require Org::Element::Timestamp;
501 $el = Org::Element::TimeRange->new(
502 document => $self, parent => $parent,
504 my $opts = {allow_event_duration=>0, allow_repeater=>0};
505 $el->ts1(Org::Element::Timestamp->new(
506 document=>$self, parent=>$parent));
507 $el->ts1->_parse_timestamp($m{act_trange_ts1}, $opts);
508 $el->ts2(Org::Element::Timestamp->new(
509 document=>$self, parent=>$parent));
510 $el->ts2->_parse_timestamp($m{act_trange_ts2}, $opts);
511 $el->children([$el->ts1, $el->ts2]);
512 } elsif ($m{act_tstamp} && $pass == 2) {
513 require Org::Element::Timestamp;
514 $el = Org::Element::Timestamp->new(
515 document => $self, parent => $parent,
517 $el->_parse_timestamp($m{act_tstamp});
518 } elsif ($m{markup_start} && $pass == 2) {
519 require Org::Element::Text;
520 $el = Org::Element::Text->new(
521 document => $self, parent => $parent,
522 style=>'', text=>$m{markup_start},
524 # temporary mark, we need to apply markup later
526 } elsif ($m{markup_end} && $pass == 2) {
527 require Org::Element::Text;
528 $el = Org::Element::Text->new(
529 document => $self, parent => $parent,
530 style=>'', text=>$m{markup_end},
532 # temporary mark, we need to apply markup later
535 die "BUG2: no element" unless $el || $pass != 2;
536 $parent->children([]) if !$parent->children;
537 push @{ $parent->children }, $el;
541 if (@plain_text && $pass == 2) {
542 $parent->children([]) if !$parent->children;
543 push @{$parent->children}, Org::Element::Text->new(
544 text => join("", @plain_text), style=>'',
545 document=>$self, parent=>$parent);
550 $self->_apply_markup($parent);
551 if (@{$self->radio_targets}) {
552 my $re = join "|", map {quotemeta} @{$self->radio_targets};
554 $self->_linkify_rt_recursive($re, $parent);
556 my $c = $parent->children // [];
559 #$log->tracef('<- _add_text()');
562 # to keep parser's regexes simple and fast, we detect markup in regex rather
563 # simplistically (as text element) and then apply some more filtering & applying
567 #$log->trace("-> _apply_markup()");
568 my ($self, $parent) = @_;
570 my $c = $parent->children or return;
573 #$log->tracef("text cluster = %s", [map {$_->as_string} @$c]);
574 # find a new mu_start
575 my $mu_start_index = -1;
577 for (my $i = $last_index; $i < @$c; $i++) {
578 next unless $c->[$i]->{_mu_start};
579 $mu_start_index = $i; $mu = $c->[$i]->text;
580 #$log->tracef("found mu_start at %d (%s)", $i, $mu);
583 unless ($mu_start_index >= 0) {
584 #$log->trace("no more mu_start found");
588 # check whether this is a valid markup (has text, has markup end, not
589 # interspersed with non-text, no more > 1 newlines)
590 my $mu_end_index = 0;
594 for (my $i=$mu_start_index+1; $i < @$c; $i++) {
595 if ($c->[$i]->isa('Org::Element::Text')) {
599 $has_unmarkable++; last;
601 if ($c->[$i]->{_mu_end} && $c->[$i]->text eq $mu) {
602 #$log->tracef("found mu_end at %d", $i);
603 $mu_end_index = $i; last;
605 my $text = $c->[$i]->as_string;
606 $newlines++ while $text =~ /\R/g;
607 last if $newlines > 1;
609 my $valid = $has_text && !$has_unmarkable
610 && $mu_end_index && $newlines <= 1;
611 #$log->tracef("mu candidate: start=%d, end=%s, ".
612 # "has_text=%s, has_unmarkable=%s, newlines=%d, valid=%s",
613 # $mu_start_index, $mu_end_index,
614 # $has_text, $has_unmarkable, $newlines, $valid
617 my $mu_el = Org::Element::Text->new(
618 document => $self, parent => $parent,
619 style=>$Org::Element::Text::mu2style{$mu}, text=>'',
621 my @c2 = splice @$c, $mu_start_index,
622 $mu_end_index-$mu_start_index+1, $mu_el;
623 #$log->tracef("grouping %s", [map {$_->text} @c2]);
624 $mu_el->children(\@c2);
628 $_->{parent} = $mu_el;
630 $self->_merge_text_elements(\@c2);
631 # squish if only one child
633 $mu_el->text($c2[0]->text);
634 $mu_el->children(undef);
637 undef $c->[$mu_start_index]->{_mu_start};
641 $self->_merge_text_elements($c);
642 #$log->trace("<- _apply_markup()");
645 sub _merge_text_elements {
646 my ($self, $els) = @_;
647 #$log->tracef("-> _merge_text_elements(%s)", [map {$_->as_string} @$els]);
648 return unless @$els >= 2;
653 next if $els->[$i]->children || !$els->[$i]->isa('Org::Element::Text');
654 my $istyle = $els->[$i]->style // "";
656 last if $i+1 >= @$els || $els->[$i+1]->children ||
657 !$els->[$i+1]->isa('Org::Element::Text');
658 last if ($els->[$i+1]->style // "") ne $istyle;
659 #$log->tracef("merging text[%d] '%s' with '%s'",
660 # $i, $els->[$i]->text, $els->[$i+1]->text);
661 $els->[$i]->{text} .= $els->[$i+1]->{text} // "";
662 splice @$els, $i+1, 1;
665 #$log->tracef("merge result = %s", [map {$_->as_string} @$els]);
666 #$log->trace("<- _merge_text_elements()");
669 sub _linkify_rt_recursive {
670 require Org::Element::Text;
671 require Org::Element::Link;
672 my ($self, $re, $parent) = @_;
673 my $c = $parent->children;
675 for (my $i=0; $i<@$c; $i++) {
677 if ($el->isa('Org::Element::Text')) {
678 my @split0 = split /\b($re)\b/, $el->text;
679 next unless @split0 > 1;
681 for my $s (@split0) {
683 push @split, Org::Element::Link->new(
684 document=>$self, parent=>$parent,
685 link=>$s, description=>undef,
686 from_radio_target=>1,
688 } elsif (length $s) {
689 push @split, Org::Element::Text->new(
690 document=>$self, parent=>$parent,
691 text=>$s, style=>$el->style,
695 splice @$c, $i, 1, @split;
697 $self->_linkify_rt_recursive($re, $el);
701 sub _add_plain_text {
702 require Org::Element::Text;
703 my ($self, $str, $parent, $pass) = @_;
704 my $el = Org::Element::Text->new(
705 document=>$self, parent=>$parent, style=>'', text=>$str);
706 $parent->children([]) if !$parent->children;
707 push @{ $parent->children }, $el;
711 [$_[0] =~ /:([^:]+)/g];
715 # ABSTRACT: Represent an Org document
722 Org::Document - Represent an Org document
732 # create a new Org document tree from string
733 my $org = Org::Document->new(from_string => <<EOF);
742 Derived from L<Org::Element>.
748 List of tags for this file, usually set via #+FILETAGS.
750 =head2 todo_states => ARRAY
752 List of known (action-requiring) todo states. Default is ['TODO'].
754 =head2 done_states => ARRAY
756 List of known done (non-action-requiring) states. Default is ['DONE'].
758 =head2 priorities => ARRAY
760 List of known priorities. Default is ['A', 'B', 'C'].
762 =head2 drawer_names => ARRAY
764 List of known drawer names. Default is [qw/CLOCK LOGBOOK PROPERTIES/].
766 =head2 properties => ARRAY
768 File-wide properties.
770 =head2 radio_targets => ARRAY
772 List of radio target text.
774 =head2 time_zone => ARRAY
776 If set, will be passed to DateTime->new() (e.g. by L<Org::Element::Timestamp>).
780 =for Pod::Coverage BUILD
782 =head2 new(from_string => ...)
784 Create object from string.
788 Steven Haryanto <stevenharyanto@gmail.com>
790 =head1 COPYRIGHT AND LICENSE
792 This software is copyright (c) 2012 by Steven Haryanto.
794 This is free software; you can redistribute it and/or modify it under
795 the same terms as the Perl 5 programming language system itself.