]> git.donarmstrong.com Git - bin.git/commitdiff
properly handle nature and vitae dates
authorDon Armstrong <don@donarmstrong.com>
Fri, 5 Feb 2016 20:06:08 +0000 (12:06 -0800)
committerDon Armstrong <don@donarmstrong.com>
Fri, 5 Feb 2016 20:06:08 +0000 (12:06 -0800)
jobs_to_org

index 5d5d6036ea34e046b7111cc0e6925007852b24c4..83511bc42449202fb974a523facfb0bdf5c1fa1d 100755 (executable)
@@ -99,7 +99,7 @@ my %sites =
                 job_selector => [url_regex => qr{^/naturejobs/science/jobs/\d+-.+}],
                 university => [href => qr{^/naturejobs/science/employer-directory/\d+$}],
                 description => [class=>"job-description"],
-                date => [],
+                date => [content => qr/\d+\s+days\s+ago$/],
                 position => [class=>'job-title heading'],
                },
      vitae => {url => 'https://chroniclevitae.com/job_search?job_search%5Bdistance_from_zip%5D=10&job_search%5Bemployment_type%5D=Full-time&job_search%5Bposition_type%5D=63',
@@ -107,7 +107,7 @@ my %sites =
                job_selector => [url_regex => qr{/jobs/\d+-\d+$}],
                university => [href => qr{/institutions/\d+$}],
                description => [class => 'job-listing__content__description'],
-               date => [_tag => 'td', content => qr/\,\s+20\d{2}$/],
+               date => [_tag => 'td', sub {$_[0]->as_text() =~ qr/\,\s+20\d{2}$/}],
                position => [_tag => 'h1',
                             sub {defined $_[0]->parent()->attr('class') and
                                      $_[0]->parent()->attr('class') eq
@@ -135,7 +135,7 @@ sub get_jobs {
         my %seen;
         my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ }
             $m->find_all_links(@{$s->{job_selector}});
-        for my $j_u (@job_urls) {
+        for my $j_u (sort @job_urls) {
             $m->get($j_u) or next;
             my $university = 'No university';
             eval {
@@ -145,6 +145,11 @@ sub get_jobs {
             eval {
                 $date = $m->tree->look_down(@{$s->{date}})->as_text() // $todays_date if
                     @{$s->{date}};
+                if ($date =~ /(\d+)\s+days ago$/) {
+                    $date = strftime('%Y-%m-%d %H:%M:%S',
+                                     localtime((DateTime->now() -
+                                      DateTime::Duration->new(days=>$1))->epoch));
+                }
             };
             my $description = 'unknown';
             eval {