From a7a59a70298a1745de274f84581352d8d64d6723 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Fri, 5 Feb 2016 12:06:08 -0800 Subject: [PATCH] properly handle nature and vitae dates --- jobs_to_org | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/jobs_to_org b/jobs_to_org index 5d5d603..83511bc 100755 --- a/jobs_to_org +++ b/jobs_to_org @@ -99,7 +99,7 @@ my %sites = job_selector => [url_regex => qr{^/naturejobs/science/jobs/\d+-.+}], university => [href => qr{^/naturejobs/science/employer-directory/\d+$}], description => [class=>"job-description"], - date => [], + date => [content => qr/\d+\s+days\s+ago$/], position => [class=>'job-title heading'], }, vitae => {url => 'https://chroniclevitae.com/job_search?job_search%5Bdistance_from_zip%5D=10&job_search%5Bemployment_type%5D=Full-time&job_search%5Bposition_type%5D=63', @@ -107,7 +107,7 @@ my %sites = job_selector => [url_regex => qr{/jobs/\d+-\d+$}], university => [href => qr{/institutions/\d+$}], description => [class => 'job-listing__content__description'], - date => [_tag => 'td', content => qr/\,\s+20\d{2}$/], + date => [_tag => 'td', sub {$_[0]->as_text() =~ qr/\,\s+20\d{2}$/}], position => [_tag => 'h1', sub {defined $_[0]->parent()->attr('class') and $_[0]->parent()->attr('class') eq @@ -135,7 +135,7 @@ sub get_jobs { my %seen; my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ } $m->find_all_links(@{$s->{job_selector}}); - for my $j_u (@job_urls) { + for my $j_u (sort @job_urls) { $m->get($j_u) or next; my $university = 'No university'; eval { @@ -145,6 +145,11 @@ sub get_jobs { eval { $date = $m->tree->look_down(@{$s->{date}})->as_text() // $todays_date if @{$s->{date}}; + if ($date =~ /(\d+)\s+days ago$/) { + $date = strftime('%Y-%m-%d %H:%M:%S', + localtime((DateTime->now() - + DateTime::Duration->new(days=>$1))->epoch)); + } }; my $description = 'unknown'; eval { -- 2.39.2