X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=jobs_to_org;fp=jobs_to_org;h=8adb8a456bb6125dd7e833d3aaac55a27bd8cf90;hb=c9a763e28d2ee634726b8fd2656770818cf7600e;hp=6a25a06aab30410ea7db8e392746b8683f32dc93;hpb=c72d1c1ad08debdd905929af959a242956833482;p=bin.git diff --git a/jobs_to_org b/jobs_to_org index 6a25a06..8adb8a4 100755 --- a/jobs_to_org +++ b/jobs_to_org @@ -55,6 +55,7 @@ use WWW::Mechanize::TreeBuilder; use vars qw($DEBUG); use Data::Printer; use Text::Wrap; +use POSIX qw(strftime); tie my $uuid, 'OSSP::uuid::tie'; $uuid= ["v1"]; @@ -93,13 +94,24 @@ my %sites = sub {scalar $_[0]->parent()->attr('class') eq 'bti-jd-details-action'}], position => [class=>"bti-jd-title",itemprop=>"title"], }, + nature => {url => 'http://www.nature.com/naturejobs/science/jobs?utf8=%E2%9C%93&q%5B%5D=professor&job_type%5B%5D=Assistant+Professor&job_type%5B%5D=Professor&order_by=created_on', + next_selector => [class=>"next_page",url_regex=>qr{^/naturejobs/science/jobs},], + job_selector => [url_regex => qr{^/naturejobs/science/jobs/\d+-.+}], + university => [href => qr{^/naturejobs/science/employer-directory/\d+$}], + description => [class=>"job-description"], + date => [], + position => [class=>'job-title heading'], + }, ); binmode STDOUT,":utf8"; get_jobs($options{site},$options{pages}); + sub get_jobs { my ($site,$pages) = @_; + my $todays_date = strftime('%Y-%m-%d %H:%M:%S',localtime()); + my $m = WWW::Mechanize->new(); WWW::Mechanize::TreeBuilder->meta->apply($m); if (not defined $sites{$site}) { @@ -113,10 +125,23 @@ sub get_jobs { $m->find_all_links(@{$s->{job_selector}}); for my $j_u (@job_urls) { $m->get($j_u); - my $university = $m->tree->look_down(@{$s->{university}})->as_text(); - my $date = $m->tree->look_down(@{$s->{date}})->as_text(); - my $description = $m->tree->look_down(@{$s->{description}})->as_text(); - my $position = $m->tree->look_down(@{$s->{position}})->as_text(); + my $university = 'No university'; + eval { + $university = $m->tree->look_down(@{$s->{university}})->as_text(); + }; + my $date = $todays_date; + eval { + $date = $m->tree->look_down(@{$s->{date}})->as_text() // $todays_date if + @{$s->{date}}; + }; + my $description = 'unknown'; + eval { + $description = $m->tree->look_down(@{$s->{description}})->as_text(); + }; + my $position = 'Unknown'; + eval { + $position = $m->tree->look_down(@{$s->{position}})->as_text(); + }; print format_job($university,$position,$j_u->URI->abs(),$description,$date); $m->back(); }