From c006ac2fb92fa2c5f281d47973474000e278060c Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 28 Mar 2016 14:29:58 -0700 Subject: [PATCH] support higheredjobs in jobs_to_org --- jobs_to_org | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/jobs_to_org b/jobs_to_org index fcdfd35..03777c5 100755 --- a/jobs_to_org +++ b/jobs_to_org @@ -115,6 +115,36 @@ my %sites = 'page-title page-title--two-col'}, ], }, + higheredjobs => {url => 'https://www.higheredjobs.com/search/advanced_action.cfm?JobCat=113&JobCat=259&JobCat=99&JobCat=100&JobCat=108&JobCat=107&PosType=1&InstType=1&InstType=2&InstType=3&Keyword=&Remote=1&Remote=2&Region=&Submit=Search+Jobs', + next_selector_tree => [class => 'js-click-submit', + href => qr{advanced_action\.cfm}, + sub {my @c = $_[0]->content_list(); + return 0 unless @c; + return (defined $c[0]->attr('href') and + defined $c[0]->attr('src') =~ /active-right\.gif/); + }, + ], + job_selector => [url_regex => qr{^details.cfm\?JobCode=\d+}, + ], + university => [class => qr/field-value/, + sub {my $p = $_[0]->parent(); + my $c = $p->look_down(class => qr/field-label/, + ); + defined $c and $c->as_text() =~ qr/institution/i; + } + ], + date => [class => qr/field-value/, + sub {my $p = $_[0]->parent(); + my $c = $p->look_down(class => qr/field-label/, + ); + defined $c and $c->as_text() =~ qr/posted/i; + } + ], + position => [id => 'jobtitle-header', + _tag => 'h1', + ], + description => [id => 'jobDesc'], + }, ); binmode STDOUT,":utf8"; @@ -163,7 +193,14 @@ sub get_jobs { print format_job($university,$position,$j_u->URI->abs(),$description,$date); $m->back(); } - $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link"; + if (exists $s->{next_selector}) { + $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link"; + } elsif (exists $s->{next_selector_tree}) { + my $link = $m->tree->look_down(@{$s->{next_selector_tree}}) or + die "Unable to find next link"; + $m->get($link->attr('href')) or + die "Unable to get next page"; + } } } -- 2.39.2