X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=jobs_to_org;h=d76392fa12acf57d5e45a50ba93f8c388025ccb5;hb=3d5241a316e3ff729b19b878b0841558120f75e9;hp=03777c568300eb17ae03aa29002db009d742e7b1;hpb=c006ac2fb92fa2c5f281d47973474000e278060c;p=bin.git diff --git a/jobs_to_org b/jobs_to_org index 03777c5..d76392f 100755 --- a/jobs_to_org +++ b/jobs_to_org @@ -118,10 +118,11 @@ my %sites = higheredjobs => {url => 'https://www.higheredjobs.com/search/advanced_action.cfm?JobCat=113&JobCat=259&JobCat=99&JobCat=100&JobCat=108&JobCat=107&PosType=1&InstType=1&InstType=2&InstType=3&Keyword=&Remote=1&Remote=2&Region=&Submit=Search+Jobs', next_selector_tree => [class => 'js-click-submit', href => qr{advanced_action\.cfm}, - sub {my @c = $_[0]->content_list(); - return 0 unless @c; - return (defined $c[0]->attr('href') and - defined $c[0]->attr('src') =~ /active-right\.gif/); + sub {return 1 + if defined + $_[0]->look_down(src => + qr/active-right/); + return 0; }, ], job_selector => [url_regex => qr{^details.cfm\?JobCode=\d+}, @@ -160,12 +161,25 @@ sub get_jobs { if (not defined $sites{$site}) { die "Unknown site $site"; } + print "* Jobs from $site\n"; my $s = $sites{$site}; $m->get($s->{url}); for (1..$pages) { my %seen; my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ } $m->find_all_links(@{$s->{job_selector}}); + my $link; + if (exists $s->{next_selector}) { + ($link) = map {$_->URI()->abs()} + $m->find_all_links(@{$s->{next_selector}}); + } elsif (exists $s->{next_selector_tree}) { + $link = $m->tree->look_down(@{$s->{next_selector_tree}}); + if (not defined $link) { + $m->tree->dump; + } + die "Unable to find next link" unless defined $link; + $link = $link->attr('href'); + } for my $j_u (sort @job_urls) { $m->get($j_u) or next; my $university = 'No university'; @@ -191,16 +205,9 @@ sub get_jobs { $position = $m->tree->look_down(@{$s->{position}})->as_text(); }; print format_job($university,$position,$j_u->URI->abs(),$description,$date); - $m->back(); - } - if (exists $s->{next_selector}) { - $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link"; - } elsif (exists $s->{next_selector_tree}) { - my $link = $m->tree->look_down(@{$s->{next_selector_tree}}) or - die "Unable to find next link"; - $m->get($link->attr('href')) or - die "Unable to get next page"; + $m->back() or die "Unable to go back"; } + $m->get($link); } }