]> git.donarmstrong.com Git - bin.git/blobdiff - jobs_to_org
add reset usb bus command
[bin.git] / jobs_to_org
index be827d08514e87395dc447a6e828933fab3703c6..67a70424f2e445fb8502e9d407206102f1584e43 100755 (executable)
@@ -89,7 +89,7 @@ my %sites =
     (herc => {url => 'http://main.hercjobs.org/jobs/search?keywords=professor+AND+%28genomics+OR+bioinformatics+OR+biology+OR+informatics%29&discipline=academic-faculty&category=academic-faculty&category=allied-health&category=biological-biomedical-sciences&category=computer-information-sciences&category=education&category=interdisciplinary&category=mathematics-statistics&category=medical-research&category=physical-sciences&sort=DATE_POSTED+DESC',
               next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next",text => '>'],
               job_selector => [url_regex => qr{^\/jobs\/\d+/.+}],
-              university => [itemprop=>"hiringOrganization",itemtype=>"http://schema.org/Organization"],
+              university => [itemprop=>"hiringOrganization",itemtype=> qr{https?://schema.org/Organization}],
               description => [class=>"bti-jd-description",itemprop=>"description"],
               date => [class=>"bti-jd-detail-text",
                        sub {scalar $_[0]->parent()->attr('class') eq 'bti-jd-details-action'}],
@@ -118,10 +118,11 @@ my %sites =
      higheredjobs => {url => 'https://www.higheredjobs.com/search/advanced_action.cfm?JobCat=113&JobCat=259&JobCat=99&JobCat=100&JobCat=108&JobCat=107&PosType=1&InstType=1&InstType=2&InstType=3&Keyword=&Remote=1&Remote=2&Region=&Submit=Search+Jobs',
                       next_selector_tree => [class => 'js-click-submit',
                                              href => qr{advanced_action\.cfm},
-                                             sub {my @c = $_[0]->content_list();
-                                                  return 0 unless @c;
-                                                  return (defined $c[0]->attr('href') and
-                                                          defined $c[0]->attr('src') =~ /active-right\.gif/);
+                                             sub {return 1
+                                                      if defined
+                                                      $_[0]->look_down(src =>
+                                                                       qr/active-right/);
+                                                  return 0;
                                               },
                                             ],
                       job_selector => [url_regex => qr{^details.cfm\?JobCode=\d+},
@@ -167,6 +168,18 @@ sub get_jobs {
         my %seen;
         my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ }
             $m->find_all_links(@{$s->{job_selector}});
+        my $link;
+        if (exists $s->{next_selector}) {
+            ($link) = map {$_->URI()->abs()}
+                $m->find_all_links(@{$s->{next_selector}});
+        } elsif (exists $s->{next_selector_tree}) {
+            $link = $m->tree->look_down(@{$s->{next_selector_tree}});
+            if (not defined $link) {
+                $m->tree->dump;
+            }
+            die "Unable to find next link" unless defined $link;
+            $link = $link->attr('href');
+        }
         for my $j_u (sort @job_urls) {
             $m->get($j_u) or next;
             my $university = 'No university';
@@ -192,16 +205,9 @@ sub get_jobs {
                 $position = $m->tree->look_down(@{$s->{position}})->as_text();
             };
             print format_job($university,$position,$j_u->URI->abs(),$description,$date);
-            $m->back();
-        }
-        if (exists $s->{next_selector}) {
-            $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link";
-        } elsif (exists $s->{next_selector_tree}) {
-            my $link = $m->tree->look_down(@{$s->{next_selector_tree}}) or
-                die "Unable to find next link";
-            $m->get($link->attr('href')) or
-                die "Unable to get next page";
+            $m->back() or die "Unable to go back";
         }
+        $m->get($link);
     }
 }