]> git.donarmstrong.com Git - bin.git/commitdiff
add jobs to org
authorDon Armstrong <don@donarmstrong.com>
Wed, 20 Jan 2016 19:07:15 +0000 (13:07 -0600)
committerDon Armstrong <don@donarmstrong.com>
Wed, 20 Jan 2016 19:07:15 +0000 (13:07 -0600)
jobs_to_org [new file with mode: 0755]

diff --git a/jobs_to_org b/jobs_to_org
new file mode 100755 (executable)
index 0000000..2468a78
--- /dev/null
@@ -0,0 +1,145 @@
+#!/usr/bin/perl
+# SCRIPTNAME DOES_SOMETHING
+# and is released under the terms of the GNU GPL version 3, or any
+# later version, at your option. See the file README and COPYING for
+# more information.
+# Copyright 2016 by Don Armstrong <don@donarmstrong.com>.
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+SCRIPTNAME - DOES_SOMETHING
+
+=head1 SYNOPSIS
+
+SCRIPTNAME [options]
+
+ Options:
+   --debug, -d debugging level (Default 0)
+   --help, -h display this help
+   --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+SCRIPTNAME
+
+=cut
+
+use OSSP::uuid;
+use WWW::Mechanize;
+use WWW::Mechanize::TreeBuilder;
+use vars qw($DEBUG);
+use Data::Printer;
+
+tie my $uuid, 'OSSP::uuid::tie';
+$uuid= ["v1"];
+
+my %options = (debug           => 0,
+               help            => 0,
+               man             => 0,
+               pages           => 1,
+               site            => 'herc',
+              );
+
+GetOptions(\%options,
+           'site|s=s',
+           'pages|p=i',
+           'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+# if (1) {
+#     push @USAGE_ERRORS,"You must pass something";
+# }
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+my %sites =
+    (herc => {url => 'http://main.hercjobs.org/jobs/search?keywords=professor+AND+%28genomics+OR+bioinformatics+OR+biology+OR+informatics%29&discipline=academic-faculty&category=academic-faculty&category=allied-health&category=biological-biomedical-sciences&category=computer-information-sciences&category=education&category=interdisciplinary&category=mathematics-statistics&category=medical-research&category=physical-sciences&sort=DATE_POSTED+DESC',
+              next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next"],
+              job_selector => [url_regex => qr{^\/jobs\/\d+/.+}],
+              university => [itemprop=>"hiringOrganization",itemtype=>"http://schema.org/Organization"],
+              description => [class=>"bti-jd-description",itemprop=>"description"],
+              date => [class=>"bti-jd-detail-text",
+                       sub {scalar $_[0]->parent()->attr('class') eq 'bti-jd-details-action'}],
+              position => [class=>"bti-jd-title",itemprop=>"title"],
+             },
+    );
+
+binmode STDOUT,":utf8";
+get_jobs($options{site},$options{pages});
+sub get_jobs {
+    my ($site,$pages) = @_;
+
+    my $m = WWW::Mechanize->new();
+    WWW::Mechanize::TreeBuilder->meta->apply($m);
+    if (not defined $sites{$site}) {
+        die "Unknown site $site";
+    }
+    my $s = $sites{$site};
+    $m->get($s->{url});
+    for (1..$pages) {
+        my %seen;
+        my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ }
+            $m->find_all_links(@{$s->{job_selector}});
+        for my $j_u (@job_urls) {
+            $m->get($j_u);
+            my $university = $m->tree->look_down(@{$s->{university}})->as_text();
+            my $date = $m->tree->look_down(@{$s->{date}})->as_text();
+            my $description = $m->tree->look_down(@{$s->{description}})->as_text();
+            my $position = $m->tree->look_down(@{$s->{position}})->as_text();
+            print format_job($university,$position,$j_u->URI->abs(),$description,$date);
+            $m->back();
+        }
+        $m->follow_link(@{$s->{next_selector}});
+    }
+}
+
+sub format_job {
+    my ($university,$position,$url,$text,$date) = @_;
+    $text =~ s/(\n)(\s*)/$1   /m;
+    my $ret = <<"EOF";
+** TODO $university -- $position
+   :PROPERTIES:
+   :ID:       $uuid
+   :END:
+[[$url]]
+[$date]
+
+$text
+
+EOF
+    return $ret;
+}
+
+
+
+__END__