From: Don Armstrong Date: Wed, 20 Jan 2016 19:07:15 +0000 (-0600) Subject: add jobs to org X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=e2449280b83e9bd012c5679bf5ba323be186b270;hp=fe833c870d0d127143f16106f6ecb682de3178f6;p=bin.git add jobs to org --- diff --git a/jobs_to_org b/jobs_to_org new file mode 100755 index 0000000..2468a78 --- /dev/null +++ b/jobs_to_org @@ -0,0 +1,145 @@ +#!/usr/bin/perl +# SCRIPTNAME DOES_SOMETHING +# and is released under the terms of the GNU GPL version 3, or any +# later version, at your option. See the file README and COPYING for +# more information. +# Copyright 2016 by Don Armstrong . + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +SCRIPTNAME - DOES_SOMETHING + +=head1 SYNOPSIS + +SCRIPTNAME [options] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + +SCRIPTNAME + +=cut + +use OSSP::uuid; +use WWW::Mechanize; +use WWW::Mechanize::TreeBuilder; +use vars qw($DEBUG); +use Data::Printer; + +tie my $uuid, 'OSSP::uuid::tie'; +$uuid= ["v1"]; + +my %options = (debug => 0, + help => 0, + man => 0, + pages => 1, + site => 'herc', + ); + +GetOptions(\%options, + 'site|s=s', + 'pages|p=i', + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +# if (1) { +# push @USAGE_ERRORS,"You must pass something"; +# } + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +my %sites = + (herc => {url => 'http://main.hercjobs.org/jobs/search?keywords=professor+AND+%28genomics+OR+bioinformatics+OR+biology+OR+informatics%29&discipline=academic-faculty&category=academic-faculty&category=allied-health&category=biological-biomedical-sciences&category=computer-information-sciences&category=education&category=interdisciplinary&category=mathematics-statistics&category=medical-research&category=physical-sciences&sort=DATE_POSTED+DESC', + next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next"], + job_selector => [url_regex => qr{^\/jobs\/\d+/.+}], + university => [itemprop=>"hiringOrganization",itemtype=>"http://schema.org/Organization"], + description => [class=>"bti-jd-description",itemprop=>"description"], + date => [class=>"bti-jd-detail-text", + sub {scalar $_[0]->parent()->attr('class') eq 'bti-jd-details-action'}], + position => [class=>"bti-jd-title",itemprop=>"title"], + }, + ); + +binmode STDOUT,":utf8"; +get_jobs($options{site},$options{pages}); +sub get_jobs { + my ($site,$pages) = @_; + + my $m = WWW::Mechanize->new(); + WWW::Mechanize::TreeBuilder->meta->apply($m); + if (not defined $sites{$site}) { + die "Unknown site $site"; + } + my $s = $sites{$site}; + $m->get($s->{url}); + for (1..$pages) { + my %seen; + my @job_urls = grep { ! $seen{ $_->URI()->abs() }++ } + $m->find_all_links(@{$s->{job_selector}}); + for my $j_u (@job_urls) { + $m->get($j_u); + my $university = $m->tree->look_down(@{$s->{university}})->as_text(); + my $date = $m->tree->look_down(@{$s->{date}})->as_text(); + my $description = $m->tree->look_down(@{$s->{description}})->as_text(); + my $position = $m->tree->look_down(@{$s->{position}})->as_text(); + print format_job($university,$position,$j_u->URI->abs(),$description,$date); + $m->back(); + } + $m->follow_link(@{$s->{next_selector}}); + } +} + +sub format_job { + my ($university,$position,$url,$text,$date) = @_; + $text =~ s/(\n)(\s*)/$1 /m; + my $ret = <<"EOF"; +** TODO $university -- $position + :PROPERTIES: + :ID: $uuid + :END: +[[$url]] +[$date] + +$text + +EOF + return $ret; +} + + + +__END__