#!/usr/bin/perl
-# SCRIPTNAME DOES_SOMETHING
+# jobs_to_org finds jobs in different job sites and turns them into org things
# and is released under the terms of the GNU GPL version 3, or any
# later version, at your option. See the file README and COPYING for
# more information.
=head1 NAME
-SCRIPTNAME - DOES_SOMETHING
+jobs_to_org - finds jobs in different job sites and turns them into org things
=head1 SYNOPSIS
-SCRIPTNAME [options]
+jobs_to_org [options]
Options:
--debug, -d debugging level (Default 0)
=head1 EXAMPLES
-SCRIPTNAME
+jobs_to_org
=cut
use WWW::Mechanize::TreeBuilder;
use vars qw($DEBUG);
use Data::Printer;
+use Text::Wrap;
+use POSIX qw(strftime);
tie my $uuid, 'OSSP::uuid::tie';
$uuid= ["v1"];
my %sites =
(herc => {url => 'http://main.hercjobs.org/jobs/search?keywords=professor+AND+%28genomics+OR+bioinformatics+OR+biology+OR+informatics%29&discipline=academic-faculty&category=academic-faculty&category=allied-health&category=biological-biomedical-sciences&category=computer-information-sciences&category=education&category=interdisciplinary&category=mathematics-statistics&category=medical-research&category=physical-sciences&sort=DATE_POSTED+DESC',
- next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next"],
+ next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next",text => '>'],
job_selector => [url_regex => qr{^\/jobs\/\d+/.+}],
university => [itemprop=>"hiringOrganization",itemtype=>"http://schema.org/Organization"],
description => [class=>"bti-jd-description",itemprop=>"description"],
sub {scalar $_[0]->parent()->attr('class') eq 'bti-jd-details-action'}],
position => [class=>"bti-jd-title",itemprop=>"title"],
},
+ nature => {url => 'http://www.nature.com/naturejobs/science/jobs?utf8=%E2%9C%93&q%5B%5D=professor&job_type%5B%5D=Assistant+Professor&job_type%5B%5D=Professor&order_by=created_on',
+ next_selector => [class=>"next_page",url_regex=>qr{^/naturejobs/science/jobs},],
+ job_selector => [url_regex => qr{^/naturejobs/science/jobs/\d+-.+}],
+ university => [href => qr{^/naturejobs/science/employer-directory/\d+$}],
+ description => [class=>"job-description"],
+ date => [],
+ position => [class=>'job-title heading'],
+ },
+ vitae => {url => 'https://chroniclevitae.com/job_search?job_search%5Bdistance_from_zip%5D=10&job_search%5Bemployment_type%5D=Full-time&job_search%5Bposition_type%5D=63',
+ next_selector => [text_regex => qr{Next}, url_regex=>qr{^/job_search}],
+ job_selector => [url_regex => qr{/jobs/\d+-\d+$}],
+ university => [href => qr{/institutions/\d+$}],
+ description => [class => 'job-listing__content__description'],
+ date => [_tag => 'td', content => qr/\,\s+20\d{2}$/],
+ position => [_tag => 'h1',
+ sub {scalar $_[0]->parent()->attr('class') eq 'page-title page-title--two-col'},
+ ],
+ },
);
binmode STDOUT,":utf8";
get_jobs($options{site},$options{pages});
+
sub get_jobs {
my ($site,$pages) = @_;
+ my $todays_date = strftime('%Y-%m-%d %H:%M:%S',localtime());
+
my $m = WWW::Mechanize->new();
WWW::Mechanize::TreeBuilder->meta->apply($m);
if (not defined $sites{$site}) {
$m->find_all_links(@{$s->{job_selector}});
for my $j_u (@job_urls) {
$m->get($j_u);
- my $university = $m->tree->look_down(@{$s->{university}})->as_text();
- my $date = $m->tree->look_down(@{$s->{date}})->as_text();
- my $description = $m->tree->look_down(@{$s->{description}})->as_text();
- my $position = $m->tree->look_down(@{$s->{position}})->as_text();
+ my $university = 'No university';
+ eval {
+ $university = $m->tree->look_down(@{$s->{university}})->as_text();
+ };
+ my $date = $todays_date;
+ eval {
+ $date = $m->tree->look_down(@{$s->{date}})->as_text() // $todays_date if
+ @{$s->{date}};
+ };
+ my $description = 'unknown';
+ eval {
+ $description = $m->tree->look_down(@{$s->{description}})->as_text();
+ };
+ my $position = 'Unknown';
+ eval {
+ $position = $m->tree->look_down(@{$s->{position}})->as_text();
+ };
print format_job($university,$position,$j_u->URI->abs(),$description,$date);
$m->back();
}
- $m->follow_link(@{$s->{next_selector}});
+ $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link";
}
}
sub format_job {
my ($university,$position,$url,$text,$date) = @_;
- $text =~ s/(\n)(\s*)/$1 /m;
+ $text = wrap(' ',' ',$text);
my $ret = <<"EOF";
** TODO $university -- $position
:PROPERTIES: