X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=jobs_to_org;fp=jobs_to_org;h=6a25a06aab30410ea7db8e392746b8683f32dc93;hb=c72d1c1ad08debdd905929af959a242956833482;hp=2468a78499d42bd9882288dda4949517d8dc5b45;hpb=e2449280b83e9bd012c5679bf5ba323be186b270;p=bin.git diff --git a/jobs_to_org b/jobs_to_org index 2468a78..6a25a06 100755 --- a/jobs_to_org +++ b/jobs_to_org @@ -54,6 +54,7 @@ use WWW::Mechanize; use WWW::Mechanize::TreeBuilder; use vars qw($DEBUG); use Data::Printer; +use Text::Wrap; tie my $uuid, 'OSSP::uuid::tie'; $uuid= ["v1"]; @@ -84,7 +85,7 @@ pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my %sites = (herc => {url => 'http://main.hercjobs.org/jobs/search?keywords=professor+AND+%28genomics+OR+bioinformatics+OR+biology+OR+informatics%29&discipline=academic-faculty&category=academic-faculty&category=allied-health&category=biological-biomedical-sciences&category=computer-information-sciences&category=education&category=interdisciplinary&category=mathematics-statistics&category=medical-research&category=physical-sciences&sort=DATE_POSTED+DESC', - next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next"], + next_selector => [class => "bti-pagination-previous-link bti-pagination-prev-next",text => '>'], job_selector => [url_regex => qr{^\/jobs\/\d+/.+}], university => [itemprop=>"hiringOrganization",itemtype=>"http://schema.org/Organization"], description => [class=>"bti-jd-description",itemprop=>"description"], @@ -119,13 +120,13 @@ sub get_jobs { print format_job($university,$position,$j_u->URI->abs(),$description,$date); $m->back(); } - $m->follow_link(@{$s->{next_selector}}); + $m->follow_link(@{$s->{next_selector}}) or die "Unable to find next link"; } } sub format_job { my ($university,$position,$url,$text,$date) = @_; - $text =~ s/(\n)(\s*)/$1 /m; + $text = wrap(' ',' ',$text); my $ret = <<"EOF"; ** TODO $university -- $position :PROPERTIES: