From: Don Armstrong Date: Fri, 5 Oct 2012 00:40:52 +0000 (+0000) Subject: add pubmed search utility X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=commitdiff_plain;h=27516645820eea8fc910745d2e33d50657141b9a add pubmed search utility --- diff --git a/get_mangareader b/get_mangareader index 8789b8e..a1de0fe 100755 --- a/get_mangareader +++ b/get_mangareader @@ -125,13 +125,17 @@ for my $manga (@manga_to_get) { print $chapter_link->url(),qq(\n); mkdir("$manga/$chapter_long"); mm_get($m,$chapter_link->url_abs()); - my $link = $m->find_link(text_regex => qr{Begin reading}); - mm_get($m,$link->url_abs()); - while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) { - my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)}); + # my $link = $m->find_link(text_regex => qr{Begin reading}); + # print $m->content(); + # exit 0; + # mm_get($m,$link->url_abs()); + print $m->uri()."\n"; + while ($m->uri() =~ m{\Q$chapter\E/?(\d\d[^\/]*)?/?$}) { + my $image = $m->find_image(alt_regex => qr{ - Page \d+}); + print $image->url_abs()."\n"; my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)}); - mm_get($m,$image->url_abs()); print "getting ".$image->url_abs()."\n"; + mm_get($m,$image->url_abs()); my ($page) = $image->url_abs =~ m/([^\/]+)$/; $m->save_content("$manga/$chapter_long/$page"); last if not defined $next_link; diff --git a/get_one_manga b/get_one_manga index 6336726..8e9e8cc 100755 --- a/get_one_manga +++ b/get_one_manga @@ -51,6 +51,7 @@ Display this manual. use URI::Escape; use WWW::Mechanize; +use HTTP::Cookies; use IO::Dir; use IO::File; use vars qw($DEBUG); @@ -91,6 +92,10 @@ if (not @ARGV) { my $failure = 0; my $m = WWW::Mechanize->new(); +$m->cookie_jar(HTTP::Cookies->new()); +$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com'); +# use Data::Dumper; +# print STDERR Dumper($m->cookie_jar()); for my $manga (@manga_to_get) { # see if the manga exists mm_get($m,$options{onemanga}.'/'.$manga); @@ -111,10 +116,19 @@ for my $manga (@manga_to_get) { my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter); if (! -d "$manga/$chapter_long") { print $chapter_link->url(),qq(\n); - mkdir("$manga/$chapter_long"); mm_get($m,$chapter_link->url_abs()); - my $link = $m->find_link(text_regex => qr{Begin reading}); + my $link = $m->find_link(text_regex => qr{Begin [Rr]eading}); + if (not defined $link) { + #print $m->content(); + my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com}); + mm_get($m,$temp->url_abs()); + #print $m->content(); + $link = $m->find_link(text_regex => qr{Begin [Rr]eading}); + } mm_get($m,$link->url_abs()); + # print $link->url_abs(); + # print $m->content(); + mkdir("$manga/$chapter_long"); while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) { my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)}); my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)}); diff --git a/git_pbuilder b/git_pbuilder index 1be9c6a..abb1d3d 100755 --- a/git_pbuilder +++ b/git_pbuilder @@ -26,9 +26,9 @@ else BACKPORT=""; fi; -OPTIONS="" +OPTIONS="${OPTIONS:-}" if [ "$DIST" = "etch" ]; then - OPTIONS="--debian-etch-workaround" + OPTIONS="$OPTIONS --debian-etch-workaround" fi; if [ -z "$BUILDRESULT" ]; then @@ -37,5 +37,6 @@ fi; pdebuild --buildresult "$BUILDRESULT" \ --debbuildopts "-i\.git -I.git $*" \ + ${PDEBUILDOPTS} \ -- --basepath ~/pbuilder/base_"${DIST}${BACKPORT}${ARCH}" ${OPTIONS} "$@" diff --git a/pubmed_search b/pubmed_search new file mode 100755 index 0000000..b5272e3 --- /dev/null +++ b/pubmed_search @@ -0,0 +1,119 @@ +#! /usr/bin/perl +# , and is released +# under the terms of the GPL version 2, or any later version, at your +# option. See the file README and COPYING for more information. +# Copyright 2011 by Don Armstrong . +# $Id: perl_script 1825 2011-01-02 01:53:43Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +use Bio::DB::EUtilities; + +use Encode qw(encode_utf8); +use Term::ANSIColor qw(:constants); +use Text::Wrap; + + +=head1 NAME + +pubmed_search - + +=head1 SYNOPSIS + + pubmed_search [options] [searchterms] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use vars qw($DEBUG); + +my %options = (debug => 0, + help => 0, + man => 0, + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +if (not @ARGV) { + push @USAGE_ERRORS,"You must pass something"; +} + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + + +my $search = Bio::DB::EUtilities->new(-eutil => 'esearch', + -email => 'don@donarmstrong.com', + -db => 'pubmed', + -term => join(' ',@ARGV), + -retmax => 1000, + ); +my @ids = $search->get_ids(); +print scalar(@ids)." results:\n"; +my $esummary = Bio::DB::EUtilities->new(-eutil => 'efetch', + -email => 'don@donarmstrong.com', + -db => 'pubmed', + -id => \@ids + ); +use XML::LibXML; +my $xml = XML::LibXML->load_xml(string => $esummary->get_Response()->content()); +for my $article ($xml->findnodes('PubmedArticleSet/PubmedArticle/MedlineCitation')) { + # print $article->toString; + my ($pmid) = $article->findnodes('./PMID'); + my ($title) = $article->findnodes('./Article/ArticleTitle'); + my ($abstract) = $article->findnodes('./Article/Abstract'); + print BOLD GREEN; + print $pmid->textContent(); + print ": "; + print RESET; + print BOLD CYAN; + print $title->textContent()."\n"; + print RESET; + print BOLD MAGENTA; + $abstract = $abstract->textContent(); + $abstract =~ s/^\s*//mg; + $abstract =~ s/(.{,80})\s/$1\n/g; + $abstract = encode_utf8($abstract); + print wrap('','',$abstract); + print "\n\n"; + print RESET; +} + +__END__ diff --git a/twidge_update b/twidge_update index cf2e8ea..e89a193 100755 --- a/twidge_update +++ b/twidge_update @@ -2,5 +2,5 @@ # stolen from https://github.com/jgoerzen/twidge/wiki/HOWTOs echo "---------|---------|---------|---------|--------=|=--------|---------|---------|---------|--------=|=--------|---------|---------|---------|" read TXT -twidge -c ${HOME}/.hide/twidge_twitter update "${TXT}" -twidge -c ${HOME}/.hide/twidge_identica update "${TXT}" +echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_twitter update +echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_identica update