From: Don Armstrong <don@donarmstrong.com>
Date: Fri, 5 Oct 2012 00:40:52 +0000 (+0000)
Subject: add pubmed search utility
X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=commitdiff_plain;h=27516645820eea8fc910745d2e33d50657141b9a

add pubmed search utility
---

diff --git a/get_mangareader b/get_mangareader
index 8789b8e..a1de0fe 100755
--- a/get_mangareader
+++ b/get_mangareader
@@ -125,13 +125,17 @@ for my $manga (@manga_to_get) {
 	    print $chapter_link->url(),qq(\n);
 	    mkdir("$manga/$chapter_long");
 	    mm_get($m,$chapter_link->url_abs());
-	    my $link = $m->find_link(text_regex => qr{Begin reading});
-	    mm_get($m,$link->url_abs());
-	    while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
-		my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
+	    # my $link = $m->find_link(text_regex => qr{Begin reading});
+	    # print $m->content();
+	    # exit 0;
+	    # mm_get($m,$link->url_abs());
+	    print $m->uri()."\n";
+	    while ($m->uri() =~ m{\Q$chapter\E/?(\d\d[^\/]*)?/?$}) {
+		my $image = $m->find_image(alt_regex => qr{ - Page \d+});
+		print $image->url_abs()."\n";
 		my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
-		mm_get($m,$image->url_abs());
 		print "getting ".$image->url_abs()."\n";
+		mm_get($m,$image->url_abs());
 		my ($page) = $image->url_abs =~ m/([^\/]+)$/;
 		$m->save_content("$manga/$chapter_long/$page");
 		last if not defined $next_link;
diff --git a/get_one_manga b/get_one_manga
index 6336726..8e9e8cc 100755
--- a/get_one_manga
+++ b/get_one_manga
@@ -51,6 +51,7 @@ Display this manual.
 
 use URI::Escape;
 use WWW::Mechanize;
+use HTTP::Cookies;
 use IO::Dir;
 use IO::File;
 use vars qw($DEBUG);
@@ -91,6 +92,10 @@ if (not @ARGV) {
 
 my $failure = 0;
 my $m = WWW::Mechanize->new();
+$m->cookie_jar(HTTP::Cookies->new());
+$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com');
+# use Data::Dumper;
+# print STDERR Dumper($m->cookie_jar());
 for my $manga (@manga_to_get) {
     # see if the manga exists
     mm_get($m,$options{onemanga}.'/'.$manga);
@@ -111,10 +116,19 @@ for my $manga (@manga_to_get) {
 	my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
 	if (! -d "$manga/$chapter_long") {
 	    print $chapter_link->url(),qq(\n);
-	    mkdir("$manga/$chapter_long");
 	    mm_get($m,$chapter_link->url_abs());
-	    my $link = $m->find_link(text_regex => qr{Begin reading});
+	    my $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+	    if (not defined $link) {
+		#print $m->content();
+		my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com});
+		mm_get($m,$temp->url_abs());
+		#print $m->content();
+		$link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+	    }
 	    mm_get($m,$link->url_abs());
+	    # print $link->url_abs();
+	    # print $m->content();
+	    mkdir("$manga/$chapter_long");
 	    while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
 		my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
 		my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
diff --git a/git_pbuilder b/git_pbuilder
index 1be9c6a..abb1d3d 100755
--- a/git_pbuilder
+++ b/git_pbuilder
@@ -26,9 +26,9 @@ else
     BACKPORT="";
 fi;
 
-OPTIONS=""
+OPTIONS="${OPTIONS:-}"
 if [ "$DIST" = "etch" ]; then
-    OPTIONS="--debian-etch-workaround"
+    OPTIONS="$OPTIONS --debian-etch-workaround"
 fi;
 
 if [ -z "$BUILDRESULT" ]; then
@@ -37,5 +37,6 @@ fi;
 
 pdebuild --buildresult "$BUILDRESULT" \
     --debbuildopts "-i\.git -I.git $*" \
+    ${PDEBUILDOPTS} \
     -- --basepath ~/pbuilder/base_"${DIST}${BACKPORT}${ARCH}" ${OPTIONS} "$@"
 
diff --git a/pubmed_search b/pubmed_search
new file mode 100755
index 0000000..b5272e3
--- /dev/null
+++ b/pubmed_search
@@ -0,0 +1,119 @@
+#! /usr/bin/perl
+# , and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2011 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1825 2011-01-02 01:53:43Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+use Bio::DB::EUtilities;
+
+use Encode qw(encode_utf8);
+use Term::ANSIColor qw(:constants);
+use Text::Wrap;
+
+
+=head1 NAME
+
+pubmed_search - 
+
+=head1 SYNOPSIS
+
+ pubmed_search [options] [searchterms]
+
+ Options:
+  --debug, -d debugging level (Default 0)
+  --help, -h display this help
+  --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use vars qw($DEBUG);
+
+my %options = (debug           => 0,
+	       help            => 0,
+	       man             => 0,
+	       );
+
+GetOptions(\%options,
+	   'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (not @ARGV) {
+    push @USAGE_ERRORS,"You must pass something";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+my $search = Bio::DB::EUtilities->new(-eutil => 'esearch',
+				       -email => 'don@donarmstrong.com',
+				       -db    => 'pubmed',
+				       -term => join(' ',@ARGV),
+				       -retmax => 1000,
+				      );
+my @ids = $search->get_ids();
+print scalar(@ids)." results:\n";
+my $esummary = Bio::DB::EUtilities->new(-eutil => 'efetch',
+					-email => 'don@donarmstrong.com',
+					-db    => 'pubmed',
+					-id  => \@ids
+				       );
+use XML::LibXML;
+my $xml = XML::LibXML->load_xml(string => $esummary->get_Response()->content());
+for my $article ($xml->findnodes('PubmedArticleSet/PubmedArticle/MedlineCitation')) {
+    # print $article->toString;
+    my ($pmid) = $article->findnodes('./PMID');
+    my ($title) = $article->findnodes('./Article/ArticleTitle');
+    my ($abstract) = $article->findnodes('./Article/Abstract');
+    print BOLD GREEN;
+    print $pmid->textContent();
+    print ": ";
+    print RESET;
+    print BOLD CYAN;
+    print $title->textContent()."\n";
+    print RESET;
+    print BOLD MAGENTA;
+    $abstract = $abstract->textContent();
+    $abstract =~ s/^\s*//mg;
+    $abstract =~ s/(.{,80})\s/$1\n/g;
+    $abstract = encode_utf8($abstract);
+    print wrap('','',$abstract);
+    print "\n\n";
+    print RESET;
+}
+
+__END__
diff --git a/twidge_update b/twidge_update
index cf2e8ea..e89a193 100755
--- a/twidge_update
+++ b/twidge_update
@@ -2,5 +2,5 @@
 # stolen from https://github.com/jgoerzen/twidge/wiki/HOWTOs
 echo "---------|---------|---------|---------|--------=|=--------|---------|---------|---------|--------=|=--------|---------|---------|---------|"
 read TXT
-twidge -c ${HOME}/.hide/twidge_twitter update "${TXT}" 
-twidge -c ${HOME}/.hide/twidge_identica update "${TXT}"
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_twitter update
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_identica update