add pubmed search utility

author Don Armstrong <don@donarmstrong.com>

Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)

committer Don Armstrong <don@donarmstrong.com>

Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
author Don Armstrong <don@donarmstrong.com>
Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
committer Don Armstrong <don@donarmstrong.com>
Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
diff --git a/get_mangareader b/get_mangareader

index 8789b8e8f4749bc6d972ee3d4f8cf694553bdba8..a1de0fe1ed762446a8e08392b96a20a5b69cbcb8 100755 (executable)
--- a/get_mangareader
+++ b/get_mangareader
@@ -125,13 +125,17 @@ for my $manga (@manga_to_get) {
             print $chapter_link->url(),qq(\n);
             mkdir("$manga/$chapter_long");
             mm_get($m,$chapter_link->url_abs());
-           my $link = $m->find_link(text_regex => qr{Begin reading});
-           mm_get($m,$link->url_abs());
-           while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
-               my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
+           # my $link = $m->find_link(text_regex => qr{Begin reading});
+           # print $m->content();
+           # exit 0;
+           # mm_get($m,$link->url_abs());
+           print $m->uri()."\n";
+           while ($m->uri() =~ m{\Q$chapter\E/?(\d\d[^\/]*)?/?$}) {
+               my $image = $m->find_image(alt_regex => qr{ - Page \d+});
+               print $image->url_abs()."\n";
                 my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
-               mm_get($m,$image->url_abs());
                 print "getting ".$image->url_abs()."\n";
+               mm_get($m,$image->url_abs());
                 my ($page) = $image->url_abs =~ m/([^\/]+)$/;
                 $m->save_content("$manga/$chapter_long/$page");
                 last if not defined $next_link;
diff --git a/get_one_manga b/get_one_manga

index 6336726bff3ef6c493068899993be5325d991267..8e9e8cc0fea7f4182976cc9c5c6669c73f7bbe02 100755 (executable)
--- a/get_one_manga
+++ b/get_one_manga
@@ -51,6 +51,7 @@ Display this manual.
  
  use URI::Escape;
  use WWW::Mechanize;
+use HTTP::Cookies;
  use IO::Dir;
  use IO::File;
  use vars qw($DEBUG);
@@ -91,6 +92,10 @@ if (not @ARGV) {
  
  my $failure = 0;
  my $m = WWW::Mechanize->new();
+$m->cookie_jar(HTTP::Cookies->new());
+$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com');
+# use Data::Dumper;
+# print STDERR Dumper($m->cookie_jar());
  for my $manga (@manga_to_get) {
      # see if the manga exists
      mm_get($m,$options{onemanga}.'/'.$manga);
@@ -111,10 +116,19 @@ for my $manga (@manga_to_get) {
         my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
         if (! -d "$manga/$chapter_long") {
             print $chapter_link->url(),qq(\n);
-           mkdir("$manga/$chapter_long");
             mm_get($m,$chapter_link->url_abs());
-           my $link = $m->find_link(text_regex => qr{Begin reading});
+           my $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+           if (not defined $link) {
+               #print $m->content();
+               my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com});
+               mm_get($m,$temp->url_abs());
+               #print $m->content();
+               $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+           }
             mm_get($m,$link->url_abs());
+           # print $link->url_abs();
+           # print $m->content();
+           mkdir("$manga/$chapter_long");
             while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
                 my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
                 my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
diff --git a/git_pbuilder b/git_pbuilder

index 1be9c6a4b08768aee694494211ed0242b19a5111..abb1d3dfda44e094f139f1c72f95f6c90fe76059 100755 (executable)
--- a/git_pbuilder
+++ b/git_pbuilder
@@ -26,9 +26,9 @@ else
      BACKPORT="";
  fi;
  
-OPTIONS=""
+OPTIONS="${OPTIONS:-}"
  if [ "$DIST" = "etch" ]; then
-    OPTIONS="--debian-etch-workaround"
+    OPTIONS="$OPTIONS --debian-etch-workaround"
  fi;
  
  if [ -z "$BUILDRESULT" ]; then
@@ -37,5 +37,6 @@ fi;
  
  pdebuild --buildresult "$BUILDRESULT" \
      --debbuildopts "-i\.git -I.git $*" \
+    ${PDEBUILDOPTS} \
      -- --basepath ~/pbuilder/base_"${DIST}${BACKPORT}${ARCH}" ${OPTIONS} "$@"
  
diff --git a/pubmed_search b/pubmed_search

new file mode 100755 (executable)

index 0000000..b5272e3
--- /dev/null
+++ b/pubmed_search
@@ -0,0 +1,119 @@
+#! /usr/bin/perl
+# , and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2011 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1825 2011-01-02 01:53:43Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+use Bio::DB::EUtilities;
+
+use Encode qw(encode_utf8);
+use Term::ANSIColor qw(:constants);
+use Text::Wrap;
+
+
+=head1 NAME
+
+pubmed_search - 
+
+=head1 SYNOPSIS
+
+ pubmed_search [options] [searchterms]
+
+ Options:
+  --debug, -d debugging level (Default 0)
+  --help, -h display this help
+  --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use vars qw($DEBUG);
+
+my %options = (debug           => 0,
+              help            => 0,
+              man             => 0,
+              );
+
+GetOptions(\%options,
+          'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (not @ARGV) {
+    push @USAGE_ERRORS,"You must pass something";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+my $search = Bio::DB::EUtilities->new(-eutil => 'esearch',
+                                      -email => 'don@donarmstrong.com',
+                                      -db    => 'pubmed',
+                                      -term => join(' ',@ARGV),
+                                      -retmax => 1000,
+                                     );
+my @ids = $search->get_ids();
+print scalar(@ids)." results:\n";
+my $esummary = Bio::DB::EUtilities->new(-eutil => 'efetch',
+                                       -email => 'don@donarmstrong.com',
+                                       -db    => 'pubmed',
+                                       -id  => \@ids
+                                      );
+use XML::LibXML;
+my $xml = XML::LibXML->load_xml(string => $esummary->get_Response()->content());
+for my $article ($xml->findnodes('PubmedArticleSet/PubmedArticle/MedlineCitation')) {
+    # print $article->toString;
+    my ($pmid) = $article->findnodes('./PMID');
+    my ($title) = $article->findnodes('./Article/ArticleTitle');
+    my ($abstract) = $article->findnodes('./Article/Abstract');
+    print BOLD GREEN;
+    print $pmid->textContent();
+    print ": ";
+    print RESET;
+    print BOLD CYAN;
+    print $title->textContent()."\n";
+    print RESET;
+    print BOLD MAGENTA;
+    $abstract = $abstract->textContent();
+    $abstract =~ s/^\s*//mg;
+    $abstract =~ s/(.{,80})\s/$1\n/g;
+    $abstract = encode_utf8($abstract);
+    print wrap('','',$abstract);
+    print "\n\n";
+    print RESET;
+}
+
+__END__
diff --git a/twidge_update b/twidge_update

index cf2e8eaaf67423fbf24198b59c44110eaa297bd2..e89a1935eeaaf790d6ab8b3cf82b7c7beadbd194 100755 (executable)
--- a/twidge_update
+++ b/twidge_update
@@ -2,5 +2,5 @@
  # stolen from https://github.com/jgoerzen/twidge/wiki/HOWTOs
  echo "---------|---------|---------|---------|--------=|=--------|---------|---------|---------|--------=|=--------|---------|---------|---------|"
  read TXT
-twidge -c ${HOME}/.hide/twidge_twitter update "${TXT}" 
-twidge -c ${HOME}/.hide/twidge_identica update "${TXT}"
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_twitter update
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_identica update
author	Don Armstrong <don@donarmstrong.com>
	Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
committer	Don Armstrong <don@donarmstrong.com>
	Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
get_mangareader		patch \| blob \| history
get_one_manga		patch \| blob \| history
git_pbuilder		patch \| blob \| history
pubmed_search	[new file with mode: 0755]	patch \| blob
twidge_update		patch \| blob \| history