]> git.donarmstrong.com Git - bin.git/commitdiff
add pubmed search utility
authorDon Armstrong <don@donarmstrong.com>
Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
committerDon Armstrong <don@donarmstrong.com>
Fri, 5 Oct 2012 00:40:52 +0000 (00:40 +0000)
get_mangareader
get_one_manga
git_pbuilder
pubmed_search [new file with mode: 0755]
twidge_update

index 8789b8e8f4749bc6d972ee3d4f8cf694553bdba8..a1de0fe1ed762446a8e08392b96a20a5b69cbcb8 100755 (executable)
@@ -125,13 +125,17 @@ for my $manga (@manga_to_get) {
            print $chapter_link->url(),qq(\n);
            mkdir("$manga/$chapter_long");
            mm_get($m,$chapter_link->url_abs());
-           my $link = $m->find_link(text_regex => qr{Begin reading});
-           mm_get($m,$link->url_abs());
-           while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
-               my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
+           # my $link = $m->find_link(text_regex => qr{Begin reading});
+           # print $m->content();
+           # exit 0;
+           # mm_get($m,$link->url_abs());
+           print $m->uri()."\n";
+           while ($m->uri() =~ m{\Q$chapter\E/?(\d\d[^\/]*)?/?$}) {
+               my $image = $m->find_image(alt_regex => qr{ - Page \d+});
+               print $image->url_abs()."\n";
                my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
-               mm_get($m,$image->url_abs());
                print "getting ".$image->url_abs()."\n";
+               mm_get($m,$image->url_abs());
                my ($page) = $image->url_abs =~ m/([^\/]+)$/;
                $m->save_content("$manga/$chapter_long/$page");
                last if not defined $next_link;
index 6336726bff3ef6c493068899993be5325d991267..8e9e8cc0fea7f4182976cc9c5c6669c73f7bbe02 100755 (executable)
@@ -51,6 +51,7 @@ Display this manual.
 
 use URI::Escape;
 use WWW::Mechanize;
+use HTTP::Cookies;
 use IO::Dir;
 use IO::File;
 use vars qw($DEBUG);
@@ -91,6 +92,10 @@ if (not @ARGV) {
 
 my $failure = 0;
 my $m = WWW::Mechanize->new();
+$m->cookie_jar(HTTP::Cookies->new());
+$m->cookie_jar()->set_cookie(1,'age_verified','42','/','www.1000manga.com');
+# use Data::Dumper;
+# print STDERR Dumper($m->cookie_jar());
 for my $manga (@manga_to_get) {
     # see if the manga exists
     mm_get($m,$options{onemanga}.'/'.$manga);
@@ -111,10 +116,19 @@ for my $manga (@manga_to_get) {
        my $chapter_long = $chapter =~ /\./ ? join('.',map {sprintf'%04d',$_} split /\./,$chapter) : sprintf('%04d',$chapter);
        if (! -d "$manga/$chapter_long") {
            print $chapter_link->url(),qq(\n);
-           mkdir("$manga/$chapter_long");
            mm_get($m,$chapter_link->url_abs());
-           my $link = $m->find_link(text_regex => qr{Begin reading});
+           my $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+           if (not defined $link) {
+               #print $m->content();
+               my $temp = $m->find_link(text_regex => qr{Read.*at.*1000manga\.com});
+               mm_get($m,$temp->url_abs());
+               #print $m->content();
+               $link = $m->find_link(text_regex => qr{Begin [Rr]eading});
+           }
            mm_get($m,$link->url_abs());
+           # print $link->url_abs();
+           # print $m->content();
+           mkdir("$manga/$chapter_long");
            while ($m->uri() =~ m{\Q$chapter\E/(\d\d[^\/]*)/?$}) {
                my $image = $m->find_image(alt_regex => qr{Loading\.+\s+(media|img)});
                my $next_link = $m->find_link(url_regex => qr{\Q$manga_escaped\E/\Q$chapter\E/(\d\d[^\/]*)});
index 1be9c6a4b08768aee694494211ed0242b19a5111..abb1d3dfda44e094f139f1c72f95f6c90fe76059 100755 (executable)
@@ -26,9 +26,9 @@ else
     BACKPORT="";
 fi;
 
-OPTIONS=""
+OPTIONS="${OPTIONS:-}"
 if [ "$DIST" = "etch" ]; then
-    OPTIONS="--debian-etch-workaround"
+    OPTIONS="$OPTIONS --debian-etch-workaround"
 fi;
 
 if [ -z "$BUILDRESULT" ]; then
@@ -37,5 +37,6 @@ fi;
 
 pdebuild --buildresult "$BUILDRESULT" \
     --debbuildopts "-i\.git -I.git $*" \
+    ${PDEBUILDOPTS} \
     -- --basepath ~/pbuilder/base_"${DIST}${BACKPORT}${ARCH}" ${OPTIONS} "$@"
 
diff --git a/pubmed_search b/pubmed_search
new file mode 100755 (executable)
index 0000000..b5272e3
--- /dev/null
@@ -0,0 +1,119 @@
+#! /usr/bin/perl
+# , and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2011 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1825 2011-01-02 01:53:43Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+use Bio::DB::EUtilities;
+
+use Encode qw(encode_utf8);
+use Term::ANSIColor qw(:constants);
+use Text::Wrap;
+
+
+=head1 NAME
+
+pubmed_search - 
+
+=head1 SYNOPSIS
+
+ pubmed_search [options] [searchterms]
+
+ Options:
+  --debug, -d debugging level (Default 0)
+  --help, -h display this help
+  --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use vars qw($DEBUG);
+
+my %options = (debug           => 0,
+              help            => 0,
+              man             => 0,
+              );
+
+GetOptions(\%options,
+          'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (not @ARGV) {
+    push @USAGE_ERRORS,"You must pass something";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+
+my $search = Bio::DB::EUtilities->new(-eutil => 'esearch',
+                                      -email => 'don@donarmstrong.com',
+                                      -db    => 'pubmed',
+                                      -term => join(' ',@ARGV),
+                                      -retmax => 1000,
+                                     );
+my @ids = $search->get_ids();
+print scalar(@ids)." results:\n";
+my $esummary = Bio::DB::EUtilities->new(-eutil => 'efetch',
+                                       -email => 'don@donarmstrong.com',
+                                       -db    => 'pubmed',
+                                       -id  => \@ids
+                                      );
+use XML::LibXML;
+my $xml = XML::LibXML->load_xml(string => $esummary->get_Response()->content());
+for my $article ($xml->findnodes('PubmedArticleSet/PubmedArticle/MedlineCitation')) {
+    # print $article->toString;
+    my ($pmid) = $article->findnodes('./PMID');
+    my ($title) = $article->findnodes('./Article/ArticleTitle');
+    my ($abstract) = $article->findnodes('./Article/Abstract');
+    print BOLD GREEN;
+    print $pmid->textContent();
+    print ": ";
+    print RESET;
+    print BOLD CYAN;
+    print $title->textContent()."\n";
+    print RESET;
+    print BOLD MAGENTA;
+    $abstract = $abstract->textContent();
+    $abstract =~ s/^\s*//mg;
+    $abstract =~ s/(.{,80})\s/$1\n/g;
+    $abstract = encode_utf8($abstract);
+    print wrap('','',$abstract);
+    print "\n\n";
+    print RESET;
+}
+
+__END__
index cf2e8eaaf67423fbf24198b59c44110eaa297bd2..e89a1935eeaaf790d6ab8b3cf82b7c7beadbd194 100755 (executable)
@@ -2,5 +2,5 @@
 # stolen from https://github.com/jgoerzen/twidge/wiki/HOWTOs
 echo "---------|---------|---------|---------|--------=|=--------|---------|---------|---------|--------=|=--------|---------|---------|---------|"
 read TXT
-twidge -c ${HOME}/.hide/twidge_twitter update "${TXT}" 
-twidge -c ${HOME}/.hide/twidge_identica update "${TXT}"
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_twitter update
+echo "${TXT}" |twidge -c ${HOME}/.hide/twidge_identica update