#!/bin/tcsh -f #(ie run the tshell on this but don't read the .cshrc or .tcshrc) echo version = 1.05 of nihcatalyst 2024 Mar 08 # 2024 Mar 08, 1.05: broken, complete rebuild. # 2020 Jan 06, 1.04: failed to get index from # https://irp.nih.gov/catalyst/ # but that page worked and brought me to # https://irp.nih.gov/catalyst/v28i1 # which, obviously, is not going to work in general! # SOLUTION: set curlflags = '--location' # No, that doesn't give the location to this program. # so I used --verbose to get the new location # 2020 Jan 06, 1.03: backup # 2017 Nov 03, 1.02: got wrong document! # 2017 Sep 22, 1.01: curl flags as in nihrecord # 2017 Sep 12, 1.00: origin echo "display current NIH Catalyst" set base = 'https://irp.nih.gov/catalyst/' set irp = https://irp.nih.gov # location to put files: set tmp = /tmp/`whoami`.nihcatalyst # set curlflags = '--insecure' # no longer helps blank index? # set curlflags = '' # removing --insecure did not help blank index # set curlflags = '--user-agent mozilla' # testing ... fails # set curlflags = '--insecure --user-agent mozilla' # testing ... # set curlflags = '--verbose' # verbose gave this info: # HTTP/1.1 301 Moved Permanently # Cache-Control: no-cache, must-revalidate # Keep-Alive: timeout=5, max=74 # Content-Type: text/html; charset=UTF-8 # Expires: Sun, 19 Nov 1978 05:00:00 GMT # Location: https://irp.nih.gov/catalyst/v28i1 # so at least I could get the new location from that! # # -L, --location # (HTTP) If the server reports that the requested page has moved # to a different location # set curlflags = '--location' # 2020 Jan 06: use verbose to find the new location: set curlflags = '--verbose' if ($#argv == 0) then echo 'usage: nihcatalyst' echo "Opens NIH Catalyst page at $base" echo 'and attempts to get latest pdf.' echo "files are in $tmp" endif if !(-d $tmp) then mkdir $tmp endif cd $tmp echo "base: $base" set i = index.html # without --location I get a redirecting page curl -o $i --location $base ls heta -a $i echo echo --- grep -i 'pdf' $i |\ tr '"' '\n' |\ grep pdf |\ grep FINAL |\ cat > newloc set newloc = `cat newloc` open $irp/$newloc exit ******************************************************************************** ******************************************************************************** ******************************************************************************** ******************************************************************************** ******************************************************************************** # 2024 Mar 08: below is the old code that broke today: # wget -nc "$base" set i = index.html curl $curlflags $base >& $i set j = nomindex.html # clean off the idiotic ^Ms! cat $i |\ # tr -d ' ' |\ tr ' ' ' '|\ cat > $j echo wtf1 heta -a $j echo wtf2 echo ======= grep 'Location: http' $j # set newlocation = `grep 'Location: http' $i|sed 's/Location: //' |tr -d ' '` # set newlocation = "`grep 'Location: http' $i|sed 's/< Location: //'`" set newlocation = `grep 'Location: http' $j|sed 's/< Location: //'` echo "newlocation '$newlocation'" # echo "$newlocation" if ("$newlocation" == '') then echo "failed to get new location" exit endif curl $newlocation >& $i if (`cat $i|wc -l` == 0) then heta -a $i wc $i echo "FAIL: unable to get $base by curl\!" exit else echo "got $newlocation by curl\!" endif exit if (0) then # This is the original code to find the new location set key = http # grep -i "$key" $i # that looks like: #