#!/bin/tcsh -f
#(ie run the cshell on this but don't read the .cshrc)

echo version = 1.66 of usfn 2023 Aug 03
# 2023 Aug 03, 1.66: handle stupid '/Users/schneidt/Desktop/\[EXTERNAL\]\ New\ recommendations\ available\ on\ ScienceDirect.eml'
# 2023 Aug 02, 1.65: clean up
# 2023 Aug 02, 1.64: handle files that begin with '- '
# 2023 Jul 06, 1.63: detect overwrite of pre-existing file
# 2023 Mar 13, 1.62: allow ',' for showpdf+aw
# 2023 Mar 11, 1.61: forcing clean name.  allow '='
#  set step8 = `echo "$step7" | tr -dc '[:alpha:][:digit:]_-.='`
# 2023 Mar 11, 1.60: force printable, can't handle "`" otherwise!! blast apart!!
# 2020 Aug 13, 1.59: clean up
# 2020 Aug 13, 1.58: remove 'MOVED' - unnecessary
# 2020 Aug 13, 1.57: remove 'original' since it duplicates 'processing' line
# 2020 Aug 13, 1.56: align names processing/original/final
# 2020 Feb 13, 1.55: improve documentation
# 2018 Jun 08, 1.54: remove multiple '_'?
# 2018 Mar 22, 1.53: instead of "failed to rename" only echo
#                    "usfn: files were already numbered"
# 2018 Mar 22, 1.52: fix broken quote
# 2018 Mar 21, 1.51: backup
# 2018 Mar 21, 1.50: fix all files
# 2018 Mar 20, 1.50: -n numbering mode for dl and dlr
# 2018 Jan 22, 1.49: backup
# 2018 Jan 22, 1.48: count number of changed cases
# 2018 Jan 02, 1.47: turn debugging off
# 2017 Nov 28, 1.46: consider rapid test for spaces in file names (egrep)
#                    problem: must list characters to remove.
# 2017 Nov 27, 1.45: remove '$'
# 2017 Nov 07, 1.44: still working on '~' ... sed failed.  do by hand!!
# 2017 Nov 07, 1.43: remove '~' from $thefiles list - "Unknown user"!
#                    ls -d -- * |tr -d '~' > $thefiles
# 2017 Sep 23, 1.42: remove all '"' from name! (must uu first!!)
# 2017 Aug 17, 1.41: clean up
# 2017 Aug 17, 1.40: failed for 'usfn *a' on file 'one a'
# 2017 Aug 10, 1.39: correct dates
# 2017 Aug 10, 1.38: define file type
#                    an argument of *.txt fails for files with spaces
#                    because the shell expands it FIRST and then the parts
#                    of the file name are not recognized.
#                    So the user can say 'txt', 'eml' or 'pdf' to define
#                    subset of files.
# 2017 Aug 10, 1.37: always use '_'; all arguments are file names
# 2017 Aug 10, 1.36: make first arguments define files
# 2017 Aug 02, 1.35: sheesh, get rid of '<>' from file names!!!
# 2017 Jun 14, 1.34: fix single back and forward quote problems
# 2017 Jun 01, 1.33: use uu to clear more unicode stuff
# 2017 May 12, 1.32: fix unicode dash (aa fails)
# 2016 Sep 05, 1.31: remove '!'!!!
# 2015 Dec 11, 1.30: remove ';'!!!
# 2015 Jul 30, 1.29: remove the dash of file names that begin with a dash!!
# 2015 Jun 26, 1.28: allow file name to begin with '-'!
# 2013 Oct 17, 1.27: remove non-printable characters
# 2013 Sep 11, 1.26: clean up
# 2013 Sep 11, 1.25: handle '* Nucl.-Acids-Res.-2013-Rutherford-nar-gkt580.pdf'?
# 2013 Aug 27, 1.24: clean up output
# 2013 Aug 26, 1.23: remove '/' in Mac OS X it's is actually ':' so remove those
# 2012 Nov 20, 1.22: remove '|'
# 2012 Jun 07, 1.21: update address
# 2009 May 29, 1.20: %20 becomes _
# 2009 Feb 19, 1.19: debug for Mac OSX
# 2009 Feb 19, 1.18: debug for Mac OSX
# 2009 Feb 19, 1.17: reverse underscores back to space with ' ' input
# 2008 Jun 27, 1.16: remove #!
# 2008 Apr 22, 1.15: spaces AND '?'
# 2008 Feb 16, 1.14: handle "&" inside a file name!
# 2007 mar 16, 1.13: possible way to speed this:  use `ls | grep ' '`
# 2005 May 22, 1.12: remove ls
# 2005 May 14, 1.11: Handle "[]" inside a file name! switch to tcsh
# 2005 Apr 15, 1.10: Handle "()" inside a file name!
# 2004 Jun 22, 1.09: Handle "-" inside a file name!
# 2003 May 17, 1.08: Handle "?" inside a file name!
# 2002 Oct 13, 1.07: handle quotes inside a file name!
# 2002 Oct 12, 1.06: fix test for existance - could be a directory or link!
# 2002 Oct 12, 1.05: debugging
# 2002 Oct 12, 1.04: show exact file name when debugging
# 2002 Jul  9, 1.03: spaces converted to ^G internally, underscores left alone
# 2002 Jul  9, 1.02: handles cases that have both space and underscore!
#                    underscore becomes tilde then spaces become underscore
# origin 2001Sep07.15:32:46

set help = 0
if ($#argv > 0) then
   if (`echo "$argv"| egrep -- '-h|-help'|wc -l` > 0) then
      set help = 1
   endif
endif

# if ($#argv > 1) then
if ($help) then
  echo 'usage: usfn [-h] [-n] [file type]'
  echo 'Un Space File Name: usfn'
  echo 'Remove spaces from all file names in the current directory.'
  echo
  echo 'If the argument is -h or -help, give this help information.'
  echo
  echo 'If the first argument is -n then number the files.'
  echo 'This is a way to avoid dealing with weird characters in'
  echo 'file names for dl and dlr.'
  echo
  echo "If a file type is given, use files ending with that type;"
  echo "otherwise all files in the current directory are processed."
  echo "One could not use '*.txt' because spaced names"
  echo "are expanded BEFORE this program gets to the argument list\!"
  echo "So instead just give 'txt' to specify '*.txt'."
  echo
  echo 'The default character is underscore "_".'
  echo 'Characters converted are: "()+ ?#:".  Also "%20" is made to underscore.'
  echo
  echo 'If the character is a space, " ", then underscores are reverted'
  echo 'to spaces.  Thus one can undo the mess one made!'
  echo
  echo 'Purpose:  Mac and PC computer operating systems allow spaces'
  echo 'in names, as do Unix.  However, such names interfere with'
  echo 'file name continuation and other speed tricks.'
  echo 'This script converts the spaces to other characters.'
  echo ''
  echo "Note: a '/' in Mac OS X is actually a ':' which is removed."
  echo ''

  echo 'Also, call the (old) aa script to remove unicode characters.'
  echo 'Also, call the (new) uu script to remove unicode characters.'

  # 'Method: Spaces are converted to ^G internally'
  # '(a bell\!  Unlikely to be used\!)'
  echo ''

  echo ''
  echo 'Dependencies:  uu heta'
  echo 'The uu script may eventually replace all the conversion steps.'
  echo ''

  echo 'Thomas D. Schneider, Ph.D.'
  #echo 'Senior Investigator'
  #echo 'National Institutes of Health'
  #echo 'National Cancer Institute'
  #echo 'Center for Cancer Research'
  #echo 'RNA Biology Laboratory'
  #echo 'Biological Information Theory Group'
  #echo 'Frederick, Maryland  21702-1201'
  #echo 'schneidt@mail.nih.gov'
  #echo 'https://schneider.ncifcrf.gov (current link)'
  echo 'https://alum.mit.edu/www/toms (permanent link)'

  exit
endif

if (0) then
# original argument processing:
echo
if ($#argv == 1) then
   set filetype = "$argv"
   echo "Processing only: $filetype files"
else
   echo "Processing all files"
   set filetype = '*'
endif
# echo "files: '$files'"
endif

# swap these lines to turn on debugging:
set debugging = 1 # true
set debugging = 0 # false

# argument processing
#echo "argv:   '$argv'"
#echo '$#'"argv: '$#argv'"
@ n = 0
set all = 1 # default is to do all files
@ numbering = 0 # default is not tonumber
foreach arg (`echo "$argv"`)
   @ n = $n + 1
   if ($debugging) then
      echo "$n $arg"
   endif
   if ("$arg" == '-n') then
      @ numbering = 1
   else
      @ all = 0
      set filetype = "$arg"
      echo "Processing only: $filetype files"
   endif
end
if ($all) then
   echo "Processing all files"
   set filetype = '*'
endif

if (0) then
   # This is the old method that allowed control of the characters.
   # Just forget it - everything becomes '_'
   set space = " "
   if ($#argv == 1) then
     set unspace = "$1"
   else
     set unspace = "_"
   endif
   echo "space is set to $unspace"
   #
   if ($#argv == 1) then
     set unparen = "$unspace"
   else
     # set unparen = "+"
     set unparen = "_"
   endif
   echo "parenthesis is set to $unparen"
   #
   # Reverse direction if space is used!!
   if ("$unspace" == ' ') then
     set space = "_"
     set unparen = " "
   endif
else
  set space = " "
  set unspace = "_"
  set unparen = "_"
endif

set thefiles = /tmp/`whoami`.usfn

if ("$numbering") then
   echo "Renumbering files"
   ls -d -- *.$filetype > $thefiles
   heta -d $thefiles
   # set s = "renaming emails"; echo "$s"; say "$s"
   @ n = 0
   set Q = '"'
   # foreach eml ("`ls *.eml`")
   foreach eml ("`ls |grep -- '.eml'`")
      @ n = $n + 1
      echo "mv -- ${Q}${eml}${Q} ${n}.eml"
            mv -- "$eml" ${n}.eml
   end
   # test our work!
   set testthefiles = /tmp/`whoami`-testthefiles
   # ls *.eml > $testthefiles
   ls -d -- *.$filetype > $testthefiles
   if (`diff $thefiles $testthefiles|wc -l` == 0) then
      set s = "usfn: files were already numbered"; echo "$s" #; say "$s"
   else
      set s = "renamed"; echo "$s"; # say "$s" &
   endif
   heta -a $testthefiles
   exit
endif

# ls -d List directories like other files, rather than  listing their
#       contents.
# The main difficulty is that the foreach list ignores carriage returns,
# so spaces in the name cause the individual parts of a name to
# look like individual files.
# The way around this is to put quote marks around the list ...

set showline = 'echo ----------------------------'
#set showedfirstline = false

# this failed when the file name had a single quote in it ...
#  set final = "`echo $original | tr ' ' "$unspace"`"

# 2006 may 14:
# foreach original ("`ls -d -- *`")
# In csh, foreach in the above fails inside this script for files
# that have '[]' in them.  Switching from csh to tcsh works.

# The -- means files that start with '-' will be handled ok:
# foreach original ("`ls -d -- *`")
if ($#argv == 0) then
   #ls -d -- *           | tr '~' '?' > $thefiles
   #ls -d -- *           | sed 's/~/\\~/g' > $thefiles
   ls -d -- * > $thefiles
else
   #ls -d -- *.$filetype | tr '~' '?' > $thefiles
   # ls -d -- *.$filetype | sed 's/~/\\~/g' > $thefiles
   ls -d -- *.$filetype > $thefiles
   if (`cat $thefiles|wc -l` == 0) then
      echo "no files ending in '*.$filetype' found - try *$filetype"
      ls -d -- *$filetype > $thefiles
      if (`cat $thefiles|wc -l` == 0) then
         echo "Still no files found - failed"
         echo "Try putting single quotes around '*a' on command line."
         exit
      endif
   endif
endif

if (`cat $thefiles|grep '~'|wc -l` > 0) then
   echo "***** FAIL: cannot handle file names with '~' in them *****"
   echo "***** sorry - do it by hand for:                      *****"
   grep '~' $thefiles
   exit
endif
# heta -d $thefiles

# process the files to remove spaces etc
@ files   = 0 # number of files
@ changes = 0 # number of changed file names
$showline
foreach original ("`cat $thefiles`")
   echo "processing '$original'"
   @ files = $files + 1

   # make single quotes and spaces into $unspace
   set step0 = `echo "$original" |sed 's/^-"//'`
   set stepuu = `echo "$step0" |uu`
   set stepnodq = `echo "$stepuu" |tr -d '"'`
   set step1 = `echo "$stepnodq"  |tr "':;!" " " | tr "$space" "$unspace" `
   set step2 = `echo "$step1"  | sed "s/\%20/_/g" `
   set step3 = `echo "$step2"  |tr '()[]&?#$|*<>' " " | tr ' ' "$unparen" `
   set step4 = `echo "$step3" | sed 's/^_//'| sed 's/^_//'`
   set step5 = `echo "$step4" | tr -dc '[:print:].'`
   set step6 = `echo "$step5" | sed "s/${unparen}${unparen}/${unparen}/"`
   set step7 = `echo "$step6" | sed "s/__/_/g"`
#   set step8 = `echo "$step7" | sed "s/`//g"`
#   set step8 = `echo "$step7" | tr -d '\`'`
# cannot remove '`'!!!!! \
# blast it apart!!!!  \
   set step8 = `echo "$step7" | tr -dc '[:alpha:][:digit:]_-.=,'`
   set final = `echo "$step8"`

   # set step7 = `echo "$step6" | uu` # clear any unicode
   # set final = `echo "$step7" | aa` # removed underscores!
   # set final = `echo "$step7"`
   # set final = `echo "$step7"|tr '\023' '-'` # failed to fix unicode dash
   # set final = `echo "$step7"|tr '–' '-'` # fix unicode dash (done in uu)

   if ($debugging) then
      echo "original: '$original'"
      echo "final:    '$final'"
   endif

   if !("$final" == "$original") then

#      if ($showedfirstline == false) then
#        set showedfirstline = true
#        $showline
#      endif

# 2020 Aug 13: original line duplicates processing line
#     echo "original: " "'""$original""'"
      echo "final:    " "'""$final""'"

      # test that the original file or directory EXISTS (-e):
      if (-e "$original") then
         # 'mv --' allows files that start with '-' to be moved.
if (-f $final) then
   say "final file already exists\!"
   echo "final file '$final' already exists\!"
   if (`diff "$original" "$final"| wc -l` == 0) then
      set s = "they are identical"
      echo "$s"
      say "$s"
      # set t = `tomdate '-'`
      # use file time:
      # filedate "$original"
      set t = `filedate "$original" |tr ':/' '-'`
      set s = "moving new one to slash tmp with timestamp $t"
      echo "$s"
      say "$s"
      mv "$original" "/tmp/$original-$t"
   else
      set s = "they differ - STOPPING"
      echo "$s"
      say "$s"
      exit
   endif
exit
endif
         mv -- "$original" "$final"
         # 2020 Aug 13  remove:
         if !(-e "$final") then
            echo "FAILED TO MOVE $original to $final\!"
         else
         #  echo '*MOVED*'
         endif
         $showline
         @ changes = $changes + 1
      else
         echo "*WARNING* cannot find '"$original"' to change it into $final\!"
         echo "original: ""'"$original"'"
         echo "final:    ""'"$final"'"
         $showline
      endif
   endif
     
end

echo "=============="
echo "$changes changes in $files files"

exit
********************************************************************************

