download.sh 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/bin/bash
  2. function usage() {
  3. echo
  4. echo "Usage: $(basename $0) <collection name>"
  5. echo
  6. echo "Copies nltk data to proper locations from local copy of repository."
  7. echo "Assumes script is in repo tools directory."
  8. echo
  9. echo "Clone the repo:"
  10. printf '\t%s\n' 'git clone git@github.com:<owner>/nltk_data.git'
  11. echo
  12. echo "Now switch branches to the one with the data on it (and this script):"
  13. printf '\t%s\n' 'git branch gh-pages remotes/origin/gh-pages'
  14. printf '\t%s\n' 'git checkout gh-pages'
  15. echo
  16. echo "Remember to use sudo if installing to /usr/share (default)"
  17. echo
  18. echo set NLTK_DATA_DIR to target directory if different than /usr/share, e.g.:
  19. printf '\t%s %s\n' 'NLTK_DATA_DIR=./local/dir' "$(basename $0) book"
  20. echo
  21. }
  22. [ $# -eq 0 ] && { usage; exit 1; }
  23. collection=$1
  24. data_dir=${NLTK_DATA_DIR:-/usr/share/nltk_data}
  25. script_dir="$( cd "$( dirname "$0" )" && pwd )"
  26. repo_dir=$(readlink -f "$script_dir/..")
  27. package_dir=$repo_dir/packages
  28. collections_dir=$repo_dir/collections
  29. mkdir -p $data_dir
  30. pushd $data_dir
  31. python -c "import xml.etree.ElementTree as e
  32. for item in e.parse('$collections_dir/$collection.xml').getroot().findall('item'):
  33. print item.get('ref')" |
  34. while read item
  35. do
  36. package=$(find $package_dir -name $item.zip -print)
  37. target_dir=$(basename $(dirname $package))
  38. target_file=$target_dir/$item.zip
  39. mkdir -p $target_dir
  40. cp $package $target_file
  41. unzip -u -d $target_dir $target_file
  42. done
  43. popd