build_pkg_index.py 667 B

1234567891011121314151617181920212223242526272829303132
  1. #!/usr/bin/env python
  2. """
  3. Build the corpus package index. Usage:
  4. build_pkg_index.py <path-to-packages> <base-url> <output-file>
  5. """
  6. xml_header = """<?xml version="1.0"?>
  7. <?xml-stylesheet href="index.xsl" type="text/xsl"?>
  8. """
  9. import sys
  10. from nltk.downloader import build_index
  11. from xml.etree import ElementTree
  12. if len(sys.argv) != 4:
  13. print("Usage: ")
  14. print("build_pkg_index.py <path-to-packages> <base-url> <output-file>")
  15. sys.exit(-1)
  16. ROOT, BASE_URL, OUT = sys.argv[1:]
  17. index = build_index(ROOT, BASE_URL)
  18. s = ElementTree.tostring(index)
  19. s = s.decode("utf8")
  20. out = open(OUT, 'w')
  21. out.write(xml_header)
  22. out.write(s)
  23. out.write('\n')
  24. out.close()