index.xml 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. <?xml version="1.0"?>
  2. <?xml-stylesheet href="index.xsl" type="text/xsl"?>
  3. <nltk_data>
  4. <packages>
  5. <package id="abc" name="Australian Broadcasting Commission 2006" webpage="http://www.abc.net.au/" author="Australian Broadcasting Commission" unzip="1" unzipped_size="4054966" size="1487851" checksum="ffb36b67ff24cbf7daaf171c897eb904" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/abc.zip" />
  6. <package id="alpino" name="Alpino Dutch Treebank" webpage="http://www.let.rug.nl/~vannoord/trees/" contact="Gertjan van Noord" license="Distributed with permission of Gertjan van Noord" unzip="1" unzipped_size="21604821" size="2797255" checksum="ae529a1c5f13d6074f5b0d68d8edb537" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/alpino.zip" />
  7. <package id="averaged_perceptron_tagger" name="Averaged Perceptron Tagger" languages="English" unzip="1" unzipped_size="6138625" size="2526731" checksum="05c91d607ee1043181233365b3f76978" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip" />
  8. <package id="averaged_perceptron_tagger_eng" name="Averaged Perceptron Tagger (JSON)" languages="English" unzip="1" unzipped_size="5703817" size="1539115" checksum="729e2255f83045670374180de9bdb613" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_eng.zip" />
  9. <package id="averaged_perceptron_tagger_ru" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" unzip="1" unzipped_size="23247411" size="8628828" checksum="f7051368e4aff6718f8b38c1362dfdb1" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_ru.zip" />
  10. <package id="averaged_perceptron_tagger_rus" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" unzip="1" unzipped_size="30246815" size="5997187" checksum="073f704b73bf8d88037e464852e34420" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_rus.zip" />
  11. <package id="basque_grammars" name="Grammars for Basque" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="5550" size="4704" checksum="0e3518cb2aeb2600cb2841df7f035606" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/basque_grammars.zip" />
  12. <package id="bcp47" name="BCP-47 Language Tags" license="IETF Trust and Unicode Inc." copyright="Copyright (c) 2022 IETF Trust and Copyright (c) 1991-2022 Unicode" webpage="https://www.rfc-editor.org/rfc/rfc5646.html" unzip="0" unzipped_size="1433135" size="222952" checksum="8ef6c0dfa7661e3338dd99c495a7d9b6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/bcp47.zip" />
  13. <package id="biocreative_ppi" name="BioCreAtIvE (Critical Assessment of Information Extraction Systems in Biology)" webpage="http://www.mitre.org/public/biocreative/" copyright="Public Domain (not copyrighted)" license="Public Domain" unzip="1" unzipped_size="1537086" size="223566" checksum="d3be36b53ab201372f1cd63ffc75e9a9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/biocreative_ppi.zip" />
  14. <package id="bllip_wsj_no_aux" name="BLLIP Parser: WSJ Model" webpage="http://nlp.stanford.edu/~mcclosky/models/" unzip="1" unzipped_size="54298623" size="24516205" checksum="51d0c9c288b4f790bf255b5c9c3533ab" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/bllip_wsj_no_aux.zip" />
  15. <package id="book_grammars" name="Grammars from NLTK Book" author="Ewan Klein" languages="English" unzip="1" unzipped_size="21179" size="9103" checksum="2e6bc2e5d678fc5d14e4c0747c69083e" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/book_grammars.zip" />
  16. <package id="brown" name="Brown Corpus" author="W. N. Francis and H. Kucera" license="May be used for non-commercial purposes." webpage="http://www.hit.uib.no/icame/brown/bcm.html" unzip="1" unzipped_size="10117565" size="3314357" checksum="a0a8630959d3d937873b1265b0a05497" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip" />
  17. <package id="brown_tei" name="Brown Corpus (TEI XML Version)" author="W. N. Francis and H. Kucera" license="May be used for non-commercial purposes." webpage="http://www.hit.uib.no/icame/brown/bcm.html" contact="Lou Burnard -- lou.burnard@oucs.ox.ac.uk" unzip="1" unzipped_size="56814689" size="8737738" checksum="3c7fe43ebf0a4c7ad3ebb63dab027e09" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown_tei.zip" />
  18. <package id="cess_cat" name="CESS-CAT Treebank" webpage="http://clic.ub.edu/cessece/" license="If you use these corpora for research, please cite thusly: CESS-Cat project (M. Antonia Mart&#237;, MarionaTaul&#233;, Llu&#237;s M&#225;rquez, Manuel Bertran (2007) ?CESS-ECE: A Multilingual and Multilevel Annotated Corpus? in http://www.lsi.upc.edu/~mbertran/cess-ece/publications)." unzip="1" unzipped_size="33720460" size="5396688" checksum="e91ac59ec6e98e3b297e2d2eab83084d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_cat.zip" />
  19. <package id="cess_esp" name="CESS-ESP Treebank" webpage="http://clic.ub.edu/cessece/" license="If you use these corpora for research, please cite thusly: CESS-Cat project (M. Antonia Mart&#237;, MarionaTaul&#233;, Llu&#237;s M&#225;rquez, Manuel Bertran (2007) ?CESS-ECE: A Multilingual and Multilevel Annotated Corpus? in http://www.lsi.upc.edu/~mbertran/cess-ece/publications)." unzip="1" unzipped_size="13233272" size="2220392" checksum="684432d4f6384b8f0bd19fee5dc15925" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cess_esp.zip" />
  20. <package id="chat80" name="Chat-80 Data Files" copyright="Copyright (C) 1982 David Warren and Fernando Pereira" license="This program may be used, copied, altered or included in other programs only for academic purposes and provided that the authorship of the initial program is aknowledged. Use for commercial purposes without the previous written agreement of the authors is forbidden." author="David Warren and Fernando Pereira" webpage="http://www.cis.upenn.edu/~pereira/oldies.html" unzip="1" unzipped_size="63817" size="19209" checksum="6832873fe92996846ac5bb21c5d84eb8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/chat80.zip" />
  21. <package id="city_database" name="City Database" note="A very small database of information about cities" unzip="1" unzipped_size="4096" size="1708" checksum="29cbf1aa02ad8abc72dd955fe74f882c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/city_database.zip" />
  22. <package id="cmudict" name="The Carnegie Mellon Pronouncing Dictionary (0.6)" webpage="ftp://ftp.cs.cmu.edu/project/speech/dict/" copyright="Copyright 1998 Carnegie Mellon University" license="Use of this dictionary, for any research or commercial purpose, is completely unrestricted. If you use or redistribute this material, we would appreciate acknowlegement of its origin." unzip="1" unzipped_size="3824638" size="896069" checksum="58f743ff818b983b89ef9302b509fc41" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cmudict.zip" />
  23. <package id="comparative_sentences" name="Comparative Sentence Dataset" copyright="Copyright (C) 2006 Nitin Jindal and Bing Liu" author="Nitin Jindal and Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="774200" size="279121" checksum="df2d005f455afb760fa37d7f565400f1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comparative_sentences.zip" />
  24. <package id="comtrans" name="ComTrans Corpus Sample" author="Reinhard Rapp" webpage="http://www.fask.uni-mainz.de/user/rapp/comtrans/" unzip="0" unzipped_size="35387522" size="11904518" checksum="8e1e34e2f052d8188fd877b2c821b42d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/comtrans.zip" />
  25. <package id="conll2000" name="CONLL 2000 Chunking Corpus" webpage="http://www.cnts.ua.ac.be/conll2000/chunking/" contact="Erik Tjong Kim Sang (erikt@uia.ua.ac.be)" unzip="1" unzipped_size="3495903" size="756607" checksum="9529b285edd5fe47271da69df1052301" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2000.zip" />
  26. <package id="conll2002" name="CONLL 2002 Named Entity Recognition Corpus" webpage="http://www.cnts.ua.ac.be/conll2002/ner/" unzip="1" unzipped_size="7785638" size="1867449" checksum="67bb4ca75fa81544d42a159524726e78" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2002.zip" />
  27. <package id="conll2007" name="Dependency Treebanks from CoNLL 2007 (Catalan and Basque Subset)" webpage="http://nextens.uvt.nl/depparse-wiki/DataDownload" contact="Kepa Sarasola" copyright="Copyright (C) 2007 The University of the Basque Country" license="Creative Commons Attribution-NonCommercial-NoDerivativeWorks license" unzip="0" unzipped_size="6399295" size="1242958" checksum="b9015928e35c41f0695525289df5208f" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/conll2007.zip" />
  28. <package id="crubadan" name="Crubadan Corpus" copyright="Copyright (C) 2010 Kevin Scannell" author="Kevin Scannell" license="GPLv3" webpage="http://borel.slu.edu/crubadan/" unzip="1" unzipped_size="11256183" size="5288655" checksum="3cc831382dec41b8d9a06d93ef300352" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/crubadan.zip" />
  29. <package id="dependency_treebank" name="Dependency Parsed Treebank" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995, which has been dependency parsed. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="1069540" size="457429" checksum="631e959acaa42eea718daf04c5cdfa76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip" />
  30. <package id="dolch" name="Dolch Word List" webpage="https://en.wikipedia.org/wiki/Dolch_word_list" unzip="1" unzipped_size="1917" size="2116" checksum="6f9c042774b96366c93fd0f9a9adb697" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dolch.zip" />
  31. <package id="europarl_raw" name="Sample European Parliament Proceedings Parallel Corpus" author="Philipp Koehn, University of Edinburgh" webpage="http://www.statmt.org/europarl" unzip="1" unzipped_size="41396100" size="12594977" checksum="7621d5675990b1decc012c823716ee76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/europarl_raw.zip" />
  32. <package id="extended_omw" name="Extended Open Multilingual WordNet" copyright="Copyright (C) 2013 Francis Bond and Ryan Foster" license="CC by SA 3.0 Licence (for data from Wikitionary) and Unicode, Inc. Licence Agreement (for data from CLDR)" webpage="http://compling.hss.ntu.edu.sg/omw/summx.html" unzip="0" unzipped_size="36087752" size="11251284" checksum="8cc3931b20fdc2a2fe1ed9d42567d51b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/extended_omw.zip" />
  33. <package id="floresta" name="Portuguese Treebank" license="Non-commercial use only" webpage="http://www.linguateca.pt/Floresta/" unzip="1" unzipped_size="16414136" size="1882021" checksum="de5f1df09949f080e0f616f0bc55967d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/floresta.zip" />
  34. <package id="framenet_v15" name="FrameNet 1.5" author="Collin F. Baker" license="May be used for non-commercial purposes." webpage="http://framenet.icsi.berkeley.edu" unzip="1" unzipped_size="579133737" size="69337891" checksum="cf68365950b2f048bcb48619de81f50a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v15.zip" />
  35. <package id="framenet_v17" name="FrameNet 1.7" author="Collin F. Baker" license="Creative Commons Attribution 3.0 Unported License" webpage="http://framenet.icsi.berkeley.edu" unzip="1" unzipped_size="855026962" size="99207152" checksum="aaef1cfdcf37000cf2a5c562407fbddb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/framenet_v17.zip" />
  36. <package id="gazetteers" name="Gazeteer Lists" license="GNU Free Documentation License; or public domain (depending on the file)" unzip="1" unzipped_size="12711" size="8265" checksum="1dd15c714a2be985c482a13d90e9caa4" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gazetteers.zip" />
  37. <package id="genesis" name="Genesis Corpus" copyright="public domain" license="public domain" unzip="1" unzipped_size="1426122" size="473239" checksum="2a76432753c01fe179684e0ae3a4d023" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/genesis.zip" />
  38. <package id="gutenberg" name="Project Gutenberg Selections" webpage="https://gutenberg.org/" license="public domain" copyright="public domain" unzip="1" unzipped_size="11802669" size="4251829" checksum="48c9c8605cd70b0230687557ee543633" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/gutenberg.zip" />
  39. <package id="ieer" name="NIST IE-ER DATA SAMPLE" webpage="http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm" unzip="1" unzipped_size="541349" size="166156" checksum="34157f569624bc8d642ef8da5722b14a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ieer.zip" />
  40. <package id="inaugural" name="C-Span Inaugural Address Corpus" copyright="public domain" license="public domain" unzip="1" unzipped_size="807436" size="346476" checksum="4e01cc9505ed7f9d04a330e67fb45509" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/inaugural.zip" />
  41. <package id="indian" name="Indian Language POS-Tagged Corpus" author="A Kumaran" license="Distributed with permission" unzip="1" unzipped_size="1091033" size="199187" checksum="599a684793935ecbcf8276133945037c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/indian.zip" />
  42. <package id="jeita" name="JEITA Public Morphologically Tagged Corpus (in ChaSen format)" webpage="http://lilyx.net/pages/nltkjapanesecorpus.html" license="Freely re-distributable under the same license as the original JEITA corpus. Each document retains its own license from Aozora bunko and Project Sugita Genpaku." unzip="0" unzipped_size="134170650" size="16531215" checksum="96e30423d6887fad17fc44f2f30d920d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/jeita.zip" />
  43. <package id="kimmo" name="PC-KIMMO Data Files" webpage="http://www.sil.org/pckimmo/" unzip="1" unzipped_size="814609" size="186958" checksum="68a8716e0233ad9c0ed0947952e4eb3e" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/kimmo.zip" />
  44. <package id="knbc" name="KNB Corpus (Annotated blog corpus)" webpage="http://lilyx.net/pages/nltkjapanesecorpus.html" license="Freely re-distributable under the same license as the original KNB Corpus." unzip="0" unzipped_size="23601139" size="8760788" checksum="992f8a3647f333e28a9958eba4bd67c7" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/knbc.zip" />
  45. <package id="large_grammars" name="Large context-free and feature-based grammars for parser comparison" webpage="http://www.informatics.sussex.ac.uk/research/groups/nlp/carroll/elsps.html" contact="John A. Carroll" license="See the individual grammar files" languages="English" unzip="1" unzipped_size="4115732" size="283747" checksum="135aa813bd721d59ae595d9d7f115dc8" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/large_grammars.zip" />
  46. <package id="lin_thesaurus" name="Lin's Dependency Thesaurus" author="Dekang Lin" webpage="http://webdocs.cs.ualberta.ca/~lindek/downloads.htm" license="Distributed with permission of Dekang Lin" unzip="1" unzipped_size="210421609" size="89154019" checksum="288cc15e4ed257c8598d6f7a30199db9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/lin_thesaurus.zip" />
  47. <package id="mac_morpho" name="MAC-MORPHO: Brazilian Portuguese news text with part-of-speech tags" webpage="http://www.nilc.icmc.usp.br/lacioweb/" license="Distributed with permission of N&#250;cleo Interinstitucional de Ling&#252;&#237;stica Computacional (NILC), Universidade de S&#227;o Paulo (USP) in S&#227;o Carlos, Universidade Federal de S&#227;o Carlos (UFSCar), Universidade Estadual Paulista (UNESP) of Araraquara." unzip="1" unzipped_size="10941402" size="3013904" checksum="cf216ae5b37cca24866909f8594c5395" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mac_morpho.zip" />
  48. <package id="machado" name="Machado de Assis -- Obra Completa" author="Machado de Assis" license="Public Domain" webpage="http://machado.mec.gov.br/" unzip="0" unzipped_size="14855338" size="6151774" checksum="d186f7d6715479a8bec48b8b8030858e" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/machado.zip" />
  49. <package id="masc_tagged" name="MASC Tagged Corpus" copyright="Copyright (C) 2014 American National Corpus" author="Nancy Ide" license="This data may be used for the purposes of linguistic education, research, and development, including commercial development." webpage="http://www.anc.org/" unzip="0" unzipped_size="4963879" size="1602143" checksum="a03d3ae8c6c2a1707885066e4d62582a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/masc_tagged.zip" />
  50. <package id="maxent_ne_chunker" name="ACE Named Entity Chunker (Maximum entropy)" languages="English" unzip="1" unzipped_size="23604982" size="13404747" checksum="d577c2cd0fdae148b36d046b14eb48e6" subdir="chunkers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/chunkers/maxent_ne_chunker.zip" />
  51. <package id="maxent_ne_chunker_tab" name="ACE Named Entity Chunker (Maximum entropy)" languages="English" unzip="1" unzipped_size="14621652" size="5449208" checksum="a50bea2481daf9b26e01ea0128780b20" subdir="chunkers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/chunkers/maxent_ne_chunker_tab.zip" />
  52. <package id="maxent_treebank_pos_tagger" name="Treebank Part of Speech Tagger (Maximum entropy)" languages="English" unzip="1" unzipped_size="17961132" size="10156853" checksum="e3b8a5353056073e164c5b06d0cc1fa7" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/maxent_treebank_pos_tagger.zip" />
  53. <package id="moses_sample" name="Moses Sample Models" webpage="http://www.statmt.org/moses/?n=Moses.SampleData" unzip="1" unzipped_size="10985045" size="10961490" checksum="715531d058ec253bd0683d0df23ec868" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/moses_sample.zip" />
  54. <package id="movie_reviews" name="Sentiment Polarity Dataset Version 2.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2004 Bo Pang and Lillian Lee" webpage="http://www.cs.cornell.edu/people/pabo/movie-review-data/" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" unzip="1" unzipped_size="7790571" size="4004848" checksum="155de2b77c6834dd8eea7cbe88e93acb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/movie_reviews.zip" />
  55. <package id="mte_teip5" name="MULTEXT-East 1984 annotated corpus 4.0" author="Erjavec, Toma&#382;; Barbu, Ana-Maria; Derzhanski, Ivan; Dimitrova, Ludmila; Garab&#237;k, Radovan; Ide, Nancy; Kaalep, Heiki-Jaan; Kotsyba, Natalia; Krstev, Cvetana; Oravecz, Csaba; Petkevi&#269;, Vladim&#237;r; Priest-Dorman, Greg; QasemiZadeh, Behrang; Radziszewski, Adam; Simov, Kiril; Tufi&#351;, Dan and Zdravkova, Katerina" license="Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)" webpage="https://www.clarin.si/repository/xmlui/handle/11356/1043" unzip="1" unzipped_size="122461442" size="14800561" checksum="27aa12b3546cb241df8699506ab15128" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/mte_teip5.zip" />
  56. <package id="mwa_ppdb" name="The monolingual word aligner (Sultan et al. 2015) subset of the Paraphrase Database." webpage="http://www.cis.upenn.edu/~ccb/ppdb/" license="Creative Commons Attribution 3.0 Unported (CC-BY)" unzip="1" unzipped_size="3657054" size="1594711" checksum="e5836f76779020b225ad6114372b954a" subdir="misc" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/mwa_ppdb.zip" />
  57. <package id="names" name="Names Corpus, Version 1.3 (1994-03-29)" copyright="Copyright (C) 1991 Mark Kantrowitz" author="Mark Kantrowitz and Bill Ross" license="You may use the lists of names for any purpose, so long as credit is given in any published work. You may also redistribute the list if you provide the recipients with a copy of this README file. The lists are not in the public domain (I retain the copyright on the lists) but are freely redistributable. If you have any additions to the lists of names, I would appreciate receiving them." webpage="http://www-2.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/names/" unzip="1" unzipped_size="56572" size="21326" checksum="93844d7c995ad28f40528c08a3430175" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/names.zip" />
  58. <package id="nombank.1.0" name="NomBank Corpus 1.0" contact="Adam Meyers" webpage="http://nlp.cs.nyu.edu/meyers/NomBank.html" license="Distributed with permission" unzip="0" unzipped_size="42315496" size="6728397" checksum="57afdc46230ea33208e4e277de24765b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nombank.1.0.zip" />
  59. <package id="nonbreaking_prefixes" name="Non-Breaking Prefixes (Moses Decoder)" webpage="https://github.com/moses-smt/mosesdecoder/tree/master/scripts/share/nonbreaking_prefixes" license="Gnu LGPL" unzip="1" unzipped_size="43361" size="25437" checksum="5e7d700390745114cd3a52160d6f2eac" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nonbreaking_prefixes.zip" />
  60. <package id="nps_chat" name="NPS Chat" author="Craig Martell (cmartell@nps.edu)" webpage="http://faculty.nps.edu/cmartell/NPSChat.htm" license="This corpus is distributed solely for non-commercial, non-profit educational and research use. It is a derivative compilation work of multiple works whose copyrights are held by the respective original authors." unzip="1" unzipped_size="2578726" size="301366" checksum="72d1b905ba2be48d711690b012856c79" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/nps_chat.zip" />
  61. <package id="omw" name="Open Multilingual Wordnet" author="Francis Bond" license="Please consult the LICENSE files included with the individual Wordnets. Note that all permit redistribution." copyright="Please consult the copyright statements of the individual Wordnets" webpage="https://omwn.org/" unzip="0" unzipped_size="50269427" size="12110409" checksum="8e2adf0627365f0c51a05807737a5e5c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/omw.zip" />
  62. <package id="omw-1.4" name="Open Multilingual Wordnet" author="Francis Bond" license="Please consult the LICENSE files included with the individual Wordnets. Note that all permit redistribution." copyright="Please consult the copyright statements of the individual Wordnets" webpage="https://omwn.org/" unzip="0" unzipped_size="96786003" size="26634772" checksum="e2acd8d3aa9c7c3dca4d8d4d169a29b8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/omw-1.4.zip" />
  63. <package id="opinion_lexicon" name="Opinion Lexicon" author="Bing Liu" copyright="Copyright (C) 2011 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="67865" size="24947" checksum="43a521f055063e001845b9d484a50173" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/opinion_lexicon.zip" />
  64. <package id="panlex_swadesh" name="PanLex Swadesh Corpora" author="Jonathan Pool (editor)" license="CC0 1.0 Universal" webpage="http://panlex.org/" unzip="0" unzipped_size="4418150" size="2861668" checksum="66dd080f09ac17db3d31bb4d667d0794" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/panlex_swadesh.zip" />
  65. <package id="paradigms" name="Paradigm Corpus" author="Cathy Bow, University of Melbourne" license="Distributed with the permission of the author" unzip="1" unzipped_size="361186" size="24902" checksum="745ee9036c5ca3226be24c97515f5707" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/paradigms.zip" />
  66. <package id="pe08" name="Cross-Framework and Cross-Domain Parser Evaluation Shared Task" version="Release 3 (20 April 2008)" webpage=" http://www-tsujii.is.s.u-tokyo.ac.jp/pe08-st/" license="Distributed with permission" unzip="1" unzipped_size="296619" size="80735" checksum="e72135042dc48772acad309a6adbb6f0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pe08.zip" />
  67. <package id="perluniprops" name="perluniprops: Index of Unicode Version 7.0.0 character properties in Perl" webpage="http://perldoc.perl.org/perluniprops.html" license="" unzip="1" unzipped_size="136038" size="100266" checksum="721ecf418efbfefb183d0559a7ef9f2d" subdir="misc" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/misc/perluniprops.zip" />
  68. <package id="pil" name="The Patient Information Leaflet (PIL) Corpus" version="Version 2.0 (31 March 2006)" webpage="http://mcs.open.ac.uk/nlg/old_projects/pills/corpus/" license="Distributed with permission" unzip="1" unzipped_size="4170899" size="1510205" checksum="d07b2ca7b5b351a24f4db8ae8fbc9e98" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pil.zip" />
  69. <package id="pl196x" name="Polish language of the XX century sixties" author="I. Kurcz, A. Lewicki, J. Sambor, K. Szafran, J. Woronczak" license="GNU General Public License" webpage="http://www.mimuw.edu.pl/polszczyzna/pl196x/index_en.htm" unzip="1" unzipped_size="58299303" size="7051453" checksum="bcbdcf0fc2420fac238ca17dc7bfe423" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pl196x.zip" />
  70. <package id="porter_test" name="Porter Stemmer Test Files" unzip="1" unzipped_size="680060" size="200510" checksum="6af70bbc602aecd18aa0b9cfa7be2aa1" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/porter_test.zip" />
  71. <package id="ppattach" name="Prepositional Phrase Attachment Corpus" author="Adwait Ratnaparkhi" webpage="ftp://ftp.cis.upenn.edu/pub/adwait/PPattachData/" copyright="(C) 1994 Adwait Ratnaparkhi" license="Distributed with the permission of the author." unzip="1" unzipped_size="3113650" size="781714" checksum="cce212b7ace8e64722ba2f41f802a5d0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ppattach.zip" />
  72. <package id="problem_reports" name="Problem Report Corpus" webpage="http://www.cs.cmu.edu/~marmalade/reports.html" author="Andrew Ko, Carnegie Mellon University" unzip="1" unzipped_size="3467763" size="1032942" checksum="8781ace4c0a181c5875cdbfc01e895fb" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/problem_reports.zip" />
  73. <package id="product_reviews_1" name="Product Reviews (5 Products)" author="Bing Liu" copyright="Copyright (C) 2004 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="396548" size="141287" checksum="c13be66052027a4605ca456d7cda0917" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_1.zip" />
  74. <package id="product_reviews_2" name="Product Reviews (9 Products)" author="Bing Liu" copyright="Copyright (C) 2007 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="438549" size="170698" checksum="522134e8b91086473299c3800c4adbae" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/product_reviews_2.zip" />
  75. <package id="propbank" name="Proposition Bank Corpus 1.0" contact="Martha Palmer" webpage="http://verbs.colorado.edu/~mpalmer/projects/ace.html" license="Distributed with permission" unzip="0" unzipped_size="18831005" size="5323498" checksum="2397782c6e6f46c9657f85db8a5421f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/propbank.zip" />
  76. <package id="pros_cons" name="Pros and Cons" author="Bing Liu" copyright="Copyright (C) 2008 Bing Liu" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#datasets" unzip="1" unzipped_size="2921218" size="746276" checksum="c4c7e61fb4d57a2f6c95317194da0f17" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/pros_cons.zip" />
  77. <package id="ptb" name="Penn Treebank" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a stub for the full Penn Treebank Corpus version 3." unzip="1" unzipped_size="63036" size="6289" checksum="7b633a1b7770279eab00bc1108769c67" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ptb.zip" />
  78. <package id="punkt" name="Punkt Tokenizer Models" author="Jan Strunk" languages="Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Italian, Malayalam, Norwegian, Polish, Portuguese, Russian, Slovene, Spanish, Swedish, Turkish" unzip="1" unzipped_size="37245719" size="13905355" checksum="8dd1d8760a0976f96e5c262decd75165" subdir="tokenizers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" />
  79. <package id="qc" name="Experimental Data for Question Classification" author="Xin Li and Dan Roth, UIUC" webpage="http://l2r.cs.uiuc.edu/~cogcomp/Data/QA/QC/" unzip="1" unzipped_size="361090" size="125456" checksum="afd4145ac31cb8d7db715974b9b8b57a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/qc.zip" />
  80. <package id="reuters" name="The Reuters-21578 benchmark corpus, ApteMod version" webpage="http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html" license="The copyright for the text of newswire articles and Reuters annotations in the Reuters-21578 collection resides with Reuters Ltd. Reuters Ltd. and Carnegie Group, Inc. have agreed to allow the free distribution of this data *for research purposes only*. If you publish results based on this data set, please acknowledge its use, refer to the data set by the name 'Reuters-21578, Distribution 1.0', and inform your readers of the current location of the data set." unzip="0" unzipped_size="9073648" size="6378691" checksum="c2acb24d5cccf8035e0fe8d29f440a68" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/reuters.zip" />
  81. <package id="rslp" name="RSLP Stemmer (Removedor de Sufixos da Lingua Portuguesa)" author="Viviane Moreira Orengo (vmorengo@inf.ufrgs.br) and Christian Huyck" languages="Portuguese" unzip="1" unzipped_size="7269" size="3805" checksum="648798996224694251834699fa6e55f7" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/rslp.zip" />
  82. <package id="rte" name="PASCAL RTE Challenges 1, 2, and 3" webpage="http://www.pascal-network.org/Challenges/RTE/" unzip="1" unzipped_size="1279930" size="386303" checksum="ca21663daa326a3bb53001c3d82e62d6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/rte.zip" />
  83. <package id="sample_grammars" name="Sample Grammars" author="" languages="English" unzip="1" unzipped_size="61718" size="20293" checksum="c4a2a01345d1e61c8febd8d498c5d2d6" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/sample_grammars.zip" />
  84. <package id="semcor" name="SemCor 3.0" author="Rada Mihalcea (rada@cs.unt.edu)" webpage="http://www.cse.unt.edu/~rada/downloads.html#semcor" license="You are granted permission to use, copy, modify and distribute this database for any purpose and without fee and royalty is hereby granted, provided that you agree to comply with the Princeton copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the database, including modifications that you make for internal use or for distribution. See semcor/README for more information." unzip="0" unzipped_size="37425596" size="4397021" checksum="46c095f0ab7090132567f87252af724f" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/semcor.zip" />
  85. <package id="senseval" name="SENSEVAL 2 Corpus: Sense Tagged Text" contact="Ted Pedersen (tpederse@umn.edu)" license="Distributed with permission." webpage="http://www.senseval.org/" unzip="1" unzipped_size="16463075" size="2151350" checksum="bfc6a33c62ddc2ec24b02701a2f364ff" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/senseval.zip" />
  86. <package id="sentence_polarity" name="Sentence Polarity Dataset v1.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2005 Bo Pang and Lillian Lee" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage="http://www.cs.cornell.edu/People/pabo/people/pabo/movie-review-data" unzip="1" unzipped_size="1241127" size="490256" checksum="5cdc0cae7f558040d050c90eb2b72e97" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentence_polarity.zip" />
  87. <package id="sentiwordnet" name="SentiWordNet" copyright="Copyright (C) 2013 SentiWordNet Project" author="Stefano Baccianella, Andrea Esuli, and Fabrizio Sebastiani" license="Creative Commons Attribution ShareAlike 3.0 Unported license" webpage="http://sentiwordnet.isti.cnr.it/" unzip="1" unzipped_size="13591402" size="4686546" checksum="5043f00829b7db4dd5f21507e092b76a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sentiwordnet.zip" />
  88. <package id="shakespeare" name="Shakespeare XML Corpus Sample" license="public domain" copyright="public domain" webpage="http://www.andrew.cmu.edu/user/akj/shakespeare/" sample="True" unzip="1" unzipped_size="1727210" size="475458" checksum="2332b32a7d83d657092ba4667c2c84c3" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/shakespeare.zip" />
  89. <package id="sinica_treebank" name="Sinica Treebank Corpus Sample" webpage="http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm" license="Distributed with the Natural Language Toolkit under the terms of the Creative Commons Attribution-NonCommercial-ShareAlike License [http://creativecommons.org/licenses/by-nc-sa/2.5/]." sample="True" unzip="1" unzipped_size="3293083" size="906706" checksum="979a905010d475a74475064211cd63c8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/sinica_treebank.zip" />
  90. <package id="smultron" name="SMULTRON Corpus Sample" author="Sofia Gustafson-Capkova, Yvonne Samuelsson, and Martin Volk" webpage="http://www.ling.su.se/DaLi/research/smultron/index.htm" unzip="1" unzipped_size="1677647" size="166207" checksum="8743ff232d76aaf2ff8a10523503a659" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/smultron.zip" />
  91. <package id="snowball_data" name="Snowball Data" languages="Danish, Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish, Turkish" webpage="https://github.com/snowballstem/snowball-data" unzip="0" unzipped_size="36360836" size="6785405" checksum="cba1cf17b887789e6df5f2c87c6e56fb" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/snowball_data.zip" />
  92. <package id="spanish_grammars" name="Grammars for Spanish" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="3980" size="4047" checksum="12f66b8e22beadd6ed202e95453465af" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/spanish_grammars.zip" />
  93. <package id="state_union" name="C-Span State of the Union Address Corpus" webpage="http://www.c-span.org/executive/stateoftheunion.asp" copyright="public domain" license="public domain" unzip="1" unzipped_size="2073917" size="808757" checksum="044f2d20c592b17a26ac0102111833c9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/state_union.zip" />
  94. <package id="stopwords" name="Stopwords Corpus" webpage="ftp://ftp.cs.cornell.edu/pub/smart/english.stop and http://snowball.tartarus.org/ and others" unzip="1" unzipped_size="81407" size="34276" checksum="8726a900bca7083674536e2593686361" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" />
  95. <package id="subjectivity" name="Subjectivity Dataset v1.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2004 Bo Pang and Lillian Lee" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage=" http://www.cs.cornell.edu/People/pabo/people/pabo/movie-review-data" unzip="1" unzipped_size="1303352" size="521628" checksum="a81a44513903ba6bb86f85aeff149561" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/subjectivity.zip" />
  96. <package id="swadesh" name="Swadesh Wordlists" webpage="http://en.wiktionary.org/wiki/Appendix:Swadesh_list" license="GNU Free Documentation License" unzip="1" unzipped_size="39998" size="22828" checksum="6612ccb71f327e85780dc7813dee40f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/swadesh.zip" />
  97. <package id="switchboard" name="Switchboard Corpus Sample" sample="True" license="Permission is granted for use of this material in accordance with the Open Content License [http://opencontent.org/opl.shtml]. This corpus contains transcripts and annotations for 36 calls from the Switchboard Corpus [http://www.ldc.upenn.edu/Catalog/LDC93S7.html]." unzip="1" unzipped_size="2541179" size="791161" checksum="878df010a9f2c2d0a6546a8365f10595" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/switchboard.zip" />
  98. <package id="tagsets" name="Help on Tagsets" author="UCREL, Lancaster University" languages="English" unzip="1" unzipped_size="79723" size="34531" checksum="e15834e0dd89b107925af6bb11a8eaa4" subdir="help" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets.zip" />
  99. <package id="tagsets_json" name="Help on Tagsets" author="UCREL, Lancaster University" languages="English" unzip="1" unzipped_size="33915" size="13239" checksum="d24a59844f8bead484652330ae7c9abd" subdir="help" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/help/tagsets_json.zip" />
  100. <package id="timit" name="TIMIT Corpus Sample" sample="True" license="This corpus sample is Copyright 1993 Linguistic Data Consortium, and is distributed under the terms of the Creative Commons Attribution, Non-Commercial, ShareAlike license. http://creativecommons.org/" webpage="http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC93S1" unzip="1" unzipped_size="31932925" size="22251869" checksum="34c047c4749a811287f2c652104d7849" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/timit.zip" />
  101. <package id="toolbox" name="Toolbox Sample Files" unzip="1" unzipped_size="829593" size="250616" checksum="26657c1b8b5f5afdc3d5d754393a9216" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/toolbox.zip" />
  102. <package id="treebank" name="Penn Treebank Sample" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="5963497" size="1740034" checksum="78c24a97940c2504d0ad35dd3f8a560b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip" />
  103. <package id="twitter_samples" name="Twitter Samples" copyright="Copyright (C) 2015 Twitter, Inc" license="Must be used subject to Twitter Developer Agreement (https://dev.twitter.com/overview/terms/agreement)" note="Sample of Tweets collected from the Twitter APIs, observing the 50k limit required by https://dev.twitter.com/overview/terms/policy#6._Be_a_Good_Partner_to_Twitter " unzip="1" unzipped_size="122350791" size="16007673" checksum="02fc79b5adc0357bc1e14747246fd3c1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/twitter_samples.zip" />
  104. <package id="udhr" name="Universal Declaration of Human Rights Corpus" webpage="http://www.un.org/Overview/rights.html" license="public domain" copyright="public domain" unzip="1" unzipped_size="3261577" size="1170177" checksum="745b3a90feb25c95fc805ebbd1ef5258" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr.zip" />
  105. <package id="udhr2" name="Universal Declaration of Human Rights Corpus (Unicode Version)" webpage="http://unicode.org/udhr/" license="public domain" copyright="public domain" unzip="1" unzipped_size="5677920" size="1653975" checksum="e604482d2dc8dd2580af7d97c1bf0a80" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/udhr2.zip" />
  106. <package id="unicode_samples" name="Unicode Samples" note="A very small corpus used to demonstrate unicode encoding in chapter 10 of the book" unzip="1" unzipped_size="643" size="1212" checksum="d46699450dd2287f5c115d8c1a0819f1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/unicode_samples.zip" />
  107. <package id="universal_tagset" name="Mappings to the Universal Part-of-Speech Tagset" author="Slav Petrov" license="CC-BY-SA-4.0" webpage="https://github.com/slavpetrov/universal-pos-tags" unzip="1" unzipped_size="37147" size="19095" checksum="ba5a69f2148a8cea6fb5084585e20890" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/universal_tagset.zip" />
  108. <package id="universal_treebanks_v20" name="Universal Treebanks Version 2.0" license="Creative Commons Attribution-NonCommercial-ShareAlike 3.0 United States" webpage="https://code.google.com/p/uni-dep-tb/" unzip="0" unzipped_size="119113962" size="25908853" checksum="4acd3991768a727be019a8021fe376d2" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/universal_treebanks_v20.zip" />
  109. <package id="vader_lexicon" name="VADER Sentiment Lexicon" author="C.J. Hutto and Eric Gilbert" webpage="https://github.com/cjhutto/vaderSentiment" license="MIT License" unzip="0" unzipped_size="434147" size="90486" checksum="8b3824e2c39b655dd225fb266c8bea53" subdir="sentiment" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/sentiment/vader_lexicon.zip" />
  110. <package id="verbnet" name="VerbNet Lexicon, Version 2.1" version="2.1" author="Karin Kipper-Schuler" webpage="https://verbs.colorado.edu/verbnet/" license="Distributed with permission of the author." unzip="1" unzipped_size="2474526" size="323661" checksum="427dac60e4a94ae910248ccd9986a22a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet.zip" />
  111. <package id="verbnet3" name="VerbNet Lexicon, Version 3.3" version="3.3" author="Karin Kipper-Schuler" webpage="https://verbs.colorado.edu/verbnet/" license="Distributed with permission of the author." unzip="1" unzipped_size="3723345" size="482025" checksum="60efc5ed90ab8a18ef4a436e4c39ffbf" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/verbnet3.zip" />
  112. <package id="webtext" name="Web Text Corpus" unzip="1" unzipped_size="1726918" size="646297" checksum="6c7680030aae5c997b1370f832545c6a" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/webtext.zip" />
  113. <package id="wmt15_eval" name="Evaluation data from WMT15" webpage="http://www.statmt.org/wmt15/" unzip="1" unzipped_size="1247631" size="383096" checksum="2067e40eaf94ccb632007b91073aa433" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/wmt15_eval.zip" />
  114. <package id="word2vec_sample" name="Word2Vec Sample" webpage="https://code.google.com/p/word2vec/" unzip="1" unzipped_size="138432415" size="49396025" checksum="d1d1a23377f9ab4c12d77c7a078318ac" subdir="models" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/models/word2vec_sample.zip" />
  115. <package id="wordnet" name="WordNet" version="3.0" license="Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution.... [see webpage for full license]" copyright="WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved." webpage="http://wordnet.princeton.edu/" unzip="0" unzipped_size="36353991" size="10775600" checksum="b3f38606f626e54c6f060548546f71f0" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet.zip" />
  116. <package id="wordnet2021" name="Open English Wordnet 2021" version="2021" license="This resource is derived from Princeton WordNet under the WordNet License and further developed under the Creative Commons Attribution 4.0 International License. You may share and adapt this resource providing attribution is given to both Princeton WordNet and the Open English WordNet team." copyright="Open English Wordnet 2021 Copyright 2021 by the Open English Wordnet team. WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="https://en-word.net/" unzip="0" unzipped_size="38408913" size="11332750" checksum="99da08a34df218457c3233d6a3dd31b8" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet2021.zip" />
  117. <package id="wordnet2022" name="Open English Wordnet 2022" version="2022" license="This resource is derived from Princeton WordNet under the WordNet License and further developed under the Creative Commons Attribution 4.0 International License. You may share and adapt this resource providing attribution is given to both Princeton WordNet and the Open English WordNet team." copyright="Open English Wordnet 2022 Copyright 2022 by the Open English Wordnet team. WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="https://en-word.net/" unzip="1" unzipped_size="38474234" size="11353460" checksum="0b17803d0d8b85ad25b5037af83d6a1c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet2022.zip" />
  118. <package id="wordnet31" name="Wordnet 3.1" version="3.1" license="Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution.... [see webpage for full license]" copyright="WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="http://wordnet.princeton.edu/" unzip="0" unzipped_size="37411975" size="11058667" checksum="d3392d6facef35433ffcef838b47cae1" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet31.zip" />
  119. <package id="wordnet_ic" name="WordNet-InfoContent" version="3.0" webpage="http://wn-similarity.sourceforge.net" unzip="1" unzipped_size="34220359" size="12056682" checksum="25f0185b31693fa11ea898e4feda528c" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/wordnet_ic.zip" />
  120. <package id="words" name="Word Lists" webpage="http://en.wikipedia.org/wiki/Words_(Unix)" license="public domain" copyright="public domain" unzip="1" unzipped_size="2498552" size="757777" checksum="8594d9d5422e01d993dfbbc3f38d3ae5" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/words.zip" />
  121. <package id="ycoe" name="York-Toronto-Helsinki Parsed Corpus of Old English Prose" webpage="http://www.ota.ahds.ac.uk/" available="False" unzip="1" unzipped_size="277" size="477" checksum="6582cd98ca26c35d9c4eaaa4350ce8f3" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/ycoe.zip" />
  122. </packages>
  123. <collections>
  124. <collection id="all" name="All packages">
  125. <item ref="abc" />
  126. <item ref="alpino" />
  127. <item ref="averaged_perceptron_tagger" />
  128. <item ref="averaged_perceptron_tagger_eng" />
  129. <item ref="averaged_perceptron_tagger_ru" />
  130. <item ref="averaged_perceptron_tagger_rus" />
  131. <item ref="basque_grammars" />
  132. <item ref="bcp47" />
  133. <item ref="biocreative_ppi" />
  134. <item ref="bllip_wsj_no_aux" />
  135. <item ref="book_grammars" />
  136. <item ref="brown" />
  137. <item ref="brown_tei" />
  138. <item ref="cess_cat" />
  139. <item ref="cess_esp" />
  140. <item ref="chat80" />
  141. <item ref="city_database" />
  142. <item ref="cmudict" />
  143. <item ref="comparative_sentences" />
  144. <item ref="comtrans" />
  145. <item ref="conll2000" />
  146. <item ref="conll2002" />
  147. <item ref="conll2007" />
  148. <item ref="crubadan" />
  149. <item ref="dependency_treebank" />
  150. <item ref="dolch" />
  151. <item ref="europarl_raw" />
  152. <item ref="extended_omw" />
  153. <item ref="floresta" />
  154. <item ref="framenet_v15" />
  155. <item ref="framenet_v17" />
  156. <item ref="gazetteers" />
  157. <item ref="genesis" />
  158. <item ref="gutenberg" />
  159. <item ref="ieer" />
  160. <item ref="inaugural" />
  161. <item ref="indian" />
  162. <item ref="jeita" />
  163. <item ref="kimmo" />
  164. <item ref="knbc" />
  165. <item ref="large_grammars" />
  166. <item ref="lin_thesaurus" />
  167. <item ref="mac_morpho" />
  168. <item ref="machado" />
  169. <item ref="masc_tagged" />
  170. <item ref="maxent_ne_chunker" />
  171. <item ref="maxent_ne_chunker_tab" />
  172. <item ref="maxent_treebank_pos_tagger" />
  173. <item ref="moses_sample" />
  174. <item ref="movie_reviews" />
  175. <item ref="mte_teip5" />
  176. <item ref="mwa_ppdb" />
  177. <item ref="names" />
  178. <item ref="nombank.1.0" />
  179. <item ref="nonbreaking_prefixes" />
  180. <item ref="nps_chat" />
  181. <item ref="omw" />
  182. <item ref="omw-1.4" />
  183. <item ref="opinion_lexicon" />
  184. <item ref="panlex_swadesh" />
  185. <item ref="paradigms" />
  186. <item ref="pe08" />
  187. <item ref="perluniprops" />
  188. <item ref="pil" />
  189. <item ref="pl196x" />
  190. <item ref="porter_test" />
  191. <item ref="ppattach" />
  192. <item ref="problem_reports" />
  193. <item ref="product_reviews_1" />
  194. <item ref="product_reviews_2" />
  195. <item ref="propbank" />
  196. <item ref="pros_cons" />
  197. <item ref="ptb" />
  198. <item ref="punkt" />
  199. <item ref="qc" />
  200. <item ref="reuters" />
  201. <item ref="rslp" />
  202. <item ref="rte" />
  203. <item ref="sample_grammars" />
  204. <item ref="semcor" />
  205. <item ref="senseval" />
  206. <item ref="sentence_polarity" />
  207. <item ref="sentiwordnet" />
  208. <item ref="shakespeare" />
  209. <item ref="sinica_treebank" />
  210. <item ref="smultron" />
  211. <item ref="snowball_data" />
  212. <item ref="spanish_grammars" />
  213. <item ref="state_union" />
  214. <item ref="stopwords" />
  215. <item ref="subjectivity" />
  216. <item ref="swadesh" />
  217. <item ref="switchboard" />
  218. <item ref="tagsets" />
  219. <item ref="tagsets_json" />
  220. <item ref="timit" />
  221. <item ref="toolbox" />
  222. <item ref="treebank" />
  223. <item ref="twitter_samples" />
  224. <item ref="udhr" />
  225. <item ref="udhr2" />
  226. <item ref="unicode_samples" />
  227. <item ref="universal_tagset" />
  228. <item ref="universal_treebanks_v20" />
  229. <item ref="vader_lexicon" />
  230. <item ref="verbnet" />
  231. <item ref="verbnet3" />
  232. <item ref="webtext" />
  233. <item ref="wmt15_eval" />
  234. <item ref="word2vec_sample" />
  235. <item ref="wordnet" />
  236. <item ref="wordnet2021" />
  237. <item ref="wordnet2022" />
  238. <item ref="wordnet31" />
  239. <item ref="wordnet_ic" />
  240. <item ref="words" />
  241. <item ref="ycoe" />
  242. </collection>
  243. <collection id="all-corpora" name="All the corpora">
  244. <item ref="abc" />
  245. <item ref="alpino" />
  246. <item ref="bcp47" />
  247. <item ref="biocreative_ppi" />
  248. <item ref="brown" />
  249. <item ref="brown_tei" />
  250. <item ref="cess_cat" />
  251. <item ref="cess_esp" />
  252. <item ref="chat80" />
  253. <item ref="city_database" />
  254. <item ref="cmudict" />
  255. <item ref="comparative_sentences" />
  256. <item ref="comtrans" />
  257. <item ref="conll2000" />
  258. <item ref="conll2002" />
  259. <item ref="conll2007" />
  260. <item ref="crubadan" />
  261. <item ref="dependency_treebank" />
  262. <item ref="dolch" />
  263. <item ref="europarl_raw" />
  264. <item ref="extended_omw" />
  265. <item ref="floresta" />
  266. <item ref="framenet_v15" />
  267. <item ref="framenet_v17" />
  268. <item ref="gazetteers" />
  269. <item ref="genesis" />
  270. <item ref="gutenberg" />
  271. <item ref="ieer" />
  272. <item ref="inaugural" />
  273. <item ref="indian" />
  274. <item ref="jeita" />
  275. <item ref="kimmo" />
  276. <item ref="knbc" />
  277. <item ref="lin_thesaurus" />
  278. <item ref="mac_morpho" />
  279. <item ref="machado" />
  280. <item ref="masc_tagged" />
  281. <item ref="movie_reviews" />
  282. <item ref="mte_teip5" />
  283. <item ref="names" />
  284. <item ref="nombank.1.0" />
  285. <item ref="nonbreaking_prefixes" />
  286. <item ref="nps_chat" />
  287. <item ref="omw" />
  288. <item ref="omw-1.4" />
  289. <item ref="opinion_lexicon" />
  290. <item ref="panlex_swadesh" />
  291. <item ref="paradigms" />
  292. <item ref="pe08" />
  293. <item ref="pil" />
  294. <item ref="pl196x" />
  295. <item ref="ppattach" />
  296. <item ref="problem_reports" />
  297. <item ref="product_reviews_1" />
  298. <item ref="product_reviews_2" />
  299. <item ref="propbank" />
  300. <item ref="pros_cons" />
  301. <item ref="ptb" />
  302. <item ref="qc" />
  303. <item ref="reuters" />
  304. <item ref="rte" />
  305. <item ref="semcor" />
  306. <item ref="senseval" />
  307. <item ref="sentence_polarity" />
  308. <item ref="sentiwordnet" />
  309. <item ref="shakespeare" />
  310. <item ref="sinica_treebank" />
  311. <item ref="smultron" />
  312. <item ref="state_union" />
  313. <item ref="stopwords" />
  314. <item ref="subjectivity" />
  315. <item ref="swadesh" />
  316. <item ref="switchboard" />
  317. <item ref="timit" />
  318. <item ref="toolbox" />
  319. <item ref="treebank" />
  320. <item ref="twitter_samples" />
  321. <item ref="udhr" />
  322. <item ref="udhr2" />
  323. <item ref="unicode_samples" />
  324. <item ref="universal_treebanks_v20" />
  325. <item ref="verbnet" />
  326. <item ref="verbnet3" />
  327. <item ref="webtext" />
  328. <item ref="wordnet" />
  329. <item ref="wordnet2021" />
  330. <item ref="wordnet2022" />
  331. <item ref="wordnet31" />
  332. <item ref="wordnet_ic" />
  333. <item ref="words" />
  334. <item ref="ycoe" />
  335. </collection>
  336. <collection id="all-nltk" name="All packages available on nltk_data gh-pages branch">
  337. <item ref="abc" />
  338. <item ref="alpino" />
  339. <item ref="averaged_perceptron_tagger" />
  340. <item ref="averaged_perceptron_tagger_eng" />
  341. <item ref="averaged_perceptron_tagger_ru" />
  342. <item ref="averaged_perceptron_tagger_rus" />
  343. <item ref="basque_grammars" />
  344. <item ref="bcp47" />
  345. <item ref="biocreative_ppi" />
  346. <item ref="bllip_wsj_no_aux" />
  347. <item ref="book_grammars" />
  348. <item ref="brown" />
  349. <item ref="brown_tei" />
  350. <item ref="cess_cat" />
  351. <item ref="cess_esp" />
  352. <item ref="chat80" />
  353. <item ref="city_database" />
  354. <item ref="cmudict" />
  355. <item ref="comparative_sentences" />
  356. <item ref="comtrans" />
  357. <item ref="conll2000" />
  358. <item ref="conll2002" />
  359. <item ref="conll2007" />
  360. <item ref="crubadan" />
  361. <item ref="dependency_treebank" />
  362. <item ref="dolch" />
  363. <item ref="europarl_raw" />
  364. <item ref="extended_omw" />
  365. <item ref="floresta" />
  366. <item ref="framenet_v15" />
  367. <item ref="framenet_v17" />
  368. <item ref="gazetteers" />
  369. <item ref="genesis" />
  370. <item ref="gutenberg" />
  371. <item ref="ieer" />
  372. <item ref="inaugural" />
  373. <item ref="indian" />
  374. <item ref="jeita" />
  375. <item ref="kimmo" />
  376. <item ref="knbc" />
  377. <item ref="large_grammars" />
  378. <item ref="lin_thesaurus" />
  379. <item ref="mac_morpho" />
  380. <item ref="machado" />
  381. <item ref="masc_tagged" />
  382. <item ref="maxent_ne_chunker" />
  383. <item ref="maxent_ne_chunker_tab" />
  384. <item ref="maxent_treebank_pos_tagger" />
  385. <item ref="moses_sample" />
  386. <item ref="movie_reviews" />
  387. <item ref="mte_teip5" />
  388. <item ref="mwa_ppdb" />
  389. <item ref="names" />
  390. <item ref="nombank.1.0" />
  391. <item ref="nonbreaking_prefixes" />
  392. <item ref="nps_chat" />
  393. <item ref="omw" />
  394. <item ref="omw-1.4" />
  395. <item ref="opinion_lexicon" />
  396. <item ref="panlex_swadesh" />
  397. <item ref="paradigms" />
  398. <item ref="pe08" />
  399. <item ref="perluniprops" />
  400. <item ref="pil" />
  401. <item ref="pl196x" />
  402. <item ref="porter_test" />
  403. <item ref="ppattach" />
  404. <item ref="problem_reports" />
  405. <item ref="product_reviews_1" />
  406. <item ref="product_reviews_2" />
  407. <item ref="propbank" />
  408. <item ref="pros_cons" />
  409. <item ref="ptb" />
  410. <item ref="punkt" />
  411. <item ref="qc" />
  412. <item ref="reuters" />
  413. <item ref="rslp" />
  414. <item ref="rte" />
  415. <item ref="sample_grammars" />
  416. <item ref="semcor" />
  417. <item ref="senseval" />
  418. <item ref="sentence_polarity" />
  419. <item ref="sentiwordnet" />
  420. <item ref="shakespeare" />
  421. <item ref="sinica_treebank" />
  422. <item ref="smultron" />
  423. <item ref="snowball_data" />
  424. <item ref="spanish_grammars" />
  425. <item ref="state_union" />
  426. <item ref="stopwords" />
  427. <item ref="subjectivity" />
  428. <item ref="swadesh" />
  429. <item ref="switchboard" />
  430. <item ref="tagsets" />
  431. <item ref="tagsets_json" />
  432. <item ref="timit" />
  433. <item ref="toolbox" />
  434. <item ref="treebank" />
  435. <item ref="twitter_samples" />
  436. <item ref="udhr" />
  437. <item ref="udhr2" />
  438. <item ref="unicode_samples" />
  439. <item ref="universal_tagset" />
  440. <item ref="universal_treebanks_v20" />
  441. <item ref="vader_lexicon" />
  442. <item ref="verbnet" />
  443. <item ref="verbnet3" />
  444. <item ref="webtext" />
  445. <item ref="wmt15_eval" />
  446. <item ref="word2vec_sample" />
  447. <item ref="wordnet" />
  448. <item ref="wordnet2021" />
  449. <item ref="wordnet2022" />
  450. <item ref="wordnet31" />
  451. <item ref="wordnet_ic" />
  452. <item ref="words" />
  453. <item ref="ycoe" />
  454. </collection>
  455. <collection id="book" name="Everything used in the NLTK Book">
  456. <item ref="abc" />
  457. <item ref="brown" />
  458. <item ref="chat80" />
  459. <item ref="cmudict" />
  460. <item ref="conll2000" />
  461. <item ref="conll2002" />
  462. <item ref="dependency_treebank" />
  463. <item ref="genesis" />
  464. <item ref="gutenberg" />
  465. <item ref="ieer" />
  466. <item ref="inaugural" />
  467. <item ref="movie_reviews" />
  468. <item ref="nps_chat" />
  469. <item ref="names" />
  470. <item ref="ppattach" />
  471. <item ref="reuters" />
  472. <item ref="senseval" />
  473. <item ref="state_union" />
  474. <item ref="stopwords" />
  475. <item ref="swadesh" />
  476. <item ref="timit" />
  477. <item ref="treebank" />
  478. <item ref="toolbox" />
  479. <item ref="udhr" />
  480. <item ref="udhr2" />
  481. <item ref="unicode_samples" />
  482. <item ref="webtext" />
  483. <item ref="wordnet" />
  484. <item ref="wordnet_ic" />
  485. <item ref="words" />
  486. <item ref="maxent_treebank_pos_tagger" />
  487. <item ref="maxent_ne_chunker" />
  488. <item ref="universal_tagset" />
  489. <item ref="punkt" />
  490. <item ref="book_grammars" />
  491. <item ref="city_database" />
  492. <item ref="tagsets" />
  493. <item ref="panlex_swadesh" />
  494. <item ref="averaged_perceptron_tagger" />
  495. </collection>
  496. <collection id="popular" name="Popular packages">
  497. <item ref="cmudict" />
  498. <item ref="gazetteers" />
  499. <item ref="genesis" />
  500. <item ref="gutenberg" />
  501. <item ref="inaugural" />
  502. <item ref="movie_reviews" />
  503. <item ref="names" />
  504. <item ref="shakespeare" />
  505. <item ref="stopwords" />
  506. <item ref="treebank" />
  507. <item ref="twitter_samples" />
  508. <item ref="omw" />
  509. <item ref="omw-1.4" />
  510. <item ref="wordnet" />
  511. <item ref="wordnet2021" />
  512. <item ref="wordnet31" />
  513. <item ref="wordnet_ic" />
  514. <item ref="words" />
  515. <item ref="maxent_ne_chunker" />
  516. <item ref="punkt" />
  517. <item ref="snowball_data" />
  518. <item ref="averaged_perceptron_tagger" />
  519. </collection>
  520. <collection id="tests" name="Packages for running tests">
  521. <item ref="averaged_perceptron_tagger" />
  522. <item ref="porter_test" />
  523. <item ref="twitter_samples" />
  524. <item ref="wmt15_eval" />
  525. <item ref="subjectivity" />
  526. <item ref="framenet_v17" />
  527. <item ref="product_reviews_1" />
  528. <item ref="product_reviews_2" />
  529. <item ref="vader_lexicon" />
  530. <item ref="crubadan" />
  531. <item ref="mte_teip5" />
  532. <item ref="sentence_polarity" />
  533. <item ref="universal_treebanks_v20" />
  534. <item ref="panlex_swadesh" />
  535. <item ref="nonbreaking_prefixes" />
  536. <item ref="perluniprops" />
  537. <item ref="pros_cons" />
  538. <item ref="opinion_lexicon" />
  539. <item ref="comparative_sentences" />
  540. </collection>
  541. <collection id="third-party" name="Third-party data packages">
  542. <item ref="dolch" />
  543. </collection>
  544. </collections>
  545. </nltk_data>