__main__.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author:XuMing(xuming624@qq.com)
  4. @description:
  5. """
  6. import argparse
  7. import sys
  8. sys.path.append('..')
  9. from pycorrector import Corrector
  10. def main(**kwargs):
  11. """
  12. Cmd script of correct. Input text file, output corrected text file.
  13. :param kwargs: input, a text file object that will be read from. Should contain utf-8 sentence per line
  14. :param output: a text file object where parsed output will be written. Parsed output will be similar to CSV data
  15. :type input: text file object in read mode
  16. :type output: text file object in write mode
  17. :return:
  18. """
  19. m = Corrector()
  20. no_char = kwargs['no_char'] if 'no_char' in kwargs else False
  21. if no_char:
  22. m.enable_char_error(enable=False)
  23. print('disable char error detect.')
  24. detail = kwargs['detail'] if 'detail' in kwargs else False
  25. count = 0
  26. with open(kwargs['input'], 'r', encoding='utf-8') as fr, open(kwargs['output'], 'w', encoding='utf-8') as fw:
  27. for line in fr:
  28. line = line.strip()
  29. corrected_dict = m.correct(line)
  30. count += 1
  31. corrected_sent = corrected_dict.get('target', '')
  32. errors = corrected_dict.get('errors', '')
  33. r = corrected_sent
  34. if errors and detail:
  35. r = corrected_sent + '\t' + str(errors)
  36. fw.write(line + '\t' + r + '\n')
  37. print('{} lines in output'.format(count))
  38. def run():
  39. parser = argparse.ArgumentParser(description=__doc__)
  40. parser.add_argument('input', type=str,
  41. help='the input file path, file encode need utf-8.')
  42. parser.add_argument('-o', '--output', type=str, required=True,
  43. help='the output file path.')
  44. parser.add_argument('-n', '--no_char', action="store_true", help='disable char detect mode.')
  45. parser.add_argument('-d', '--detail', action="store_true", help='print detail info')
  46. args = parser.parse_args()
  47. print(args)
  48. main(**vars(args))
  49. if __name__ == '__main__':
  50. run()