gingo
/
corrector


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
							# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description: 
"""

import argparse
import sys

sys.path.append('..')
from pycorrector import Corrector


def main(**kwargs):
    """
    Cmd script of correct. Input text file, output corrected text file.
    :param kwargs: input, a text file object that will be read from. Should contain utf-8 sentence per line
    :param output: a text file object where parsed output will be written. Parsed output will be similar to CSV data
    :type input: text file object in read mode
    :type output: text file object in write mode
    :return:
    """
    m = Corrector()
    no_char = kwargs['no_char'] if 'no_char' in kwargs else False
    if no_char:
        m.enable_char_error(enable=False)
        print('disable char error detect.')

    detail = kwargs['detail'] if 'detail' in kwargs else False
    count = 0
    with open(kwargs['input'], 'r', encoding='utf-8') as fr, open(kwargs['output'], 'w', encoding='utf-8') as fw:
        for line in fr:
            line = line.strip()
            corrected_dict = m.correct(line)
            count += 1
            corrected_sent = corrected_dict.get('target', '')
            errors = corrected_dict.get('errors', '')
            r = corrected_sent
            if errors and detail:
                r = corrected_sent + '\t' + str(errors)
            fw.write(line + '\t' + r + '\n')
        print('{} lines in output'.format(count))


def run():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('input', type=str,
                        help='the input file path, file encode need utf-8.')
    parser.add_argument('-o', '--output', type=str, required=True,
                        help='the output file path.')
    parser.add_argument('-n', '--no_char', action="store_true", help='disable char detect mode.')
    parser.add_argument('-d', '--detail', action="store_true", help='print detail info')
    args = parser.parse_args()
    print(args)
    main(**vars(args))


if __name__ == '__main__':
    run()