Skip to content

Commit

Permalink
Merge pull request #471 from treya-lin/dev
Browse files Browse the repository at this point in the history
Fix issue #470: Improve how confusion words are located and/or replaced
  • Loading branch information
shibing624 authored Jan 31, 2024
2 parents 9fe431b + ba0a2b2 commit 2499e79
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
9 changes: 5 additions & 4 deletions pycorrector/confusion_corrector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
功能:1)补充纠错对,提升召回率;2)对误杀加白,提升准确率
"""
import os
import re
from typing import List

from loguru import logger
Expand Down Expand Up @@ -56,10 +57,10 @@ def correct(self, sentence: str):
details = []
# 自定义混淆集加入疑似错误词典
for err, truth in self.custom_confusion.items():
idx = sentence.find(err)
if idx > -1:
corrected_sentence = sentence[:idx] + truth + sentence[(idx + len(err)):]
details.append((err, truth, idx))
for i in re.finditer(err, sentence):
start,end = i.span()
corrected_sentence = corrected_sentence[:start] + truth + corrected_sentence[end:]
details.append((err, truth, start))
return {'source': sentence, 'target': corrected_sentence, 'errors': details}

def correct_batch(self, sentences: List[str]):
Expand Down
6 changes: 3 additions & 3 deletions pycorrector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
@description: error word detector
"""
import os
import re
from codecs import open

import numpy as np
Expand Down Expand Up @@ -396,9 +397,8 @@ def _detect(self, sentence, start_idx=0, **kwargs):
self.check_detector_initialized()
# 1. 自定义混淆集加入疑似错误词典
for confuse in self.custom_confusion:
idx = sentence.find(confuse)
if idx > -1:
maybe_err = [confuse, idx + start_idx, idx + len(confuse) + start_idx, ErrorType.confusion]
for i in re.finditer(confuse, sentence):
maybe_err = [confuse, i.span()[0] + start_idx, i.span()[1] + start_idx, ErrorType.confusion]
self._add_maybe_error_item(maybe_err, maybe_errors)

# 2. 专名错误检测
Expand Down

0 comments on commit 2499e79

Please sign in to comment.