update confusion

shibing624 · Feb 3, 2024 · eacef59 · eacef59
1 parent 2499e79
commit eacef59
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 9 deletions.
diff --git a/examples/kenlm/my_custom_confusion.txt b/examples/kenlm/my_custom_confusion.txt
@@ -11,4 +11,4 @@ iphonex iphoneX
 happt happen
 shylock shylock
 份额  份额
-天俺门 天安门
+天氨门 天安门
diff --git a/examples/kenlm/use_custom_confusion.py b/examples/kenlm/use_custom_confusion.py
@@ -19,7 +19,7 @@
         '上述承诺内容系本人真实意思表示',  # 正常
         '大家一哄而伞怎么回事',  # 成语
         '交通银行的份额没有减少',  # 误杀
-        '我爱北京天俺门',  # 漏召回
+        '北京天氨门，我爱北京天氨门',  # 漏召回
     ]
     m = Corrector()
     print(m.correct_batch(error_sentences))

diff --git a/examples/macbert/model_correction_pipeline_demo.py b/examples/macbert/model_correction_pipeline_demo.py
@@ -6,8 +6,7 @@
 import sys
 
 sys.path.append("../..")
-from pycorrector import MacBertCorrector
-from pycorrector import ConfusionCorrector
+from pycorrector import MacBertCorrector, ConfusionCorrector
 
 if __name__ == '__main__':
     error_sentences = [
@@ -30,11 +29,13 @@
         '因为爸爸在看录音机，所以我没得看',
         '不过在许多传统国家，女人向未得到平等',
         '我想喝小明同学。',  # 漏召回
+        '北京天氨门，我爱北京天氨门',  # 漏召回
     ]
 
     model1 = MacBertCorrector()
     # add confusion corrector for post process
-    confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支"}
+    confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支",
+                      "天氨门": "天安门"}
     model2 = ConfusionCorrector(custom_confusion_path_or_dict=confusion_dict)
     for line in error_sentences:
         r1 = model1.correct(line)

diff --git a/pycorrector/confusion_corrector.py b/pycorrector/confusion_corrector.py
@@ -58,7 +58,7 @@ def correct(self, sentence: str):
         # 自定义混淆集加入疑似错误词典
         for err, truth in self.custom_confusion.items():
             for i in re.finditer(err, sentence):
-                start,end = i.span()
+                start, end = i.span()
                 corrected_sentence = corrected_sentence[:start] + truth + corrected_sentence[end:]
                 details.append((err, truth, start))
         return {'source': sentence, 'target': corrected_sentence, 'errors': details}

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -13,4 +13,5 @@ datasets
 tensorboardX
 paddlenlp
 paddlepaddle
-pytest
+pytest
+kenlm
diff --git a/requirements.txt b/requirements.txt
@@ -6,5 +6,4 @@ datasets
 numpy
 pandas
 six
-loguru
-kenlm
+loguru