From eacef5925d340534951337b7c3cd5d40075a356a Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Sat, 3 Feb 2024 20:57:28 +0800
Subject: [PATCH] update confusion

---
 examples/kenlm/my_custom_confusion.txt             | 2 +-
 examples/kenlm/use_custom_confusion.py             | 2 +-
 examples/macbert/model_correction_pipeline_demo.py | 7 ++++---
 pycorrector/confusion_corrector.py                 | 2 +-
 requirements-dev.txt                               | 3 ++-
 requirements.txt                                   | 3 +--
 6 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/examples/kenlm/my_custom_confusion.txt b/examples/kenlm/my_custom_confusion.txt
index cf6188e3..b73939ea 100644
--- a/examples/kenlm/my_custom_confusion.txt
+++ b/examples/kenlm/my_custom_confusion.txt
@@ -11,4 +11,4 @@ iphonex iphoneX
 happt happen
 shylock shylock
 份额  份额
-天俺门 天安门
\ No newline at end of file
+天氨门 天安门
\ No newline at end of file
diff --git a/examples/kenlm/use_custom_confusion.py b/examples/kenlm/use_custom_confusion.py
index 29ce492d..61b05a2e 100644
--- a/examples/kenlm/use_custom_confusion.py
+++ b/examples/kenlm/use_custom_confusion.py
@@ -19,7 +19,7 @@
         '上述承诺内容系本人真实意思表示',  # 正常
         '大家一哄而伞怎么回事',  # 成语
         '交通银行的份额没有减少',  # 误杀
-        '我爱北京天俺门',  # 漏召回
+        '北京天氨门，我爱北京天氨门',  # 漏召回
     ]
     m = Corrector()
     print(m.correct_batch(error_sentences))
diff --git a/examples/macbert/model_correction_pipeline_demo.py b/examples/macbert/model_correction_pipeline_demo.py
index dde4c4c6..ab981a27 100644
--- a/examples/macbert/model_correction_pipeline_demo.py
+++ b/examples/macbert/model_correction_pipeline_demo.py
@@ -6,8 +6,7 @@
 import sys
 
 sys.path.append("../..")
-from pycorrector import MacBertCorrector
-from pycorrector import ConfusionCorrector
+from pycorrector import MacBertCorrector, ConfusionCorrector
 
 if __name__ == '__main__':
     error_sentences = [
@@ -30,11 +29,13 @@
         '因为爸爸在看录音机，所以我没得看',
         '不过在许多传统国家，女人向未得到平等',
         '我想喝小明同学。',  # 漏召回
+        '北京天氨门，我爱北京天氨门',  # 漏召回
     ]
 
     model1 = MacBertCorrector()
     # add confusion corrector for post process
-    confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支"}
+    confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支",
+                      "天氨门": "天安门"}
     model2 = ConfusionCorrector(custom_confusion_path_or_dict=confusion_dict)
     for line in error_sentences:
         r1 = model1.correct(line)
diff --git a/pycorrector/confusion_corrector.py b/pycorrector/confusion_corrector.py
index 1867b858..7c1e99e4 100644
--- a/pycorrector/confusion_corrector.py
+++ b/pycorrector/confusion_corrector.py
@@ -58,7 +58,7 @@ def correct(self, sentence: str):
         # 自定义混淆集加入疑似错误词典
         for err, truth in self.custom_confusion.items():
             for i in re.finditer(err, sentence):
-                start,end = i.span()
+                start, end = i.span()
                 corrected_sentence = corrected_sentence[:start] + truth + corrected_sentence[end:]
                 details.append((err, truth, start))
         return {'source': sentence, 'target': corrected_sentence, 'errors': details}
diff --git a/requirements-dev.txt b/requirements-dev.txt
index df4b98d4..0bb077ea 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -13,4 +13,5 @@ datasets
 tensorboardX
 paddlenlp
 paddlepaddle
-pytest
\ No newline at end of file
+pytest
+kenlm
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 4c7c46eb..5c9d9ecc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,4 @@ datasets
 numpy
 pandas
 six
-loguru
-kenlm
\ No newline at end of file
+loguru
\ No newline at end of file