Skip to content

Commit

Permalink
update confusion
Browse files Browse the repository at this point in the history
  • Loading branch information
shibing624 committed Feb 3, 2024
1 parent 2499e79 commit eacef59
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 9 deletions.
2 changes: 1 addition & 1 deletion examples/kenlm/my_custom_confusion.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ iphonex iphoneX
happt happen
shylock shylock
份额 份额
天俺门 天安门
天氨门 天安门
2 changes: 1 addition & 1 deletion examples/kenlm/use_custom_confusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
'上述承诺内容系本人真实意思表示', # 正常
'大家一哄而伞怎么回事', # 成语
'交通银行的份额没有减少', # 误杀
'我爱北京天俺门', # 漏召回
'北京天氨门,我爱北京天氨门', # 漏召回
]
m = Corrector()
print(m.correct_batch(error_sentences))
Expand Down
7 changes: 4 additions & 3 deletions examples/macbert/model_correction_pipeline_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import sys

sys.path.append("../..")
from pycorrector import MacBertCorrector
from pycorrector import ConfusionCorrector
from pycorrector import MacBertCorrector, ConfusionCorrector

if __name__ == '__main__':
error_sentences = [
Expand All @@ -30,11 +29,13 @@
'因为爸爸在看录音机,所以我没得看',
'不过在许多传统国家,女人向未得到平等',
'我想喝小明同学。', # 漏召回
'北京天氨门,我爱北京天氨门', # 漏召回
]

model1 = MacBertCorrector()
# add confusion corrector for post process
confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支"}
confusion_dict = {"喝小明同学": "喝小茗同学", "老人让坐": "老人让座", "平净": "平静", "分知": "分支",
"天氨门": "天安门"}
model2 = ConfusionCorrector(custom_confusion_path_or_dict=confusion_dict)
for line in error_sentences:
r1 = model1.correct(line)
Expand Down
2 changes: 1 addition & 1 deletion pycorrector/confusion_corrector.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def correct(self, sentence: str):
# 自定义混淆集加入疑似错误词典
for err, truth in self.custom_confusion.items():
for i in re.finditer(err, sentence):
start,end = i.span()
start, end = i.span()
corrected_sentence = corrected_sentence[:start] + truth + corrected_sentence[end:]
details.append((err, truth, start))
return {'source': sentence, 'target': corrected_sentence, 'errors': details}
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ datasets
tensorboardX
paddlenlp
paddlepaddle
pytest
pytest
kenlm
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@ datasets
numpy
pandas
six
loguru
kenlm
loguru

0 comments on commit eacef59

Please sign in to comment.