医疗名词中否定词的提取
本文最后更新于 2024-03-08,文章内容可能已经过时。
背景:
为了搭建搭建疾病和症状相关的知识图谱
从电子病历中获得出入院信息进行NER识别之后得到名词实体列表
需要取出其中的症状,观察后发现有以下几类词,我们需要取出症状而不是正常
由于不能使用其他大模型,只能用目前开发的大模型,但准确率不高,所以需要写脚本辅助实现
左心功能低下 | 不带否定词的症状 |
生命体征正常 | 正常词 |
ALT1107U/L | 医疗名称 |
无肝区明显不适 | 带否定词的正常词 |
头晕伴双侧下肢无力 | 带否定词的症状 |
实现:
if __name__ == "__main__":
origin_file = "正式/ner_result.txt"
normal_file = "正式/正常实体词.txt"
abnormal_file = "正式/异常实体词.txt"
unsure_file = "正式/不确定实体词.txt"
unsure_flag = False
normal_line = []
abnormal_line = []
unsure_line = []
unsure1_line = []
unsure2_line = []
unsure3_line = []
count = 0
# 正常匹配
re_normal_list = [
r"^(?!.*诱因)(?=.*无)(?=.*明显).*$",
r".*无.*障碍.*",
]
# 异常匹配
re_abnormal_list = [
r".*无法.*",
r".*无力.*",
r".*无名指.*",
r".*无.*好转",
r"^(?!无明显诱因$).*无明显诱因.*",
]
with open(origin_file, "r", encoding="utf-8") as f:
i = 100
# 用自定义的规则初步分异常和正常
while True:
normal_flag = ''
abnormal_flag = ''
# 少于3个字假如待定
line = f.readline()
if len(line) < 1:
break
# 选出异常
for re_item in re_abnormal_list:
match_obj = re.match(re_item, line, re.M | re.I)
if match_obj is not None:
abnormal_line.append(line)
abnormal_flag = 'True'
break
if abnormal_flag == 'True':
continue
# 选出正常
for re_item in re_normal_list:
match_obj = re.match(re_item, line, re.M | re.I)
if match_obj is not None:
normal_line.append(line)
normal_flag = 'True'
break
if normal_flag == 'True':
continue
# 加入待定
unsure1_line.append(line)
# while True:
# i -= 1
# # if i == 0:
# # break
# line = f.readline()
# if len(line) < 3:
# break
#
for line in unsure1_line:
normal_flag = ''
abnormal_flag = ''
# 不包含无的都待定
index = line.find("无")
# 分成左边和右边
part_line = line[:index]
part_line2 = line[index:]
# seg_list = jieba.cut(part_line, cut_all=False)
# seg_list2 = jieba.cut(part_line2, cut_all=False)
seg_list_left = []
seg_list_right = []
# 将无字两边的词分割出来
seg_list_all = []
if part_line not in seg_list_all:
seg_list_left.append(part_line)
seg_list_all.append(part_line)
if part_line2 not in seg_list_all:
seg_list_right.append(part_line2)
seg_list_all.append(part_line2)
# 右边为症状或疾病为正常,为部位为异常,左边为部位为正常,为疾病或症状为异常,如果都检测不到就待定
if(index >= 0 and seg_list_right[0] != '' and seg_list_left[0] != ''):
for seg in seg_list_right:
if line not in normal_line:
result = is_symptom(seg)
if result == 'normal':
normal_flag = 'True'
elif(result == 'abnormal'):
abnormal_flag = 'True'
if(seg_list_left[0] != ''):
for seg in seg_list_left:
if line not in normal_line:
result = is_position(seg)
if result == 'normal':
normal_flag = 'True'
elif(result == 'abnormal'):
abnormal_flag = 'True'
if(abnormal_flag == 'True'):
abnormal_line.append(line)
print(count, seg, 'abnormal')
count += 1
continue
elif(normal_flag == 'True'):
normal_line.append(line)
print(count, seg.replace('\n', ''), 'normal')
count += 1
continue
elif(index >= 0 and (seg_list_right[0] != '' or seg_list_left[0] != '')):
if(seg_list_right[0] != ''):
for seg in seg_list_right:
if line not in normal_line:
result = is_symptom(seg)
if result == 'normal':
normal_flag = True
normal_line.append(line)
break
elif(result == 'abnormal'):
abnormal_flag = True
abnormal_line.append(line)
print(count, seg.replace('\n', ''), result)
count += 1
if normal_flag or abnormal_flag:
continue
if(seg_list_left[0] != ''):
for seg in seg_list_left:
if line not in normal_line:
result = is_position(seg)
if result == 'normal':
normal_flag = True
normal_line.append(line)
break
elif(result == 'abnormal'):
abnormal_flag = True
abnormal_line.append(line)
print(count, seg.replace('\n', ''), result)
count += 1
if normal_flag or abnormal_flag:
continue
else:
if line not in normal_line:
result = is_position(line)
if result == 'normal':
normal_flag = True
normal_line.append(line)
elif(result == 'abnormal'):
abnormal_flag = True
abnormal_line.append(line)
print(count, line.replace('\n', ''), result)
count += 1
if normal_flag or abnormal_flag:
continue
# 带无但是待定的词
if line not in normal_line:
unsure_line.append(line)
# while True:
# i -= 1
# # if i == 0:
# # break
# line = f.readline()
# if len(line) <= 2:
# break
#
# 开头不为无的,或开头为无但长度大于2的加入待定,开头为无长度小等于2的加入正常
with open(normal_file, "w", encoding="utf-8") as f:
f.writelines(normal_line)
with open(abnormal_file, "w", encoding="utf-8") as f:
f.writelines(abnormal_line)
with open(unsure_file, "w", encoding="utf-8") as f:
f.writelines(unsure_line)
exit(0)
本文是原创文章,采用 CC BY-NC-ND 4.0 协议,完整转载请注明来自 周日
评论
匿名评论
隐私政策
你无需删除空行,直接评论以获取最佳展示效果