202 lines
6.4 KiB
Python
202 lines
6.4 KiB
Python
import re
|
||
from validity_check import check_atom
|
||
|
||
|
||
def parse_atom(atom):
|
||
"""
|
||
将化学式中的原子转化为字典
|
||
:param atom: 原子的字符串表示
|
||
:return: 原子的字典表示,{'元素名称': 元素个数}
|
||
"""
|
||
if len(atom) == 1:
|
||
check_atom(atom)
|
||
return {atom: 1}
|
||
elif len(atom) == 2:
|
||
if atom[1].isdigit():
|
||
check_atom(atom[0])
|
||
return {atom[0]: int(atom[1])}
|
||
else:
|
||
check_atom(atom)
|
||
return {atom: 1}
|
||
else:
|
||
check_atom(atom[:-1])
|
||
return {atom[:-1]: int(atom[-1])}
|
||
|
||
|
||
def combine_same_atom(parsed_molecule):
|
||
"""
|
||
合并同类原子,类型不变,数量相加
|
||
:param parsed_molecule: 化学式的字典表示
|
||
:return: 合并完成后化学式的字典表示
|
||
"""
|
||
atoms = parsed_molecule['atoms']
|
||
new_atoms = []
|
||
for atom in atoms:
|
||
for new_atom in new_atoms:
|
||
if list(atom.keys())[0] == list(new_atom.keys())[0]:
|
||
new_atom[list(atom.keys())[0]] += atom[list(atom.keys())[0]]
|
||
break
|
||
else:
|
||
new_atoms.append(atom)
|
||
return {
|
||
'atoms': new_atoms,
|
||
'coefficient': parsed_molecule['coefficient'],
|
||
'pretty_name': parsed_molecule['pretty_name']
|
||
}
|
||
|
||
|
||
def parse_atomic_clusters(atomic_clusters):
|
||
"""
|
||
将原子团转化为字典
|
||
:param atomic_clusters: 经过parse_molecule处理后的原子团
|
||
如(ClO)2->['(', 'Cl', 'O', ')', '2']
|
||
:return: 原子团的字典表示
|
||
"""
|
||
# 去除首括号
|
||
atomic_clusters = atomic_clusters[1:]
|
||
# 去除尾括号,解析尾括号后的数值作为原子团系数
|
||
if atomic_clusters[-1].isdigit():
|
||
coefficient = int(atomic_clusters[-1])
|
||
atomic_clusters = atomic_clusters[:-2]
|
||
elif atomic_clusters[-1] == ')':
|
||
coefficient = 1
|
||
else:
|
||
raise ValueError('无效的原子团系数')
|
||
# 解析原子团
|
||
atoms = []
|
||
for i in range(len(atomic_clusters)):
|
||
if atomic_clusters[i].isdigit():
|
||
while atomic_clusters[i - 1] == '':
|
||
i -= 1
|
||
if i == 0:
|
||
raise ValueError('系数错误')
|
||
atomic_clusters[i - 1] += atomic_clusters[i]
|
||
atomic_clusters[i] = ''
|
||
atomic_clusters = [atom for atom in atomic_clusters if atom != '']
|
||
for atom in atomic_clusters:
|
||
atoms.append(parse_atom(atom))
|
||
return {
|
||
'atoms': atoms,
|
||
'coefficient': coefficient,
|
||
'pretty_name': ''.join(atomic_clusters)
|
||
}
|
||
|
||
|
||
def parse_molecule(molecule):
|
||
"""
|
||
将化学式转化为字典,以大小写区分不同元素(元素的第一个字母大写)
|
||
注意一个化学式中可能包含多个元素
|
||
应当得到一个字典:
|
||
{
|
||
'atoms': [ {'元素名称': 元素个数}, {'元素名称': 元素个数}, ... ],
|
||
'coefficient': 系数,
|
||
'pretty_name': 化学式的字符串表示
|
||
}
|
||
:param molecule: 化学式
|
||
:return: 化学式的字典表示
|
||
"""
|
||
if molecule[0].isdigit():
|
||
coefficient = int(molecule[0])
|
||
pretty_name = molecule = molecule[1:]
|
||
else:
|
||
coefficient = 1
|
||
pretty_name = molecule
|
||
# 以大写字母和括号为分隔符,分割化学式
|
||
molecule = re.split('([A-Z][a-z]*|\(|\))', molecule)
|
||
molecule = [i for i in molecule if i != '']
|
||
# 将原子团提取出来单独处理
|
||
atomic_clusters = []
|
||
for i in range(len(molecule)):
|
||
if molecule[i] == '(':
|
||
j = i + 1
|
||
# 当molecule内不包含右括号
|
||
while ')' not in molecule[j]:
|
||
j += 1
|
||
if j == len(molecule):
|
||
raise ValueError('括号不匹配')
|
||
for k in range(i, j + 1):
|
||
atomic_clusters.append(molecule[k])
|
||
if j + 1 < len(molecule) and molecule[j + 1].isdigit():
|
||
atomic_clusters.append(molecule[j + 1])
|
||
molecule[j + 1] = ''
|
||
for k in range(i, j + 1):
|
||
molecule[k] = ''
|
||
# 如果出现单个数字,说明该数字是系数,将其追加到上一个原子/原子团(非空字符串)的后面
|
||
if molecule[i].isdigit():
|
||
while molecule[i - 1] == '':
|
||
i -= 1
|
||
if i == 0:
|
||
raise ValueError('系数错误')
|
||
molecule[i - 1] += molecule[i]
|
||
molecule[i] = ''
|
||
molecule = [i for i in molecule if i != '']
|
||
atoms = []
|
||
for i in molecule:
|
||
atoms.append(parse_atom(i))
|
||
# 解析原子团
|
||
if len(atomic_clusters) != 0:
|
||
parsed_atomic_clusters = parse_atomic_clusters(atomic_clusters)
|
||
for each in parsed_atomic_clusters['atoms']:
|
||
quantity = each[list(each.keys())[0]] * parsed_atomic_clusters['coefficient']
|
||
each[list(each.keys())[0]] = quantity
|
||
atoms.append(each)
|
||
parsed_atom = {
|
||
'atoms': atoms,
|
||
'coefficient': coefficient,
|
||
'pretty_name': pretty_name
|
||
}
|
||
return combine_same_atom(parsed_atom)
|
||
|
||
|
||
def parse_equation(eq):
|
||
"""
|
||
将化学方程式转化为字典
|
||
:param eq: 化学方程式
|
||
:return: 化学方程式的字典表示
|
||
"""
|
||
eq = eq.replace(' ', '')
|
||
eq = eq.replace('->', '=')
|
||
eq = eq.replace('=', '=>')
|
||
eq = eq.split('=>')
|
||
left = eq[0]
|
||
right = eq[1]
|
||
left = left.split('+')
|
||
right = right.split('+')
|
||
left = [parse_molecule(molecule) for molecule in left]
|
||
right = [parse_molecule(molecule) for molecule in right]
|
||
return {
|
||
'left': left,
|
||
'right': right
|
||
}
|
||
|
||
|
||
def format_molecule(molecule):
|
||
"""
|
||
化学式的字典表示转化为字符串
|
||
:param molecule: 化学式的字典表示
|
||
:return: None
|
||
"""
|
||
if molecule['coefficient'] != 1:
|
||
return str(molecule['coefficient']) + molecule['pretty_name']
|
||
else:
|
||
return molecule['pretty_name']
|
||
|
||
|
||
def format_equation(eq):
|
||
"""
|
||
将化学方程式的字典表示转化为字符串,需要包含系数
|
||
:param eq: 化学方程式的字典表示
|
||
:return: 化学方程式的字符串表示
|
||
"""
|
||
left = eq['left']
|
||
right = eq['right']
|
||
left = [format_molecule(molecule) for molecule in left]
|
||
right = [format_molecule(molecule) for molecule in right]
|
||
return ' + '.join(left) + ' => ' + ' + '.join(right)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
eq = input('测试parser:请输入分子式:')
|
||
parsed_eq = parse_molecule(eq)
|
||
print('解析结果:', parsed_eq)
|