chemical_equation_balancer/parser.py

204 lines
6.5 KiB
Python
Raw Permalink Normal View History

2022-12-10 16:30:23 +08:00
import re
from validity_check import check_atom
def parse_atom(atom):
"""
将化学式中的原子转化为字典
:param atom: 原子的字符串表示
:return: 原子的字典表示{'元素名称': 元素个数}
"""
if len(atom) == 1:
check_atom(atom)
return {atom: 1}
elif len(atom) == 2:
if atom[1].isdigit():
check_atom(atom[0])
return {atom[0]: int(atom[1])}
else:
check_atom(atom)
return {atom: 1}
else:
check_atom(atom[:-1])
return {atom[:-1]: int(atom[-1])}
def combine_same_atom(parsed_molecule):
"""
合并同类原子类型不变数量相加
:param parsed_molecule: 化学式的字典表示
:return: 合并完成后化学式的字典表示
"""
atoms = parsed_molecule['atoms']
new_atoms = []
for atom in atoms:
for new_atom in new_atoms:
if list(atom.keys())[0] == list(new_atom.keys())[0]:
new_atom[list(atom.keys())[0]] += atom[list(atom.keys())[0]]
break
else:
new_atoms.append(atom)
return {
'atoms': new_atoms,
'coefficient': parsed_molecule['coefficient'],
'pretty_name': parsed_molecule['pretty_name']
}
def parse_atomic_clusters(atomic_clusters):
"""
将原子团转化为字典
:param atomic_clusters: 经过parse_molecule处理后的原子团
(ClO)2->['(', 'Cl', 'O', ')', '2']
2022-12-10 16:30:23 +08:00
:return: 原子团的字典表示
"""
# 去除首括号
atomic_clusters = atomic_clusters[1:]
# 去除尾括号,解析尾括号后的数值作为原子团系数
if atomic_clusters[-1].isdigit():
coefficient = int(atomic_clusters[-1])
atomic_clusters = atomic_clusters[:-2]
2022-12-10 16:30:23 +08:00
elif atomic_clusters[-1] == ')':
coefficient = 1
else:
raise ValueError('无效的原子团系数')
# 解析原子团
atoms = []
for i in range(len(atomic_clusters)):
if atomic_clusters[i].isdigit():
while atomic_clusters[i - 1] == '':
i -= 1
if i == 0:
raise ValueError('系数错误')
atomic_clusters[i - 1] += atomic_clusters[i]
atomic_clusters[i] = ''
atomic_clusters = [atom for atom in atomic_clusters if atom != '']
2022-12-10 16:30:23 +08:00
for atom in atomic_clusters:
atoms.append(parse_atom(atom))
return {
'atoms': atoms,
'coefficient': coefficient,
'pretty_name': ''.join(atomic_clusters)
}
def parse_molecule(molecule):
"""
将化学式转化为字典以大小写区分不同元素元素的第一个字母大写
注意一个化学式中可能包含多个元素
应当得到一个字典
{
'atoms': [ {'元素名称': 元素个数}, {'元素名称': 元素个数}, ... ],
'coefficient': 系数,
'pretty_name': 化学式的字符串表示
}
:param molecule: 化学式
:return: 化学式的字典表示
"""
if molecule[0].isdigit():
coefficient = int(molecule[0])
pretty_name = molecule = molecule[1:]
else:
coefficient = 1
pretty_name = molecule
# 以大写字母和括号为分隔符,分割化学式
molecule = re.split('([A-Z][a-z]*|\(|\))', molecule)
2022-12-10 16:30:23 +08:00
molecule = [i for i in molecule if i != '']
# 将原子团提取出来单独处理
atomic_clusters = []
for i in range(len(molecule)):
if molecule[i] == '(':
j = i + 1
# 当molecule内不包含右括号
while ')' not in molecule[j]:
2022-12-10 16:30:23 +08:00
j += 1
if j == len(molecule):
raise ValueError('括号不匹配')
for k in range(i, j + 1):
atomic_clusters.append(molecule[k])
if j + 1 < len(molecule) and molecule[j + 1].isdigit():
atomic_clusters.append(molecule[j + 1])
molecule[j + 1] = ''
2022-12-10 16:30:23 +08:00
for k in range(i, j + 1):
molecule[k] = ''
# 如果出现单个数字,说明该数字是系数,将其追加到上一个原子/原子团(非空字符串)的后面
if molecule[i].isdigit():
while molecule[i - 1] == '':
i -= 1
if i == 0:
raise ValueError('系数错误')
molecule[i - 1] += molecule[i]
molecule[i] = ''
molecule = [i for i in molecule if i != '']
atoms = []
for i in molecule:
atoms.append(parse_atom(i))
# 解析原子团
if len(atomic_clusters) != 0:
parsed_atomic_clusters = parse_atomic_clusters(atomic_clusters)
for each in parsed_atomic_clusters['atoms']:
quantity = each[list(each.keys())[0]] * parsed_atomic_clusters['coefficient']
each[list(each.keys())[0]] = quantity
atoms.append(each)
parsed_atom = {
'atoms': atoms,
'coefficient': coefficient,
'pretty_name': pretty_name
}
return combine_same_atom(parsed_atom)
def parse_equation(eq):
"""
将化学方程式转化为字典
:param eq: 化学方程式
:return: 化学方程式的字典表示
"""
equation = eq.replace(' ', '')
equation = equation.replace('->', '=')
equation = equation.replace('=', '=>')
equation = equation.split('=>')
if len(equation) != 2:
raise ValueError('化学方程式错误')
left = equation[0]
right = equation[1]
2022-12-10 16:30:23 +08:00
left = left.split('+')
right = right.split('+')
left = [parse_molecule(molecule) for molecule in left]
right = [parse_molecule(molecule) for molecule in right]
return {
'left': left,
'right': right
}
2022-12-10 21:38:17 +08:00
def format_molecule(molecule):
"""
化学式的字典表示转化为字符串
:param molecule: 化学式的字典表示
:return: None
"""
if molecule['coefficient'] != 1:
return str(molecule['coefficient']) + molecule['pretty_name']
else:
return molecule['pretty_name']
2022-12-10 21:38:17 +08:00
def format_equation(eq):
"""
将化学方程式的字典表示转化为字符串需要包含系数
:param eq: 化学方程式的字典表示
:return: 化学方程式的字符串表示
"""
left = eq['left']
right = eq['right']
left = [format_molecule(molecule) for molecule in left]
right = [format_molecule(molecule) for molecule in right]
return ' + '.join(left) + ' => ' + ' + '.join(right)
if __name__ == '__main__':
eq = input('测试parser请输入分子式')
parsed_eq = parse_molecule(eq)
print('解析结果:', parsed_eq)