chemical_equation_balancer/parser.py

204 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from validity_check import check_atom
def parse_atom(atom):
"""
将化学式中的原子转化为字典
:param atom: 原子的字符串表示
:return: 原子的字典表示,{'元素名称': 元素个数}
"""
if len(atom) == 1:
check_atom(atom)
return {atom: 1}
elif len(atom) == 2:
if atom[1].isdigit():
check_atom(atom[0])
return {atom[0]: int(atom[1])}
else:
check_atom(atom)
return {atom: 1}
else:
check_atom(atom[:-1])
return {atom[:-1]: int(atom[-1])}
def combine_same_atom(parsed_molecule):
"""
合并同类原子,类型不变,数量相加
:param parsed_molecule: 化学式的字典表示
:return: 合并完成后化学式的字典表示
"""
atoms = parsed_molecule['atoms']
new_atoms = []
for atom in atoms:
for new_atom in new_atoms:
if list(atom.keys())[0] == list(new_atom.keys())[0]:
new_atom[list(atom.keys())[0]] += atom[list(atom.keys())[0]]
break
else:
new_atoms.append(atom)
return {
'atoms': new_atoms,
'coefficient': parsed_molecule['coefficient'],
'pretty_name': parsed_molecule['pretty_name']
}
def parse_atomic_clusters(atomic_clusters):
"""
将原子团转化为字典
:param atomic_clusters: 经过parse_molecule处理后的原子团
如(ClO)2->['(', 'Cl', 'O', ')', '2']
:return: 原子团的字典表示
"""
# 去除首括号
atomic_clusters = atomic_clusters[1:]
# 去除尾括号,解析尾括号后的数值作为原子团系数
if atomic_clusters[-1].isdigit():
coefficient = int(atomic_clusters[-1])
atomic_clusters = atomic_clusters[:-2]
elif atomic_clusters[-1] == ')':
coefficient = 1
else:
raise ValueError('无效的原子团系数')
# 解析原子团
atoms = []
for i in range(len(atomic_clusters)):
if atomic_clusters[i].isdigit():
while atomic_clusters[i - 1] == '':
i -= 1
if i == 0:
raise ValueError('系数错误')
atomic_clusters[i - 1] += atomic_clusters[i]
atomic_clusters[i] = ''
atomic_clusters = [atom for atom in atomic_clusters if atom != '']
for atom in atomic_clusters:
atoms.append(parse_atom(atom))
return {
'atoms': atoms,
'coefficient': coefficient,
'pretty_name': ''.join(atomic_clusters)
}
def parse_molecule(molecule):
"""
将化学式转化为字典,以大小写区分不同元素(元素的第一个字母大写)
注意一个化学式中可能包含多个元素
应当得到一个字典:
{
'atoms': [ {'元素名称': 元素个数}, {'元素名称': 元素个数}, ... ],
'coefficient': 系数,
'pretty_name': 化学式的字符串表示
}
:param molecule: 化学式
:return: 化学式的字典表示
"""
if molecule[0].isdigit():
coefficient = int(molecule[0])
pretty_name = molecule = molecule[1:]
else:
coefficient = 1
pretty_name = molecule
# 以大写字母和括号为分隔符,分割化学式
molecule = re.split('([A-Z][a-z]*|\(|\))', molecule)
molecule = [i for i in molecule if i != '']
# 将原子团提取出来单独处理
atomic_clusters = []
for i in range(len(molecule)):
if molecule[i] == '(':
j = i + 1
# 当molecule内不包含右括号
while ')' not in molecule[j]:
j += 1
if j == len(molecule):
raise ValueError('括号不匹配')
for k in range(i, j + 1):
atomic_clusters.append(molecule[k])
if j + 1 < len(molecule) and molecule[j + 1].isdigit():
atomic_clusters.append(molecule[j + 1])
molecule[j + 1] = ''
for k in range(i, j + 1):
molecule[k] = ''
# 如果出现单个数字,说明该数字是系数,将其追加到上一个原子/原子团(非空字符串)的后面
if molecule[i].isdigit():
while molecule[i - 1] == '':
i -= 1
if i == 0:
raise ValueError('系数错误')
molecule[i - 1] += molecule[i]
molecule[i] = ''
molecule = [i for i in molecule if i != '']
atoms = []
for i in molecule:
atoms.append(parse_atom(i))
# 解析原子团
if len(atomic_clusters) != 0:
parsed_atomic_clusters = parse_atomic_clusters(atomic_clusters)
for each in parsed_atomic_clusters['atoms']:
quantity = each[list(each.keys())[0]] * parsed_atomic_clusters['coefficient']
each[list(each.keys())[0]] = quantity
atoms.append(each)
parsed_atom = {
'atoms': atoms,
'coefficient': coefficient,
'pretty_name': pretty_name
}
return combine_same_atom(parsed_atom)
def parse_equation(eq):
"""
将化学方程式转化为字典
:param eq: 化学方程式
:return: 化学方程式的字典表示
"""
equation = eq.replace(' ', '')
equation = equation.replace('->', '=')
equation = equation.replace('=', '=>')
equation = equation.split('=>')
if len(equation) != 2:
raise ValueError('化学方程式错误')
left = equation[0]
right = equation[1]
left = left.split('+')
right = right.split('+')
left = [parse_molecule(molecule) for molecule in left]
right = [parse_molecule(molecule) for molecule in right]
return {
'left': left,
'right': right
}
def format_molecule(molecule):
"""
化学式的字典表示转化为字符串
:param molecule: 化学式的字典表示
:return: None
"""
if molecule['coefficient'] != 1:
return str(molecule['coefficient']) + molecule['pretty_name']
else:
return molecule['pretty_name']
def format_equation(eq):
"""
将化学方程式的字典表示转化为字符串,需要包含系数
:param eq: 化学方程式的字典表示
:return: 化学方程式的字符串表示
"""
left = eq['left']
right = eq['right']
left = [format_molecule(molecule) for molecule in left]
right = [format_molecule(molecule) for molecule in right]
return ' + '.join(left) + ' => ' + ' + '.join(right)
if __name__ == '__main__':
eq = input('测试parser请输入分子式')
parsed_eq = parse_molecule(eq)
print('解析结果:', parsed_eq)