159 lines
4.9 KiB
Python
159 lines
4.9 KiB
Python
import re
|
|
from validity_check import check_atom
|
|
|
|
|
|
def parse_atom(atom):
|
|
"""
|
|
将化学式中的原子转化为字典
|
|
:param atom: 原子的字符串表示
|
|
:return: 原子的字典表示,{'元素名称': 元素个数}
|
|
"""
|
|
if len(atom) == 1:
|
|
check_atom(atom)
|
|
return {atom: 1}
|
|
elif len(atom) == 2:
|
|
if atom[1].isdigit():
|
|
check_atom(atom[0])
|
|
return {atom[0]: int(atom[1])}
|
|
else:
|
|
check_atom(atom)
|
|
return {atom: 1}
|
|
else:
|
|
check_atom(atom[:-1])
|
|
return {atom[:-1]: int(atom[-1])}
|
|
|
|
|
|
def combine_same_atom(parsed_molecule):
|
|
"""
|
|
合并同类原子,类型不变,数量相加
|
|
:param parsed_molecule: 化学式的字典表示
|
|
:return: 合并完成后化学式的字典表示
|
|
"""
|
|
atoms = parsed_molecule['atoms']
|
|
new_atoms = []
|
|
for atom in atoms:
|
|
for new_atom in new_atoms:
|
|
if list(atom.keys())[0] == list(new_atom.keys())[0]:
|
|
new_atom[list(atom.keys())[0]] += atom[list(atom.keys())[0]]
|
|
break
|
|
else:
|
|
new_atoms.append(atom)
|
|
return {
|
|
'atoms': new_atoms,
|
|
'coefficient': parsed_molecule['coefficient'],
|
|
'pretty_name': parsed_molecule['pretty_name']
|
|
}
|
|
|
|
|
|
def parse_atomic_clusters(atomic_clusters):
|
|
"""
|
|
将原子团转化为字典
|
|
:param atomic_clusters: 经过parse_molecule处理后的原子团
|
|
如(ClO)2->['(', 'Cl', 'O', ')2']
|
|
:return: 原子团的字典表示
|
|
"""
|
|
# 去除首括号
|
|
atomic_clusters = atomic_clusters[1:]
|
|
# 去除尾括号,解析尾括号后的数值作为原子团系数
|
|
if atomic_clusters[-1][-1].isdigit():
|
|
coefficient = int(atomic_clusters[-1][-1])
|
|
atomic_clusters = atomic_clusters[:-1]
|
|
elif atomic_clusters[-1] == ')':
|
|
coefficient = 1
|
|
else:
|
|
raise ValueError('无效的原子团系数')
|
|
# 解析原子团
|
|
atoms = []
|
|
for atom in atomic_clusters:
|
|
atoms.append(parse_atom(atom))
|
|
return {
|
|
'atoms': atoms,
|
|
'coefficient': coefficient,
|
|
'pretty_name': ''.join(atomic_clusters)
|
|
}
|
|
|
|
|
|
def parse_molecule(molecule):
|
|
"""
|
|
将化学式转化为字典,以大小写区分不同元素(元素的第一个字母大写)
|
|
注意一个化学式中可能包含多个元素
|
|
应当得到一个字典:
|
|
{
|
|
'atoms': [ {'元素名称': 元素个数}, {'元素名称': 元素个数}, ... ],
|
|
'coefficient': 系数,
|
|
'pretty_name': 化学式的字符串表示
|
|
}
|
|
:param molecule: 化学式
|
|
:return: 化学式的字典表示
|
|
"""
|
|
pretty_name = ''
|
|
if molecule[0].isdigit():
|
|
coefficient = int(molecule[0])
|
|
pretty_name = molecule = molecule[1:]
|
|
else:
|
|
coefficient = 1
|
|
pretty_name = molecule
|
|
# 以大写字母为分隔符,分割化学式
|
|
molecule = re.split(r'([A-Z][a-z]*)', molecule)
|
|
molecule = [i for i in molecule if i != '']
|
|
# 将原子团提取出来单独处理
|
|
atomic_clusters = []
|
|
for i in range(len(molecule)):
|
|
if molecule[i] == '(':
|
|
j = i + 1
|
|
while molecule[j][0] != ')':
|
|
j += 1
|
|
if j == len(molecule):
|
|
raise ValueError('括号不匹配')
|
|
for k in range(i, j + 1):
|
|
atomic_clusters.append(molecule[k])
|
|
for k in range(i, j + 1):
|
|
molecule[k] = ''
|
|
# 如果出现单个数字,说明该数字是系数,将其追加到上一个原子/原子团(非空字符串)的后面
|
|
if molecule[i].isdigit():
|
|
while molecule[i - 1] == '':
|
|
i -= 1
|
|
if i == 0:
|
|
raise ValueError('系数错误')
|
|
molecule[i - 1] += molecule[i]
|
|
molecule[i] = ''
|
|
molecule = [i for i in molecule if i != '']
|
|
atoms = []
|
|
for i in molecule:
|
|
atoms.append(parse_atom(i))
|
|
# 解析原子团
|
|
if len(atomic_clusters) != 0:
|
|
parsed_atomic_clusters = parse_atomic_clusters(atomic_clusters)
|
|
for each in parsed_atomic_clusters['atoms']:
|
|
quantity = each[list(each.keys())[0]] * parsed_atomic_clusters['coefficient']
|
|
each[list(each.keys())[0]] = quantity
|
|
atoms.append(each)
|
|
parsed_atom = {
|
|
'atoms': atoms,
|
|
'coefficient': coefficient,
|
|
'pretty_name': pretty_name
|
|
}
|
|
return combine_same_atom(parsed_atom)
|
|
|
|
|
|
def parse_equation(eq):
|
|
"""
|
|
将化学方程式转化为字典
|
|
:param eq: 化学方程式
|
|
:return: 化学方程式的字典表示
|
|
"""
|
|
eq = eq.replace(' ', '')
|
|
eq = eq.replace('->', '=')
|
|
eq = eq.replace('=', '=>')
|
|
eq = eq.split('=>')
|
|
left = eq[0]
|
|
right = eq[1]
|
|
left = left.split('+')
|
|
right = right.split('+')
|
|
left = [parse_molecule(molecule) for molecule in left]
|
|
right = [parse_molecule(molecule) for molecule in right]
|
|
return {
|
|
'left': left,
|
|
'right': right
|
|
}
|