Source code for finchnmr.xml_parser
"""
Parse XML files.
Authors: David A. Sheen, Nathan A. Mahynski
"""
import xml
import xml.etree.ElementTree as ET
import pandas as pd
from typing import Any
types_dict = dict(
F1="float64",
F2="float64",
annotation="str",
intensity="float64",
type="int",
)
[docs]def parse_peak_file(xml_file: str) -> "pd.DataFrame":
"""
Parse the XML file in a Pandas DataFrame.
Parameters
----------
xml_file : str
Name of .xml file to parse.
Returns
-------
dataframe : pd.DataFrame
DataFrame of NMR features
"""
tree = ET.parse(xml_file)
root = tree.getroot()
# Build a Pandas Dataframe from a dictionary
index = 0 # Index by integer
peak_dict: dict[int, Any] = dict()
for child in root:
for sub in child:
# NMR features are labeled with the tag 'Peak2D'; there is also a metadata header, which we ignore here.
if "Peak2D" in sub.tag:
peak_dict[index] = sub.attrib
index += 1
peak_df = pd.DataFrame.from_dict(peak_dict, orient="index").astype(
types_dict
)
return peak_df