Source code for mdtraj.formats.hoomdxml

##############################################################################
# MDTraj: A Python Library for Loading, Saving, and Manipulating
#         Molecular Dynamics Trajectories.
# Copyright 2012-2013 Stanford University and the Authors
#
# Authors: Tim Moore
# Contributors:
#
# MDTraj is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
##############################################################################
from xml.etree import cElementTree

import numpy as np

from mdtraj.core.element import virtual_site
from mdtraj.formats.registry import FormatRegistry
from mdtraj.utils import import_

__all__ = ["load_hoomdxml"]


[docs] @FormatRegistry.register_loader(".hoomdxml") def load_hoomdxml(filename, top=None): """Load a single conformation from an HOOMD-Blue XML file. For more information on this file format, see: http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html Notably, all node names and attributes are in all lower case. HOOMD-Blue does not contain residue and chain information explicitly. For this reason, chains will be found by looping over all the bonds and finding what is bonded to what. Each chain consisists of exactly one residue. Parameters ---------- filename : path-like The path on disk to the XML file top : None This argumet is ignored Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object, with corresponding Topology. Notes ----- This function requires the NetworkX python package. """ from mdtraj.core.topology import Topology from mdtraj.core.trajectory import Trajectory topology = Topology() tree = cElementTree.parse(filename) config = tree.getroot().find("configuration") position = config.find("position") bond = config.find("bond") atom_type = config.find("type") # MDTraj calls this "name" box = config.find("box") box.attrib = {key.lower(): val for key, val in box.attrib.items()} # be generous for case of box attributes lx = float(box.attrib["lx"]) ly = float(box.attrib["ly"]) lz = float(box.attrib["lz"]) try: xy = float(box.attrib["xy"]) xz = float(box.attrib["xz"]) yz = float(box.attrib["yz"]) except (ValueError, KeyError): xy = 0.0 xz = 0.0 yz = 0.0 unitcell_vectors = np.array( [ [ [lx, xy * ly, xz * lz], [0.0, ly, yz * lz], [0.0, 0.0, lz], ], ], ) positions, types = [], {} for pos in position.text.splitlines()[1:]: positions.append( ( float(pos.split()[0]), float(pos.split()[1]), float(pos.split()[2]), ), ) for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]): types[idx] = str(atom_name.split()[0]) if len(types) != len(positions): raise ValueError("Different number of types and positions in xml file") # ignore the bond type if hasattr(bond, "text"): bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]] chains = _find_chains(bonds) else: chains = [] bonds = [] # Relate the first index in the bonded-group to mdtraj.Residue bonded_to_residue = {} for i, _ in enumerate(types): bonded_group = _in_chain(chains, i) if bonded_group is not None: if bonded_group[0] not in bonded_to_residue: t_chain = topology.add_chain() t_residue = topology.add_residue("A", t_chain) bonded_to_residue[bonded_group[0]] = t_residue topology.add_atom( types[i], virtual_site, bonded_to_residue[bonded_group[0]], ) if bonded_group is None: t_chain = topology.add_chain() t_residue = topology.add_residue("A", t_chain) topology.add_atom(types[i], virtual_site, t_residue) for bond in bonds: atom1, atom2 = bond[0], bond[1] topology.add_bond(topology.atom(atom1), topology.atom(atom2)) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = unitcell_vectors return traj
def _find_chains(bond_list): """Given a set of bonds, find unique molecules, with the assumption that there are no bonds between separate chains (i.e., only INTRAmolecular bonds), which also implies that each atom can be in exactly one chain. Parameters ---------- bond_list : list of (int, int) The list of bonds Returns _______ chains : list of list of int List of atoms in each chain Notes ----- This function requires the NetworkX python package. """ nx = import_("networkx") _chains = [] bond_list = np.asarray(bond_list) molecules = nx.Graph() molecules.add_nodes_from(set(bond_list.flatten())) molecules.add_edges_from(bond_list) return [sorted(x) for x in list(nx.connected_components(molecules))] def _in_chain(chains, atom_index): """Check if an item is in a list of lists""" for chain in chains: if atom_index in chain: return chain return None