From a83abdf0766516a22e727ab922a5056ccaadbcab Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 27 Feb 2026 09:02:27 +0800 Subject: [PATCH 1/3] chore: move LMDB imports into methods for performance Refactor imports and initialize msgpack_numpy in the LMDB class. --- dpdata/lmdb/format.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dpdata/lmdb/format.py b/dpdata/lmdb/format.py index 9b518be6..b596ee87 100644 --- a/dpdata/lmdb/format.py +++ b/dpdata/lmdb/format.py @@ -2,15 +2,10 @@ import os -import lmdb -import msgpack -import msgpack_numpy as m import numpy as np from dpdata.format import Format -m.patch() - class LMDBError(Exception): """Base class for LMDB errors.""" @@ -62,6 +57,10 @@ class LMDBFormat(Format): >>> import dpdata >>> loaded_multi_systems = dpdata.MultiSystems.from_file("my_multi_system_db.lmdb", fmt="lmdb") """ + def __init__(self, *args, **kwargs) -> None: + import msgpack_numpy as m + + m.patch() def to_multi_systems( self, formulas, directory, map_size=1000000000, frame_idx_fmt="012d", **kwargs @@ -86,6 +85,8 @@ def to_multi_systems( tuple (self, formula) to be used by to_system """ + import msgpack + self._frame_idx_fmt = frame_idx_fmt self._global_frame_idx = 0 self._system_info = [] @@ -105,6 +106,7 @@ def to_multi_systems( self._txn = None def _dump_to_txn(self, data, txn, formula, dtypes): + import msgpack from dpdata.data_type import Axis nframes = data["coords"].shape[0] @@ -209,6 +211,8 @@ def from_multi_systems(self, file_name, map_size=1000000000, **kwargs): dict data dictionary for each system """ + import msgpack + import lmdb from dpdata.data_type import Axis, DataType from dpdata.system import LabeledSystem, System From 876a86682e34ebabb152ec896cf5cf4538e81db8 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 27 Feb 2026 09:04:23 +0800 Subject: [PATCH 2/3] Update format.py --- dpdata/lmdb/format.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpdata/lmdb/format.py b/dpdata/lmdb/format.py index b596ee87..b85e1c01 100644 --- a/dpdata/lmdb/format.py +++ b/dpdata/lmdb/format.py @@ -85,6 +85,7 @@ def to_multi_systems( tuple (self, formula) to be used by to_system """ + import lmdb import msgpack self._frame_idx_fmt = frame_idx_fmt From dc22e4405e4ed037a77f48a5ceabb69657b31787 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 01:10:19 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/lmdb/format.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpdata/lmdb/format.py b/dpdata/lmdb/format.py index b85e1c01..2e7ae3c4 100644 --- a/dpdata/lmdb/format.py +++ b/dpdata/lmdb/format.py @@ -57,6 +57,7 @@ class LMDBFormat(Format): >>> import dpdata >>> loaded_multi_systems = dpdata.MultiSystems.from_file("my_multi_system_db.lmdb", fmt="lmdb") """ + def __init__(self, *args, **kwargs) -> None: import msgpack_numpy as m @@ -108,6 +109,7 @@ def to_multi_systems( def _dump_to_txn(self, data, txn, formula, dtypes): import msgpack + from dpdata.data_type import Axis nframes = data["coords"].shape[0] @@ -212,8 +214,9 @@ def from_multi_systems(self, file_name, map_size=1000000000, **kwargs): dict data dictionary for each system """ - import msgpack import lmdb + import msgpack + from dpdata.data_type import Axis, DataType from dpdata.system import LabeledSystem, System