Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Changelog

## Version 0.4.0 - 0.4.3
## Version 0.4.0 - 0.4.4

- Classes extend `BiocObject` from biocutils. `metadata` is a named list.
- Update actions to run from 3.10-3.14
- Support empty compressed list objects of size `n`.
- Implement combine generic for compressed lists.
- element metadata slot is a `BiocFrame`.

## Version 0.3.0

Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ python_requires = >=3.9
# For more information, check out https://semver.org/.
install_requires =
importlib-metadata; python_version<"3.8"
biocutils>=0.3.1
biocutils>=0.3.3
numpy
biocframe>=0.7.1
biocframe>=0.7.2


[options.packages.find]
Expand Down
54 changes: 41 additions & 13 deletions src/compressed_lists/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from warnings import warn

import biocutils as ut
from biocframe import BiocFrame
import numpy as np

from .partition import Partitioning
Expand All @@ -13,6 +14,31 @@
__license__ = "MIT"


def is_pandas(x: Any) -> bool:
"""Check if ``x`` is a :py:class:`~pandas.DataFrame`.

Args:
x:
Any object.

Returns:
True if ``x`` is a :py:class:`~pandas.DataFrame`.
"""
if hasattr(x, "dtypes"):
return True

return False


def _sanitize_frame(frame, num_rows: int):
frame = frame if frame is not None else BiocFrame({}, number_of_rows=num_rows)

if is_pandas(frame):
frame = BiocFrame.from_pandas(frame)

return frame


def _validate_data_and_partitions(unlist_data, partition):
if len(unlist_data) != partition.nobj():
raise ValueError(
Expand All @@ -33,7 +59,7 @@ def __init__(
unlist_data: Any,
partitioning: Partitioning,
element_type: Any = None,
element_metadata: Optional[dict] = None,
element_metadata: Optional[BiocFrame] = None,
metadata: Optional[Union[Dict[str, Any], ut.NamedList]] = None,
_validate: bool = True,
):
Expand Down Expand Up @@ -64,7 +90,7 @@ class for the type of elements.
self._unlist_data = unlist_data
self._partitioning = partitioning
self._element_type = element_type
self._element_metadata = element_metadata or {}
self._element_metadata = _sanitize_frame(element_metadata, len(partitioning))

if _validate:
_validate_data_and_partitions(self._unlist_data, self._partitioning)
Expand Down Expand Up @@ -93,6 +119,7 @@ def __deepcopy__(self, memo=None, _nil=[]):
element_type=_elem_type_copy,
element_metadata=_elem_metadata_copy,
metadata=_metadata_copy,
_validate=False,
)

def __copy__(self):
Expand All @@ -107,6 +134,7 @@ def __copy__(self):
element_type=self._element_type,
element_metadata=self._element_metadata,
metadata=self._metadata,
_validate=False,
)

def copy(self):
Expand Down Expand Up @@ -150,8 +178,7 @@ def __repr__(self) -> str:
_etype_name = self._element_type.__name__
output += ", element_type=" + _etype_name

if len(self._element_metadata) > 0:
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
output += ", element_metadata=" + self._element_metadata.__repr__()

if len(self._metadata) > 0:
output += ", metadata=" + ut.print_truncated_dict(self._metadata)
Expand All @@ -178,7 +205,7 @@ def __str__(self) -> str:

output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n"

output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"

return output
Expand Down Expand Up @@ -303,14 +330,14 @@ def unlist_data(self, unlist_data: Any):
######>> element metadata <<#######
###################################

def get_element_metadata(self) -> dict:
def get_element_metadata(self) -> BiocFrame:
"""
Returns:
Dictionary of metadata for each element in this object.
"""
return self._element_metadata

def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -> CompressedList:
def set_element_metadata(self, element_metadata: BiocFrame, in_place: bool = False) -> CompressedList:
"""Set new element metadata.

Args:
Expand All @@ -324,19 +351,20 @@ def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -
A modified ``CompressedList`` object, either as a copy of the original
or as a reference to the (in-place-modified) original.
"""
if not isinstance(element_metadata, dict):
raise TypeError(f"`element_metadata` must be a dictionary, provided {type(element_metadata)}.")
if not isinstance(element_metadata, BiocFrame):
raise TypeError(f"`element_metadata` must be a BiocFrame, provided {type(element_metadata)}.")

output = self._define_output(in_place)
output._element_metadata = element_metadata
output._element_metadata = _sanitize_frame(element_metadata, len(self._partitioning))
return output

@property
def element_metadata(self) -> dict:
def element_metadata(self) -> BiocFrame:
"""Alias for :py:attr:`~get_element_metadata`."""
return self.get_element_metadata()

@element_metadata.setter
def element_metadata(self, element_metadata: dict):
def element_metadata(self, element_metadata: BiocFrame):
"""Alias for :py:attr:`~set_element_metadata` with ``in_place = True``.

As this mutates the original object, a warning is raised.
Expand Down Expand Up @@ -576,7 +604,7 @@ def extract_subset(self, indices: Sequence[int]) -> CompressedList:
new_data,
new_partitioning,
element_type=self._element_type,
element_metadata={k: v for k, v in self._element_metadata.items() if k in indices},
element_metadata=self._element_metadata[indices,],
metadata=self._metadata.copy(),
)

Expand Down
5 changes: 2 additions & 3 deletions src/compressed_lists/biocframe_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ def __repr__(self) -> str:
else self._element_type
)

if len(self._element_metadata) > 0:
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
output += ", element_metadata=" + self._element_metadata.__repr__()

if len(self._metadata) > 0:
output += ", metadata=" + ut.print_truncated_dict(self._metadata)
Expand All @@ -155,7 +154,7 @@ def __str__(self) -> str:

output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n"

output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"

return output
Expand Down
12 changes: 6 additions & 6 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from compressed_lists import CompressedList
from biocframe import BiocFrame

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
Expand Down Expand Up @@ -95,14 +96,13 @@ def test_base_metadata(base_list):
base_list.metadata = meta
assert base_list.metadata == ut.NamedList.from_dict({"source": "test"})

el_meta = {"info": "details"}
el_meta = BiocFrame({"score": [1, 2, 3]})
cl_el_meta = base_list.set_element_metadata(el_meta, in_place=False)
assert base_list.element_metadata == {}
assert cl_el_meta.element_metadata == {"info": "details"}
assert len(base_list.element_metadata) == 3
assert cl_el_meta.element_metadata.get_column("score") == el_meta.get_column("score")

with pytest.warns(UserWarning, match="Setting property 'element_metadata'"):
base_list.element_metadata = el_meta
assert base_list.element_metadata == {"info": "details"}
with pytest.raises(Exception):
base_list.element_metadata = {"info": "details"}


def test_base_copying(base_list):
Expand Down
1 change: 0 additions & 1 deletion tests/test_comp_biocframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def test_bframe_typed_list_column():

def test_split_biocframe(frame_data):
frame_data.set_column("groups", [0, 0, 1], in_place=True)
print(frame_data)
clist = splitAsCompressedList(frame_data, groups_or_partitions=frame_data.get_column("groups"))

assert isinstance(clist, CompressedSplitBiocFrameList)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_comp_bool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from biocutils.boolean_list import BooleanList
from biocutils import BooleanList

from compressed_lists import CompressedBooleanList

Expand Down
2 changes: 1 addition & 1 deletion tests/test_comp_float.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from biocutils.float_list import FloatList
from biocutils import FloatList

from compressed_lists import CompressedFloatList

Expand Down
2 changes: 1 addition & 1 deletion tests/test_comp_int.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import pytest
from biocutils.integer_list import IntegerList
from biocutils import IntegerList

from compressed_lists import CompressedIntegerList, Partitioning

Expand Down
2 changes: 1 addition & 1 deletion tests/test_comp_str.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from biocutils.string_list import StringList
from biocutils import StringList

from compressed_lists import CompressedStringList

Expand Down