Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/gooddata-pandas/src/gooddata_pandas/data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ def _extract_from_attributes_and_maybe_metrics(
col_to_attr_idx: dict[str, int],
col_to_metric_idx: dict[str, int],
index_to_attr_idx: Optional[dict[str, int]] = None,
result_page_len: Optional[int] = None,
) -> tuple[dict, dict]:
"""
Internal function that extracts data from execution response with attributes columns and
Expand All @@ -371,6 +372,8 @@ def _extract_from_attributes_and_maybe_metrics(
col_to_metric_idx (dict[str, int]): A mapping of pandas column names to metric dimension indices.
index_to_attr_idx (Optional[dict[str, int]]):
An optional mapping of pandas index names to attribute dimension indices.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to _RESULT_PAGE_LEN (1000). Larger values can improve performance for large result sets.

Returns:
tuple: A tuple containing the following dictionaries:
Expand All @@ -379,7 +382,8 @@ def _extract_from_attributes_and_maybe_metrics(
"""
exec_def = execution.exec_def
offset = [0 for _ in exec_def.dimensions]
limit = [len(exec_def.metrics), _RESULT_PAGE_LEN] if exec_def.has_metrics() else [_RESULT_PAGE_LEN]
page_len = result_page_len if result_page_len is not None else _RESULT_PAGE_LEN
limit = [len(exec_def.metrics), page_len] if exec_def.has_metrics() else [page_len]
attribute_dim = 1 if exec_def.has_metrics() else 0
result = execution.read_result(limit=limit, offset=offset)
safe_index_to_attr_idx = index_to_attr_idx if index_to_attr_idx is not None else dict()
Expand Down Expand Up @@ -421,6 +425,7 @@ def compute_and_extract(
filter_by: Optional[Union[Filter, list[Filter]]] = None,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> tuple[dict, dict]:
"""
Convenience function that computes and extracts data from the execution response.
Expand All @@ -435,6 +440,8 @@ def compute_and_extract(
submitted to the backend.
is_cancellable (bool, optional): Whether the execution of this definition should be cancelled when
the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
tuple: A tuple containing the following dictionaries:
Expand Down Expand Up @@ -472,4 +479,5 @@ def compute_and_extract(
col_to_attr_idx,
col_to_metric_idx,
index_to_attr_idx,
result_page_len=result_page_len,
)
22 changes: 21 additions & 1 deletion packages/gooddata-pandas/src/gooddata_pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def indexed(
filter_by: Optional[Union[Filter, list[Filter]]] = None,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.DataFrame:
"""
Creates a data frame indexed by values of the label. The data frame columns will be created from either
Expand All @@ -90,6 +91,8 @@ def indexed(
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
submitted to the backend.
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.DataFrame: A DataFrame instance.
Expand All @@ -102,6 +105,7 @@ def indexed(
filter_by=filter_by,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

_idx = make_pandas_index(index)
Expand All @@ -114,6 +118,7 @@ def not_indexed(
filter_by: Optional[Union[Filter, list[Filter]]] = None,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.DataFrame:
"""
Creates a data frame with columns created from metrics and or labels.
Expand All @@ -125,6 +130,8 @@ def not_indexed(
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
submitted to the backend.
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.DataFrame: A DataFrame instance.
Expand All @@ -137,6 +144,7 @@ def not_indexed(
filter_by=filter_by,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

return pandas.DataFrame(data=data)
Expand All @@ -148,6 +156,7 @@ def for_items(
auto_index: bool = True,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.DataFrame:
"""
Creates a data frame for named items. This is a convenience method that will create DataFrame with or
Expand All @@ -162,6 +171,8 @@ def for_items(
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
submitted to the backend.
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.DataFrame: A DataFrame instance.
Expand All @@ -184,14 +195,19 @@ def for_items(
if not auto_index or not has_measures or not has_attributes:
columns: ColumnsDef = {**resolved_attr_cols, **resolved_measure_cols}

return self.not_indexed(columns=columns, filter_by=filter_by)
return self.not_indexed(
columns=columns,
filter_by=filter_by,
result_page_len=result_page_len,
)

return self.indexed(
index_by=resolved_attr_cols,
columns=resolved_measure_cols,
filter_by=filter_by,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

def for_visualization(
Expand All @@ -200,6 +216,7 @@ def for_visualization(
auto_index: bool = True,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.DataFrame:
"""
Creates a data frame with columns based on the content of the visualization with the provided identifier.
Expand All @@ -211,6 +228,8 @@ def for_visualization(
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
submitted to the backend.
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.DataFrame: A DataFrame instance.
Expand All @@ -231,6 +250,7 @@ def for_visualization(
auto_index=auto_index,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

def for_created_visualization(
Expand Down
9 changes: 9 additions & 0 deletions packages/gooddata-pandas/src/gooddata_pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def indexed(
filter_by: Optional[Union[Filter, list[Filter]]] = None,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.Series:
"""Creates pandas Series from data points calculated from a single `data_by`.

Expand Down Expand Up @@ -68,6 +69,9 @@ def indexed(

is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.

result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.Series: pandas series instance
"""
Expand All @@ -80,6 +84,7 @@ def indexed(
filter_by=filter_by,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

_idx = make_pandas_index(index)
Expand All @@ -93,6 +98,7 @@ def not_indexed(
filter_by: Optional[Union[Filter, list[Filter]]] = None,
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
is_cancellable: bool = False,
result_page_len: Optional[int] = None,
) -> pandas.Series:
"""
Creates a pandas.Series from data points calculated from a single `data_by` without constructing an index.
Expand Down Expand Up @@ -122,6 +128,8 @@ def not_indexed(
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
submitted to the backend.
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
result_page_len (Optional[int]): Optional page size for result pagination.
Defaults to 1000. Larger values can improve performance for large result sets.

Returns:
pandas.Series: The resulting pandas Series instance.
Expand All @@ -140,6 +148,7 @@ def not_indexed(
filter_by=filter_by,
on_execution_submitted=on_execution_submitted,
is_cancellable=is_cancellable,
result_page_len=result_page_len,
)

return pandas.Series(data=data["_series"])
Loading