Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
838d75f
fix: using polars unnest for all backend types.
Shamik-07 May 15, 2026
851e5b1
fix: update the expand dict print statement.
Shamik-07 May 15, 2026
0ea6758
tests: enabling the expand dict test and adding the necessary datafra…
Shamik-07 May 15, 2026
294e21e
tests: removing ibis skip in expand dict test.
Shamik-07 May 15, 2026
0d1e52f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 May 19, 2026
24f672b
fix: removing additional polars dataframe creation and using the none…
Shamik-07 May 19, 2026
4e82164
fix: using polars as optional in test handlers.
Shamik-07 May 19, 2026
88ce223
fix: processing pandas backend separately to not cause arrow coercion…
Shamik-07 May 19, 2026
1f722b8
fix: mypy error.
Shamik-07 May 19, 2026
ff063e5
fix: changing the expand dict print code function for pandas to using…
Shamik-07 May 19, 2026
eb5ed5f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 May 20, 2026
6e16445
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 May 21, 2026
de2ee2e
feat: unnesting only one level of pandas df.
Shamik-07 May 21, 2026
83126b2
refactor: modifying the pandas print module to expect one level of di…
Shamik-07 May 21, 2026
7eedc2d
tests: added a nested dict test for the expand dict handler.
Shamik-07 May 21, 2026
78cbe7d
tests: added a nested dict test for the expand dict handler.
Shamik-07 May 21, 2026
855cd5b
docs: adding comment for using max_level=0 in pd.json_normalize for e…
Shamik-07 May 21, 2026
803d52f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion marimo/_plugins/ui/_impl/dataframes/transforms/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,24 @@ def handle_explode_columns(
def handle_expand_dict(
df: DataFrame, transform: ExpandDictTransform
) -> DataFrame:
return df.explode(transform.column_id)
collected_df, undo = collect_and_preserve_type(df)
native_df = collected_df.to_native()

# Keep pandas handling fully pandas-native so mixed/object columns in
# unrelated fields do not trigger Arrow coercion errors.
if nw.dependencies.is_pandas_dataframe(native_df):
import pandas as pd

result_df = native_df.copy()
expanded = pd.json_normalize(
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
result_df.pop(transform.column_id), # type: ignore[arg-type]
)
expanded.index = result_df.index
return undo(nw.from_native(result_df.join(expanded)))

polars_df = collected_df.to_polars()
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
unnested = polars_df.unnest(transform.column_id)
return undo(nw.from_native(unnested))

@staticmethod
def handle_unique(df: DataFrame, transform: UniqueTransform) -> DataFrame:
Expand Down
9 changes: 6 additions & 3 deletions marimo/_plugins/ui/_impl/dataframes/transforms/print_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,11 @@ def generate_where_clause(df_name: str, where: FilterCondition) -> str:

elif transform.type == TransformType.EXPAND_DICT:
column_id = _as_literal(transform.column_id)
args = f"{df_name}.pop({column_id}).values.tolist()"
return f"{df_name}.join(pd.DataFrame({args}))"
return (
f"{df_name}.join("
f"pd.json_normalize({df_name}.pop({column_id})).set_axis({df_name}.index, axis=0)"
f")"
)

elif transform.type == TransformType.UNIQUE:
column_ids = transform.column_ids
Expand Down Expand Up @@ -465,7 +468,7 @@ def generate_where_clause_polars(where: FilterCondition) -> str:

elif transform.type == TransformType.EXPAND_DICT:
column_id = _as_literal(transform.column_id)
return f"{df_name}.hstack(pl.DataFrame({df_name}.select({column_id}).to_series().to_list())).drop({column_id})"
return f"{df_name}.unnest({column_id})"

elif transform.type == TransformType.UNIQUE:
column_ids = transform.column_ids
Expand Down
75 changes: 31 additions & 44 deletions tests/_plugins/ui/_impl/dataframes/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@

pytest.importorskip("ibis")
pd = pytest.importorskip("pandas")
pytest.importorskip("polars")
pytest.importorskip("pyarrow")
pytest.importorskip("polars")


def apply(df: DataFrameType, transform: Transform) -> DataFrameType:
Expand Down Expand Up @@ -86,7 +86,10 @@ def assert_frame_equal(a: DataFrameType, b: DataFrameType) -> None:


def assert_frame_equal_with_nans(
a: DataFrameType, b: DataFrameType, allow_nan_equals_zero: bool = False
a: DataFrameType,
b: DataFrameType,
allow_nan_equals_zero: bool = False,
allow_none_equals_nan: bool = False,
) -> None:
"""
Assert two dataframes are equal, treating NaNs in the same locations as equal.
Expand All @@ -97,6 +100,9 @@ def assert_frame_equal_with_nans(
allow_nan_equals_zero: If True, treat NaN and 0.0 as equivalent values.
This is useful for pivot operations where missing aggregations may
be filled with 0.0 or NaN depending on the backend.
allow_none_equals_nan: If True, treat None and NaN as equivalent
missing values. This is useful when different backends materialise
missing numeric values differently.
"""
import math

Expand Down Expand Up @@ -137,7 +143,25 @@ def assert_frame_equal_with_nans(
or val_b == 0.0
)
)
if not (val_a == val_b or both_nan or nan_or_zero_match):
# Useful for expand dict operations where None and nan are equal
none_nan_match = allow_none_equals_nan and (
(
val_a is None
and isinstance(val_b, float)
and math.isnan(val_b)
)
or (
val_b is None
and isinstance(val_a, float)
and math.isnan(val_a)
)
)
if not (
val_a == val_b
or both_nan
or nan_or_zero_match
or none_nan_match
):
raise AssertionError(
f"DataFrame values differ at column '{col}', row {idx}: {val_a} != {val_b}"
)
Expand Down Expand Up @@ -1733,18 +1757,15 @@ def test_explode_columns(df: DataFrameType) -> None:
assert nw_result.columns == ["A", "B", "C"]

@staticmethod
@pytest.mark.skip(
reason="Dict/struct expansion not supported uniformly across backends"
)
@pytest.mark.parametrize(
("df", "expected"),
list(
zip(
create_test_dataframes(
{"A": [{"foo": 1, "bar": "hello"}], "B": [1]}
{"A": [{"foo": 1, "bar": "hello"}, None], "B": [1, 2]},
),
create_test_dataframes(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

polars should already be created in this create_test_dataframes. so we dont need to add the dataframe below

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

{"B": [1], "foo": [1], "bar": ["hello"]}
{"B": [1, 2], "foo": [1, None], "bar": ["hello", None]},
),
strict=False,
)
Expand All @@ -1760,9 +1781,10 @@ def test_expand_dict(df: DataFrameType, expected: DataFrameType) -> None:
nw_expected = collect_df(expected)
result_cols = sorted(nw_result.columns)
expected_cols = sorted(nw_expected.columns)
assert_frame_equal(
assert_frame_equal_with_nans(
nw_expected.select(expected_cols),
nw_result.select(result_cols),
allow_none_equals_nan=True,
)

@staticmethod
Expand Down Expand Up @@ -2341,41 +2363,6 @@ def test_filter_rows_nulls_pandas(
result = apply(df, in_transform)
assert_frame_equal_with_nans(result, expected)

@staticmethod
@pytest.mark.parametrize(
("df", "expected"),
list(
zip(
create_test_dataframes(
{"nulls": [1, 2, 3, None, "hello"]}, include=["pandas"]
),
create_test_dataframes({"nulls": [None]}, include=["pandas"]),
strict=False,
)
),
)
def test_filter_rows_null_pandas_object(
df: DataFrameType, expected: DataFrameType
) -> None:
in_transform = FilterRowsTransform(
type=TransformType.FILTER_ROWS,
operation="keep_rows",
where=FilterGroup(
type="group",
operator="and",
children=[
FilterCondition(
type="condition",
column_id="nulls",
operator="in",
value=[None],
)
],
),
)
result = apply(df, in_transform)
assert_frame_equal_with_nans(result, expected)

@staticmethod
@pytest.mark.parametrize(
("df", "expected"),
Expand Down
Loading