-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Fix: expand dict transformation #9561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Shamik-07
wants to merge
18
commits into
marimo-team:main
Choose a base branch
from
Shamik-07:fix/expand_dict_transformation
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+192
β48
Open
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
838d75f
fix: using polars unnest for all backend types.
Shamik-07 851e5b1
fix: update the expand dict print statement.
Shamik-07 0ea6758
tests: enabling the expand dict test and adding the necessary datafraβ¦
Shamik-07 294e21e
tests: removing ibis skip in expand dict test.
Shamik-07 0d1e52f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 24f672b
fix: removing additional polars dataframe creation and using the noneβ¦
Shamik-07 4e82164
fix: using polars as optional in test handlers.
Shamik-07 88ce223
fix: processing pandas backend separately to not cause arrow coercionβ¦
Shamik-07 1f722b8
fix: mypy error.
Shamik-07 ff063e5
fix: changing the expand dict print code function for pandas to usingβ¦
Shamik-07 eb5ed5f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 6e16445
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 de2ee2e
feat: unnesting only one level of pandas df.
Shamik-07 83126b2
refactor: modifying the pandas print module to expect one level of diβ¦
Shamik-07 7eedc2d
tests: added a nested dict test for the expand dict handler.
Shamik-07 78cbe7d
tests: added a nested dict test for the expand dict handler.
Shamik-07 855cd5b
docs: adding comment for using max_level=0 in pd.json_normalize for eβ¦
Shamik-07 803d52f
Merge branch 'main' into fix/expand_dict_transformation
Shamik-07 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,8 +47,8 @@ | |
|
|
||
| pytest.importorskip("ibis") | ||
| pd = pytest.importorskip("pandas") | ||
| pytest.importorskip("polars") | ||
| pytest.importorskip("pyarrow") | ||
| pytest.importorskip("polars") | ||
|
|
||
|
|
||
| def apply(df: DataFrameType, transform: Transform) -> DataFrameType: | ||
|
|
@@ -86,7 +86,10 @@ def assert_frame_equal(a: DataFrameType, b: DataFrameType) -> None: | |
|
|
||
|
|
||
| def assert_frame_equal_with_nans( | ||
| a: DataFrameType, b: DataFrameType, allow_nan_equals_zero: bool = False | ||
| a: DataFrameType, | ||
| b: DataFrameType, | ||
| allow_nan_equals_zero: bool = False, | ||
| allow_none_equals_nan: bool = False, | ||
| ) -> None: | ||
| """ | ||
| Assert two dataframes are equal, treating NaNs in the same locations as equal. | ||
|
|
@@ -97,6 +100,9 @@ def assert_frame_equal_with_nans( | |
| allow_nan_equals_zero: If True, treat NaN and 0.0 as equivalent values. | ||
| This is useful for pivot operations where missing aggregations may | ||
| be filled with 0.0 or NaN depending on the backend. | ||
| allow_none_equals_nan: If True, treat None and NaN as equivalent | ||
| missing values. This is useful when different backends materialise | ||
| missing numeric values differently. | ||
| """ | ||
| import math | ||
|
|
||
|
|
@@ -137,7 +143,25 @@ def assert_frame_equal_with_nans( | |
| or val_b == 0.0 | ||
| ) | ||
| ) | ||
| if not (val_a == val_b or both_nan or nan_or_zero_match): | ||
| # Useful for expand dict operations where None and nan are equal | ||
| none_nan_match = allow_none_equals_nan and ( | ||
| ( | ||
| val_a is None | ||
| and isinstance(val_b, float) | ||
| and math.isnan(val_b) | ||
| ) | ||
| or ( | ||
| val_b is None | ||
| and isinstance(val_a, float) | ||
| and math.isnan(val_a) | ||
| ) | ||
| ) | ||
| if not ( | ||
| val_a == val_b | ||
| or both_nan | ||
| or nan_or_zero_match | ||
| or none_nan_match | ||
| ): | ||
| raise AssertionError( | ||
| f"DataFrame values differ at column '{col}', row {idx}: {val_a} != {val_b}" | ||
| ) | ||
|
|
@@ -1733,18 +1757,15 @@ def test_explode_columns(df: DataFrameType) -> None: | |
| assert nw_result.columns == ["A", "B", "C"] | ||
|
|
||
| @staticmethod | ||
| @pytest.mark.skip( | ||
| reason="Dict/struct expansion not supported uniformly across backends" | ||
| ) | ||
| @pytest.mark.parametrize( | ||
| ("df", "expected"), | ||
| list( | ||
| zip( | ||
| create_test_dataframes( | ||
| {"A": [{"foo": 1, "bar": "hello"}], "B": [1]} | ||
| {"A": [{"foo": 1, "bar": "hello"}, None], "B": [1, 2]}, | ||
| ), | ||
| create_test_dataframes( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. polars should already be created in this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
| {"B": [1], "foo": [1], "bar": ["hello"]} | ||
| {"B": [1, 2], "foo": [1, None], "bar": ["hello", None]}, | ||
| ), | ||
| strict=False, | ||
| ) | ||
|
|
@@ -1760,9 +1781,10 @@ def test_expand_dict(df: DataFrameType, expected: DataFrameType) -> None: | |
| nw_expected = collect_df(expected) | ||
| result_cols = sorted(nw_result.columns) | ||
| expected_cols = sorted(nw_expected.columns) | ||
| assert_frame_equal( | ||
| assert_frame_equal_with_nans( | ||
| nw_expected.select(expected_cols), | ||
| nw_result.select(result_cols), | ||
| allow_none_equals_nan=True, | ||
| ) | ||
|
|
||
| @staticmethod | ||
|
|
@@ -2341,41 +2363,6 @@ def test_filter_rows_nulls_pandas( | |
| result = apply(df, in_transform) | ||
| assert_frame_equal_with_nans(result, expected) | ||
|
|
||
| @staticmethod | ||
| @pytest.mark.parametrize( | ||
| ("df", "expected"), | ||
| list( | ||
| zip( | ||
| create_test_dataframes( | ||
| {"nulls": [1, 2, 3, None, "hello"]}, include=["pandas"] | ||
| ), | ||
| create_test_dataframes({"nulls": [None]}, include=["pandas"]), | ||
| strict=False, | ||
| ) | ||
| ), | ||
| ) | ||
| def test_filter_rows_null_pandas_object( | ||
| df: DataFrameType, expected: DataFrameType | ||
| ) -> None: | ||
| in_transform = FilterRowsTransform( | ||
| type=TransformType.FILTER_ROWS, | ||
| operation="keep_rows", | ||
| where=FilterGroup( | ||
| type="group", | ||
| operator="and", | ||
| children=[ | ||
| FilterCondition( | ||
| type="condition", | ||
| column_id="nulls", | ||
| operator="in", | ||
| value=[None], | ||
| ) | ||
| ], | ||
| ), | ||
| ) | ||
| result = apply(df, in_transform) | ||
| assert_frame_equal_with_nans(result, expected) | ||
|
|
||
| @staticmethod | ||
| @pytest.mark.parametrize( | ||
| ("df", "expected"), | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.