Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.0.3
* Fixed crash when a boolean column contains only one unique value (e.g., all True) and the table is rendered with the native backend.
* Fixed object columns containing decimal.Decimal values (e.g., from Amazon Redshift) being incorrectly treated as categorical instead of numerical.

# 0.0.2
* Introduced narwhals as dependency, adds support for pandas 3, polars,
pyarrow
Expand Down
2 changes: 1 addition & 1 deletion pysummaries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
'pandas_to_report_html', 'get_styles', 'Pandas2HTMLSummaryTable',
'PySummariesException']

__version__ = '0.0.2'
__version__ = '0.0.3'
2 changes: 2 additions & 0 deletions pysummaries/reportable/intermediate_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ def extract_multibodyblocks(indexes, df, value_styles, styles, tabid):
#row labels
*rowgroup_labels, currow_label = rowlabel
if (rowgroup_labels != lastblock_index and lastblock_index is not None) or cnt==len(df):
if lastblock_index is None:
lastblock_index = rowgroup_labels
rowgroupstyle = styles.get("rowgrouplabel")
titles = [RowGroupLabel(x, tabid, level, y, rowgroup_cnts, style=rowgroupstyle) for level,(x,y) in enumerate(zip(lastblock_index, labels_paddings))]
# prune
Expand Down
5 changes: 5 additions & 0 deletions pysummaries/table_summary/table_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ def calculate_table_summary(df, strata=None, show_overall=True, columns_labels=N
coltypes = detect_df_col_types(df)
if not isinstance(df, pd.DataFrame):
df = nw.from_native(df).to_pandas()
# Convert object columns classified as numerical (e.g. decimal.Decimal) to
# a numeric dtype so that pandas can compute stats on them.
for col_name, col_type in coltypes.items():
if col_type == "numerical" and df[col_name].dtype == object:
df[col_name] = pd.to_numeric(df[col_name], errors="coerce")
colnames = df.columns.to_list()
strat_cats = list()
if strata is not None:
Expand Down
4 changes: 3 additions & 1 deletion pysummaries/table_summary/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# #############################################################################
from decimal import Decimal

import narwhals as nw
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -41,7 +43,7 @@ def _classify_object_col(df, col_name):
return "categorical"
if curtype == str:
return "categorical"
if np.issubdtype(type(col.iloc[0]), np.number) or isinstance(col.iloc[0], (int, float)):
if np.issubdtype(type(col.iloc[0]), np.number) or isinstance(col.iloc[0], (int, float, Decimal)):
return "numerical"
return "categorical"

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

setup(
name="pysummaries",
version='0.0.2',
version='0.0.3',
author="Otto Fajardo",
author_email="pleasecontactviagithub@notvalid.com",
description="Produce table summaries from pandas, polars or PyArrow dataframes",
Expand Down
71 changes: 71 additions & 0 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,77 @@ def test_simple_table_summary_gt(self):
# ---------------------------------------------------------------------------
# HTML content tests using BeautifulSoup
# ---------------------------------------------------------------------------
class TestEdgeCases(unittest.TestCase):
"""Tests for edge cases and bug fixes."""

def test_bool_column_single_value(self):
"""Boolean column with only one unique value should not crash.

Regression test: when the summary DataFrame has a single row,
cnt==len(df) on the first iteration while lastblock_index is still
None, causing a TypeError in extract_multibodyblocks.
"""
df = pd.DataFrame({
"flag": [True, True, True, True],
"group": ["A", "A", "B", "B"],
})
result = pysummaries.get_table_summary(
df, strata="group", columns_include=["flag", "group"],
show_overall=False
)
self.assertIsNotNone(result)

def test_decimal_column_treated_as_numeric(self):
"""Columns containing decimal.Decimal values should be treated as numeric."""
from decimal import Decimal
df = pd.DataFrame({
"value": [Decimal("12.5"), Decimal("24.3"), Decimal("6.1"), Decimal("18.7")],
"group": ["A", "A", "B", "B"],
})
col_types = detect_df_col_types(df)
self.assertEqual(col_types["value"], "numerical")

def test_decimal_column_summary(self):
"""Decimal columns should produce numeric summaries (Mean, Median, etc.)."""
from decimal import Decimal
df = pd.DataFrame({
"value": [Decimal("12.5"), Decimal("24.3"), Decimal("6.1"), Decimal("18.7")],
"group": ["A", "A", "B", "B"],
})
result, _ = pysummaries.calculate_table_summary(
df, strata="group", columns_include=["value", "group"],
show_overall=False
)
# Numeric summaries have "Mean (SD)" as a row label
self.assertIn("Mean (SD)", result.index.get_level_values(-1))

def test_decimal_column_polars(self):
"""Polars Decimal columns should produce numeric summaries."""
from decimal import Decimal
df = pl.DataFrame({
"value": [Decimal("12.5"), Decimal("24.3"), Decimal("6.1"), Decimal("18.7")],
"group": ["A", "A", "B", "B"],
})
result, _ = pysummaries.calculate_table_summary(
df, strata="group", columns_include=["value", "group"],
show_overall=False
)
self.assertIn("Mean (SD)", result.index.get_level_values(-1))

def test_decimal_column_pyarrow(self):
"""PyArrow Decimal columns should produce numeric summaries."""
from decimal import Decimal
table = pa.table({
"value": pa.array([Decimal("12.5"), Decimal("24.3"), Decimal("6.1"), Decimal("18.7")]),
"group": ["A", "A", "B", "B"],
})
result, _ = pysummaries.calculate_table_summary(
table, strata="group", columns_include=["value", "group"],
show_overall=False
)
self.assertIn("Mean (SD)", result.index.get_level_values(-1))


class TestHTMLContent(unittest.TestCase):

def setUp(self):
Expand Down
Loading