突出显示python pandas to_latex中每行的最佳值



我使用pandas自动生成使用to_latex()的表,我想突出显示(粗体、斜体等(每行的最佳值(最大值、最小值(。

有办法做到这一点吗?

我在他们的github中发现了这个问题,但没有提供答案,也没有找到后续的解决方案。

已经有一个PR请求将来将支持此功能。正如这篇评论中提到的,它预计将与熊猫1.3.0一起发布。与此同时,我在另一个问题中遇到了一个解决方案,它专注于突出每列的最大值:

from functools import partial
import pandas as pd
import numpy as np

def bold_formatter(x, value, num_decimals=2):
"""Format a number in bold when (almost) identical to a given value.

Args:
x: Input number.

value: Value to compare x with.

num_decimals: Number of decimals to use for output format.
Returns:
String converted output.
"""
# Consider values equal, when rounded results are equal
# otherwise, it may look surprising in the table where they seem identical
if round(x, num_decimals) == round(value, num_decimals):
return f"{{\bfseries\num{{{x:.{num_decimals}f}}}}}"
else:
return f"\num{{{x:.{num_decimals}f}}}"

df = pd.DataFrame(np.array([[1.123456, 2.123456, 3.123456, 4.123456],
[11.123456, 22.123456, 33.123456, 44.123456],
[111.123456, 222.123456, 333.123456, 444.123456],]),
columns=['a', 'b', 'c', 'd'])
col_names = ['a in \si{\meter}',
'b in \si{\volt}',
'c in \si{\seconds}',
'd']
# Colums to format with maximum condition and 2 floating decimals
max_columns_2f = ['a']
# Colums to format with minimum condition and 2 floating decimals
min_columns_2f = ['b', 'c']
# Colums to format with minimum condition and 4 floating decimals
min_columns_4f= ['d']
fmts_max_2f = {column: partial(bold_formatter, value=df[column].max(), num_decimals=2) for column in max_columns_2f}
fmts_min_2f = {column: partial(bold_formatter, value=df[column].min(), num_decimals=2) for column in min_columns_2f}
fmts_min_4f = {column: partial(bold_formatter, value=df[column].min(), num_decimals=4) for column in min_columns_4f}
fmts = dict(**fmts_max_2f, **fmts_min_2f, **fmts_min_4f)
with open("test_table.tex", "w") as fh:
df.to_latex(buf=fh,
index=False,
header=col_names,
formatters=fmts,
escape=False)

遍历行以找到最大值的位置,并在这些特定位置应用格式。

import numpy as np
import pandas as pd
# generate a dataframe with 10 rows and 4 columns filled with random numbers
df = pd.DataFrame(data=np.random.rand(10, 4), index= [f"row_{i}" for i in range(10)], columns=[f"col_{i}" for i in range(4)])
# apply some formatting for all numbers (optional)
df_s = df.style.format("{:.2f}")
# loop through rows and find which column for each row has the highest value
for row in df.index:
col = df.loc[row].idxmax()
# redo formatting for a specific cell
df_s = df_s.format(lambda x: "\textbf{" + f'{x:.2f}' + "}", subset=(row, col))
print(df_s.to_latex())

结果:

begin{tabular}{lrrrr}
& col_0 & col_1 & col_2 & col_3 \
row_0 & 0.56 & textbf{0.74} & 0.48 & 0.70 \
row_1 & 0.22 & 0.02 & 0.08 & textbf{0.97} \
row_2 & textbf{0.80} & 0.26 & 0.39 & 0.30 \
row_3 & textbf{0.93} & 0.26 & 0.28 & 0.75 \
row_4 & 0.39 & textbf{0.45} & 0.10 & 0.30 \
row_5 & 0.31 & textbf{0.73} & 0.19 & 0.45 \
row_6 & 0.23 & textbf{0.61} & 0.31 & 0.21 \
row_7 & 0.27 & 0.38 & 0.64 & textbf{0.93} \
row_8 & 0.15 & 0.09 & textbf{0.48} & 0.44 \
row_9 & textbf{0.84} & 0.59 & 0.57 & 0.44 \
end{tabular}

您可以使用所有panda内部函数:

df = pd.DataFrame(data=[[1,2], [3,4]])
print(df.style.highlight_max(axis=0, props="font-weight:bold;").to_latex(convert_css=True))
begin{tabular}{lrr}
& 0 & 1 \
0 & 1 & 2 \
1 & bfseries 3 & bfseries 4 \
end{tabular}

或者指定您自己的乳胶(命令/选项(对:

print(df.style.highlight_max(axis=0, props="textbf:--rwrap;").to_latex())
begin{tabular}{lrr}
& 0 & 1 \
0 & 1 & 2 \
1 & textbf{3} & textbf{4} \
end{tabular}

(使用轴=1获得行中的最大值(

最新更新