Revert "ci/compare: Bring back nix stats comparison" (#403448)

Changed files
+4 -189
ci
-141
ci/eval/compare/cmp-stats.py
···
-
import json
-
import os
-
from scipy.stats import ttest_rel
-
import pandas as pd
-
import numpy as np
-
from pathlib import Path
-
-
# Define metrics of interest (can be expanded as needed)
-
METRIC_PREFIXES = ("nr", "gc")
-
-
def flatten_data(json_data: dict) -> dict:
-
"""
-
Extracts and flattens metrics from JSON data.
-
This is needed because the JSON data can be nested.
-
For example, the JSON data entry might look like this:
-
-
"gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464}
-
-
Flattened:
-
-
"gc.cycles": 13
-
"gc.heapSize": 5404549120
-
...
-
-
Args:
-
json_data (dict): JSON data containing metrics.
-
Returns:
-
dict: Flattened metrics with keys as metric names.
-
"""
-
flat_metrics = {}
-
for k, v in json_data.items():
-
if isinstance(v, (int, float)):
-
flat_metrics[k] = v
-
elif isinstance(v, dict):
-
for sub_k, sub_v in v.items():
-
flat_metrics[f"{k}.{sub_k}"] = sub_v
-
return flat_metrics
-
-
-
-
-
def load_all_metrics(directory: Path) -> dict:
-
"""
-
Loads all stats JSON files in the specified directory and extracts metrics.
-
-
Args:
-
directory (Path): Directory containing JSON files.
-
Returns:
-
dict: Dictionary with filenames as keys and extracted metrics as values.
-
"""
-
metrics = {}
-
for system_dir in directory.iterdir():
-
assert system_dir.is_dir()
-
-
for chunk_output in system_dir.iterdir():
-
with chunk_output.open() as f:
-
data = json.load(f)
-
metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data)
-
-
return metrics
-
-
def dataframe_to_markdown(df: pd.DataFrame) -> str:
-
markdown_lines = []
-
-
# Header (get column names and format them)
-
header = '\n| ' + ' | '.join(df.columns) + ' |'
-
markdown_lines.append(header)
-
markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line
-
-
# Iterate over rows to build Markdown rows
-
for _, row in df.iterrows():
-
# TODO: define threshold for highlighting
-
highlight = False
-
-
fmt = lambda x: f"**{x}**" if highlight else f"{x}"
-
-
# Check for no change and NaN in p_value/t_stat
-
row_values = []
-
for val in row:
-
if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat
-
row_values.append("-") # Custom symbol for NaN
-
elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0)
-
row_values.append("-") # Custom symbol for no change
-
else:
-
row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val)))
-
-
markdown_lines.append('| ' + ' | '.join(row_values) + ' |')
-
-
return '\n'.join(markdown_lines)
-
-
-
def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame:
-
common_files = sorted(set(before_metrics) & set(after_metrics))
-
all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() })
-
-
results = []
-
-
for key in all_keys:
-
before_vals, after_vals = [], []
-
-
for fname in common_files:
-
if key in before_metrics[fname] and key in after_metrics[fname]:
-
before_vals.append(before_metrics[fname][key])
-
after_vals.append(after_metrics[fname][key])
-
-
if len(before_vals) >= 2:
-
before_arr = np.array(before_vals)
-
after_arr = np.array(after_vals)
-
-
diff = after_arr - before_arr
-
pct_change = 100 * diff / before_arr
-
t_stat, p_val = ttest_rel(after_arr, before_arr)
-
-
results.append({
-
"metric": key,
-
"mean_before": np.mean(before_arr),
-
"mean_after": np.mean(after_arr),
-
"mean_diff": np.mean(diff),
-
"mean_%_change": np.mean(pct_change),
-
"p_value": p_val,
-
"t_stat": t_stat
-
})
-
-
df = pd.DataFrame(results).sort_values("p_value")
-
return df
-
-
-
if __name__ == "__main__":
-
before_dir = os.environ.get("BEFORE_DIR")
-
after_dir = os.environ.get("AFTER_DIR")
-
-
if not before_dir or not after_dir:
-
print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.")
-
exit(1)
-
-
before_metrics = load_all_metrics(Path(before_dir) / "stats")
-
after_metrics = load_all_metrics(Path(after_dir) / "stats")
-
-
df1 = perform_pairwise_tests(before_metrics, after_metrics)
-
markdown_table = dataframe_to_markdown(df1)
-
print(markdown_table)
+4 -46
ci/eval/compare/default.nix
···
jq,
runCommand,
writeText,
-
python3,
...
}:
{
···
in
runCommand "compare"
{
-
nativeBuildInputs = [
-
jq
-
(python3.withPackages (
-
ps: with ps; [
-
numpy
-
pandas
-
scipy
-
]
-
))
-
-
];
+
nativeBuildInputs = [ jq ];
maintainers = builtins.toJSON maintainers;
passAsFile = [ "maintainers" ];
-
env = {
-
BEFORE_DIR = "${beforeResultDir}";
-
AFTER_DIR = "${afterResultDir}";
-
};
}
''
mkdir $out
cp ${changed-paths} $out/changed-paths.json
+
jq -r -f ${./generate-step-summary.jq} < ${changed-paths} > $out/step-summary.md
-
if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then
-
# Chunks have changed between revisions
-
# We cannot generate a performance comparison
-
{
-
echo
-
echo "# Performance comparison"
-
echo
-
echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')"
-
echo
-
echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"
-
echo
-
} >> $out/step-summary.md
+
cp "$maintainersPath" "$out/maintainers.json"
-
python3 ${./cmp-stats.py} >> $out/step-summary.md
-
-
else
-
# Package chunks are the same in both revisions
-
# We can use the to generate a performance comparison
-
{
-
echo
-
echo "# Performance Comparison"
-
echo
-
echo "Performance stats were skipped because the package sets differ between the two revisions."
-
echo
-
echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"
-
} >> $out/step-summary.md
-
fi
-
-
jq -r -f ${./generate-step-summary.jq} < ${changed-paths} >> $out/step-summary.md
-
-
cp "$maintainersPath" "$out/maintainers.json"
+
# TODO: Compare eval stats
''
-2
ci/eval/default.nix
···
nixVersions,
jq,
sta,
-
python3,
}:
let
···
runCommand
writeText
supportedSystems
-
python3
;
};