1import json
2import os
3from scipy.stats import ttest_rel
4import pandas as pd
5import numpy as np
6from pathlib import Path
7
8# Define metrics of interest (can be expanded as needed)
9METRIC_PREFIXES = ("nr", "gc")
10
11def flatten_data(json_data: dict) -> dict:
12 """
13 Extracts and flattens metrics from JSON data.
14 This is needed because the JSON data can be nested.
15 For example, the JSON data entry might look like this:
16
17 "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464}
18
19 Flattened:
20
21 "gc.cycles": 13
22 "gc.heapSize": 5404549120
23 ...
24
25 Args:
26 json_data (dict): JSON data containing metrics.
27 Returns:
28 dict: Flattened metrics with keys as metric names.
29 """
30 flat_metrics = {}
31 for k, v in json_data.items():
32 if isinstance(v, (int, float)):
33 flat_metrics[k] = v
34 elif isinstance(v, dict):
35 for sub_k, sub_v in v.items():
36 flat_metrics[f"{k}.{sub_k}"] = sub_v
37 return flat_metrics
38
39
40
41
42def load_all_metrics(directory: Path) -> dict:
43 """
44 Loads all stats JSON files in the specified directory and extracts metrics.
45
46 Args:
47 directory (Path): Directory containing JSON files.
48 Returns:
49 dict: Dictionary with filenames as keys and extracted metrics as values.
50 """
51 metrics = {}
52 for system_dir in directory.iterdir():
53 assert system_dir.is_dir()
54
55 for chunk_output in system_dir.iterdir():
56 with chunk_output.open() as f:
57 data = json.load(f)
58 metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data)
59
60 return metrics
61
62def dataframe_to_markdown(df: pd.DataFrame) -> str:
63 df = df.sort_values(by=df.columns[0], ascending=True)
64 markdown_lines = []
65
66 # Header (get column names and format them)
67 header = '\n| ' + ' | '.join(df.columns) + ' |'
68 markdown_lines.append(header)
69 markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line
70
71 # Iterate over rows to build Markdown rows
72 for _, row in df.iterrows():
73 # TODO: define threshold for highlighting
74 highlight = False
75
76 fmt = lambda x: f"**{x}**" if highlight else f"{x}"
77
78 # Check for no change and NaN in p_value/t_stat
79 row_values = []
80 for val in row:
81 if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat
82 row_values.append("-") # Custom symbol for NaN
83 elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0)
84 row_values.append("-") # Custom symbol for no change
85 else:
86 row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val)))
87
88 markdown_lines.append('| ' + ' | '.join(row_values) + ' |')
89
90 return '\n'.join(markdown_lines)
91
92
93def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame:
94 common_files = sorted(set(before_metrics) & set(after_metrics))
95 all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() })
96
97 results = []
98
99 for key in all_keys:
100 before_vals, after_vals = [], []
101
102 for fname in common_files:
103 if key in before_metrics[fname] and key in after_metrics[fname]:
104 before_vals.append(before_metrics[fname][key])
105 after_vals.append(after_metrics[fname][key])
106
107 if len(before_vals) >= 2:
108 before_arr = np.array(before_vals)
109 after_arr = np.array(after_vals)
110
111 diff = after_arr - before_arr
112 pct_change = 100 * diff / before_arr
113 t_stat, p_val = ttest_rel(after_arr, before_arr)
114
115 results.append({
116 "metric": key,
117 "mean_before": np.mean(before_arr),
118 "mean_after": np.mean(after_arr),
119 "mean_diff": np.mean(diff),
120 "mean_%_change": np.mean(pct_change),
121 "p_value": p_val,
122 "t_stat": t_stat
123 })
124
125 df = pd.DataFrame(results).sort_values("p_value")
126 return df
127
128
129if __name__ == "__main__":
130 before_dir = os.environ.get("BEFORE_DIR")
131 after_dir = os.environ.get("AFTER_DIR")
132
133 if not before_dir or not after_dir:
134 print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.")
135 exit(1)
136
137 before_stats = Path(before_dir) / "stats"
138 after_stats = Path(after_dir) / "stats"
139
140 # This may happen if the pull request target does not include PR#399720 yet.
141 if not before_stats.exists():
142 print("⚠️ Skipping comparison: stats directory is missing in the target commit.")
143 exit(0)
144
145 # This should never happen, but we're exiting gracefully anyways
146 if not after_stats.exists():
147 print("⚠️ Skipping comparison: stats directory missing in current PR evaluation.")
148 exit(0)
149
150 before_metrics = load_all_metrics(before_stats)
151 after_metrics = load_all_metrics(after_stats)
152 df1 = perform_pairwise_tests(before_metrics, after_metrics)
153 markdown_table = dataframe_to_markdown(df1)
154 print(markdown_table)