at 25.11-pre 5.2 kB view raw
1import json 2import os 3from scipy.stats import ttest_rel 4import pandas as pd 5import numpy as np 6from pathlib import Path 7 8# Define metrics of interest (can be expanded as needed) 9METRIC_PREFIXES = ("nr", "gc") 10 11def flatten_data(json_data: dict) -> dict: 12 """ 13 Extracts and flattens metrics from JSON data. 14 This is needed because the JSON data can be nested. 15 For example, the JSON data entry might look like this: 16 17 "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464} 18 19 Flattened: 20 21 "gc.cycles": 13 22 "gc.heapSize": 5404549120 23 ... 24 25 Args: 26 json_data (dict): JSON data containing metrics. 27 Returns: 28 dict: Flattened metrics with keys as metric names. 29 """ 30 flat_metrics = {} 31 for k, v in json_data.items(): 32 if isinstance(v, (int, float)): 33 flat_metrics[k] = v 34 elif isinstance(v, dict): 35 for sub_k, sub_v in v.items(): 36 flat_metrics[f"{k}.{sub_k}"] = sub_v 37 return flat_metrics 38 39 40 41 42def load_all_metrics(directory: Path) -> dict: 43 """ 44 Loads all stats JSON files in the specified directory and extracts metrics. 45 46 Args: 47 directory (Path): Directory containing JSON files. 48 Returns: 49 dict: Dictionary with filenames as keys and extracted metrics as values. 50 """ 51 metrics = {} 52 for system_dir in directory.iterdir(): 53 assert system_dir.is_dir() 54 55 for chunk_output in system_dir.iterdir(): 56 with chunk_output.open() as f: 57 data = json.load(f) 58 metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data) 59 60 return metrics 61 62def dataframe_to_markdown(df: pd.DataFrame) -> str: 63 df = df.sort_values(by=df.columns[0], ascending=True) 64 markdown_lines = [] 65 66 # Header (get column names and format them) 67 header = '\n| ' + ' | '.join(df.columns) + ' |' 68 markdown_lines.append(header) 69 markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line 70 71 # Iterate over rows to build Markdown rows 72 for _, row in df.iterrows(): 73 # TODO: define threshold for highlighting 74 highlight = False 75 76 fmt = lambda x: f"**{x}**" if highlight else f"{x}" 77 78 # Check for no change and NaN in p_value/t_stat 79 row_values = [] 80 for val in row: 81 if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat 82 row_values.append("-") # Custom symbol for NaN 83 elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0) 84 row_values.append("-") # Custom symbol for no change 85 else: 86 row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val))) 87 88 markdown_lines.append('| ' + ' | '.join(row_values) + ' |') 89 90 return '\n'.join(markdown_lines) 91 92 93def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame: 94 common_files = sorted(set(before_metrics) & set(after_metrics)) 95 all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() }) 96 97 results = [] 98 99 for key in all_keys: 100 before_vals, after_vals = [], [] 101 102 for fname in common_files: 103 if key in before_metrics[fname] and key in after_metrics[fname]: 104 before_vals.append(before_metrics[fname][key]) 105 after_vals.append(after_metrics[fname][key]) 106 107 if len(before_vals) >= 2: 108 before_arr = np.array(before_vals) 109 after_arr = np.array(after_vals) 110 111 diff = after_arr - before_arr 112 pct_change = 100 * diff / before_arr 113 t_stat, p_val = ttest_rel(after_arr, before_arr) 114 115 results.append({ 116 "metric": key, 117 "mean_before": np.mean(before_arr), 118 "mean_after": np.mean(after_arr), 119 "mean_diff": np.mean(diff), 120 "mean_%_change": np.mean(pct_change), 121 "p_value": p_val, 122 "t_stat": t_stat 123 }) 124 125 df = pd.DataFrame(results).sort_values("p_value") 126 return df 127 128 129if __name__ == "__main__": 130 before_dir = os.environ.get("BEFORE_DIR") 131 after_dir = os.environ.get("AFTER_DIR") 132 133 if not before_dir or not after_dir: 134 print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.") 135 exit(1) 136 137 before_stats = Path(before_dir) / "stats" 138 after_stats = Path(after_dir) / "stats" 139 140 # This may happen if the pull request target does not include PR#399720 yet. 141 if not before_stats.exists(): 142 print("⚠️ Skipping comparison: stats directory is missing in the target commit.") 143 exit(0) 144 145 # This should never happen, but we're exiting gracefully anyways 146 if not after_stats.exists(): 147 print("⚠️ Skipping comparison: stats directory missing in current PR evaluation.") 148 exit(0) 149 150 before_metrics = load_all_metrics(before_stats) 151 after_metrics = load_all_metrics(after_stats) 152 df1 = perform_pairwise_tests(before_metrics, after_metrics) 153 markdown_table = dataframe_to_markdown(df1) 154 print(markdown_table)