commit 87d5e58aad0488dd8745f496fcdbc07bd58e11a8 · pyrox.dev/nixpkgs

-141

ci/eval/compare/cmp-stats.py

···

       1
       1
       -
       import json

     

       2
       2
       -
       import os

     

       3
       3
       -
       from scipy.stats import ttest_rel

     

       4
       4
       -
       import pandas as pd

     

       5
       5
       -
       import numpy as np

     

       6
       6
       -
       from pathlib import Path

     

       7
       7
       -
       

     

       8
       8
       -
       # Define metrics of interest (can be expanded as needed)

     

       9
       9
       -
       METRIC_PREFIXES = ("nr", "gc")

     

       10
       10
       -
       

     

       11
       11
       -
       def flatten_data(json_data: dict) -> dict:

     

       12
       12
       -
           """

     

       13
       13
       -
           Extracts and flattens metrics from JSON data.

     

       14
       14
       -
           This is needed because the JSON data can be nested.

     

       15
       15
       -
           For example, the JSON data entry might look like this:

     

       16
       16
       -
       

     

       17
       17
       -
           "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464}

     

       18
       18
       -
       

     

       19
       19
       -
           Flattened:

     

       20
       20
       -
       

     

       21
       21
       -
           "gc.cycles": 13

     

       22
       22
       -
           "gc.heapSize": 5404549120

     

       23
       23
       -
           ...

     

       24
       24
       -
       

     

       25
       25
       -
           Args:

     

       26
       26
       -
               json_data (dict): JSON data containing metrics.

     

       27
       27
       -
           Returns:

     

       28
       28
       -
               dict: Flattened metrics with keys as metric names.

     

       29
       29
       -
           """

     

       30
       30
       -
           flat_metrics = {}

     

       31
       31
       -
           for k, v in json_data.items():

     

       32
       32
       -
               if isinstance(v, (int, float)):

     

       33
       33
       -
                   flat_metrics[k] = v

     

       34
       34
       -
               elif isinstance(v, dict):

     

       35
       35
       -
                   for sub_k, sub_v in v.items():

     

       36
       36
       -
                       flat_metrics[f"{k}.{sub_k}"] = sub_v

     

       37
       37
       -
           return flat_metrics

     

       38
       38
       -
       

     

       39
       39
       -
       

     

       40
       40
       -
       

     

       41
       41
       -
       

     

       42
       42
       -
       def load_all_metrics(directory: Path) -> dict:

     

       43
       43
       -
           """

     

       44
       44
       -
           Loads all stats JSON files in the specified directory and extracts metrics.

     

       45
       45
       -
       

     

       46
       46
       -
           Args:

     

       47
       47
       -
               directory (Path): Directory containing JSON files.

     

       48
       48
       -
           Returns:

     

       49
       49
       -
               dict: Dictionary with filenames as keys and extracted metrics as values.

     

       50
       50
       -
           """

     

       51
       51
       -
           metrics = {}

     

       52
       52
       -
           for system_dir in directory.iterdir():

     

       53
       53
       -
               assert system_dir.is_dir()

     

       54
       54
       -
       

     

       55
       55
       -
               for chunk_output in system_dir.iterdir():

     

       56
       56
       -
                       with chunk_output.open() as f:

     

       57
       57
       -
                           data = json.load(f)

     

       58
       58
       -
                       metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data)

     

       59
       59
       -
       

     

       60
       60
       -
           return metrics

     

       61
       61
       -
       

     

       62
       62
       -
       def dataframe_to_markdown(df: pd.DataFrame) -> str:

     

       63
       63
       -
           markdown_lines = []

     

       64
       64
       -
       

     

       65
       65
       -
           # Header (get column names and format them)

     

       66
       66
       -
           header = '\n| ' + ' | '.join(df.columns) + ' |'

     

       67
       67
       -
           markdown_lines.append(header)

     

       68
       68
       -
           markdown_lines.append("| - " * (len(df.columns)) + "|")  # Separator line

     

       69
       69
       -
       

     

       70
       70
       -
           # Iterate over rows to build Markdown rows

     

       71
       71
       -
           for _, row in df.iterrows():

     

       72
       72
       -
               # TODO: define threshold for highlighting

     

       73
       73
       -
               highlight = False

     

       74
       74
       -
       

     

       75
       75
       -
               fmt = lambda x: f"**{x}**" if highlight else f"{x}"

     

       76
       76
       -
       

     

       77
       77
       -
               # Check for no change and NaN in p_value/t_stat

     

       78
       78
       -
               row_values = []

     

       79
       79
       -
               for val in row:

     

       80
       80
       -
                   if isinstance(val, float) and np.isnan(val):  # For NaN values in p-value or t-stat

     

       81
       81
       -
                       row_values.append("-")  # Custom symbol for NaN

     

       82
       82
       -
                   elif isinstance(val, float) and val == 0:  # For no change (mean_diff == 0)

     

       83
       83
       -
                       row_values.append("-")  # Custom symbol for no change

     

       84
       84
       -
                   else:

     

       85
       85
       -
                       row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val)))

     

       86
       86
       -
       

     

       87
       87
       -
               markdown_lines.append('| ' + ' | '.join(row_values) + ' |')

     

       88
       88
       -
       

     

       89
       89
       -
           return '\n'.join(markdown_lines)

     

       90
       90
       -
       

     

       91
       91
       -
       

     

       92
       92
       -
       def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame:

     

       93
       93
       -
           common_files = sorted(set(before_metrics) & set(after_metrics))

     

       94
       94
       -
           all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() })

     

       95
       95
       -
       

     

       96
       96
       -
           results = []

     

       97
       97
       -
       

     

       98
       98
       -
           for key in all_keys:

     

       99
       99
       -
               before_vals, after_vals = [], []

     

       100
       100
       -
       

     

       101
       101
       -
               for fname in common_files:

     

       102
       102
       -
                   if key in before_metrics[fname] and key in after_metrics[fname]:

     

       103
       103
       -
                       before_vals.append(before_metrics[fname][key])

     

       104
       104
       -
                       after_vals.append(after_metrics[fname][key])

     

       105
       105
       -
       

     

       106
       106
       -
               if len(before_vals) >= 2:

     

       107
       107
       -
                   before_arr = np.array(before_vals)

     

       108
       108
       -
                   after_arr = np.array(after_vals)

     

       109
       109
       -
       

     

       110
       110
       -
                   diff = after_arr - before_arr

     

       111
       111
       -
                   pct_change = 100 * diff / before_arr

     

       112
       112
       -
                   t_stat, p_val = ttest_rel(after_arr, before_arr)

     

       113
       113
       -
       

     

       114
       114
       -
                   results.append({

     

       115
       115
       -
                       "metric": key,

     

       116
       116
       -
                       "mean_before": np.mean(before_arr),

     

       117
       117
       -
                       "mean_after": np.mean(after_arr),

     

       118
       118
       -
                       "mean_diff": np.mean(diff),

     

       119
       119
       -
                       "mean_%_change": np.mean(pct_change),

     

       120
       120
       -
                       "p_value": p_val,

     

       121
       121
       -
                       "t_stat": t_stat

     

       122
       122
       -
                   })

     

       123
       123
       -
       

     

       124
       124
       -
           df = pd.DataFrame(results).sort_values("p_value")

     

       125
       125
       -
           return df

     

       126
       126
       -
       

     

       127
       127
       -
       

     

       128
       128
       -
       if __name__ == "__main__":

     

       129
       129
       -
           before_dir = os.environ.get("BEFORE_DIR")

     

       130
       130
       -
           after_dir = os.environ.get("AFTER_DIR")

     

       131
       131
       -
       

     

       132
       132
       -
           if not before_dir or not after_dir:

     

       133
       133
       -
               print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.")

     

       134
       134
       -
               exit(1)

     

       135
       135
       -
       

     

       136
       136
       -
           before_metrics = load_all_metrics(Path(before_dir) / "stats")

     

       137
       137
       -
           after_metrics = load_all_metrics(Path(after_dir) / "stats")

     

       138
       138
       -
       

     

       139
       139
       -
           df1 = perform_pairwise_tests(before_metrics, after_metrics)

     

       140
       140
       -
           markdown_table = dataframe_to_markdown(df1)

     

       141
       141
       -
           print(markdown_table)

+4 -46

ci/eval/compare/default.nix

···

       3
       3
        
         jq,

     

       4
       4
        
         runCommand,

     

       5
       5
        
         writeText,

     

       6
       6
       -
         python3,

     

       7
       6
        
         ...

     

       8
       7
        
       }:

     

       9
       8
        
       {

     
···

       126
       125
        
       in

     

       127
       126
        
       runCommand "compare"

     

       128
       127
        
         {

     

       129
       129
       -
           nativeBuildInputs = [

     

       130
       130
       -
             jq

     

       131
       131
       -
             (python3.withPackages (

     

       132
       132
       -
               ps: with ps; [

     

       133
       133
       -
                 numpy

     

       134
       134
       -
                 pandas

     

       135
       135
       -
                 scipy

     

       136
       136
       -
               ]

     

       137
       137
       -
             ))

     

       138
       138
       -
       

     

       139
       139
       -
           ];

     

       128
       128
       +
           nativeBuildInputs = [ jq ];

     

       140
       129
        
           maintainers = builtins.toJSON maintainers;

     

       141
       130
        
           passAsFile = [ "maintainers" ];

     

       142
       142
       -
           env = {

     

       143
       143
       -
             BEFORE_DIR = "${beforeResultDir}";

     

       144
       144
       -
             AFTER_DIR = "${afterResultDir}";

     

       145
       145
       -
           };

     

       146
       131
        
         }

     

       147
       132
        
         ''

     

       148
       133
        
           mkdir $out

     

       149
       134
        
       

     

       150
       135
        
           cp ${changed-paths} $out/changed-paths.json

     

       151
       136
        
       

     

       137
       137
       +
           jq -r -f ${./generate-step-summary.jq} < ${changed-paths} > $out/step-summary.md

     

       152
       138
        
       

     

       153
       153
       -
           if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then

     

       154
       154
       -
             # Chunks have changed between revisions

     

       155
       155
       -
             # We cannot generate a performance comparison

     

       156
       156
       -
             {

     

       157
       157
       -
               echo

     

       158
       158
       -
               echo "# Performance comparison"

     

       159
       159
       -
               echo

     

       160
       160
       -
               echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')"

     

       161
       161
       -
               echo

     

       162
       162
       -
               echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"

     

       163
       163
       -
               echo

     

       164
       164
       -
             } >> $out/step-summary.md

     

       139
       139
       +
           cp "$maintainersPath" "$out/maintainers.json"

     

       165
       140
        
       

     

       166
       166
       -
             python3 ${./cmp-stats.py} >> $out/step-summary.md

     

       167
       167
       -
       

     

       168
       168
       -
           else

     

       169
       169
       -
             # Package chunks are the same in both revisions

     

       170
       170
       -
             # We can use the to generate a performance comparison

     

       171
       171
       -
             {

     

       172
       172
       -
               echo

     

       173
       173
       -
               echo "# Performance Comparison"

     

       174
       174
       -
               echo

     

       175
       175
       -
               echo "Performance stats were skipped because the package sets differ between the two revisions."

     

       176
       176
       -
               echo

     

       177
       177
       -
               echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"

     

       178
       178
       -
             } >> $out/step-summary.md

     

       179
       179
       -
           fi

     

       180
       180
       -
       

     

       181
       181
       -
           jq -r -f ${./generate-step-summary.jq} < ${changed-paths} >> $out/step-summary.md

     

       182
       182
       -
       

     

       183
       183
       -
           cp "$maintainersPath" "$out/maintainers.json"

     

       141
       141
       +
           # TODO: Compare eval stats

     

       184
       142
        
         ''

-2

ci/eval/default.nix

···

       9
       9
        
         nixVersions,

     

       10
       10
        
         jq,

     

       11
       11
        
         sta,

     

       12
       12
       -
         python3,

     

       13
       12
        
       }:

     

       14
       13
        
       

     

       15
       14
        
       let

     
···

       271
       270
        
             runCommand

     

       272
       271
        
             writeText

     

       273
       272
        
             supportedSystems

     

       274
       274
       -
             python3

     

       275
       273
        
             ;

     

       276
       274
        
         };

     

       277
       275