commit fb1647ec6e02030b1ea0a2a84686f9cd49c02ace · pyrox.dev/nixpkgs

+55 -4

ci/eval/compare/cmp-stats.py

···

       8
        
       from pathlib import Path

     

       9
        
       from scipy.stats import ttest_rel

     

       10
        
       from tabulate import tabulate

     

       0
        
       
     

       11
        
       

     

       12
        
       

     

       13
        
       def flatten_data(json_data: dict) -> dict:

     
···

       86
        
           return metrics

     

       87
        
       

     

       88
        
       

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       89
        
       def metric_sort_key(name: str) -> str:

     

       90
        
           if name in ("time.cpu", "time.gc", "time.gcFraction"):

     

       91
        
               return (1, name)

     
···

       99
        
               return (5, name)

     

       100
        
       

     

       101
        
       

     

       102
       -
       def dataframe_to_markdown(df: pd.DataFrame) -> str:

     

       103
        
           df = df.sort_values(

     

       104
        
               by=df.columns[0], ascending=True, key=lambda s: s.map(metric_sort_key)

     

       105
        
           )

     
···

       108
        
           headers = [str(column) for column in df.columns]

     

       109
        
           table = [

     

       110
        
               [

     

       111
       -
                   row["metric"],

     

       0
        
       
     

       112
        
                   # Check for no change and NaN in p_value/t_stat

     

       113
        
                   *[None if np.isnan(val) or np.allclose(val, 0) else val for val in row[1:]],

     

       114
        
               ]

     

       115
        
               for _, row in df.iterrows()

     

       116
        
           ]

     

       117
        
       

     

       118
       -
           return tabulate(table, headers, tablefmt="github", floatfmt=".4f", missingval="-")

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       119
        
       

     

       120
        
       

     

       121
        
       def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame:

     
···

       174
        
               description="Performance comparison of Nix evaluation statistics"

     

       175
        
           )

     

       176
        
           parser.add_argument(

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       177
        
               "before", help="File or directory containing baseline (data before)"

     

       178
        
           )

     

       179
        
           parser.add_argument(

     
···

       191
        
           before_metrics = load_all_metrics(before_stats)

     

       192
        
           after_metrics = load_all_metrics(after_stats)

     

       193
        
           df1 = perform_pairwise_tests(before_metrics, after_metrics)

     

       194
       -
           markdown_table = dataframe_to_markdown(df1)

     

       195
        
           print(markdown_table)

     

       196
        
       

     

       197

···

       8
        
       from pathlib import Path

     

       9
        
       from scipy.stats import ttest_rel

     

       10
        
       from tabulate import tabulate

     

       11
       +
       from typing import Final

     

       12
        
       

     

       13
        
       

     

       14
        
       def flatten_data(json_data: dict) -> dict:

     
···

       87
        
           return metrics

     

       88
        
       

     

       89
        
       

     

       90
       +
       def metric_table_name(name: str, explain: bool) -> str:

     

       91
       +
           """

     

       92
       +
           Returns the name of the metric, plus a footnote to explain it if needed.

     

       93
       +
           """

     

       94
       +
           return f"{name}[^{name}]" if explain else name

     

       95
       +
       

     

       96
       +
       

     

       97
       +
       METRIC_EXPLANATION_FOOTNOTE: Final[str] = """

     

       98
       +
       

     

       99
       +
       [^time.cpu]: Number of seconds of CPU time accounted by the OS to the Nix evaluator process. On UNIX systems, this comes from [`getrusage(RUSAGE_SELF)`](https://man7.org/linux/man-pages/man2/getrusage.2.html).

     

       100
       +
       [^time.gc]: Number of seconds of CPU time accounted by the Boehm garbage collector to performing GC.

     

       101
       +
       [^time.gcFraction]: What fraction of the total CPU time is accounted towards performing GC.

     

       102
       +
       [^gc.cycles]: Number of times garbage collection has been performed.

     

       103
       +
       [^gc.heapSize]: Size in bytes of the garbage collector heap.

     

       104
       +
       [^gc.totalBytes]: Size in bytes of all allocations in the garbage collector.

     

       105
       +
       [^envs.bytes]: Size in bytes of all `Env` objects allocated by the Nix evaluator. These are almost exclusively created by [`nix-env`](https://nix.dev/manual/nix/stable/command-ref/nix-env.html).

     

       106
       +
       [^list.bytes]: Size in bytes of all [lists](https://nix.dev/manual/nix/stable/language/syntax.html#list-literal) allocated by the Nix evaluator.

     

       107
       +
       [^sets.bytes]: Size in bytes of all [attrsets](https://nix.dev/manual/nix/stable/language/syntax.html#list-literal) allocated by the Nix evaluator.

     

       108
       +
       [^symbols.bytes]: Size in bytes of all items in the Nix evaluator symbol table.

     

       109
       +
       [^values.bytes]: Size in bytes of all values allocated by the Nix evaluator.

     

       110
       +
       [^envs.number]: The count of all `Env` objects allocated.

     

       111
       +
       [^nrAvoided]: The number of thunks avoided being created.

     

       112
       +
       [^nrExprs]: The number of expression objects ever created.

     

       113
       +
       [^nrFunctionCalls]: The number of function calls ever made.

     

       114
       +
       [^nrLookups]: The number of lookups into an attrset ever made.

     

       115
       +
       [^nrOpUpdateValuesCopied]: The number of attrset values copied in the process of merging attrsets.

     

       116
       +
       [^nrOpUpdates]: The number of attrsets merge operations (`//`) performed.

     

       117
       +
       [^nrPrimOpCalls]: The number of function calls to primops (Nix builtins) ever made.

     

       118
       +
       [^nrThunks]: The number of [thunks](https://nix.dev/manual/nix/latest/language/evaluation.html#laziness) ever made. A thunk is a delayed computation, represented by an expression reference and a closure.

     

       119
       +
       [^sets.number]: The number of attrsets ever made.

     

       120
       +
       [^symbols.number]: The number of symbols ever added to the symbol table.

     

       121
       +
       [^values.number]: The number of values ever made.

     

       122
       +
       [^envs.elements]: The number of values contained within an `Env` object.

     

       123
       +
       [^list.concats]: The number of list concatenation operations (`++`) performed.

     

       124
       +
       [^list.elements]: The number of values contained within a list.

     

       125
       +
       [^sets.elements]: The number of values contained within an attrset.

     

       126
       +
       [^sizes.Attr]: Size in bytes of the `Attr` type.

     

       127
       +
       [^sizes.Bindings]: Size in bytes of the `Bindings` type.

     

       128
       +
       [^sizes.Env]: Size in bytes of the `Env` type.

     

       129
       +
       [^sizes.Value]: Size in bytes of the `Value` type.

     

       130
       +
       """

     

       131
       +
       

     

       132
       +
       

     

       133
        
       def metric_sort_key(name: str) -> str:

     

       134
        
           if name in ("time.cpu", "time.gc", "time.gcFraction"):

     

       135
        
               return (1, name)

     
···

       143
        
               return (5, name)

     

       144
        
       

     

       145
        
       

     

       146
       +
       def dataframe_to_markdown(df: pd.DataFrame, explain: bool) -> str:

     

       147
        
           df = df.sort_values(

     

       148
        
               by=df.columns[0], ascending=True, key=lambda s: s.map(metric_sort_key)

     

       149
        
           )

     
···

       152
        
           headers = [str(column) for column in df.columns]

     

       153
        
           table = [

     

       154
        
               [

     

       155
       +
                   # The metric acts as its own footnote name

     

       156
       +
                   metric_table_name(row["metric"], explain),

     

       157
        
                   # Check for no change and NaN in p_value/t_stat

     

       158
        
                   *[None if np.isnan(val) or np.allclose(val, 0) else val for val in row[1:]],

     

       159
        
               ]

     

       160
        
               for _, row in df.iterrows()

     

       161
        
           ]

     

       162
        
       

     

       163
       +
           result = tabulate(table, headers, tablefmt="github", floatfmt=".4f", missingval="-")

     

       164
       +
           if explain:

     

       165
       +
               result += METRIC_EXPLANATION_FOOTNOTE

     

       166
       +
           return result

     

       167
        
       

     

       168
        
       

     

       169
        
       def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame:

     
···

       222
        
               description="Performance comparison of Nix evaluation statistics"

     

       223
        
           )

     

       224
        
           parser.add_argument(

     

       225
       +
               "--explain", action="store_true", help="Explain the evaluation statistics"

     

       226
       +
           )

     

       227
       +
           parser.add_argument(

     

       228
        
               "before", help="File or directory containing baseline (data before)"

     

       229
        
           )

     

       230
        
           parser.add_argument(

     
···

       242
        
           before_metrics = load_all_metrics(before_stats)

     

       243
        
           after_metrics = load_all_metrics(after_stats)

     

       244
        
           df1 = perform_pairwise_tests(before_metrics, after_metrics)

     

       245
       +
           markdown_table = dataframe_to_markdown(df1, explain=options.explain)

     

       246
        
           print(markdown_table)

     

       247
        
       

     

       248

+1 -1

ci/eval/compare/default.nix

···

       209
        
               echo

     

       210
        
             } >> $out/step-summary.md

     

       211
        
       

     

       212
       -
             cmp-stats ${combined}/before/stats ${combined}/after/stats >> $out/step-summary.md

     

       213
        
       

     

       214
        
           else

     

       215
        
             # Package chunks are the same in both revisions

···

       209
        
               echo

     

       210
        
             } >> $out/step-summary.md

     

       211
        
       

     

       212
       +
             cmp-stats --explain ${combined}/before/stats ${combined}/after/stats >> $out/step-summary.md

     

       213
        
       

     

       214
        
           else

     

       215
        
             # Package chunks are the same in both revisions