Merge pull request #205190 from NixOS/lib.path.relativeNormalise

lib.path.subpath.{isValid,normalise}: init

+1
.github/CODEOWNERS
···
/lib/cli.nix @edolstra @nbp @Profpatsch
/lib/debug.nix @edolstra @nbp @Profpatsch
/lib/asserts.nix @edolstra @nbp @Profpatsch
+
/lib/path.* @infinisil @fricklerhandwerk
# Nixpkgs Internals
/default.nix @nbp
+1
doc/doc-support/default.nix
···
{ name = "lists"; description = "list manipulation functions"; }
{ name = "debug"; description = "debugging functions"; }
{ name = "options"; description = "NixOS / nixpkgs option handling"; }
+
{ name = "path"; description = "path functions"; }
{ name = "filesystem"; description = "filesystem functions"; }
{ name = "sources"; description = "source filtering functions"; }
{ name = "cli"; description = "command-line serialization functions"; }
+5 -1
doc/doc-support/lib-function-docs.nix
···
installPhase = ''
function docgen {
# TODO: wrap lib.$1 in <literal>, make nixdoc not escape it
-
nixdoc -c "$1" -d "lib.$1: $2" -f "$1.nix" > "$out/$1.xml"
+
if [[ -e "../lib/$1.nix" ]]; then
+
nixdoc -c "$1" -d "lib.$1: $2" -f "$1.nix" > "$out/$1.xml"
+
else
+
nixdoc -c "$1" -d "lib.$1: $2" -f "$1/default.nix" > "$out/$1.xml"
+
fi
echo "<xi:include href='$1.xml' />" >> "$out/index.xml"
}
+9 -7
doc/doc-support/lib-function-locations.nix
···
let
revision = pkgs.lib.trivial.revisionWithDefault (nixpkgs.revision or "master");
-
libDefPos = set:
-
builtins.map
-
(name: {
-
name = name;
+
libDefPos = prefix: set:
+
builtins.concatMap
+
(name: [{
+
name = builtins.concatStringsSep "." (prefix ++ [name]);
location = builtins.unsafeGetAttrPos name set;
-
})
-
(builtins.attrNames set);
+
}] ++ nixpkgsLib.optionals
+
(builtins.length prefix == 0 && builtins.isAttrs set.${name})
+
(libDefPos (prefix ++ [name]) set.${name})
+
) (builtins.attrNames set);
libset = toplib:
builtins.map
(subsetname: {
subsetname = subsetname;
-
functions = libDefPos toplib.${subsetname};
+
functions = libDefPos [] toplib.${subsetname};
})
(builtins.map (x: x.name) libsets);
+2 -1
lib/default.nix
···
maintainers = import ../maintainers/maintainer-list.nix;
teams = callLibs ../maintainers/team-list.nix;
meta = callLibs ./meta.nix;
-
sources = callLibs ./sources.nix;
versions = callLibs ./versions.nix;
# module system
···
fetchers = callLibs ./fetchers.nix;
# Eval-time filesystem handling
+
path = callLibs ./path;
filesystem = callLibs ./filesystem.nix;
+
sources = callLibs ./sources.nix;
# back-compat aliases
platforms = self.systems.doubles;
+196
lib/path/README.md
···
+
# Path library
+
+
This document explains why the `lib.path` library is designed the way it is.
+
+
The purpose of this library is to process [filesystem paths]. It does not read files from the filesystem.
+
It exists to support the native Nix [path value type] with extra functionality.
+
+
[filesystem paths]: https://en.m.wikipedia.org/wiki/Path_(computing)
+
[path value type]: https://nixos.org/manual/nix/stable/language/values.html#type-path
+
+
As an extension of the path value type, it inherits the same intended use cases and limitations:
+
- Only use paths to access files at evaluation time, such as the local project source.
+
- Paths cannot point to derivations, so they are unfit to represent dependencies.
+
- A path implicitly imports the referenced files into the Nix store when interpolated to a string. Therefore paths are not suitable to access files at build- or run-time, as you risk importing the path from the evaluation system instead.
+
+
Overall, this library works with two types of paths:
+
- Absolute paths are represented with the Nix [path value type]. Nix automatically normalises these paths.
+
- Subpaths are represented with the [string value type] since path value types don't support relative paths. This library normalises these paths as safely as possible. Absolute paths in strings are not supported.
+
+
A subpath refers to a specific file or directory within an absolute base directory.
+
It is a stricter form of a relative path, notably [without support for `..` components][parents] since those could escape the base directory.
+
+
[string value type]: https://nixos.org/manual/nix/stable/language/values.html#type-string
+
+
This library is designed to be as safe and intuitive as possible, throwing errors when operations are attempted that would produce surprising results, and giving the expected result otherwise.
+
+
This library is designed to work well as a dependency for the `lib.filesystem` and `lib.sources` library components. Contrary to these library components, `lib.path` does not read any paths from the filesystem.
+
+
This library makes only these assumptions about paths and no others:
+
- `dirOf path` returns the path to the parent directory of `path`, unless `path` is the filesystem root, in which case `path` is returned.
+
- There can be multiple filesystem roots: `p == dirOf p` and `q == dirOf q` does not imply `p == q`.
+
- While there's only a single filesystem root in stable Nix, the [lazy trees feature](https://github.com/NixOS/nix/pull/6530) introduces [additional filesystem roots](https://github.com/NixOS/nix/pull/6530#discussion_r1041442173).
+
- `path + ("/" + string)` returns the path to the `string` subdirectory in `path`.
+
- If `string` contains no `/` characters, then `dirOf (path + ("/" + string)) == path`.
+
- If `string` contains no `/` characters, then `baseNameOf (path + ("/" + string)) == string`.
+
- `path1 == path2` returns `true` only if `path1` points to the same filesystem path as `path2`.
+
+
Notably we do not make the assumption that we can turn paths into strings using `toString path`.
+
+
## Design decisions
+
+
Each subsection here contains a decision along with arguments and counter-arguments for (+) and against (-) that decision.
+
+
### Leading dots for relative paths
+
[leading-dots]: #leading-dots-for-relative-paths
+
+
Observing: Since subpaths are a form of relative paths, they can have a leading `./` to indicate it being a relative path, this is generally not necessary for tools though.
+
+
Considering: Paths should be as explicit, consistent and unambiguous as possible.
+
+
Decision: Returned subpaths should always have a leading `./`.
+
+
<details>
+
<summary>Arguments</summary>
+
+
- (+) In shells, just running `foo` as a command wouldn't execute the file `foo`, whereas `./foo` would execute the file. In contrast, `foo/bar` does execute that file without the need for `./`. This can lead to confusion about when a `./` needs to be prefixed. If a `./` is always included, this becomes a non-issue. This effectively then means that paths don't overlap with command names.
+
- (+) Prepending with `./` makes the subpaths always valid as relative Nix path expressions.
+
- (+) Using paths in command line arguments could give problems if not escaped properly, e.g. if a path was `--version`. This is not a problem with `./--version`. This effectively then means that paths don't overlap with GNU-style command line options.
+
- (-) `./` is not required to resolve relative paths, resolution always has an implicit `./` as prefix.
+
- (-) It's less noisy without the `./`, e.g. in error messages.
+
- (+) But similarly, it could be confusing whether something was even a path.
+
e.g. `foo` could be anything, but `./foo` is more clearly a path.
+
- (+) Makes it more uniform with absolute paths (those always start with `/`).
+
- (-) That is not relevant for practical purposes.
+
- (+) `find` also outputs results with `./`.
+
- (-) But only if you give it an argument of `.`. If you give it the argument `some-directory`, it won't prefix that.
+
- (-) `realpath --relative-to` doesn't prefix relative paths with `./`.
+
- (+) There is no need to return the same result as `realpath`.
+
+
</details>
+
+
### Representation of the current directory
+
[curdir]: #representation-of-the-current-directory
+
+
Observing: The subpath that produces the base directory can be represented with `.` or `./` or `./.`.
+
+
Considering: Paths should be as consistent and unambiguous as possible.
+
+
Decision: It should be `./.`.
+
+
<details>
+
<summary>Arguments</summary>
+
+
- (+) `./` would be inconsistent with [the decision to not persist trailing slashes][trailing-slashes].
+
- (-) `.` is how `realpath` normalises paths.
+
- (+) `.` can be interpreted as a shell command (it's a builtin for sourcing files in `bash` and `zsh`).
+
- (+) `.` would be the only path without a `/`. It could not be used as a Nix path expression, since those require at least one `/` to be parsed as such.
+
- (-) `./.` is rather long.
+
- (-) We don't require users to type this though, as it's only output by the library.
+
As inputs all three variants are supported for subpaths (and we can't do anything about absolute paths)
+
- (-) `builtins.dirOf "foo" == "."`, so `.` would be consistent with that.
+
- (+) `./.` is consistent with the [decision to have leading `./`][leading-dots].
+
- (+) `./.` is a valid Nix path expression, although this property does not hold for every relative path or subpath.
+
+
</details>
+
+
### Subpath representation
+
[relrepr]: #subpath-representation
+
+
Observing: Subpaths such as `foo/bar` can be represented in various ways:
+
- string: `"foo/bar"`
+
- list with all the components: `[ "foo" "bar" ]`
+
- attribute set: `{ type = "relative-path"; components = [ "foo" "bar" ]; }`
+
+
Considering: Paths should be as safe to use as possible. We should generate string outputs in the library and not encourage users to do that themselves.
+
+
Decision: Paths are represented as strings.
+
+
<details>
+
<summary>Arguments</summary>
+
+
- (+) It's simpler for the users of the library. One doesn't have to convert a path a string before it can be used.
+
- (+) Naively converting the list representation to a string with `concatStringsSep "/"` would break for `[]`, requiring library users to be more careful.
+
- (+) It doesn't encourage people to do their own path processing and instead use the library.
+
With a list representation it would seem easy to just use `lib.lists.init` to get the parent directory, but then it breaks for `.`, which would be represented as `[ ]`.
+
- (+) `+` is convenient and doesn't work on lists and attribute sets.
+
- (-) Shouldn't use `+` anyways, we export safer functions for path manipulation.
+
+
</details>
+
+
### Parent directory
+
[parents]: #parent-directory
+
+
Observing: Relative paths can have `..` components, which refer to the parent directory.
+
+
Considering: Paths should be as safe and unambiguous as possible.
+
+
Decision: `..` path components in string paths are not supported, neither as inputs nor as outputs. Hence, string paths are called subpaths, rather than relative paths.
+
+
<details>
+
<summary>Arguments</summary>
+
+
- (+) If we wanted relative paths to behave according to the "physical" interpretation (as a directory tree with relations between nodes), it would require resolving symlinks, since e.g. `foo/..` would not be the same as `.` if `foo` is a symlink.
+
- (-) The "logical" interpretation is also valid (treating paths as a sequence of names), and is used by some software. It is simpler, and not using symlinks at all is safer.
+
- (+) Mixing both models can lead to surprises.
+
- (+) We can't resolve symlinks without filesystem access.
+
- (+) Nix also doesn't support reading symlinks at evaluation time.
+
- (-) We could just not handle such cases, e.g. `equals "foo" "foo/bar/.. == false`. The paths are different, we don't need to check whether the paths point to the same thing.
+
- (+) Assume we said `relativeTo /foo /bar == "../bar"`. If this is used like `/bar/../foo` in the end, and `bar` turns out to be a symlink to somewhere else, this won't be accurate.
+
- (-) We could decide to not support such ambiguous operations, or mark them as such, e.g. the normal `relativeTo` will error on such a case, but there could be `extendedRelativeTo` supporting that.
+
- (-) `..` are a part of paths, a path library should therefore support it.
+
- (+) If we can convincingly argue that all such use cases are better done e.g. with runtime tools, the library not supporting it can nudge people towards using those.
+
- (-) We could allow "..", but only in the prefix.
+
- (+) Then we'd have to throw an error for doing `append /some/path "../foo"`, making it non-composable.
+
- (+) The same is for returning paths with `..`: `relativeTo /foo /bar => "../bar"` would produce a non-composable path.
+
- (+) We argue that `..` is not needed at the Nix evaluation level, since we'd always start evaluation from the project root and don't go up from there.
+
- (+) `..` is supported in Nix paths, turning them into absolute paths.
+
- (-) This is ambiguous in the presence of symlinks.
+
- (+) If you need `..` for building or runtime, you can use build-/run-time tooling to create those (e.g. `realpath` with `--relative-to`), or use absolute paths instead.
+
This also gives you the ability to correctly handle symlinks.
+
+
</details>
+
+
### Trailing slashes
+
[trailing-slashes]: #trailing-slashes
+
+
Observing: Subpaths can contain trailing slashes, like `foo/`, indicating that the path points to a directory and not a file.
+
+
Considering: Paths should be as consistent as possible, there should only be a single normalisation for the same path.
+
+
Decision: All functions remove trailing slashes in their results.
+
+
<details>
+
<summary>Arguments</summary>
+
+
- (+) It allows normalisations to be unique, in that there's only a single normalisation for the same path. If trailing slashes were preserved, both `foo/bar` and `foo/bar/` would be valid but different normalisations for the same path.
+
- Comparison to other frameworks to figure out the least surprising behavior:
+
- (+) Nix itself doesn't support trailing slashes when parsing and doesn't preserve them when appending paths.
+
- (-) [Rust's std::path](https://doc.rust-lang.org/std/path/index.html) does preserve them during [construction](https://doc.rust-lang.org/std/path/struct.Path.html#method.new).
+
- (+) Doesn't preserve them when returning individual [components](https://doc.rust-lang.org/std/path/struct.Path.html#method.components).
+
- (+) Doesn't preserve them when [canonicalizing](https://doc.rust-lang.org/std/path/struct.Path.html#method.canonicalize).
+
- (+) [Python 3's pathlib](https://docs.python.org/3/library/pathlib.html#module-pathlib) doesn't preserve them during [construction](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath).
+
- Notably it represents the individual components as a list internally.
+
- (-) [Haskell's filepath](https://hackage.haskell.org/package/filepath-1.4.100.0) has [explicit support](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#g:6) for handling trailing slashes.
+
- (-) Does preserve them for [normalisation](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#v:normalise).
+
- (-) [NodeJS's Path library](https://nodejs.org/api/path.html) preserves trailing slashes for [normalisation](https://nodejs.org/api/path.html#pathnormalizepath).
+
- (+) For [parsing a path](https://nodejs.org/api/path.html#pathparsepath) into its significant elements, trailing slashes are not preserved.
+
- (+) Nix's builtin function `dirOf` gives an unexpected result for paths with trailing slashes: `dirOf "foo/bar/" == "foo/bar"`.
+
Inconsistently, `baseNameOf` works correctly though: `baseNameOf "foo/bar/" == "bar"`.
+
- (-) We are writing a path library to improve handling of paths though, so we shouldn't use these functions and discourage their use.
+
- (-) Unexpected result when normalising intermediate paths, like `relative.normalise ("foo" + "/") + "bar" == "foobar"`.
+
- (+) This is not a practical use case though.
+
- (+) Don't use `+` to append paths, this library has a `join` function for that.
+
- (-) Users might use `+` out of habit though.
+
- (+) The `realpath` command also removes trailing slashes.
+
- (+) Even with a trailing slash, the path is the same, it's only an indication that it's a directory.
+
+
</details>
+
+
## Other implementations and references
+
+
- [Rust](https://doc.rust-lang.org/std/path/struct.Path.html)
+
- [Python](https://docs.python.org/3/library/pathlib.html)
+
- [Haskell](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html)
+
- [Nodejs](https://nodejs.org/api/path.html)
+
- [POSIX.1-2017](https://pubs.opengroup.org/onlinepubs/9699919799/nframe.html)
+218
lib/path/default.nix
···
+
# Functions for working with paths, see ./path.md
+
{ lib }:
+
let
+
+
inherit (builtins)
+
isString
+
split
+
match
+
;
+
+
inherit (lib.lists)
+
length
+
head
+
last
+
genList
+
elemAt
+
;
+
+
inherit (lib.strings)
+
concatStringsSep
+
substring
+
;
+
+
inherit (lib.asserts)
+
assertMsg
+
;
+
+
# Return the reason why a subpath is invalid, or `null` if it's valid
+
subpathInvalidReason = value:
+
if ! isString value then
+
"The given value is of type ${builtins.typeOf value}, but a string was expected"
+
else if value == "" then
+
"The given string is empty"
+
else if substring 0 1 value == "/" then
+
"The given string \"${value}\" starts with a `/`, representing an absolute path"
+
# We don't support ".." components, see ./path.md#parent-directory
+
else if match "(.*/)?\\.\\.(/.*)?" value != null then
+
"The given string \"${value}\" contains a `..` component, which is not allowed in subpaths"
+
else null;
+
+
# Split and normalise a relative path string into its components.
+
# Error for ".." components and doesn't include "." components
+
splitRelPath = path:
+
let
+
# Split the string into its parts using regex for efficiency. This regex
+
# matches patterns like "/", "/./", "/././", with arbitrarily many "/"s
+
# together. These are the main special cases:
+
# - Leading "./" gets split into a leading "." part
+
# - Trailing "/." or "/" get split into a trailing "." or ""
+
# part respectively
+
#
+
# These are the only cases where "." and "" parts can occur
+
parts = split "/+(\\./+)*" path;
+
+
# `split` creates a list of 2 * k + 1 elements, containing the k +
+
# 1 parts, interleaved with k matches where k is the number of
+
# (non-overlapping) matches. This calculation here gets the number of parts
+
# back from the list length
+
# floor( (2 * k + 1) / 2 ) + 1 == floor( k + 1/2 ) + 1 == k + 1
+
partCount = length parts / 2 + 1;
+
+
# To assemble the final list of components we want to:
+
# - Skip a potential leading ".", normalising "./foo" to "foo"
+
# - Skip a potential trailing "." or "", normalising "foo/" and "foo/." to
+
# "foo". See ./path.md#trailing-slashes
+
skipStart = if head parts == "." then 1 else 0;
+
skipEnd = if last parts == "." || last parts == "" then 1 else 0;
+
+
# We can now know the length of the result by removing the number of
+
# skipped parts from the total number
+
componentCount = partCount - skipEnd - skipStart;
+
+
in
+
# Special case of a single "." path component. Such a case leaves a
+
# componentCount of -1 due to the skipStart/skipEnd not verifying that
+
# they don't refer to the same character
+
if path == "." then []
+
+
# Generate the result list directly. This is more efficient than a
+
# combination of `filter`, `init` and `tail`, because here we don't
+
# allocate any intermediate lists
+
else genList (index:
+
# To get to the element we need to add the number of parts we skip and
+
# multiply by two due to the interleaved layout of `parts`
+
elemAt parts ((skipStart + index) * 2)
+
) componentCount;
+
+
# Join relative path components together
+
joinRelPath = components:
+
# Always return relative paths with `./` as a prefix (./path.md#leading-dots-for-relative-paths)
+
"./" +
+
# An empty string is not a valid relative path, so we need to return a `.` when we have no components
+
(if components == [] then "." else concatStringsSep "/" components);
+
+
in /* No rec! Add dependencies on this file at the top. */ {
+
+
+
/* Whether a value is a valid subpath string.
+
+
- The value is a string
+
+
- The string is not empty
+
+
- The string doesn't start with a `/`
+
+
- The string doesn't contain any `..` path components
+
+
Type:
+
subpath.isValid :: String -> Bool
+
+
Example:
+
# Not a string
+
subpath.isValid null
+
=> false
+
+
# Empty string
+
subpath.isValid ""
+
=> false
+
+
# Absolute path
+
subpath.isValid "/foo"
+
=> false
+
+
# Contains a `..` path component
+
subpath.isValid "../foo"
+
=> false
+
+
# Valid subpath
+
subpath.isValid "foo/bar"
+
=> true
+
+
# Doesn't need to be normalised
+
subpath.isValid "./foo//bar/"
+
=> true
+
*/
+
subpath.isValid = value:
+
subpathInvalidReason value == null;
+
+
+
/* Normalise a subpath. Throw an error if the subpath isn't valid, see
+
`lib.path.subpath.isValid`
+
+
- Limit repeating `/` to a single one
+
+
- Remove redundant `.` components
+
+
- Remove trailing `/` and `/.`
+
+
- Add leading `./`
+
+
Laws:
+
+
- (Idempotency) Normalising multiple times gives the same result:
+
+
subpath.normalise (subpath.normalise p) == subpath.normalise p
+
+
- (Uniqueness) There's only a single normalisation for the paths that lead to the same file system node:
+
+
subpath.normalise p != subpath.normalise q -> $(realpath ${p}) != $(realpath ${q})
+
+
- Don't change the result when appended to a Nix path value:
+
+
base + ("/" + p) == base + ("/" + subpath.normalise p)
+
+
- Don't change the path according to `realpath`:
+
+
$(realpath ${p}) == $(realpath ${subpath.normalise p})
+
+
- Only error on invalid subpaths:
+
+
builtins.tryEval (subpath.normalise p)).success == subpath.isValid p
+
+
Type:
+
subpath.normalise :: String -> String
+
+
Example:
+
# limit repeating `/` to a single one
+
subpath.normalise "foo//bar"
+
=> "./foo/bar"
+
+
# remove redundant `.` components
+
subpath.normalise "foo/./bar"
+
=> "./foo/bar"
+
+
# add leading `./`
+
subpath.normalise "foo/bar"
+
=> "./foo/bar"
+
+
# remove trailing `/`
+
subpath.normalise "foo/bar/"
+
=> "./foo/bar"
+
+
# remove trailing `/.`
+
subpath.normalise "foo/bar/."
+
=> "./foo/bar"
+
+
# Return the current directory as `./.`
+
subpath.normalise "."
+
=> "./."
+
+
# error on `..` path components
+
subpath.normalise "foo/../bar"
+
=> <error>
+
+
# error on empty string
+
subpath.normalise ""
+
=> <error>
+
+
# error on absolute path
+
subpath.normalise "/foo"
+
=> <error>
+
*/
+
subpath.normalise = path:
+
assert assertMsg (subpathInvalidReason path == null)
+
"lib.path.subpath.normalise: Argument is not a valid subpath string: ${subpathInvalidReason path}";
+
joinRelPath (splitRelPath path);
+
+
}
+34
lib/path/tests/default.nix
···
+
{
+
nixpkgs ? ../../..,
+
system ? builtins.currentSystem,
+
pkgs ? import nixpkgs {
+
config = {};
+
overlays = [];
+
inherit system;
+
},
+
libpath ? ../..,
+
# Random seed
+
seed ? null,
+
}:
+
pkgs.runCommand "lib-path-tests" {
+
nativeBuildInputs = with pkgs; [
+
nix
+
jq
+
bc
+
];
+
} ''
+
# Needed to make Nix evaluation work
+
export NIX_STATE_DIR=$(mktemp -d)
+
+
cp -r ${libpath} lib
+
export TEST_LIB=$PWD/lib
+
+
echo "Running unit tests lib/path/tests/unit.nix"
+
nix-instantiate --eval lib/path/tests/unit.nix \
+
--argstr libpath "$TEST_LIB"
+
+
echo "Running property tests lib/path/tests/prop.sh"
+
bash lib/path/tests/prop.sh ${toString seed}
+
+
touch $out
+
''
+64
lib/path/tests/generate.awk
···
+
# Generate random path-like strings, separated by null characters.
+
#
+
# Invocation:
+
#
+
# awk -f ./generate.awk -v <variable>=<value> | tr '\0' '\n'
+
#
+
# Customizable variables (all default to 0):
+
# - seed: Deterministic random seed to use for generation
+
# - count: Number of paths to generate
+
# - extradotweight: Give extra weight to dots being generated
+
# - extraslashweight: Give extra weight to slashes being generated
+
# - extranullweight: Give extra weight to null being generated, making paths shorter
+
BEGIN {
+
# Random seed, passed explicitly for reproducibility
+
srand(seed)
+
+
# Don't include special characters below 32
+
minascii = 32
+
# Don't include DEL at 128
+
maxascii = 127
+
upperascii = maxascii - minascii
+
+
# add extra weight for ., in addition to the one weight from the ascii range
+
upperdot = upperascii + extradotweight
+
+
# add extra weight for /, in addition to the one weight from the ascii range
+
upperslash = upperdot + extraslashweight
+
+
# add extra weight for null, indicating the end of the string
+
# Must be at least 1 to have strings end at all
+
total = upperslash + 1 + extranullweight
+
+
# new=1 indicates that it's a new string
+
new=1
+
while (count > 0) {
+
+
# Random integer between [0, total)
+
value = int(rand() * total)
+
+
if (value < upperascii) {
+
# Ascii range
+
printf("%c", value + minascii)
+
new=0
+
+
} else if (value < upperdot) {
+
# Dot range
+
printf "."
+
new=0
+
+
} else if (value < upperslash) {
+
# If it's the start of a new path, only generate a / in 10% of cases
+
# This is always an invalid subpath, which is not a very interesting case
+
if (new && rand() > 0.1) continue
+
printf "/"
+
+
} else {
+
# Do not generate empty strings
+
if (new) continue
+
printf "\x00"
+
count--
+
new=1
+
}
+
}
+
}
+60
lib/path/tests/prop.nix
···
+
# Given a list of path-like strings, check some properties of the path library
+
# using those paths and return a list of attribute sets of the following form:
+
#
+
# { <string> = <lib.path.subpath.normalise string>; }
+
#
+
# If `normalise` fails to evaluate, the attribute value is set to `""`.
+
# If not, the resulting value is normalised again and an appropriate attribute set added to the output list.
+
{
+
# The path to the nixpkgs lib to use
+
libpath,
+
# A flat directory containing files with randomly-generated
+
# path-like values
+
dir,
+
}:
+
let
+
lib = import libpath;
+
+
# read each file into a string
+
strings = map (name:
+
builtins.readFile (dir + "/${name}")
+
) (builtins.attrNames (builtins.readDir dir));
+
+
inherit (lib.path.subpath) normalise isValid;
+
inherit (lib.asserts) assertMsg;
+
+
normaliseAndCheck = str:
+
let
+
originalValid = isValid str;
+
+
tryOnce = builtins.tryEval (normalise str);
+
tryTwice = builtins.tryEval (normalise tryOnce.value);
+
+
absConcatOrig = /. + ("/" + str);
+
absConcatNormalised = /. + ("/" + tryOnce.value);
+
in
+
# Check the lib.path.subpath.normalise property to only error on invalid subpaths
+
assert assertMsg
+
(originalValid -> tryOnce.success)
+
"Even though string \"${str}\" is valid as a subpath, the normalisation for it failed";
+
assert assertMsg
+
(! originalValid -> ! tryOnce.success)
+
"Even though string \"${str}\" is invalid as a subpath, the normalisation for it succeeded";
+
+
# Check normalisation idempotency
+
assert assertMsg
+
(originalValid -> tryTwice.success)
+
"For valid subpath \"${str}\", the normalisation \"${tryOnce.value}\" was not a valid subpath";
+
assert assertMsg
+
(originalValid -> tryOnce.value == tryTwice.value)
+
"For valid subpath \"${str}\", normalising it once gives \"${tryOnce.value}\" but normalising it twice gives a different result: \"${tryTwice.value}\"";
+
+
# Check that normalisation doesn't change a string when appended to an absolute Nix path value
+
assert assertMsg
+
(originalValid -> absConcatOrig == absConcatNormalised)
+
"For valid subpath \"${str}\", appending to an absolute Nix path value gives \"${absConcatOrig}\", but appending the normalised result \"${tryOnce.value}\" gives a different value \"${absConcatNormalised}\"";
+
+
# Return an empty string when failed
+
if tryOnce.success then tryOnce.value else "";
+
+
in lib.genAttrs strings normaliseAndCheck
+179
lib/path/tests/prop.sh
···
+
#!/usr/bin/env bash
+
+
# Property tests for the `lib.path` library
+
#
+
# It generates random path-like strings and runs the functions on
+
# them, checking that the expected laws of the functions hold
+
+
set -euo pipefail
+
shopt -s inherit_errexit
+
+
# https://stackoverflow.com/a/246128
+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+
if test -z "${TEST_LIB:-}"; then
+
TEST_LIB=$SCRIPT_DIR/../..
+
fi
+
+
tmp="$(mktemp -d)"
+
clean_up() {
+
rm -rf "$tmp"
+
}
+
trap clean_up EXIT
+
mkdir -p "$tmp/work"
+
cd "$tmp/work"
+
+
# Defaulting to a random seed but the first argument can override this
+
seed=${1:-$RANDOM}
+
echo >&2 "Using seed $seed, use \`lib/path/tests/prop.sh $seed\` to reproduce this result"
+
+
# The number of random paths to generate. This specific number was chosen to
+
# be fast enough while still generating enough variety to detect bugs.
+
count=500
+
+
debug=0
+
# debug=1 # print some extra info
+
# debug=2 # print generated values
+
+
# Fine tuning parameters to balance the number of generated invalid paths
+
# to the variance in generated paths.
+
extradotweight=64 # Larger value: more dots
+
extraslashweight=64 # Larger value: more slashes
+
extranullweight=16 # Larger value: shorter strings
+
+
die() {
+
echo >&2 "test case failed: " "$@"
+
exit 1
+
}
+
+
if [[ "$debug" -ge 1 ]]; then
+
echo >&2 "Generating $count random path-like strings"
+
fi
+
+
# Read stream of null-terminated strings entry-by-entry into bash,
+
# write it to a file and the `strings` array.
+
declare -a strings=()
+
mkdir -p "$tmp/strings"
+
while IFS= read -r -d $'\0' str; do
+
echo -n "$str" > "$tmp/strings/${#strings[@]}"
+
strings+=("$str")
+
done < <(awk \
+
-f "$SCRIPT_DIR"/generate.awk \
+
-v seed="$seed" \
+
-v count="$count" \
+
-v extradotweight="$extradotweight" \
+
-v extraslashweight="$extraslashweight" \
+
-v extranullweight="$extranullweight")
+
+
if [[ "$debug" -ge 1 ]]; then
+
echo >&2 "Trying to normalise the generated path-like strings with Nix"
+
fi
+
+
# Precalculate all normalisations with a single Nix call. Calling Nix for each
+
# string individually would take way too long
+
nix-instantiate --eval --strict --json \
+
--argstr libpath "$TEST_LIB" \
+
--argstr dir "$tmp/strings" \
+
"$SCRIPT_DIR"/prop.nix \
+
>"$tmp/result.json"
+
+
# Uses some jq magic to turn the resulting attribute set into an associative
+
# bash array assignment
+
declare -A normalised_result="($(jq '
+
to_entries
+
| map("[\(.key | @sh)]=\(.value | @sh)")
+
| join(" \n")' -r < "$tmp/result.json"))"
+
+
# Looks up a normalisation result for a string
+
# Checks that the normalisation is only failing iff it's an invalid subpath
+
# For valid subpaths, returns 0 and prints the normalisation result
+
# For invalid subpaths, returns 1
+
normalise() {
+
local str=$1
+
# Uses the same check for validity as in the library implementation
+
if [[ "$str" == "" || "$str" == /* || "$str" =~ ^(.*/)?\.\.(/.*)?$ ]]; then
+
valid=
+
else
+
valid=1
+
fi
+
+
normalised=${normalised_result[$str]}
+
# An empty string indicates failure, this is encoded in ./prop.nix
+
if [[ -n "$normalised" ]]; then
+
if [[ -n "$valid" ]]; then
+
echo "$normalised"
+
else
+
die "For invalid subpath \"$str\", lib.path.subpath.normalise returned this result: \"$normalised\""
+
fi
+
else
+
if [[ -n "$valid" ]]; then
+
die "For valid subpath \"$str\", lib.path.subpath.normalise failed"
+
else
+
if [[ "$debug" -ge 2 ]]; then
+
echo >&2 "String \"$str\" is not a valid subpath"
+
fi
+
# Invalid and it correctly failed, we let the caller continue if they catch the exit code
+
return 1
+
fi
+
fi
+
}
+
+
# Intermediate result populated by test_idempotency_realpath
+
# and used in test_normalise_uniqueness
+
#
+
# Contains a mapping from a normalised subpath to the realpath result it represents
+
declare -A norm_to_real
+
+
test_idempotency_realpath() {
+
if [[ "$debug" -ge 1 ]]; then
+
echo >&2 "Checking idempotency of each result and making sure the realpath result isn't changed"
+
fi
+
+
# Count invalid subpaths to display stats
+
invalid=0
+
for str in "${strings[@]}"; do
+
if ! result=$(normalise "$str"); then
+
((invalid++)) || true
+
continue
+
fi
+
+
# Check the law that it doesn't change the result of a realpath
+
mkdir -p -- "$str" "$result"
+
real_orig=$(realpath -- "$str")
+
real_norm=$(realpath -- "$result")
+
+
if [[ "$real_orig" != "$real_norm" ]]; then
+
die "realpath of the original string \"$str\" (\"$real_orig\") is not the same as realpath of the normalisation \"$result\" (\"$real_norm\")"
+
fi
+
+
if [[ "$debug" -ge 2 ]]; then
+
echo >&2 "String \"$str\" gets normalised to \"$result\" and file path \"$real_orig\""
+
fi
+
norm_to_real["$result"]="$real_orig"
+
done
+
if [[ "$debug" -ge 1 ]]; then
+
echo >&2 "$(bc <<< "scale=1; 100 / $count * $invalid")% of the total $count generated strings were invalid subpath strings, and were therefore ignored"
+
fi
+
}
+
+
test_normalise_uniqueness() {
+
if [[ "$debug" -ge 1 ]]; then
+
echo >&2 "Checking for the uniqueness law"
+
fi
+
+
for norm_p in "${!norm_to_real[@]}"; do
+
real_p=${norm_to_real["$norm_p"]}
+
for norm_q in "${!norm_to_real[@]}"; do
+
real_q=${norm_to_real["$norm_q"]}
+
# Checks normalisation uniqueness law for each pair of values
+
if [[ "$norm_p" != "$norm_q" && "$real_p" == "$real_q" ]]; then
+
die "Normalisations \"$norm_p\" and \"$norm_q\" are different, but the realpath of them is the same: \"$real_p\""
+
fi
+
done
+
done
+
}
+
+
test_idempotency_realpath
+
test_normalise_uniqueness
+
+
echo >&2 tests ok
+125
lib/path/tests/unit.nix
···
+
# Unit tests for lib.path functions. Use `nix-build` in this directory to
+
# run these
+
{ libpath }:
+
let
+
lib = import libpath;
+
inherit (lib.path) subpath;
+
+
cases = lib.runTests {
+
testSubpathIsValidExample1 = {
+
expr = subpath.isValid null;
+
expected = false;
+
};
+
testSubpathIsValidExample2 = {
+
expr = subpath.isValid "";
+
expected = false;
+
};
+
testSubpathIsValidExample3 = {
+
expr = subpath.isValid "/foo";
+
expected = false;
+
};
+
testSubpathIsValidExample4 = {
+
expr = subpath.isValid "../foo";
+
expected = false;
+
};
+
testSubpathIsValidExample5 = {
+
expr = subpath.isValid "foo/bar";
+
expected = true;
+
};
+
testSubpathIsValidExample6 = {
+
expr = subpath.isValid "./foo//bar/";
+
expected = true;
+
};
+
testSubpathIsValidTwoDotsEnd = {
+
expr = subpath.isValid "foo/..";
+
expected = false;
+
};
+
testSubpathIsValidTwoDotsMiddle = {
+
expr = subpath.isValid "foo/../bar";
+
expected = false;
+
};
+
testSubpathIsValidTwoDotsPrefix = {
+
expr = subpath.isValid "..foo";
+
expected = true;
+
};
+
testSubpathIsValidTwoDotsSuffix = {
+
expr = subpath.isValid "foo..";
+
expected = true;
+
};
+
testSubpathIsValidTwoDotsPrefixComponent = {
+
expr = subpath.isValid "foo/..bar/baz";
+
expected = true;
+
};
+
testSubpathIsValidTwoDotsSuffixComponent = {
+
expr = subpath.isValid "foo/bar../baz";
+
expected = true;
+
};
+
testSubpathIsValidThreeDots = {
+
expr = subpath.isValid "...";
+
expected = true;
+
};
+
testSubpathIsValidFourDots = {
+
expr = subpath.isValid "....";
+
expected = true;
+
};
+
testSubpathIsValidThreeDotsComponent = {
+
expr = subpath.isValid "foo/.../bar";
+
expected = true;
+
};
+
testSubpathIsValidFourDotsComponent = {
+
expr = subpath.isValid "foo/..../bar";
+
expected = true;
+
};
+
+
testSubpathNormaliseExample1 = {
+
expr = subpath.normalise "foo//bar";
+
expected = "./foo/bar";
+
};
+
testSubpathNormaliseExample2 = {
+
expr = subpath.normalise "foo/./bar";
+
expected = "./foo/bar";
+
};
+
testSubpathNormaliseExample3 = {
+
expr = subpath.normalise "foo/bar";
+
expected = "./foo/bar";
+
};
+
testSubpathNormaliseExample4 = {
+
expr = subpath.normalise "foo/bar/";
+
expected = "./foo/bar";
+
};
+
testSubpathNormaliseExample5 = {
+
expr = subpath.normalise "foo/bar/.";
+
expected = "./foo/bar";
+
};
+
testSubpathNormaliseExample6 = {
+
expr = subpath.normalise ".";
+
expected = "./.";
+
};
+
testSubpathNormaliseExample7 = {
+
expr = (builtins.tryEval (subpath.normalise "foo/../bar")).success;
+
expected = false;
+
};
+
testSubpathNormaliseExample8 = {
+
expr = (builtins.tryEval (subpath.normalise "")).success;
+
expected = false;
+
};
+
testSubpathNormaliseExample9 = {
+
expr = (builtins.tryEval (subpath.normalise "/foo")).success;
+
expected = false;
+
};
+
testSubpathNormaliseIsValidDots = {
+
expr = subpath.normalise "./foo/.bar/.../baz...qux";
+
expected = "./foo/.bar/.../baz...qux";
+
};
+
testSubpathNormaliseWrongType = {
+
expr = (builtins.tryEval (subpath.normalise null)).success;
+
expected = false;
+
};
+
testSubpathNormaliseTwoDots = {
+
expr = (builtins.tryEval (subpath.normalise "..")).success;
+
expected = false;
+
};
+
};
+
in
+
if cases == [] then "Unit tests successful"
+
else throw "Path unit tests failed: ${lib.generators.toPretty {} cases}"
+3
lib/tests/release.nix
···
inherit pkgs;
lib = import ../.;
})
+
(import ../path/tests {
+
inherit pkgs;
+
})
];
} ''
datadir="${pkgs.nix}/share"
+9 -1
pkgs/tools/nix/nixdoc/default.nix
···
-
{ lib, stdenv, fetchFromGitHub, rustPlatform, darwin }:
+
{ lib, stdenv, fetchFromGitHub, fetchpatch, rustPlatform, darwin }:
rustPlatform.buildRustPackage rec {
pname = "nixdoc";
···
rev = "v${version}";
sha256 = "14d4dq06jdqazxvv7fq5872zy0capxyb0fdkp8qg06gxl1iw201s";
};
+
+
patches = [
+
# Support nested identifiers https://github.com/nix-community/nixdoc/pull/27
+
(fetchpatch {
+
url = "https://github.com/nix-community/nixdoc/pull/27/commits/ea542735bf675fe2ccd37edaffb9138d1a8c1b7e.patch";
+
sha256 = "1fmz44jv2r9qsnjxvkkjfb0safy69l4x4vx1g5gisrp8nwdn94rj";
+
})
+
];
buildInputs = lib.optionals stdenv.isDarwin [ darwin.Security ];