Merge pull request #195838 from 06kellyjac/semgrep

resolves https://github.com/NixOS/nixpkgs/issues/192149

Sandro a9e692b4 8d49bc25

Changed files
+224 -52
pkgs
+34 -20
pkgs/tools/security/semgrep/common.nix
···
{ lib, fetchFromGitHub, fetchzip, stdenv }:
rec {
-
version = "0.112.1";
+
version = "1.0.0";
src = fetchFromGitHub {
owner = "returntocorp";
repo = "semgrep";
rev = "v${version}";
-
sha256 = "sha256-SZtxZz4x6YUKw1uO5HQTU4lRY989SoCNsPQphJr+L0Y=";
+
sha256 = "sha256-4fNBpokHKCtMB3P0ot1TzcuzOs5hlyH8nIw+bCGqThA=";
};
# submodule dependencies
# these are fetched so we:
# 1. don't fetch the many submodules we don't need
# 2. avoid fetchSubmodules since it's prone to impurities
-
langsSrc = fetchFromGitHub {
-
owner = "returntocorp";
-
repo = "semgrep-langs";
-
rev = "91e288062eb794e8a5e6967d1009624237793491";
-
sha256 = "sha256-z2t2bTRyj5zu9h/GBg2YeRFimpJsd3dA7dK8VBaKzHo=";
-
};
-
-
interfacesSrc = fetchFromGitHub {
-
owner = "returntocorp";
-
repo = "semgrep-interfaces";
-
rev = "7bc457a32e088ef21adf1529fa0ddeea634b9131";
-
sha256 = "sha256-xN8Qm1/YLa49k9fZKDoPPmHASI2ipI3mkKlwEK2ajO4=";
+
submodules = {
+
"cli/src/semgrep/lang" = fetchFromGitHub {
+
owner = "returntocorp";
+
repo = "semgrep-langs";
+
rev = "65cb2ed80e31e01b122f893fef8428d14432da75";
+
sha256 = "sha256-HdPJdOlMM1l7vNSATkEu5KrCkpt2feEAH8LFDU84KUM=";
+
};
+
"cli/src/semgrep/semgrep_interfaces" = fetchFromGitHub {
+
owner = "returntocorp";
+
repo = "semgrep-interfaces";
+
rev = "c69e30a4cf39f11cab5378700f5e193e8282079e";
+
sha256 = "sha256-Wr3/TWx/LHiTFCoGY4sqdsn3dHvMsEIVYA3RGiv88xQ=";
+
};
};
# fetch pre-built semgrep-core since the ocaml build is complex and relies on
# the opam package manager at some point
-
coreRelease = if stdenv.isDarwin then fetchzip {
-
url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}-osx.zip";
-
sha256 = "sha256-JiOH39vMDL6r9WKuPO0CDkRwGZtzl/GIFoSegVddFpw=";
-
} else fetchzip {
-
url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}-ubuntu-16.04.tgz";
-
sha256 = "sha256-V6r+VQrgz8uVSbRa2AmW4lnLxovk63FL7LqVKD46RBw=";
+
core = rec {
+
data = {
+
x86_64-linux = {
+
suffix = "-ubuntu-16.04.tgz";
+
sha256 = "sha256-SsaAuhcDyO3nr6H2xOtdxzOoEQd6aIe0mlpehvDWzU0=";
+
};
+
x86_64-darwin = {
+
suffix = "-osx.zip";
+
sha256 = "sha256-DAcAB/q6XeljCp4mVljIJB4AUjUuzMSRMFzIuyjWMew=";
+
};
+
};
+
src = let
+
inherit (stdenv.hostPlatform) system;
+
selectSystemData = data: data.${system} or (throw "Unsupported system: ${system}");
+
inherit (selectSystemData data) suffix sha256;
+
in fetchzip {
+
url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}${suffix}";
+
inherit sha256;
+
};
};
meta = with lib; {
+50 -30
pkgs/tools/security/semgrep/default.nix
···
in
buildPythonApplication rec {
pname = "semgrep";
-
inherit (common) version;
-
src = "${common.src}/cli";
+
inherit (common) src version;
-
SEMGREP_CORE_BIN = "${semgrep-core}/bin/semgrep-core";
+
postPatch = (lib.concatStringsSep "\n" (lib.mapAttrsToList (
+
path: submodule: ''
+
# substitute ${path}
+
# remove git submodule placeholder
+
rm -r ${path}
+
# link submodule
+
ln -s ${submodule}/ ${path}
+
''
+
) common.submodules)) + ''
+
cd cli
+
'';
nativeBuildInputs = [ pythonRelaxDepsHook ];
+
# tell cli/setup.py to not copy semgrep-core into the result
+
# this means we can share a copy of semgrep-core and avoid an issue where it
+
# copies the binary but doesn't retain the executable bit
+
SEMGREP_SKIP_BIN = true;
+
pythonRelaxDeps = [
"attrs"
"boltons"
···
"typing-extensions"
];
-
postPatch = ''
-
# remove git submodule placeholders
-
rm -r ./src/semgrep/{lang,semgrep_interfaces}
-
# link submodule dependencies
-
ln -s ${common.langsSrc}/ ./src/semgrep/lang
-
ln -s ${common.interfacesSrc}/ ./src/semgrep/semgrep_interfaces
-
'';
+
propagatedBuildInputs = with pythonPackages; [
+
attrs
+
boltons
+
colorama
+
click
+
click-option-group
+
glom
+
requests
+
ruamel-yaml
+
tqdm
+
packaging
+
jsonschema
+
wcmatch
+
peewee
+
defusedxml
+
urllib3
+
typing-extensions
+
python-lsp-jsonrpc
+
tomli
+
];
doCheck = true;
checkInputs = [ git pytestCheckHook ] ++ (with pythonPackages; [
···
]);
disabledTests = [
# requires networking
-
"tests/unit/test_metric_manager.py"
+
"test_send"
+
# requires networking
+
"test_parse_exclude_rules_auto"
];
preCheck = ''
# tests need a home directory
···
--replace 'addopts = "--splitting-algorithm=least_duration"' ""
'';
-
propagatedBuildInputs = with pythonPackages; [
-
attrs
-
boltons
-
colorama
-
click
-
click-option-group
-
glom
-
requests
-
ruamel-yaml
-
tqdm
-
packaging
-
jsonschema
-
wcmatch
-
peewee
-
defusedxml
-
urllib3
-
typing-extensions
-
python-lsp-jsonrpc
-
];
+
# since we stop cli/setup.py from finding semgrep-core and copying it into
+
# the result we need to provide it on the PATH
+
preFixup = ''
+
makeWrapperArgs+=(--prefix PATH : ${lib.makeBinPath [ semgrep-core ]})
+
'';
+
+
passthru = {
+
inherit common;
+
updateScript = ./update.sh;
+
};
meta = common.meta // {
description = common.meta.description + " - cli";
+1 -2
pkgs/tools/security/semgrep/semgrep-core.nix
···
stdenvNoCC.mkDerivation rec {
pname = "semgrep-core";
inherit (common) version;
-
-
src = common.coreRelease;
+
inherit (common.core) src;
installPhase = ''
runHook preInstall
+139
pkgs/tools/security/semgrep/update.sh
···
+
#!/usr/bin/env nix-shell
+
#!nix-shell -i bash -p curl gnused jq
+
+
set -euxo pipefail
+
+
# provide a github token so you don't get rate limited
+
# if you use gh cli you can use:
+
# `export GITHUB_TOKEN="$(cat ~/.config/gh/config.yml | yq '.hosts."github.com".oauth_token' -r)"`
+
# or just set your token by hand:
+
# `read -s -p "Enter your token: " GITHUB_TOKEN; export GITHUB_TOKEN`
+
# (we use read so it doesn't show in our shell history and in secret mode so the token you paste isn't visible)
+
if [ -z "${GITHUB_TOKEN:-}" ]; then
+
echo "no GITHUB_TOKEN provided - you could meet API request limiting" >&2
+
fi
+
+
ROOT="$(dirname "$(readlink -f "$0")")"
+
NIXPKGS_ROOT="$ROOT/../../../.."
+
NIX_DRV="$ROOT/default.nix"
+
+
COMMON_FILE="$ROOT/common.nix"
+
+
instantiateClean() {
+
nix-instantiate -A "$1" --eval --strict | cut -d\" -f2
+
}
+
+
# get latest version
+
NEW_VERSION=$(
+
curl -s -H
+
"Accept: application/vnd.github.v3+json" \
+
${GITHUB_TOKEN:+ -H "Authorization: bearer $GITHUB_TOKEN"} \
+
https://api.github.com/repos/returntocorp/semgrep/releases/latest \
+
| jq -r '.tag_name'
+
)
+
# trim v prefix
+
NEW_VERSION="${NEW_VERSION:1}"
+
OLD_VERSION="$(instantiateClean semgrep.common.version)"
+
+
if [[ "$OLD_VERSION" == "$NEW_VERSION" ]]; then
+
echo "Already up to date"
+
exit
+
fi
+
+
replace() {
+
sed -i "s@$1@$2@g" "$3"
+
}
+
+
fetchgithub() {
+
set +eo pipefail
+
nix-build -A "$1" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g'
+
set -eo pipefail
+
}
+
+
fetchzip() {
+
set +eo pipefail
+
nix-build -E "with import $NIXPKGS_ROOT {}; fetchzip {url = \"$1\"; sha256 = lib.fakeSha256; }" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g'
+
set -eo pipefail
+
}
+
+
replace "$OLD_VERSION" "$NEW_VERSION" "$COMMON_FILE"
+
+
echo "Updating src"
+
+
OLD_HASH="$(instantiateClean semgrep.common.src.outputHash)"
+
echo "Old hash $OLD_HASH"
+
TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
+
replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE"
+
NEW_HASH="$(fetchgithub semgrep.common.src)"
+
echo "New hash $NEW_HASH"
+
replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+
echo "Updated src"
+
+
# loop through platforms for core
+
nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.common.core.data" --eval --strict --json \
+
| jq '.[]' -r \
+
| while read -r PLATFORM; do
+
echo "Updating core for $PLATFORM"
+
SUFFIX=$(instantiateClean semgrep.common.core.data."$1".suffix "$PLATFORM")
+
OLD_HASH=$(instantiateClean semgrep.common.core.data."$1".sha256 "$PLATFORM")
+
echo "Old hash $OLD_HASH"
+
+
NEW_URL="https://github.com/returntocorp/semgrep/releases/download/v$NEW_VERSION/semgrep-v$NEW_VERSION$SUFFIX"
+
NEW_HASH="$(fetchzip "$NEW_URL")"
+
echo "New hash $NEW_HASH"
+
+
replace "$OLD_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+
echo "Updated core for $PLATFORM"
+
done
+
+
OLD_PWD=$PWD
+
TMPDIR="$(mktemp -d)"
+
# shallow clone to check submodule commits, don't actually need the submodules
+
git clone https://github.com/returntocorp/semgrep "$TMPDIR/semgrep" --depth 1 --branch "v$NEW_VERSION"
+
+
get_submodule_commit() {
+
OLD_PWD=$PWD
+
(
+
cd "$TMPDIR/semgrep"
+
git ls-tree --object-only HEAD "$1"
+
cd "$OLD_PWD"
+
)
+
}
+
+
# loop through submodules
+
nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.passthru.common.submodules" --eval --strict --json \
+
| jq '.[]' -r \
+
| while read -r SUBMODULE; do
+
echo "Updating $SUBMODULE"
+
OLD_REV=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".rev)
+
echo "Old commit $OLD_REV"
+
OLD_HASH=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".outputHash)
+
echo "Old hash $OLD_HASH"
+
+
NEW_REV=$(get_submodule_commit "$SUBMODULE")
+
echo "New commit $NEW_REV"
+
+
if [[ "$OLD_REV" == "$NEW_REV" ]]; then
+
echo "$SUBMODULE already up to date"
+
continue
+
fi
+
+
NEW_URL=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".url | sed "s@$OLD_REV@$NEW_REV@g")
+
NEW_HASH=$(nix --experimental-features nix-command hash to-sri "sha256:$(nix-prefetch-url "$NEW_URL")")
+
+
TMP_HASH="sha256-ABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
+
replace "$OLD_REV" "$NEW_REV" "$COMMON_FILE"
+
replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE"
+
NEW_HASH="$(fetchgithub semgrep.passthru.common.submodules."$SUBMODULE")"
+
echo "New hash $NEW_HASH"
+
replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE"
+
+
echo "Updated $SUBMODULE"
+
done
+
+
rm -rf "$TMPDIR"
+
+
echo "Finished"
+