diff options
author | Philip Taron <philip.taron@gmail.com> | 2023-12-29 06:42:26 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-29 16:42:26 +0200 |
commit | 68eccbdc5b56f2a2450f9a8463f9934388cafabf (patch) | |
tree | 908e7d78b820013f27a3bfd7d3f8b0414717f0c1 /flake.nix | |
parent | 97bbca6e8522d18041fcde6c3d0907a52ce36446 (diff) |
flake.nix : rewrite (#4605)
* flake.lock: update to hotfix CUDA::cuda_driver
Required to support https://github.com/ggerganov/llama.cpp/pull/4606
* flake.nix: rewrite
1. Split into separate files per output.
2. Added overlays, so that this flake can be integrated into others.
The names in the overlay are `llama-cpp`, `llama-cpp-opencl`,
`llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the
broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs).
3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/)
rather than `with pkgs;` so that there's dependency injection rather
than dependency lookup.
4. Add a description and meta information for each package.
The description includes a bit about what's trying to accelerate each one.
5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge.
6. Format with `serokell/nixfmt` for a consistent style.
7. Update `flake.lock` with the latest goods.
* flake.nix: use finalPackage instead of passing it manually
* nix: unclutter darwin support
* nix: pass most darwin frameworks unconditionally
...for simplicity
* *.nix: nixfmt
nix shell github:piegamesde/nixfmt/rfc101-style --command \
nixfmt flake.nix .devops/nix/*.nix
* flake.nix: add maintainers
* nix: move meta down to follow Nixpkgs style more closely
* nix: add missing meta attributes
nix: clarify the interpretation of meta.maintainers
nix: clarify the meaning of "broken" and "badPlatforms"
nix: passthru: expose the use* flags for inspection
E.g.:
```
❯ nix eval .#cuda.useCuda
true
```
* flake.nix: avoid re-evaluating nixpkgs too many times
* flake.nix: use flake-parts
* nix: migrate to pname+version
* flake.nix: overlay: expose both the namespace and the default attribute
* ci: add the (Nix) flakestry workflow
* nix: cmakeFlags: explicit OFF bools
* nix: cuda: reduce runtime closure
* nix: fewer rebuilds
* nix: respect config.cudaCapabilities
* nix: add the impure driver's location to the DT_RUNPATHs
* nix: clean sources more thoroughly
...this way outPaths change less frequently,
and so there are fewer rebuilds
* nix: explicit mpi support
* nix: explicit jetson support
* flake.nix: darwin: only expose the default
---------
Co-authored-by: Someone Serge <sergei.kozlukov@aalto.fi>
Diffstat (limited to 'flake.nix')
-rw-r--r-- | flake.nix | 226 |
1 files changed, 93 insertions, 133 deletions
@@ -1,139 +1,99 @@ { + description = "Port of Facebook's LLaMA model in C/C++"; + inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; + flake-parts.url = "github:hercules-ci/flake-parts"; }; - outputs = { self, nixpkgs, flake-utils }: - flake-utils.lib.eachDefaultSystem (system: - let - name = "llama.cpp"; - src = ./.; - meta.mainProgram = "llama"; - inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin; - buildInputs = with pkgs; [ openmpi ]; - osSpecific = with pkgs; buildInputs ++ ( - if isAarch64 && isDarwin then - with pkgs.darwin.apple_sdk_11_0.frameworks; [ - Accelerate - MetalKit - ] - else if isAarch32 && isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else if isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else - with pkgs; [ openblas ] - ); - pkgs = import nixpkgs { inherit system; }; - nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ]; - cudatoolkit_joined = with pkgs; symlinkJoin { - # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit - # see https://github.com/NixOS/nixpkgs/issues/224291 - # copied from jaxlib - name = "${cudaPackages.cudatoolkit.name}-merged"; - paths = [ - cudaPackages.cudatoolkit.lib - cudaPackages.cudatoolkit.out - ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [ - # for some reason some of the required libs are in the targets/x86_64-linux - # directory; not sure why but this works around it - "${cudaPackages.cudatoolkit}/targets/${system}" - ]; - }; - llama-python = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]); - # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime - llama-python-extra = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]); - postPatch = '' - substituteInPlace ./ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' - ''; - postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server - mkdir -p $out/include - cp ${src}/llama.h $out/include/ - ''; - cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ]; - in + + # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: + # + # ```bash + # ❯ nix repl + # nix-repl> :lf github:ggerganov/llama.cpp + # Added 13 variables. + # nix-repl> outputs.apps.x86_64-linux.quantize + # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; } + # ``` + outputs = + { self, flake-parts, ... }@inputs: + let + # We could include the git revisions in the package names but those would + # needlessly trigger rebuilds: + # llamaVersion = self.dirtyShortRev or self.shortRev; + + # Nix already uses cryptographic hashes for versioning, so we'll just fix + # the fake semver for now: + llamaVersion = "0.0.0"; + in + flake-parts.lib.mkFlake { inherit inputs; } + { - packages.default = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = osSpecific; - cmakeFlags = cmakeFlags - ++ (if isAarch64 && isDarwin then [ - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" - "-DLLAMA_METAL=ON" - ] else [ - "-DLLAMA_BLAS=ON" - "-DLLAMA_BLAS_VENDOR=OpenBLAS" - ]); - }; - packages.opencl = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ clblast ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CLBLAST=ON" - ]; - }; - packages.cuda = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CUBLAS=ON" - ]; - }; - packages.rocm = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_HIPBLAS=1" - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM - # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt - # and select the line that matches the current nixpkgs version of rocBLAS. - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" - ]; - }; - apps.llama-server = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama-server"; - }; - apps.llama-embedding = { - type = "app"; - program = "${self.packages.${system}.default}/bin/embedding"; - }; - apps.llama = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama"; - }; - apps.quantize = { - type = "app"; - program = "${self.packages.${system}.default}/bin/quantize"; - }; - apps.train-text-from-scratch = { - type = "app"; - program = "${self.packages.${system}.default}/bin/train-text-from-scratch"; - }; - apps.default = self.apps.${system}.llama; - devShells.default = pkgs.mkShell { - buildInputs = [ llama-python ]; - packages = nativeBuildInputs ++ osSpecific; - }; - devShells.extra = pkgs.mkShell { - buildInputs = [ llama-python-extra ]; - packages = nativeBuildInputs ++ osSpecific; - }; - }); + + imports = [ + .devops/nix/nixpkgs-instances.nix + .devops/nix/apps.nix + .devops/nix/devshells.nix + .devops/nix/jetson-support.nix + ]; + + # An overlay can be used to have a more granular control over llama-cpp's + # dependencies and configuration, than that offered by the `.override` + # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. + # + # E.g. in a flake: + # ``` + # { nixpkgs, llama-cpp, ... }: + # let pkgs = import nixpkgs { + # overlays = [ (llama-cpp.overlays.default) ]; + # system = "aarch64-linux"; + # config.allowUnfree = true; + # config.cudaSupport = true; + # config.cudaCapabilities = [ "7.2" ]; + # config.cudaEnableForwardCompat = false; + # }; in { + # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; + # } + # ``` + # + # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format + flake.overlays.default = + (final: prev: { + llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; + inherit (final.llamaPackages) llama-cpp; + }); + + systems = [ + "aarch64-darwin" + "aarch64-linux" + "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) + "x86_64-linux" + ]; + + perSystem = + { + config, + lib, + pkgs, + pkgsCuda, + pkgsRocm, + ... + }: + { + # We don't use the overlay here so as to avoid making too many instances of nixpkgs, + # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs + packages = + { + default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + } + // lib.optionalAttrs pkgs.stdenv.isLinux { + opencl = config.packages.default.override { useOpenCL = true; }; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + + mpi-cpu = config.packages.default.override { useMpi = true; }; + mpi-cuda = config.packages.default.override { useMpi = true; }; + }; + }; + }; } |