| |
| |
| package system |
|
|
| import ( |
| "os" |
| "path/filepath" |
| "runtime" |
| "strings" |
|
|
| "github.com/mudler/xlog" |
| ) |
|
|
| const ( |
| |
| Nvidia = "nvidia" |
| AMD = "amd" |
| Intel = "intel" |
|
|
| |
| defaultCapability = "default" |
| nvidiaL4T = "nvidia-l4t" |
| darwinX86 = "darwin-x86" |
| metal = "metal" |
| vulkan = "vulkan" |
|
|
| nvidiaCuda13 = "nvidia-cuda-13" |
| nvidiaCuda12 = "nvidia-cuda-12" |
| nvidiaL4TCuda12 = "nvidia-l4t-cuda-12" |
| nvidiaL4TCuda13 = "nvidia-l4t-cuda-13" |
|
|
| capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY" |
| capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE" |
| defaultRunFile = "/run/localai/capability" |
|
|
| |
| backendTokenDarwin = "darwin" |
| backendTokenMLX = "mlx" |
| backendTokenMetal = "metal" |
| backendTokenL4T = "l4t" |
| backendTokenCUDA = "cuda" |
| backendTokenROCM = "rocm" |
| backendTokenHIP = "hip" |
| backendTokenSYCL = "sycl" |
| ) |
|
|
| var ( |
| cuda13DirExists bool |
| cuda12DirExists bool |
| ) |
|
|
| func init() { |
| _, err := os.Stat(filepath.Join("usr", "local", "cuda-13")) |
| cuda13DirExists = err == nil |
| _, err = os.Stat(filepath.Join("usr", "local", "cuda-12")) |
| cuda12DirExists = err == nil |
| } |
|
|
| func (s *SystemState) Capability(capMap map[string]string) string { |
| reportedCapability := s.getSystemCapabilities() |
|
|
| |
| if _, exists := capMap[reportedCapability]; exists { |
| xlog.Debug("Using reported capability", "reportedCapability", reportedCapability, "capMap", capMap) |
| return reportedCapability |
| } |
|
|
| xlog.Debug("The requested capability was not found, using default capability", "reportedCapability", reportedCapability, "capMap", capMap) |
| |
| return defaultCapability |
| } |
|
|
| func (s *SystemState) getSystemCapabilities() string { |
| capability := os.Getenv(capabilityEnv) |
| if capability != "" { |
| xlog.Info("Using forced capability from environment variable", "capability", capability, "env", capabilityEnv) |
| return capability |
| } |
|
|
| capabilityRunFile := defaultRunFile |
| capabilityRunFileEnv := os.Getenv(capabilityRunFileEnv) |
| if capabilityRunFileEnv != "" { |
| capabilityRunFile = capabilityRunFileEnv |
| } |
|
|
| |
| |
| |
| if _, err := os.Stat(capabilityRunFile); err == nil { |
| capability, err := os.ReadFile(capabilityRunFile) |
| if err == nil { |
| xlog.Info("Using forced capability run file", "capabilityRunFile", capabilityRunFile, "capability", string(capability), "env", capabilityRunFileEnv) |
| return strings.Trim(strings.TrimSpace(string(capability)), "\n") |
| } |
| } |
|
|
| |
| if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { |
| xlog.Info("Using metal capability (arm64 on mac)", "env", capabilityEnv) |
| return metal |
| } |
|
|
| |
| if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" { |
| xlog.Info("Using darwin-x86 capability (amd64 on mac)", "env", capabilityEnv) |
| return darwinX86 |
| } |
|
|
| |
| if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" { |
| if s.GPUVendor == Nvidia { |
| xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv) |
| if cuda13DirExists { |
| return nvidiaL4TCuda13 |
| } |
| if cuda12DirExists { |
| return nvidiaL4TCuda12 |
| } |
| return nvidiaL4T |
| } |
| } |
|
|
| if cuda13DirExists { |
| return nvidiaCuda13 |
| } |
|
|
| if cuda12DirExists { |
| return nvidiaCuda12 |
| } |
|
|
| if s.GPUVendor == "" { |
| xlog.Info("Default capability (no GPU detected)", "env", capabilityEnv) |
| return defaultCapability |
| } |
|
|
| xlog.Info("Capability automatically detected", "capability", s.GPUVendor, "env", capabilityEnv) |
| |
| if s.VRAM <= 4*1024*1024*1024 { |
| xlog.Warn("VRAM is less than 4GB, defaulting to CPU", "env", capabilityEnv) |
| return defaultCapability |
| } |
|
|
| return s.GPUVendor |
| } |
|
|
| |
| |
| |
| |
| func (s *SystemState) BackendPreferenceTokens() []string { |
| capStr := strings.ToLower(s.getSystemCapabilities()) |
| switch { |
| case strings.HasPrefix(capStr, Nvidia): |
| return []string{backendTokenCUDA, vulkan, "cpu"} |
| case strings.HasPrefix(capStr, AMD): |
| return []string{backendTokenROCM, backendTokenHIP, vulkan, "cpu"} |
| case strings.HasPrefix(capStr, Intel): |
| return []string{backendTokenSYCL, Intel, "cpu"} |
| case strings.HasPrefix(capStr, metal): |
| return []string{backendTokenMetal, "cpu"} |
| case strings.HasPrefix(capStr, darwinX86): |
| return []string{"darwin-x86", "cpu"} |
| case strings.HasPrefix(capStr, vulkan): |
| return []string{vulkan, "cpu"} |
| default: |
| return []string{"cpu"} |
| } |
| } |
|
|
| |
| |
| func (s *SystemState) DetectedCapability() string { |
| return s.getSystemCapabilities() |
| } |
|
|
| |
| |
| |
| func (s *SystemState) IsBackendCompatible(name, uri string) bool { |
| combined := strings.ToLower(name + " " + uri) |
| capability := s.getSystemCapabilities() |
|
|
| |
| isDarwinBackend := strings.Contains(combined, backendTokenDarwin) || |
| strings.Contains(combined, backendTokenMLX) || |
| strings.Contains(combined, backendTokenMetal) |
| if isDarwinBackend { |
| |
| return capability == metal || capability == darwinX86 |
| } |
|
|
| |
| |
| |
| isL4TBackend := strings.Contains(combined, backendTokenL4T) |
| if isL4TBackend { |
| return strings.HasPrefix(capability, nvidiaL4T) |
| } |
|
|
| |
| isNvidiaBackend := strings.Contains(combined, backendTokenCUDA) || |
| strings.Contains(combined, Nvidia) |
| if isNvidiaBackend { |
| |
| return strings.HasPrefix(capability, Nvidia) |
| } |
|
|
| |
| isAMDBackend := strings.Contains(combined, backendTokenROCM) || |
| strings.Contains(combined, backendTokenHIP) || |
| strings.Contains(combined, AMD) |
| if isAMDBackend { |
| return capability == AMD |
| } |
|
|
| |
| isIntelBackend := strings.Contains(combined, backendTokenSYCL) || |
| strings.Contains(combined, Intel) |
| if isIntelBackend { |
| return capability == Intel |
| } |
|
|
| |
| return true |
| } |
|
|