AbdulElahGwaith
/

LocalAI

Model card Files Files and versions

LocalAI / .env

AbdulElahGwaith's picture

AbdulElahGwaith

Upload folder using huggingface_hub

0f07ba7 verified 3 months ago

history blame contribute delete

2.82 kB

	## Set number of threads.
	## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
	# LOCALAI_THREADS=14

	## Specify a different bind address (defaults to ":8080")
	# LOCALAI_ADDRESS=127.0.0.1:8080

	## Default models context size
	# LOCALAI_CONTEXT_SIZE=512
	#
	## Define galleries.
	## models will to install will be visible in `/models/available`
	# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]

	## CORS settings
	# LOCALAI_CORS=true
	# LOCALAI_CORS_ALLOW_ORIGINS=*

	## Default path for models
	#
	# LOCALAI_MODELS_PATH=/models

	## Enable debug mode
	# LOCALAI_LOG_LEVEL=debug

	## Disables COMPEL (Diffusers)
	# COMPEL=0

	## Enable/Disable single backend (useful if only one GPU is available)
	# LOCALAI_SINGLE_ACTIVE_BACKEND=true

	# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
	# LOCALAI_FORCE_BACKEND_SHUTDOWN=true

	## Path where to store generated images
	# LOCALAI_IMAGE_PATH=/tmp/generated/images

	## Specify a default upload limit in MB (whisper)
	# LOCALAI_UPLOAD_LIMIT=15

	## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
	# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py

	### Advanced settings ###
	### Those are not really used by LocalAI, but from components in the stack ###
	##
	### Preload libraries
	# LD_PRELOAD=

	### Huggingface cache for models
	# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface

	### Python backends GRPC max workers
	### Default number of workers for GRPC Python backends.
	### This actually controls wether a backend can process multiple requests or not.
	# PYTHON_GRPC_MAX_WORKERS=1

	### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
	# LLAMACPP_PARALLEL=1

	### Define a list of GRPC Servers for llama-cpp workers to distribute the load
	# https://github.com/ggerganov/llama.cpp/pull/6829
	# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
	# LLAMACPP_GRPC_SERVERS=""

	### Enable to run parallel requests
	# LOCALAI_PARALLEL_REQUESTS=true

	# Enable to allow p2p mode
	# LOCALAI_P2P=true

	# Enable to use federated mode
	# LOCALAI_FEDERATED=true

	# Enable to start federation server
	# FEDERATED_SERVER=true

	# Define to use federation token
	# TOKEN=""

	### Watchdog settings
	###
	# Enables watchdog to kill backends that are inactive for too much time
	# LOCALAI_WATCHDOG_IDLE=true
	#
	# Time in duration format (e.g. 1h30m) after which a backend is considered idle
	# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
	#
	# Enables watchdog to kill backends that are busy for too much time
	# LOCALAI_WATCHDOG_BUSY=true
	#
	# Time in duration format (e.g. 1h30m) after which a backend is considered busy
	# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m