Update README.md
Browse files
README.md
CHANGED
|
@@ -47,12 +47,12 @@ conda activate infinity_parser2
|
|
| 47 |
# Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
|
| 48 |
pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
|
| 49 |
|
| 50 |
-
# Install FlashAttention (
|
| 51 |
-
#
|
| 52 |
-
# To speed up installation, download the appropriate wheel from the official releases (https://github.com/Dao-AILab/flash-attention/releases), then run:
|
| 53 |
-
# pip install /path/to/<wheel_filename>.whl
|
| 54 |
pip install flash-attn==2.8.3 --no-build-isolation
|
| 55 |
-
#
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# Install vLLM
|
| 58 |
# NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
|
|
|
|
| 47 |
# Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
|
| 48 |
pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
|
| 49 |
|
| 50 |
+
# Install FlashAttention (FlashAttention-2 is recommended by default)
|
| 51 |
+
# Standard install (compiles from source, ~10-30 min):
|
|
|
|
|
|
|
| 52 |
pip install flash-attn==2.8.3 --no-build-isolation
|
| 53 |
+
# Faster install: download wheel from https://github.com/Dao-AILab/flash-attention/releases. Then run: pip install /path/to/<wheel_filename>.whl
|
| 54 |
+
# For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention
|
| 55 |
+
# NOTE: The code will prioritize detecting FlashAttention-3. If not found, it falls back to FlashAttention-2.
|
| 56 |
|
| 57 |
# Install vLLM
|
| 58 |
# NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
|