infly
/

Infinity-Parser2-Pro

Model card Files Files and versions

zuminghuang commited on 4 days ago

Commit

a9ac0e9

·

verified ·

1 Parent(s): cade246

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -47,12 +47,12 @@ conda activate infinity_parser2
 # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
 pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
-# Install FlashAttention (recommended for NVIDIA GPUs).
-# This command builds flash-attn from source, which can take 10 to 30 minutes.
-# To speed up installation, download the appropriate wheel from the official releases (https://github.com/Dao-AILab/flash-attention/releases), then run:
-# pip install /path/to/<wheel_filename>.whl
 pip install flash-attn==2.8.3 --no-build-isolation
-# NOTE: For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention.
 # Install vLLM
 # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.

 # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
 pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
+# Install FlashAttention (FlashAttention-2 is recommended by default)
+# Standard install (compiles from source, ~10-30 min):
 pip install flash-attn==2.8.3 --no-build-isolation
+# Faster install: download wheel from https://github.com/Dao-AILab/flash-attention/releases. Then run: pip install /path/to/<wheel_filename>.whl
+# For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention
+# NOTE: The code will prioritize detecting FlashAttention-3. If not found, it falls back to FlashAttention-2.
 # Install vLLM
 # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.