diff --git a/bin/run_llama.sh b/bin/run_llama.sh index cdd17d6c8..53b15693d 100755 --- a/bin/run_llama.sh +++ b/bin/run_llama.sh @@ -124,43 +124,44 @@ fi if [ "${DoBenchmark}" == "yes" ]; then echo "Running benchmark..." cd "${LLAMA_BUILD_DIR}" || exit - # Download model from HF (this will make it avail in local cache); bench call requires local model file - # Use -st (single-turn) to exit after the first response instead of blocking in - # conversation mode, which is auto-enabled for chat/instruction-tuned models. - # Redirect stdin from /dev/null as a safety net against interactive hangs. - ./bin/llama-cli -hf "${LLAMA_BENCH_HF_ID}" --prompt "/exit" -st < /dev/null - - # Get cache directory from llama-cli - CacheListOutput=$(./bin/llama-cli --cache-list 2>&1) + + # Get cache directory from llama-cli if supported by the local build. + CacheListOutput=$(./bin/llama-cli --cache-list 2>&1 || true) CacheDir=$(echo "${CacheListOutput}" | grep "model cache directory:" | sed 's/.*: //') : "${CacheDir:=${LLAMA_CACHE}}" # Find requested model by converting HF ID to filename pattern (user/model -> user_model) SearchPattern="${LLAMA_BENCH_HF_ID//\//_}" - LlamaModelPath=$(find "${CacheDir}" -maxdepth 1 -type f -name "${SearchPattern}*.gguf" 2>/dev/null | head -1) + LlamaModelPath=$(find "${CacheDir}" \( -type f -o -xtype f \) -name "${SearchPattern}*.gguf" 2>/dev/null | head -1) # Fallback: use all available .gguf files in cache if [ -z "${LlamaModelPath}" ]; then echo "Requested model not found, using all cached models" - mapfile -t ModelPaths < <(find "${CacheDir}" -maxdepth 1 -type f -name "*.gguf" 2>/dev/null) + mapfile -t ModelPaths < <(find "${CacheDir}" \( -type f -o -xtype f \) -name "*.gguf" 2>/dev/null) else ModelPaths=("${LlamaModelPath}") fi - if [ ${#ModelPaths[@]} -eq 0 ]; then - echo "ERROR: No model files found in cache directory: ${CacheDir}" - ls -la "${CacheDir}" 2>/dev/null || echo "Directory does not exist" - exit 1 - fi - # Marker for external scripts echo "LLAMA_BENCHMARK_BEGIN" | tee "${LLAMA_TESTS_LOG_LOCATION}/llama-bench.log" - # Run benchmark for each model - for LlamaModelPath in "${ModelPaths[@]}"; do - echo "Benchmarking: ${LlamaModelPath}" - ./bin/llama-bench -ngl 999 -fa 1 -ub 2048 -m "${LlamaModelPath}" 2>&1 | tee -a "${LLAMA_TESTS_LOG_LOCATION}/llama-bench.log" - done + if [ ${#ModelPaths[@]} -eq 0 ] && ./bin/llama-bench --help 2>&1 | grep -q -- "--hf-repo"; then + # Let llama-bench resolve/download the HF model directly. Using llama-cli as + # a prefetch step can hang in ROCm/KFD waits on cold cache. + ./bin/llama-bench -hf "${LLAMA_BENCH_HF_ID}" -ngl 999 -fa 1 -ub 2048 \ + 2>&1 | tee -a "${LLAMA_TESTS_LOG_LOCATION}/llama-bench.log" + else + if [ ${#ModelPaths[@]} -eq 0 ]; then + echo "ERROR: No model files found in cache directory: ${CacheDir}" + exit 1 + fi + + # Run benchmark for each model + for LlamaModelPath in "${ModelPaths[@]}"; do + echo "Benchmarking: ${LlamaModelPath}" | tee -a "${LLAMA_TESTS_LOG_LOCATION}/llama-bench.log" + ./bin/llama-bench -ngl 999 -fa 1 -ub 2048 -m "${LlamaModelPath}" 2>&1 | tee -a "${LLAMA_TESTS_LOG_LOCATION}/llama-bench.log" + done + fi fi popd || exit