-
-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Expand file tree
/
Copy pathpatch-grpc-server.sh
More file actions
executable file
·68 lines (60 loc) · 2.33 KB
/
patch-grpc-server.sh
File metadata and controls
executable file
·68 lines (60 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
# Patch the shared backend/cpp/llama-cpp/grpc-server.cpp *copy* used by the
# turboquant build:
#
# 1. Augment the kv_cache_types[] allow-list so `LoadModel` accepts the
# fork-specific `turbo2` / `turbo3` / `turbo4` cache types.
#
# Historical context: this script used to also paper over API gaps between the
# fork and upstream (flat vs nested `common_params_speculative`, missing
# `get_media_marker()`, `ctx_server.impl->model` vs `model_tgt`, and a
# LOCALAI_LEGACY_LLAMA_CPP_SPEC compile gate). As of TURBOQUANT_VERSION
# 4c1c3ac0 the fork has rebased past ggml-org/llama.cpp#21962, #22397 and
# #22838, so the shared grpc-server.cpp compiles unmodified against the fork.
# Only the fork-specific KV-cache enum entries remain.
#
# We patch the *copy* sitting in turboquant-<flavor>-build/, never the original
# under backend/cpp/llama-cpp/, so the stock llama-cpp build stays compiling
# against vanilla upstream.
#
# Idempotent: skips the insertion if its marker is already present (so re-runs
# of the same build dir don't double-insert).
set -euo pipefail
if [[ $# -ne 1 ]]; then
echo "usage: $0 <grpc-server.cpp>" >&2
exit 2
fi
SRC=$1
if [[ ! -f "$SRC" ]]; then
echo "grpc-server.cpp not found at $SRC" >&2
exit 2
fi
if grep -q 'GGML_TYPE_TURBO2_0' "$SRC"; then
echo "==> $SRC already has TurboQuant cache types, skipping KV allow-list patch"
else
echo "==> patching $SRC to allow turbo2/turbo3/turbo4 KV-cache types"
# Insert the three TURBO entries right after the first ` GGML_TYPE_Q5_1,`
# line (the kv_cache_types[] allow-list). Using awk because the builder image
# does not ship python3, and GNU sed's multi-line `a\` quoting is awkward.
awk '
/^ GGML_TYPE_Q5_1,$/ && !done {
print
print " // turboquant fork extras - added by patch-grpc-server.sh"
print " GGML_TYPE_TURBO2_0,"
print " GGML_TYPE_TURBO3_0,"
print " GGML_TYPE_TURBO4_0,"
done = 1
next
}
{ print }
END {
if (!done) {
print "patch-grpc-server.sh: anchor ` GGML_TYPE_Q5_1,` not found" > "/dev/stderr"
exit 1
}
}
' "$SRC" > "$SRC.tmp"
mv "$SRC.tmp" "$SRC"
echo "==> KV allow-list patch OK"
fi
echo "==> all patches applied"