Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
231 commits
Select commit Hold shift + click to select a range
ac4cdde
vendor : update LibreSSL to 4.3.2 (#24397)
angt Jun 10, 2026
db94854
server : skip checkpoints beyond pos_next (#24411)
aldehir Jun 11, 2026
68f3066
vocab : refactor normalizer flags into options struct, add strip_acce…
o7si Jun 11, 2026
1bfbdb1
vocab : adopt leading TemplateProcessing special token as BOS (#24428)
o7si Jun 11, 2026
18ef86e
server: skip unused log lines on router mode (#24463)
ngxson Jun 11, 2026
1af154a
vulkan: use medium matmul tile on Asahi Linux (#24306)
xingjianll Jun 11, 2026
fdc3db9
vulkan: add fast path for contiguous buffer transfers (#23973)
winstonma Jun 11, 2026
17e59d6
ggml : bump version to 0.15.0 (ggml/1539)
ggerganov Jun 11, 2026
263cc04
sync : ggml
ggerganov Jun 11, 2026
4c65955
vulkan: ifdef eMesaHoneykrisp (build fix) (#24479)
jeffbolznv Jun 11, 2026
1593d56
docker : support specifying the GCC version for CUDA (#24447)
wencan Jun 11, 2026
ba1df05
opencl: add q5_0/q5_1 gemm and gemv kernels for Adreno (#24319)
shaofeiqi Jun 12, 2026
099ea76
[SYCL] Fix CI build & release for SYCL backend (#24387)
arthw Jun 12, 2026
85f99dc
ggml: support concat for scalar types at cuda backend (#24011)
zihaomu Jun 12, 2026
88a3927
spec: add EAGLE3 speculative decoding support (#18039)
ruixiang63 Jun 12, 2026
6471e3c
UI/jpeg exif orientation (#24196)
ServeurpersoCom Jun 12, 2026
70b54e1
vendor : update cpp-httplib to 0.47.0 (#24395)
angt Jun 12, 2026
e08c226
ggml : bump version to 0.15.1 (ggml/1541)
ggerganov Jun 12, 2026
f532be8
sync : ggml
ggerganov Jun 12, 2026
02182fc
fit : avoid including llama-ext.h in fit.h (#24506)
ggerganov Jun 12, 2026
f7ca93d
ui: PWA support (#23871)
allozaur Jun 12, 2026
3e7bd4f
vulkan: add pipeline barriers for memcpy read operations (#23770)
0cc4m Jun 12, 2026
ebc1077
server : fix reasoning budget WebUI precedence over model.ini (#24517)
ggerganov Jun 12, 2026
cd50446
ci : unbreak release (#24544)
CISC Jun 12, 2026
f58bad4
ci : unbreak release harder (#24545)
CISC Jun 12, 2026
e37abd6
mtmd: add batching API (#24384)
ngxson Jun 12, 2026
c34b922
fix sycl links in release notes (#24527)
muhammad-salem Jun 13, 2026
d8a24cc
fit : wrap llama_device_memory_data (#24522)
ggerganov Jun 13, 2026
57fe1f0
server: clean up static assets handling (#24550)
ngxson Jun 13, 2026
597b667
ui: keep original file name and path (#24568)
ngxson Jun 13, 2026
1a7718b
vulkan: support non-contig unary/glu ops (#24215)
jeffbolznv Jun 13, 2026
341babc
jinja : fix split and replace with empty first arg (#24574)
CISC Jun 13, 2026
e8067a8
ui: build-time gzip compression (#24571)
ngxson Jun 13, 2026
f05cf46
jinja : fix negative step slice with start/stop values (#24580)
CISC Jun 13, 2026
4988f6e
Add arch support for cohere2-MoE (#24260)
michaelw9999 Jun 13, 2026
53bd47e
ui : fix llama-ui-embed crash when no asset dir is given (#24597)
aldehir Jun 13, 2026
c2ba3e4
add sycl to check-release (#24583)
CISC Jun 14, 2026
4672211
ci : use CUDA label for cuda backend (#24594)
CISC Jun 14, 2026
8ed274e
Add cohere2moe to llama-vocab for TINY_AYA (#24601)
bartowski1182 Jun 14, 2026
6e14286
cli : fix not copying preserved tokens (#24258)
michaelw9999 Jun 14, 2026
acd79d6
jinja : add count/d/e filter aliases (#24606)
CISC Jun 14, 2026
1fd6dfe
ui : fix ui clipping in mobile due to incorrect height setup (#24605)
amoshydra Jun 14, 2026
fd5869f
UI/mobile keyboard and pwa popup fixes (#24610)
ServeurpersoCom Jun 14, 2026
20c5266
docker: specify registry to simplify Podman builds (#24607)
Minoru Jun 14, 2026
8edaca9
docs : fix typos in CUDA-FEDORA.md and grammars/README.md (#24459)
m-atharkhan Jun 14, 2026
aedb2a5
chat: add dedicated Cohere2MoE (North Code) parser (#24615)
pwilkin Jun 14, 2026
5f04dc7
ui: Add HEIC/HEIF image support (#24137)
NickM-27 Jun 14, 2026
ef8268f
fix(ui): render thinking/reasoning block content as markdown (#24611)
franitel Jun 14, 2026
dd4623a
convert : fix lora base model arch retrieval (#24621)
CISC Jun 14, 2026
6e9007a
ggml-webgpu: improve i-quants mul_mat performance and speed up prefil…
yomaytk Jun 15, 2026
3686e9d
CUDA: only support F32/F16 for GGML_OP_REPEAT (#24533)
leonardHONG Jun 15, 2026
2a6c391
UI/svg block rendering (#24080)
ServeurpersoCom Jun 15, 2026
a6dff71
chat: fix whitespace problems once and for all (#24624)
pwilkin Jun 15, 2026
272088b
metal : add repeat bf16 (#24638)
ggerganov Jun 15, 2026
c035ff4
[SYCL]: Remove per-allocation Level Zero runtime checks (#23399)
sanmai Jun 15, 2026
987fbd8
[SYCL] add to support pool_1d, move pool_1d/2d code to pool.cpp/hpp (…
arthw Jun 15, 2026
8872ab5
sycl : enhance set_rows to support q1_0, mxfp4, nvfp4 (#24564)
arthw Jun 15, 2026
72be44f
sycl : fix reorder function; add fp32/fp16 in build script (#24578)
arthw Jun 15, 2026
d8a3f52
sycl: fix soft_max_f32 max reduction (#24451)
someoneinjd Jun 15, 2026
e3bb1ad
SYCL: use native subgroup size for K-quant DMMV (#21700)
PMZFX Jun 15, 2026
6eab471
wasm : fix fallback symbol collision (#24639)
abetlen Jun 15, 2026
9dbc662
vulkan: support more CONCAT types (#24579)
jeffbolznv Jun 15, 2026
e3cab40
mtmd : add post-decode callback (#24645)
ggerganov Jun 15, 2026
0ae3f45
chat: fix an "oldie but goodie" grammar generator bug that surfaced d…
pwilkin Jun 15, 2026
581e8ec
chat: harden peg-native tool call parsing (#24329)
ServeurpersoCom Jun 15, 2026
a1eb756
docs: Add instructions to install `llama.cpp` from conda-forge (#22219)
jjerphan Jun 15, 2026
38d5463
chat: include full unparsed prompt in debug (#24650)
pwilkin Jun 15, 2026
e36a602
mtmd: fix miscounting n_tokens (#24656)
ngxson Jun 15, 2026
7dad2f1
chat : fix LFM2 tool-call parsing double-escaping (#24667)
tdakhran Jun 15, 2026
ad39cca
vulkan: add col2im_1d op (#24425)
ServeurpersoCom Jun 16, 2026
4196b47
sycl : Make GGML_SYCL_F16=ON the default (#23996)
malsbat Jun 16, 2026
fdd1098
[SYCL] Support OP EXPM1, support all UT cases of FLOOR, TRUNC, ROUND …
arthw Jun 16, 2026
ac79caa
sycl: support reordered Q4_K/Q5_K/Q6_K MoE MUL_MAT_ID (#24452)
newjordan Jun 16, 2026
e3a74b2
bench : add --offline (#24511)
angt Jun 16, 2026
635b65a
spec: add spec metrics mean acceptance length and acceptance rate per…
ruixiang63 Jun 16, 2026
d5fb104
vulkan: Support gated_delta_net with S_v=16 (#24581)
jeffbolznv Jun 16, 2026
32120c1
vulkan: prefer host-visible memory buffers on UMA devices (#22930)
winstonma Jun 16, 2026
a182490
spec: add backend sampling support for eagle3 (#24655)
ruixiang63 Jun 16, 2026
02810c7
Fix and restrict NVFP4 edge-cases in llama-graph (#24331)
ORippler Jun 16, 2026
c1304d7
ui: add source toggle to mermaid and svg blocks (#24652)
ServeurpersoCom Jun 16, 2026
74ade52
vendor : update BoringSSL to 0.20260616.0 (#24693)
cabelo Jun 16, 2026
9b260fc
sycl: Add optional USM system allocations (#22526)
ifdu Jun 17, 2026
ebbc1e5
SYCL: fix use-after-free bug with async memcpy in MoE prefill (#24676)
sanmai Jun 17, 2026
58728bd
sycl : Enable to support fp16 by OPs: SQR, SQRT, LOG, SIN, COS, CLAMP…
arthw Jun 17, 2026
890f1a2
openvino: OV 2026.2, context-shift, Q5_1 support, gemma4 dense/embedd…
wine99 Jun 17, 2026
cda6385
common: update logging to enforce max_capacity and optimize queue res…
max-krasnyansky Jun 17, 2026
5157172
opencl: optimize mul_mat_f16_f32_l4 for decode (#24504)
lhez Jun 17, 2026
bae36ef
UI : fix SSE transport detection and routing through CORS proxy. Assi…
hrpnr Jun 17, 2026
d5376cf
ci: fix vulkan docker images (#24595)
Kononnable Jun 17, 2026
ea21e03
Revert "cuda: reset cuda context after reading memory size (#23935)" …
0cc4m Jun 17, 2026
558e221
vulkan: record actual memory properties during buffer creation (#24326)
winstonma Jun 17, 2026
8086439
webui: export conversations as jsonl (#24688)
julien-c Jun 17, 2026
d1759e4
[SYCL] Add conv_3d (#24691)
arthw Jun 17, 2026
74a80dd
[SYCL] add dev2dev memcpy by SYCL API (#24476)
arthw Jun 17, 2026
1a2dea2
spec: fix segfault error on long prompts for eagle3 (#24707)
ruixiang63 Jun 17, 2026
b4024af
llama : skip main_gpu validation when no devices are available (#23405)
Dev-iL Jun 17, 2026
4b4d13a
server: (router) add model management API (#23976)
ngxson Jun 17, 2026
8d2e580
metal : add f16 and bf16 support for concat operator (#24724)
ggerganov Jun 17, 2026
0843245
metal : implement rope_back operator (#24725)
ggerganov Jun 17, 2026
2e88c49
ggml-cpu: Conditionally enable power11 backend based on compiler supp…
shalinib-ibm Jun 17, 2026
f3e1828
mtmd: llava_uhd should no longer use batch dim (#24732)
ngxson Jun 17, 2026
cae0a3b
metal : check for BF16 support in concat kernel (#24747)
ggerganov Jun 18, 2026
4a79037
ci : fix Windows x64 (OpenVINO) release link (#24731)
ravi9 Jun 18, 2026
0b73fc7
ui: Update code formatting command in pre-commit hook (#24685)
allozaur Jun 18, 2026
6f1034b
[SYCL] support OPs: conv_2d, conv_2d_dw, conv2d_transpose (#24600)
arthw Jun 18, 2026
32e806b
ci : fix check-release message parsing (#24751)
CISC Jun 18, 2026
6ec59dd
app : enable self-update only when built with llama-install.sh (#24754)
angt Jun 18, 2026
dd69db2
sycl : support MUL_MAT and OUT_PROD with Q1_0 (#24721)
arthw Jun 18, 2026
9724f66
[SYCL] rename GGML_SYCL_SUPPORT_LEVEL_ZERO (#24719)
arthw Jun 18, 2026
24bba7b
mtmd: refactor preprocessor, add mtmd_image_preproc_out (#24736)
ngxson Jun 18, 2026
968c438
server: fix router args not being forwarded to child instances (#24760)
ngxson Jun 18, 2026
552258c
server: (router) rework -hf preset repo (#24739)
ngxson Jun 18, 2026
1078621
server : return HTTP 400 on invalid grammar (#24144) (#24154)
Anuj-Attri Jun 18, 2026
2083217
ui: provide touch accessible model selection UI (#24604)
amoshydra Jun 18, 2026
0802307
server : add last-5-seconds generation speed display (#24291)
akx Jun 18, 2026
e1efd09
server: add "schema" and validation (#24150)
ngxson Jun 18, 2026
fe7c8b2
server: (router) fix stopping_thread potentially hang (#24728)
ngxson Jun 18, 2026
7b6c5a2
docs: fix export-lora --lora-scaled syntax [no release] (#24703)
kkkzbh Jun 18, 2026
d2c6795
hexagon: support for op-trace (fine-grain tracing of HVX/HMX/DMA eve…
max-krasnyansky Jun 18, 2026
060ce1b
mtmd: refactor llava-uhd overview image handling (always use ov_img_f…
ngxson Jun 18, 2026
32eddaf
cmake : fix ui build with read-only source (#24752)
o7si Jun 18, 2026
a6b3260
mtmd: add batching for mtmd-cli, add video tests (#24778)
ngxson Jun 18, 2026
40f3aaf
server: add "X-Accel-Buffering": "no" header to streaming endpoints (…
regunakyle Jun 18, 2026
3a3edc9
Ggml/cuda col2im 1d (#24417)
ServeurpersoCom Jun 18, 2026
db52540
mtmd: add batching support for internvl (#24775)
ngxson Jun 18, 2026
8141e73
ggml-cpu: support K tails in power10 Q8/Q4 MMA matmul (#24753)
shalinib-ibm Jun 19, 2026
80452d6
server : consolidate slot selection into get_available_slot (#24755)
ggerganov Jun 19, 2026
5bd21b8
pi : remove docs from system prompt (#24791)
ggerganov Jun 19, 2026
1868af1
ggml : bump version to 0.15.2 (ggml/1548)
ggerganov Jun 19, 2026
5fd2dc2
sync : ggml
ggerganov Jun 19, 2026
159d093
server: fix non-bound n_discard value (ctx shifting) (#24786)
ngxson Jun 19, 2026
b14e3fb
spec: support eagle3 for qwen3.5 & 3.6 (#24593)
ruixiang63 Jun 19, 2026
e2e7a9b
mtmd: several bug fixes (#24784)
ngxson Jun 19, 2026
38724ab
docker : build the UI (#24794)
aldehir Jun 19, 2026
8c2d6f6
server: add --agent arg, remove redundant webui naming compat (#24801)
ngxson Jun 19, 2026
0d2d9cc
vendor : update cpp-httplib to 0.48.0 (#24787)
cabelo Jun 19, 2026
fabde3b
arg: Add comment line support to --api-key-file (#23168)
kucharskim Jun 19, 2026
175147e
server: remove all internal mentions about "webui" (#24817)
ngxson Jun 19, 2026
e475fa2
mtmd, arg: fix utf8 handling on windows (#24779)
ngxson Jun 19, 2026
4b48a53
server : optimize get_token_probabilities (#24796)
angt Jun 19, 2026
2b686a9
server: refactor child --> router communication (#24821)
ngxson Jun 19, 2026
f449e05
ggml-webgpu: add adapter toggles for F16 on Vulkan + NVIDIA
yomaytk Jun 19, 2026
f4043fe
convert : more consistent handling of rope_parameters (#24833)
CISC Jun 20, 2026
37a77fb
ggml : optimize AMX (#24806)
angt Jun 20, 2026
796f41b
model : glm-dsa load DSA indexer tensors as optional (#24770)
davidrhodus Jun 20, 2026
67e9fd3
docker : prebuild web UI for s390x build [no release] (#24829)
aldehir Jun 20, 2026
e27f308
server: avoid forwarding auth headers in CORS proxy (#24373)
ItsMatti4 Jun 20, 2026
8452824
release: add missing link for win opencl adreno arm64 (#24809)
muhammad-salem Jun 20, 2026
75f460a
arg: try fixing test-args-parser randomly fails (#24826)
ngxson Jun 20, 2026
84de01a
llama : use LLM_KV for quantization_version & file_type (#24802)
angt Jun 20, 2026
4a80943
fix(hexagon): use padded stride for ssm-conv weights (#24470)
BiReRa Jun 20, 2026
c576070
common/json-schema-to-grammar : align spacing rules with parsers (#24…
aldehir Jun 20, 2026
063d9c1
common/peg : refactor until gbnf grammar generation (#24839)
aldehir Jun 21, 2026
d789527
spec : Support Step3.5/3.7 flash mtp3 (#24340)
forforever73 Jun 21, 2026
8a118ee
minor : clean-up whitespaces (#24862)
ggerganov Jun 21, 2026
d6d8995
server: real-time model load progress tracking via /models/sse (#24828)
ngxson Jun 21, 2026
bfa3219
server: add "verbose" field to schema (#24864)
ngxson Jun 21, 2026
2f89acc
mtmd: add load progress callback (#24865)
ngxson Jun 21, 2026
bf53382
jinja : implement call statement (#24847)
CISC Jun 21, 2026
0d135df
mtmd: fix mtmd_get_memory_usage (#24867)
ngxson Jun 21, 2026
bddfd2b
server: refactor batch construction (#24843)
ngxson Jun 21, 2026
7c082bc
server: fix report progress for loading spec models, add "stages" lis…
ngxson Jun 21, 2026
52b3df0
common/peg : implement ac parser for stricter grammar generation (#24…
aldehir Jun 21, 2026
0ef6f06
docs/android.md: Add dependency `libandroid-spawn` for building in te…
aafsmarak Jun 22, 2026
d0f9d2e
server: fix edit_file crash on append at end of file (line_start -1) …
ServeurpersoCom Jun 22, 2026
37957e8
sampling : remove unconditional softmax+sort in top-n-sigma sampler (…
TimNN Jun 22, 2026
f8cc15f
[SYCL] support bf16 on bin_bcast OP and unary OPs (#24838)
arthw Jun 22, 2026
099b579
ui: model status and load progress via /models/sse feed (#24878)
ServeurpersoCom Jun 22, 2026
6ee0f65
server: refactor/generalize input file schema (#24299)
ngxson Jun 22, 2026
721354f
server: (router) move model downloading to dedicated process (#24834)
ngxson Jun 22, 2026
9c0ac88
ui: Prioritize favorite models in model selection (#24766)
mahdiou Jun 22, 2026
dec5ca5
server : Add id to tool call responses api (#24882)
boondocklabs Jun 22, 2026
23ee879
opencl: q8_0 gemv precision improvement (#24923)
shawngu-quic Jun 23, 2026
73618f2
server: improve user message detection and create checkpoints at ever…
aldehir Jun 23, 2026
035cd8f
codeowners: add yomaytk to ggml-webgpu (#24930)
yomaytk Jun 23, 2026
7c90850
ggml-webgpu: improve MTP inference by using mat-vec path for small ba…
yomaytk Jun 23, 2026
a3900a6
model: Granite Speech Plus (#24818)
gabe-l-hart Jun 23, 2026
c926ad0
vulkan: link ggml-cpu when GGML_VULKAN_CHECK_RESULTS / RUN_TESTS are …
Detensable Jun 23, 2026
75ad0b2
server: fix remote preset handling, add test (#24938)
ngxson Jun 23, 2026
0eb874d
vulkan: make mul_mm ALIGNED a spec constant (#24689)
jeffbolznv Jun 23, 2026
c560636
vulkan: support CONV_3D (#24612)
jeffbolznv Jun 23, 2026
92e854a
vulkan: Support GET_ROWS_BACK (#24883)
jeffbolznv Jun 23, 2026
72a9269
vulkan: support all backend tests for SQR/SQRT/SIN/COS/CLAMP/LEAKY_RE…
jeffbolznv Jun 23, 2026
be4a6a6
server : check draft context creation error (#24922)
Kononnable Jun 23, 2026
ac4105d
vulkan: Apply bias before softmax in FA, to avoid overflow (#24909)
jeffbolznv Jun 24, 2026
88636e1
model : Add LFM2.5-ColBERT-350M and LFM2.5-Embedding-350M (#24913)
tdakhran Jun 24, 2026
ef9c13d
ui: New Logo + Navigation cleanup & Mobile UI/UX improvements (#24897)
allozaur Jun 24, 2026
00139b6
ui: loading bar below the model picker (#24931)
ServeurpersoCom Jun 24, 2026
1191758
vulkan: fail the build when a shader fails to compile (#24450)
liminfei-amd Jun 24, 2026
51eae8c
vulkan: allow reducing the graph submission batches to avoid timeouts…
wbruna Jun 24, 2026
fb40104
common: remove unused json-partial (#24968)
ngxson Jun 24, 2026
894bb27
mtmd: model: unlimited-ocr: converter + parity test (#24969)
sfallah Jun 24, 2026
8be759e
hexagon: MUL_MAT and MUL_MAT_ID rework : 32x32 tiled weight repack, k…
max-krasnyansky Jun 24, 2026
09cedfd
chat: harden caps check (#24973)
pwilkin Jun 25, 2026
fdb2c11
opencl: support non-contig rows in norm (#24965)
lhez Jun 25, 2026
9c10954
sycl : fix the failed UT cases of conv_3d (#24900)
arthw Jun 25, 2026
e9fb3b3
sycl : support --split-mode tensor (#24152)
Spruill-1 Jun 25, 2026
b3ce5ce
quant : fix quantizing moe with mtp (#24986)
CISC Jun 25, 2026
e12a012
build: include libmtmd in Apple XCFramework (#21935)
theabecaster Jun 25, 2026
fdbd6ab
tests : synchronize contexts at end of test-thread-safety (#24935)
krystophny Jun 25, 2026
3e61ea0
ui: fix always-show-sidebar-on-desktop setting after navigation refac…
ServeurpersoCom Jun 25, 2026
f728ada
ggml : address integer overflows in binary ops CUDA implementation (#…
fairydreaming Jun 25, 2026
683b04c
app : add the llama download subcommand (#24982)
angt Jun 25, 2026
e8ecce5
docs : Eagle3 qwen3 draft model support (#24977)
kashif Jun 25, 2026
60bc886
common: refactor model handling (#24980)
ngxson Jun 25, 2026
099bf06
misc: update lables (#24920)
ngxson Jun 25, 2026
e9d1b76
server: use status code 403 for disabled features (#24970)
ngxson Jun 25, 2026
c7cddef
misc: fix labeler (#25012)
ngxson Jun 25, 2026
1ec44d1
CUDA: Various fixes to `cpy.cu` (#25000)
ORippler Jun 25, 2026
9d5d882
model : Add label for LFM2.5-230M (#25008)
tdakhran Jun 25, 2026
beac530
xcframework : disable mtmd video on i/tv/visionos (#25018)
CISC Jun 25, 2026
cd81994
Merge remote-tracking branch 'upstream/master' into jimwu.sync-upstream
Jun 26, 2026
5d83b8b
Merge pull request #28 from ROCm/jimwu.sync-upstream
jimw567 Jun 26, 2026
13ee2f1
ci: add gfx11 ROCm build + on-hardware test workflow
Jun 10, 2026
cb547e3
ci: pin ROCm to last known-good nightly 7.14.0a20260608
Jun 10, 2026
c1709b0
ci: route gfx1150 test to linux-strix-gpu-rocm-2 runner
Jun 10, 2026
1b33419
ci: pull test model from pinned GitHub release, revert gfx1150 runner
Jun 11, 2026
445e589
ci: bundle libatomic + update test assertions to current llama.cpp ou…
Jun 11, 2026
6ba6660
ci: make gfx test verify substantive generation, not just offload
Jun 11, 2026
20562e1
ci(gfx11): make on-hardware test verify deterministic answer
Jun 11, 2026
d8c0a60
ci(gfx11): add nightly dated release, timeout test, skip gfx1150 test
Jun 11, 2026
9e37c63
ci(gfx11): add gfx110X (Hawk Point/Phoenix RDNA3) build + release target
Jun 11, 2026
aa90db9
ci: add gfx1153 build + release target
Jun 12, 2026
914a8e5
ci: retrigger CI
Jun 13, 2026
5afbaec
Add mmq device table for RDNA3.5
Jun 17, 2026
75f9c80
ci(gfx11): build one universal multi-arch ROCm package
Jun 16, 2026
91a2c4c
ci(gfx11): rename "universal" package to "multiarch"
Jun 17, 2026
5692e0b
ci(gfx11): use "multiarch" spelling in comments and scripts
Jun 17, 2026
b11df79
ci(gfx11): run editorconfig + model-naming linters on ubuntu-24.04
Jun 27, 2026
eb2440b
ci(gfx11): trigger editorconfig + model-naming linters on gfx11 branch
Jun 27, 2026
78372b6
ci(gfx11): run flake8 lint on ubuntu-24.04, trigger on gfx11
Jun 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
16 changes: 16 additions & 0 deletions .devops/cann.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ ARG APP_REVISION=N/A
# BUILD STAGE
# Compile all binary files and libraries
# ==============================================================================
ARG NODE_VERSION=24

FROM docker.io/node:$NODE_VERSION AS web

ARG APP_VERSION

WORKDIR /app/tools/ui

COPY tools/ui/package.json tools/ui/package-lock.json ./
RUN npm ci

COPY tools/ui/ ./
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build

FROM ${CANN_BASE_IMAGE} AS build

# -- Install build dependencies --
Expand All @@ -26,6 +40,8 @@ WORKDIR /app
# -- Copy project files --
COPY . .

COPY --from=web /app/tools/ui/dist tools/ui/dist

# -- Set CANN environment variables (required for compilation) --
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
Expand Down
20 changes: 18 additions & 2 deletions .devops/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,21 @@ ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
ARG APP_REVISION=N/A

FROM ubuntu:$UBUNTU_VERSION AS build
ARG NODE_VERSION=24

FROM docker.io/node:$NODE_VERSION AS web

ARG APP_VERSION

WORKDIR /app/tools/ui

COPY tools/ui/package.json tools/ui/package-lock.json ./
RUN npm ci

COPY tools/ui/ ./
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build

FROM docker.io/ubuntu:$UBUNTU_VERSION AS build

ARG TARGETARCH

Expand All @@ -16,6 +30,8 @@ WORKDIR /app

COPY . .

COPY --from=web /app/tools/ui/dist tools/ui/dist

RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
else \
Expand All @@ -37,7 +53,7 @@ RUN mkdir -p /app/full \
&& cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ubuntu:$UBUNTU_VERSION AS base
FROM docker.io/ubuntu:$UBUNTU_VERSION AS base

ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
Expand Down
26 changes: 22 additions & 4 deletions .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,47 @@
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=12.8.1
ARG GCC_VERSION=14
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_DEV_CONTAINER=docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
ARG APP_REVISION=N/A

ARG NODE_VERSION=24

FROM docker.io/node:$NODE_VERSION AS web

ARG APP_VERSION

WORKDIR /app/tools/ui

COPY tools/ui/package.json tools/ui/package-lock.json ./
RUN npm ci

COPY tools/ui/ ./
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build

FROM ${BASE_CUDA_DEV_CONTAINER} AS build

ARG GCC_VERSION
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
apt-get install -y gcc-${GCC_VERSION} g++-${GCC_VERSION} build-essential cmake python3 python3-pip git libssl-dev libgomp1

ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
ENV CC=gcc-${GCC_VERSION} CXX=g++-${GCC_VERSION} CUDAHOSTCXX=g++-${GCC_VERSION}

WORKDIR /app

COPY . .

COPY --from=web /app/tools/ui/dist tools/ui/dist

RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
Expand Down
25 changes: 21 additions & 4 deletions .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,23 @@ ARG APP_REVISION=N/A

## Build Image

FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
ARG NODE_VERSION=24

ARG GGML_SYCL_F16=OFF
FROM docker.io/node:$NODE_VERSION AS web

ARG APP_VERSION

WORKDIR /app/tools/ui

COPY tools/ui/package.json tools/ui/package-lock.json ./
RUN npm ci

COPY tools/ui/ ./
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build

FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS build

ARG GGML_SYCL_F16=ON
ARG LEVEL_ZERO_VERSION=1.28.2
ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
RUN apt-get update && \
Expand All @@ -22,9 +36,12 @@ WORKDIR /app

COPY . .

COPY --from=web /app/tools/ui/dist tools/ui/dist

RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON" \
&& export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
Expand All @@ -42,7 +59,7 @@ RUN mkdir -p /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh

FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS base

ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli-cann.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
ARG APP_REVISION=N/A

FROM ascendai/cann:$ASCEND_VERSION AS build
FROM docker.io/ascendai/cann:$ASCEND_VERSION AS build

WORKDIR /app

Expand All @@ -30,7 +30,7 @@ RUN echo "Building with static libs" && \
cmake --build build --config Release --target llama-completion

# TODO: use image with NNRT
FROM ascendai/cann:$ASCEND_VERSION AS runtime
FROM docker.io/ascendai/cann:$ASCEND_VERSION AS runtime

ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
Expand Down
20 changes: 18 additions & 2 deletions .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,28 @@ ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.3.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
ARG BASE_MUSA_DEV_CONTAINER=docker.io/mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64

ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
ARG BASE_MUSA_RUN_CONTAINER=docker.io/mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

ARG BUILD_DATE=N/A
ARG APP_VERSION=N/A
ARG APP_REVISION=N/A

ARG NODE_VERSION=24

FROM docker.io/node:$NODE_VERSION AS web

ARG APP_VERSION

WORKDIR /app/tools/ui

COPY tools/ui/package.json tools/ui/package-lock.json ./
RUN npm ci

COPY tools/ui/ ./
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

# MUSA architecture to build for (defaults to all supported archs)
Expand All @@ -29,6 +43,8 @@ WORKDIR /app

COPY . .

COPY --from=web /app/tools/ui/dist tools/ui/dist

RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
Expand Down
Loading
Loading