│ ├── api_service/ # code for api services
│ │ : xllm core features folder
│ │ ├── distributed_runtime/ # code for distributed and pd serving
│ │ ├── framework/ # code for execution orchestration
│ │ ├── kernels/ # adaption for npu kernels adaption
│ │ ├── layers/ # model layers impl
│ │ ├── platform/ # adaption for various platform
│ │ ├── runtime/ # code for worker and executor
│ │ ├── scheduler/ # code for batch and pd scheduler
│ ├── function_call # code for tool call parser
│ ├── models/ # models impl
│ ├── processors/ # code for vlm pre-processing
│ ├── proto/ # communication protocol
│ ├── pybind/ # code for python bind
| └── server/ # xLLM server
├── examples/ # examples of calling xLLM
├── tools/ # code for npu time generations
└── xllm.cpp # entrypoint of xLLM