text-generation-inference
15 строк · 463.0 Байт
1# Fork that adds only the correct stream to this kernel in order
2# to make cuda graphs work.
3awq_commit := bd1dc2d5254345cc76ab71894651fb821275bdd4
4
5awq:
6rm -rf llm-awq
7git clone https://github.com/huggingface/llm-awq
8
9build-awq: awq
10cd llm-awq/ && git fetch && git checkout $(awq_commit)
11cd llm-awq/awq/kernels && python setup.py build
12
13install-awq: build-awq
14pip uninstall awq_inference_engine -y || true
15cd llm-awq/awq/kernels && python setup.py install
16