colossalai
204 строки · 7.1 Кб
1name: Build on PR
2
3on:
4pull_request:
5types: [synchronize, opened, reopened, ready_for_review, closed, edited]
6branches:
7- "main"
8- "develop"
9- "feature/**"
10paths:
11- ".github/workflows/build_on_pr.yml" # run command & env variables change
12- "colossalai/**" # source code change
13- "!colossalai/**.md" # ignore doc change
14- "op_builder/**" # cuda extension change
15- "!op_builder/**.md" # ignore doc change
16- "requirements/**" # requirements change
17- "tests/**" # test change
18- "!tests/**.md" # ignore doc change
19- "pytest.ini" # test config change
20- "setup.py" # install command change
21create:
22delete:
23
24jobs:
25detect:
26name: Detect file change
27if: |
28github.event_name == 'pull_request' &&
29(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
30github.event.pull_request.draft == false &&
31github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
32outputs:
33changedExtenisonFiles: ${{ steps.find-extension-change.outputs.all_changed_files }}
34anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }}
35changedLibraryFiles: ${{ steps.find-lib-change.outputs.all_changed_files }}
36anyLibraryFileChanged: ${{ steps.find-lib-change.outputs.any_changed }}
37runs-on: ubuntu-latest
38concurrency:
39group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change
40cancel-in-progress: true
41steps:
42- uses: actions/checkout@v2
43with:
44fetch-depth: 0
45ref: ${{ github.event.pull_request.head.sha }}
46
47- name: Locate base commit
48id: locate-base-sha
49run: |
50curBranch=$(git rev-parse --abbrev-ref HEAD)
51commonCommit=$(git merge-base origin/main $curBranch)
52echo $commonCommit
53echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
54
55- name: Find the changed extension-related files
56id: find-extension-change
57uses: tj-actions/changed-files@v35
58with:
59base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
60files: |
61op_builder/**
62colossalai/kernel/**
63setup.py
64
65- name: Find the changed library-related files
66id: find-lib-change
67uses: tj-actions/changed-files@v35
68with:
69base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
70files: |
71**/*.py
72**/*.h
73**/*.cpp
74**/*.cu
75**/*.txt
76
77- name: List changed files
78run: |
79for file in ${{ steps.find-extension-change.outputs.all_changed_files }}; do
80echo "$file was changed"
81done
82for file in ${{ steps.find-lib-change.outputs.all_changed_files }}; do
83echo "$file was changed"
84done
85
86build:
87name: Build and Test Colossal-AI
88needs: detect
89if: needs.detect.outputs.anyLibraryFileChanged == 'true'
90runs-on: [self-hosted, gpu]
91container:
92image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
93options: --gpus all --rm -v /dev/shm -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
94timeout-minutes: 60
95defaults:
96run:
97shell: bash
98concurrency:
99group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test
100cancel-in-progress: true
101steps:
102- name: Checkout TensorNVMe
103uses: actions/checkout@v2
104with:
105repository: hpcaitech/TensorNVMe
106ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
107path: TensorNVMe
108
109- name: Restore TensorNVMe Cache
110run: |
111if [ -d /github/home/tensornvme_cache ] && [ ! -z "$(ls -A /github/home/tensornvme_cache/)" ]; then
112cp -p -r /github/home/tensornvme_cache/* /__w/ColossalAI/ColossalAI/TensorNVMe
113fi
114
115- name: Install TensorNVMe
116run: |
117cd TensorNVMe
118conda install cmake
119pip install -r requirements.txt
120pip install -v .
121
122- name: Store TensorNVMe Cache
123run: |
124cd TensorNVMe
125cp -p -r ./build /github/home/tensornvme_cache/
126cp -p -r ./cmake-build /github/home/tensornvme_cache/
127
128- name: Checkout Colossal-AI
129uses: actions/checkout@v2
130with:
131ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
132
133- name: Restore Colossal-AI Cache
134if: needs.detect.outputs.anyExtensionFileChanged != 'true'
135run: |
136# -p flag is required to preserve the file timestamp to avoid ninja rebuild
137if [ -d /github/home/cuda_ext_cache ] && [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ]; then
138cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
139fi
140
141- name: Install Colossal-AI
142run: |
143BUILD_EXT=1 pip install -v -e .
144pip install -r requirements/requirements-test.txt
145
146- name: Store Colossal-AI Cache
147run: |
148# -p flag is required to preserve the file timestamp to avoid ninja rebuild
149cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
150
151- name: Execute Unit Testing
152run: |
153CURL_CA_BUNDLE="" PYTHONPATH=$PWD FAST_TEST=1 pytest \
154-m "not largedist" \
155--durations=0 \
156--ignore tests/test_analyzer \
157--ignore tests/test_auto_parallel \
158--ignore tests/test_fx \
159--ignore tests/test_autochunk \
160--ignore tests/test_gptq \
161--ignore tests/test_infer_ops \
162--ignore tests/test_legacy \
163--ignore tests/test_smoothquant \
164tests/
165env:
166LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
167LLAMA_PATH: /data/scratch/llama-tiny
168
169- name: Collate artifact
170env:
171PR_NUMBER: ${{ github.event.number }}
172changedLibraryFiles: ${{ needs.detect.outputs.changedLibraryFiles }}
173anyLibraryFileChanged: ${{ needs.detect.outputs.anyLibraryFileChanged }}
174changedExtenisonFiles: ${{ needs.detect.outputs.changedExtenisonFiles }}
175run: |
176mkdir report
177echo $PR_NUMBER > ./report/pr_number
178
179# generate coverage.xml if any
180if [ "$anyLibraryFileChanged" == "true" ] && [ -e .coverage ]; then
181allFiles=""
182for file in $changedLibraryFiles; do
183if [ "$allFiles" == "" ]; then
184allFiles=$file
185else
186allFiles=$allFiles,$file
187fi
188done
189
190coverage report --data-file .coverage --include $allFiles > ./coverage.txt
191
192covPercentage=$(tail -n 1 coverage.txt | grep -o '[1-9]*%$')
193covNum=${covPercentage::-1}
194mv coverage.txt ./report
195echo $covNum > ./report/cov_number
196else
197echo "No coverage report is generated"
198fi
199
200- name: Upload test coverage artifact
201uses: actions/upload-artifact@v3
202with:
203name: report
204path: report/