pytorch

Форк
0
67 строк · 2.0 Кб
1
name: Setup XPU host
2

3
description: Set up XPU host for CI
4

5
runs:
6
  using: composite
7
  steps:
8
    - name: Clean all stopped docker containers
9
      if: always()
10
      shell: bash
11
      run: |
12
        # Prune all stopped containers.
13
        # If other runner is pruning on this node, will skip.
14
        nprune=$(ps -ef | grep -c "docker container prune")
15
        if [[ $nprune -eq 1 ]]; then
16
          docker container prune -f
17
        fi
18

19
    - name: Runner health check system info
20
      if: always()
21
      shell: bash
22
      run: |
23
        cat /etc/os-release || true
24
        cat /etc/apt/sources.list.d/oneAPI.list || true
25
        cat /etc/apt/sources.list.d/intel-gpu-jammy.list || true
26
        whoami
27

28
    - name: Runner health check xpu-smi
29
      if: always()
30
      shell: bash
31
      run: |
32
        xpu-smi discovery
33

34
    - name: Runner health check GPU count
35
      if: always()
36
      shell: bash
37
      run: |
38
        ngpu=$(xpu-smi discovery | grep -c -E 'Device Name')
39
        msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
40
        if [[ $ngpu -eq 0 ]]; then
41
          echo "Error: Failed to detect any GPUs on the runner"
42
          echo "$msg"
43
          exit 1
44
        fi
45

46
    - name: Runner diskspace health check
47
      uses: ./.github/actions/diskspace-cleanup
48
      if: always()
49

50
    - name: Runner health check disconnect on failure
51
      if: ${{ failure() }}
52
      shell: bash
53
      run: |
54
        killall runsvc.sh
55

56
    - name: Preserve github env variables for use in docker
57
      shell: bash
58
      run: |
59
        env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
60
        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
61

62
    - name: XPU set GPU_FLAG
63
      shell: bash
64
      run: |
65
        # Add render group for container creation.
66
        render_gid=`cat /etc/group | grep render | cut -d: -f3`
67
        echo "GPU_FLAG=--device=/dev/mem --device=/dev/dri --group-add video --group-add $render_gid" >> "${GITHUB_ENV}"
68

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.