deepspeed

Форк
0
/
amd-mi200.yml 
82 строки · 2.9 Кб
1
name: amd-mi200
2

3
on:
4
  workflow_dispatch:
5
  schedule:
6
    - cron: "0 0 * * *"
7

8
concurrency:
9
  group: ${{ github.workflow }}-${{ github.ref }}
10
  cancel-in-progress: true
11

12
permissions:
13
  contents: read
14
  issues: write
15

16
jobs:
17
  amd-tests:
18
    # The type of runner that the job will run on
19
    runs-on: [self-hosted, amd, mi200]
20

21
    # Steps represent a sequence of tasks that will be executed as part of the job
22
    steps:
23
      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24
      - uses: actions/checkout@v3
25

26
      - id: setup-venv
27
        uses: ./.github/workflows/setup-venv
28

29
      - name: Install pytorch
30
        run: |
31
          pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/rocm5.6
32
          python -c "import torch; print('torch:', torch.__version__, torch)"
33
          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
34

35
      - name: Install transformers
36
        run: |
37
          git clone https://github.com/huggingface/transformers
38
          cd transformers
39
          # if needed switch to the last known good SHA until transformers@master is fixed
40
          # git checkout 1cc453d33
41
          git rev-parse --short HEAD
42
          pip install .
43

44
      - name: Install (ROCm) apex
45
        run: |
46
          git clone https://github.com/ROCmSoftwarePlatform/apex.git
47
          cd apex
48
          git checkout torch_2.1_higher
49
          CURRENT_VER=$(git rev-parse HEAD)
50
          INSTALLED_VER=$(cat /blob/amd-apex/.venv_installed_version)
51
          if [[ "$CURRENT_VER" != "$INSTALLED_VER" ]]; then
52
            pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings="--global-option=--cpp_ext" --config-settings="--global-option=--cuda_ext" --target=/blob/amd-apex/ --upgrade .
53
            git rev-parse HEAD > /blob/amd-apex/.venv_installed_version
54
          fi
55
          echo PYTHONPATH=$PYTHONPATH:/blob/amd-apex/ >> $GITHUB_ENV
56
      # Runs a set of commands using the runners shell
57
      - name: Install deepspeed
58
        run: |
59
          pip install .[dev,1bit,autotuning]
60
          #python -c "from deepspeed.env_report import cli_main; cli_main()"
61
          ds_report
62

63
      - name: Python environment
64
        run: |
65
          pip list
66

67
      # Runs a set of commands using the runners shell
68
      - name: Unit tests
69
        run: |
70
          unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
71
          cd tests
72
          pytest $PYTEST_OPTS -n 4 --verbose unit/
73
          pytest $PYTEST_OPTS -m 'sequential' unit/
74

75
      - name: Open GitHub issue if nightly CI fails
76
        if: ${{ failure() && (github.event_name == 'schedule') }}
77
        uses: JasonEtco/create-an-issue@v2
78
        env:
79
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
80
        with:
81
          filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
82
          update_existing: true
83

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.