slovnet

Форк
0
102 строки · 1.9 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "%run main.py\n",
10
    "%load_ext autoreload\n",
11
    "%autoreload 2\n",
12
    "\n",
13
    "!mkdir -p {RAW_DIR}\n",
14
    "s3 = S3()"
15
   ]
16
  },
17
  {
18
   "cell_type": "code",
19
   "execution_count": null,
20
   "metadata": {},
21
   "outputs": [],
22
   "source": [
23
    "# !wget https://storage.yandexcloud.net/natasha-nerus/data/nerus_lenta.conllu.gz -P {RAW_DIR}"
24
   ]
25
  },
26
  {
27
   "cell_type": "code",
28
   "execution_count": null,
29
   "metadata": {},
30
   "outputs": [],
31
   "source": [
32
    "records = load_nerus(RAW_NERUS)\n",
33
    "records = log_progress(records, total=NERUS_TOTAL)\n",
34
    "\n",
35
    "sents = (\n",
36
    "    sent\n",
37
    "    for record in records\n",
38
    "    for sent in record.sents\n",
39
    ")\n",
40
    "markups = (adapt_markup(_.syntax) for _ in sents)\n",
41
    "items = (_.as_json for _ in markups)\n",
42
    "lines = list(format_jl(items))"
43
   ]
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": null,
48
   "metadata": {},
49
   "outputs": [],
50
   "source": [
51
    "seed(1)\n",
52
    "shuffle(lines)"
53
   ]
54
  },
55
  {
56
   "cell_type": "code",
57
   "execution_count": null,
58
   "metadata": {},
59
   "outputs": [],
60
   "source": [
61
    "# dump_gz_lines(log_progress(lines), NERUS)"
62
   ]
63
  },
64
  {
65
   "cell_type": "code",
66
   "execution_count": null,
67
   "metadata": {},
68
   "outputs": [],
69
   "source": [
70
    "# s3.upload(NERUS, S3_NERUS)"
71
   ]
72
  },
73
  {
74
   "cell_type": "code",
75
   "execution_count": null,
76
   "metadata": {},
77
   "outputs": [],
78
   "source": []
79
  }
80
 ],
81
 "metadata": {
82
  "kernelspec": {
83
   "display_name": "Python 3",
84
   "language": "python",
85
   "name": "python3"
86
  },
87
  "language_info": {
88
   "codemirror_mode": {
89
    "name": "ipython",
90
    "version": 3
91
   },
92
   "file_extension": ".py",
93
   "mimetype": "text/x-python",
94
   "name": "python",
95
   "nbconvert_exporter": "python",
96
   "pygments_lexer": "ipython3",
97
   "version": "3.6.9"
98
  }
99
 },
100
 "nbformat": 4,
101
 "nbformat_minor": 2
102
}
103

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.