Awesome-LLMOps

Форк
0
/
github-markdown-toc 
411 строк · 12.1 Кб
1
#!/usr/bin/env bash
2

3
#
4
# Steps:
5
#
6
#  1. Download corresponding html file for some README.md:
7
#       curl -s $1
8
#
9
#  2. Discard rows where no substring 'user-content-' (github's markup):
10
#       awk '/user-content-/ { ...
11
#
12
#  3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
13
#      It's a level of the current header:
14
#       substr($0, length($0), 1)
15
#
16
#  3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
17
#       sprintf("%*s", (level-1)*'"$nb_spaces"', "")
18
#
19
#  4. Find head's text and insert it inside "* [ ... ]":
20
#       substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
21
#
22
#  5. Find anchor and insert it inside "(...)":
23
#       substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
24
#
25

26
gh_toc_version="0.8.0"
27

28
gh_user_agent="gh-md-toc v$gh_toc_version"
29

30
#
31
# Download rendered into html README.md by its url.
32
#
33
#
34
gh_toc_load() {
35
    local gh_url=$1
36

37
    if type curl &>/dev/null; then
38
        curl --user-agent "$gh_user_agent" -s "$gh_url"
39
    elif type wget &>/dev/null; then
40
        wget --user-agent="$gh_user_agent" -qO- "$gh_url"
41
    else
42
        echo "Please, install 'curl' or 'wget' and try again."
43
        exit 1
44
    fi
45
}
46

47
#
48
# Converts local md file into html by GitHub
49
#
50
# -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
51
# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
52
gh_toc_md2html() {
53
    local gh_file_md=$1
54
    local skip_header=$2
55

56
    URL=https://api.github.com/markdown/raw
57

58
    if [ ! -z "$GH_TOC_TOKEN" ]; then
59
        TOKEN=$GH_TOC_TOKEN
60
    else
61
        TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
62
        if [ -f "$TOKEN_FILE" ]; then
63
            TOKEN="$(cat $TOKEN_FILE)"
64
        fi
65
    fi
66
    if [ ! -z "${TOKEN}" ]; then
67
        AUTHORIZATION="Authorization: token ${TOKEN}"
68
    fi
69

70
    local gh_tmp_file_md=$gh_file_md
71
    if [ "$skip_header" = "yes" ]; then
72
        if grep -Fxq "<!--te-->" $gh_src; then
73
          # cut everything before the toc
74
          gh_tmp_file_md=$gh_file_md~~
75
          sed '1,/<!--te-->/d' $gh_file_md > $gh_tmp_file_md
76
        fi
77
    fi
78

79
    # echo $URL 1>&2
80
    OUTPUT=$(curl -s \
81
        --user-agent "$gh_user_agent" \
82
        --data-binary @"$gh_tmp_file_md" \
83
        -H "Content-Type:text/plain" \
84
        -H "$AUTHORIZATION" \
85
        "$URL")
86

87
    rm -f $gh_file_md~~
88

89
    if [ "$?" != "0" ]; then
90
        echo "XXNetworkErrorXX"
91
    fi
92
    if [ "$(echo "${OUTPUT}" | awk '/API rate limit exceeded/')" != "" ]; then
93
        echo "XXRateLimitXX"
94
    else
95
        echo "${OUTPUT}"
96
    fi
97
}
98

99

100
#
101
# Is passed string url
102
#
103
gh_is_url() {
104
    case $1 in
105
        https* | http*)
106
            echo "yes";;
107
        *)
108
            echo "no";;
109
    esac
110
}
111

112
#
113
# TOC generator
114
#
115
gh_toc(){
116
    local gh_src=$1
117
    local gh_src_copy=$1
118
    local gh_ttl_docs=$2
119
    local need_replace=$3
120
    local no_backup=$4
121
    local no_footer=$5
122
    local indent=$6
123
    local skip_header=$7
124

125
    if [ "$gh_src" = "" ]; then
126
        echo "Please, enter URL or local path for a README.md"
127
        exit 1
128
    fi
129

130

131
    # Show "TOC" string only if working with one document
132
    if [ "$gh_ttl_docs" = "1" ]; then
133

134
        echo "Table of Contents"
135
        echo "================="
136
        echo ""
137
        gh_src_copy=""
138

139
    fi
140

141
    if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
142
        gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" "$indent"
143
        if [ "${PIPESTATUS[0]}" != "0" ]; then
144
            echo "Could not load remote document."
145
            echo "Please check your url or network connectivity"
146
            exit 1
147
        fi
148
        if [ "$need_replace" = "yes" ]; then
149
            echo
150
            echo "!! '$gh_src' is not a local file"
151
            echo "!! Can't insert the TOC into it."
152
            echo
153
        fi
154
    else
155
        local rawhtml=$(gh_toc_md2html "$gh_src" "$skip_header")
156
        if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
157
             echo "Parsing local markdown file requires access to github API"
158
             echo "Please make sure curl is installed and check your network connectivity"
159
             exit 1
160
        fi
161
        if [ "$rawhtml" == "XXRateLimitXX" ]; then
162
             echo "Parsing local markdown file requires access to github API"
163
             echo "Error: You exceeded the hourly limit. See: https://developer.github.com/v3/#rate-limiting"
164
             TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
165
             echo "or place GitHub auth token here: ${TOKEN_FILE}"
166
             exit 1
167
        fi
168
        local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy" "$indent"`
169
        echo "$toc"
170
        if [ "$need_replace" = "yes" ]; then
171
            if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
172
                echo "Found markers"
173
            else
174
                echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
175
                exit 1
176
            fi
177
            local ts="<\!--ts-->"
178
            local te="<\!--te-->"
179
            local dt=`date +'%F_%H%M%S'`
180
            local ext=".orig.${dt}"
181
            local toc_path="${gh_src}.toc.${dt}"
182
            local toc_createdby="<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
183
            local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
184
            # http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
185
            # clear old TOC
186
            sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
187
            # create toc file
188
            echo "${toc}" > "${toc_path}"
189
            if [ "${no_footer}" != "yes" ]; then
190
                echo -e "\n${toc_createdby}\n${toc_footer}\n" >> "$toc_path"
191
            fi
192

193
            # insert toc file
194
            if ! sed --version > /dev/null 2>&1; then
195
                sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
196
            else
197
                sed -i "/${ts}/r ${toc_path}" "$gh_src"
198
            fi
199
            echo
200
            if [ "${no_backup}" = "yes" ]; then
201
                rm ${toc_path} ${gh_src}${ext}
202
            fi
203
            echo "!! TOC was added into: '$gh_src'"
204
            if [ -z "${no_backup}" ]; then
205
                echo "!! Origin version of the file: '${gh_src}${ext}'"
206
                echo "!! TOC added into a separate file: '${toc_path}'"
207
        fi
208
            echo
209
        fi
210
    fi
211
}
212

213
#
214
# Grabber of the TOC from rendered html
215
#
216
# $1 - a source url of document.
217
#      It's need if TOC is generated for multiple documents.
218
# $2 - number of spaces used to indent.
219
#
220
gh_toc_grab() {
221
    common_awk_script='
222
                     modified_href = ""
223
                     split(href, chars, "")
224
                     for (i=1;i <= length(href); i++) {
225
                         c = chars[i]
226
                         res = ""
227
                         if (c == "+") {
228
                             res = " "
229
                         } else {
230
                             if (c == "%") {
231
                                 res = "\\x"
232
                             } else {
233
                                 res = c ""
234
                             }
235
                         }
236
                         modified_href = modified_href res
237
                    }
238
                    print sprintf("%*s", (level-1)*'"$2"', "") "* [" text "](" gh_url  modified_href ")"
239
                    '
240
    if [ `uname -s` == "OS/390" ]; then
241
        grepcmd="pcregrep -o"
242
        echoargs=""
243
        awkscript='{
244
                     level = substr($0, length($0), 1)
245
                     text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
246
                     href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
247
                     '"$common_awk_script"'
248
                }'
249
    else
250
        grepcmd="grep -Eo"
251
        echoargs="-e"
252
        awkscript='{
253
                     level = substr($0, length($0), 1)
254
                     text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
255
                     href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
256
                     '"$common_awk_script"'
257
                }'
258
    fi
259
    href_regex='href=\"[^\"]+?\"'
260

261
    # if closed <h[1-6]> is on the new line, then move it on the prev line
262
    # for example:
263
    #   was: The command <code>foo1</code>
264
    #        </h1>
265
    #   became: The command <code>foo1</code></h1>
266
    sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
267

268
    # find strings that corresponds to template
269
    $grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
270

271
    # remove code tags
272
    sed 's/<code>//g' | sed 's/<\/code>//g' |
273

274
    # remove g-emoji
275
    sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
276

277
    # now all rows are like:
278
    #   <a id="user-content-..." href="..."><span ...></span></a> ... </h1
279
    # format result line
280
    #   * $0 - whole string
281
    #   * last element of each row: "</hN" where N in (1,2,3,...)
282
    echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
283
}
284

285
        # perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
286

287
#
288
# Returns filename only from full path or url
289
#
290
gh_toc_get_filename() {
291
    echo "${1##*/}"
292
}
293

294
show_version() {
295
    echo "$gh_toc_version"
296
    echo
297
    echo "os:     `uname -s`"
298
    echo "arch:   `uname -m`"
299
    echo "kernel: `uname -r`"
300
    echo "shell:  `$SHELL --version`"
301
    echo
302
    for tool in curl wget grep awk sed; do
303
        printf "%-5s: " $tool
304
        if `type $tool &>/dev/null`; then
305
            echo `$tool --version | head -n 1`
306
        else
307
            echo "not installed"
308
        fi
309
    done
310
}
311

312
show_help() {
313
    local app_name=$(basename "$0")
314
    echo "GitHub TOC generator ($app_name): $gh_toc_version"
315
    echo ""
316
    echo "Usage:"
317
    echo "  $app_name [options] src [src]   Create TOC for a README file (url or local path)"
318
    echo "  $app_name -                     Create TOC for markdown from STDIN"
319
    echo "  $app_name --help                Show help"
320
    echo "  $app_name --version             Show version"
321
    echo ""
322
    echo "Options:"
323
    echo "  --indent <NUM>      Set indent size. Default: 3."
324
    echo "  --insert            Insert new TOC into original file. For local files only. Default: false."
325
    echo "                      See https://github.com/ekalinin/github-markdown-toc/issues/41 for details."
326
    echo "  --no-backup         Remove backup file. Set --insert as well. Default: false."
327
    echo "  --hide-footer       Do not write date & author of the last TOC update. Set --insert as well. Default: false."
328
    echo "  --skip-header       Hide entry of the topmost headlines. Default: false."
329
    echo "                      See https://github.com/ekalinin/github-markdown-toc/issues/125 for details."
330
    echo ""
331
}
332

333
#
334
# Options handlers
335
#
336
gh_toc_app() {
337
    local need_replace="no"
338
    local indent=3
339

340
    if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
341
        show_help
342
        return
343
    fi
344

345
    if [ "$1" = '--version' ]; then
346
        show_version
347
        return
348
    fi
349

350
    if [ "$1" = '--indent' ]; then
351
        indent="$2"
352
        shift 2
353
    fi
354

355
    if [ "$1" = "-" ]; then
356
        if [ -z "$TMPDIR" ]; then
357
            TMPDIR="/tmp"
358
        elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
359
            mkdir -p "$TMPDIR"
360
        fi
361
        local gh_tmp_md
362
        if [ `uname -s` == "OS/390" ]; then
363
            local timestamp=$(date +%m%d%Y%H%M%S)
364
            gh_tmp_md="$TMPDIR/tmp.$timestamp"
365
        else
366
            gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
367
        fi
368
        while read input; do
369
            echo "$input" >> "$gh_tmp_md"
370
        done
371
        gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" "$indent"
372
        return
373
    fi
374

375
    if [ "$1" = '--insert' ]; then
376
        need_replace="yes"
377
        shift
378
    fi
379

380
    if [ "$1" = '--no-backup' ]; then
381
        need_replace="yes"
382
        no_backup="yes"
383
        shift
384
    fi
385

386
    if [ "$1" = '--hide-footer' ]; then
387
        need_replace="yes"
388
        no_footer="yes"
389
        shift
390
    fi
391

392
    if [ "$1" = '--skip-header' ]; then
393
        skip_header="yes"
394
        shift
395
    fi
396

397

398
    for md in "$@"
399
    do
400
        echo ""
401
        gh_toc "$md" "$#" "$need_replace" "$no_backup" "$no_footer" "$indent" "$skip_header"
402
    done
403

404
    echo ""
405
    echo "<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
406
}
407

408
#
409
# Entry point
410
#
411
gh_toc_app "$@"
412

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.