Awesome-LLMOps
411 строк · 12.1 Кб
1#!/usr/bin/env bash
2
3#
4# Steps:
5#
6# 1. Download corresponding html file for some README.md:
7# curl -s $1
8#
9# 2. Discard rows where no substring 'user-content-' (github's markup):
10# awk '/user-content-/ { ...
11#
12# 3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
13# It's a level of the current header:
14# substr($0, length($0), 1)
15#
16# 3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
17# sprintf("%*s", (level-1)*'"$nb_spaces"', "")
18#
19# 4. Find head's text and insert it inside "* [ ... ]":
20# substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
21#
22# 5. Find anchor and insert it inside "(...)":
23# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
24#
25
26gh_toc_version="0.8.0"
27
28gh_user_agent="gh-md-toc v$gh_toc_version"
29
30#
31# Download rendered into html README.md by its url.
32#
33#
34gh_toc_load() {
35local gh_url=$1
36
37if type curl &>/dev/null; then
38curl --user-agent "$gh_user_agent" -s "$gh_url"
39elif type wget &>/dev/null; then
40wget --user-agent="$gh_user_agent" -qO- "$gh_url"
41else
42echo "Please, install 'curl' or 'wget' and try again."
43exit 1
44fi
45}
46
47#
48# Converts local md file into html by GitHub
49#
50# -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
51# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
52gh_toc_md2html() {
53local gh_file_md=$1
54local skip_header=$2
55
56URL=https://api.github.com/markdown/raw
57
58if [ ! -z "$GH_TOC_TOKEN" ]; then
59TOKEN=$GH_TOC_TOKEN
60else
61TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
62if [ -f "$TOKEN_FILE" ]; then
63TOKEN="$(cat $TOKEN_FILE)"
64fi
65fi
66if [ ! -z "${TOKEN}" ]; then
67AUTHORIZATION="Authorization: token ${TOKEN}"
68fi
69
70local gh_tmp_file_md=$gh_file_md
71if [ "$skip_header" = "yes" ]; then
72if grep -Fxq "<!--te-->" $gh_src; then
73# cut everything before the toc
74gh_tmp_file_md=$gh_file_md~~
75sed '1,/<!--te-->/d' $gh_file_md > $gh_tmp_file_md
76fi
77fi
78
79# echo $URL 1>&2
80OUTPUT=$(curl -s \
81--user-agent "$gh_user_agent" \
82--data-binary @"$gh_tmp_file_md" \
83-H "Content-Type:text/plain" \
84-H "$AUTHORIZATION" \
85"$URL")
86
87rm -f $gh_file_md~~
88
89if [ "$?" != "0" ]; then
90echo "XXNetworkErrorXX"
91fi
92if [ "$(echo "${OUTPUT}" | awk '/API rate limit exceeded/')" != "" ]; then
93echo "XXRateLimitXX"
94else
95echo "${OUTPUT}"
96fi
97}
98
99
100#
101# Is passed string url
102#
103gh_is_url() {
104case $1 in
105https* | http*)
106echo "yes";;
107*)
108echo "no";;
109esac
110}
111
112#
113# TOC generator
114#
115gh_toc(){
116local gh_src=$1
117local gh_src_copy=$1
118local gh_ttl_docs=$2
119local need_replace=$3
120local no_backup=$4
121local no_footer=$5
122local indent=$6
123local skip_header=$7
124
125if [ "$gh_src" = "" ]; then
126echo "Please, enter URL or local path for a README.md"
127exit 1
128fi
129
130
131# Show "TOC" string only if working with one document
132if [ "$gh_ttl_docs" = "1" ]; then
133
134echo "Table of Contents"
135echo "================="
136echo ""
137gh_src_copy=""
138
139fi
140
141if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
142gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" "$indent"
143if [ "${PIPESTATUS[0]}" != "0" ]; then
144echo "Could not load remote document."
145echo "Please check your url or network connectivity"
146exit 1
147fi
148if [ "$need_replace" = "yes" ]; then
149echo
150echo "!! '$gh_src' is not a local file"
151echo "!! Can't insert the TOC into it."
152echo
153fi
154else
155local rawhtml=$(gh_toc_md2html "$gh_src" "$skip_header")
156if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
157echo "Parsing local markdown file requires access to github API"
158echo "Please make sure curl is installed and check your network connectivity"
159exit 1
160fi
161if [ "$rawhtml" == "XXRateLimitXX" ]; then
162echo "Parsing local markdown file requires access to github API"
163echo "Error: You exceeded the hourly limit. See: https://developer.github.com/v3/#rate-limiting"
164TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
165echo "or place GitHub auth token here: ${TOKEN_FILE}"
166exit 1
167fi
168local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy" "$indent"`
169echo "$toc"
170if [ "$need_replace" = "yes" ]; then
171if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
172echo "Found markers"
173else
174echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
175exit 1
176fi
177local ts="<\!--ts-->"
178local te="<\!--te-->"
179local dt=`date +'%F_%H%M%S'`
180local ext=".orig.${dt}"
181local toc_path="${gh_src}.toc.${dt}"
182local toc_createdby="<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
183local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
184# http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
185# clear old TOC
186sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
187# create toc file
188echo "${toc}" > "${toc_path}"
189if [ "${no_footer}" != "yes" ]; then
190echo -e "\n${toc_createdby}\n${toc_footer}\n" >> "$toc_path"
191fi
192
193# insert toc file
194if ! sed --version > /dev/null 2>&1; then
195sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
196else
197sed -i "/${ts}/r ${toc_path}" "$gh_src"
198fi
199echo
200if [ "${no_backup}" = "yes" ]; then
201rm ${toc_path} ${gh_src}${ext}
202fi
203echo "!! TOC was added into: '$gh_src'"
204if [ -z "${no_backup}" ]; then
205echo "!! Origin version of the file: '${gh_src}${ext}'"
206echo "!! TOC added into a separate file: '${toc_path}'"
207fi
208echo
209fi
210fi
211}
212
213#
214# Grabber of the TOC from rendered html
215#
216# $1 - a source url of document.
217# It's need if TOC is generated for multiple documents.
218# $2 - number of spaces used to indent.
219#
220gh_toc_grab() {
221common_awk_script='
222modified_href = ""
223split(href, chars, "")
224for (i=1;i <= length(href); i++) {
225c = chars[i]
226res = ""
227if (c == "+") {
228res = " "
229} else {
230if (c == "%") {
231res = "\\x"
232} else {
233res = c ""
234}
235}
236modified_href = modified_href res
237}
238print sprintf("%*s", (level-1)*'"$2"', "") "* [" text "](" gh_url modified_href ")"
239'
240if [ `uname -s` == "OS/390" ]; then
241grepcmd="pcregrep -o"
242echoargs=""
243awkscript='{
244level = substr($0, length($0), 1)
245text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
246href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
247'"$common_awk_script"'
248}'
249else
250grepcmd="grep -Eo"
251echoargs="-e"
252awkscript='{
253level = substr($0, length($0), 1)
254text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
255href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
256'"$common_awk_script"'
257}'
258fi
259href_regex='href=\"[^\"]+?\"'
260
261# if closed <h[1-6]> is on the new line, then move it on the prev line
262# for example:
263# was: The command <code>foo1</code>
264# </h1>
265# became: The command <code>foo1</code></h1>
266sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
267
268# find strings that corresponds to template
269$grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
270
271# remove code tags
272sed 's/<code>//g' | sed 's/<\/code>//g' |
273
274# remove g-emoji
275sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
276
277# now all rows are like:
278# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
279# format result line
280# * $0 - whole string
281# * last element of each row: "</hN" where N in (1,2,3,...)
282echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
283}
284
285# perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
286
287#
288# Returns filename only from full path or url
289#
290gh_toc_get_filename() {
291echo "${1##*/}"
292}
293
294show_version() {
295echo "$gh_toc_version"
296echo
297echo "os: `uname -s`"
298echo "arch: `uname -m`"
299echo "kernel: `uname -r`"
300echo "shell: `$SHELL --version`"
301echo
302for tool in curl wget grep awk sed; do
303printf "%-5s: " $tool
304if `type $tool &>/dev/null`; then
305echo `$tool --version | head -n 1`
306else
307echo "not installed"
308fi
309done
310}
311
312show_help() {
313local app_name=$(basename "$0")
314echo "GitHub TOC generator ($app_name): $gh_toc_version"
315echo ""
316echo "Usage:"
317echo " $app_name [options] src [src] Create TOC for a README file (url or local path)"
318echo " $app_name - Create TOC for markdown from STDIN"
319echo " $app_name --help Show help"
320echo " $app_name --version Show version"
321echo ""
322echo "Options:"
323echo " --indent <NUM> Set indent size. Default: 3."
324echo " --insert Insert new TOC into original file. For local files only. Default: false."
325echo " See https://github.com/ekalinin/github-markdown-toc/issues/41 for details."
326echo " --no-backup Remove backup file. Set --insert as well. Default: false."
327echo " --hide-footer Do not write date & author of the last TOC update. Set --insert as well. Default: false."
328echo " --skip-header Hide entry of the topmost headlines. Default: false."
329echo " See https://github.com/ekalinin/github-markdown-toc/issues/125 for details."
330echo ""
331}
332
333#
334# Options handlers
335#
336gh_toc_app() {
337local need_replace="no"
338local indent=3
339
340if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
341show_help
342return
343fi
344
345if [ "$1" = '--version' ]; then
346show_version
347return
348fi
349
350if [ "$1" = '--indent' ]; then
351indent="$2"
352shift 2
353fi
354
355if [ "$1" = "-" ]; then
356if [ -z "$TMPDIR" ]; then
357TMPDIR="/tmp"
358elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
359mkdir -p "$TMPDIR"
360fi
361local gh_tmp_md
362if [ `uname -s` == "OS/390" ]; then
363local timestamp=$(date +%m%d%Y%H%M%S)
364gh_tmp_md="$TMPDIR/tmp.$timestamp"
365else
366gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
367fi
368while read input; do
369echo "$input" >> "$gh_tmp_md"
370done
371gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" "$indent"
372return
373fi
374
375if [ "$1" = '--insert' ]; then
376need_replace="yes"
377shift
378fi
379
380if [ "$1" = '--no-backup' ]; then
381need_replace="yes"
382no_backup="yes"
383shift
384fi
385
386if [ "$1" = '--hide-footer' ]; then
387need_replace="yes"
388no_footer="yes"
389shift
390fi
391
392if [ "$1" = '--skip-header' ]; then
393skip_header="yes"
394shift
395fi
396
397
398for md in "$@"
399do
400echo ""
401gh_toc "$md" "$#" "$need_replace" "$no_backup" "$no_footer" "$indent" "$skip_header"
402done
403
404echo ""
405echo "<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
406}
407
408#
409# Entry point
410#
411gh_toc_app "$@"
412