Skip to content

Commit 5f3610b

Browse files
committed
Update TOC script to latest version
1 parent 8c0c872 commit 5f3610b

File tree

1 file changed

+178
-57
lines changed

1 file changed

+178
-57
lines changed

gh-md-toc

Lines changed: 178 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# substr($0, length($0), 1)
1515
#
1616
# 3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
17-
# sprintf("%*s", substr($0, length($0), 1)*3, " ")
17+
# sprintf("%*s", (level-1)*'"$nb_spaces"', "")
1818
#
1919
# 4. Find head's text and insert it inside "* [ ... ]":
2020
# substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
@@ -23,7 +23,7 @@
2323
# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
2424
#
2525

26-
gh_toc_version="0.6.2"
26+
gh_toc_version="0.9.0"
2727

2828
gh_user_agent="gh-md-toc v$gh_toc_version"
2929

@@ -47,10 +47,12 @@ gh_toc_load() {
4747
#
4848
# Converts local md file into html by GitHub
4949
#
50-
# curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
50+
# -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
5151
# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
5252
gh_toc_md2html() {
5353
local gh_file_md=$1
54+
local skip_header=$2
55+
5456
URL=https://api.github.com/markdown/raw
5557

5658
if [ ! -z "$GH_TOC_TOKEN" ]; then
@@ -62,14 +64,27 @@ gh_toc_md2html() {
6264
fi
6365
fi
6466
if [ ! -z "${TOKEN}" ]; then
65-
AUTHORIZATION="--header \"Authorization: token ${TOKEN}\""
67+
AUTHORIZATION="Authorization: token ${TOKEN}"
68+
fi
69+
70+
local gh_tmp_file_md=$gh_file_md
71+
if [ "$skip_header" = "yes" ]; then
72+
if grep -Fxq "<!--te-->" "$gh_src"; then
73+
# cut everything before the toc
74+
gh_tmp_file_md=$gh_file_md~~
75+
sed '1,/<!--te-->/d' $gh_file_md > $gh_tmp_file_md
76+
fi
6677
fi
6778

6879
# echo $URL 1>&2
69-
OUTPUT="$(curl -s --user-agent "$gh_user_agent" \
70-
--data-binary @"$gh_file_md" -H "Content-Type:text/plain" \
71-
${AUTHORIZATION} \
72-
$URL)"
80+
OUTPUT=$(curl -s \
81+
--user-agent "$gh_user_agent" \
82+
--data-binary @"$gh_tmp_file_md" \
83+
-H "Content-Type:text/plain" \
84+
-H "$AUTHORIZATION" \
85+
"$URL")
86+
87+
rm -f $gh_file_md~~
7388

7489
if [ "$?" != "0" ]; then
7590
echo "XXNetworkErrorXX"
@@ -103,6 +118,9 @@ gh_toc(){
103118
local gh_ttl_docs=$2
104119
local need_replace=$3
105120
local no_backup=$4
121+
local no_footer=$5
122+
local indent=$6
123+
local skip_header=$7
106124

107125
if [ "$gh_src" = "" ]; then
108126
echo "Please, enter URL or local path for a README.md"
@@ -121,7 +139,7 @@ gh_toc(){
121139
fi
122140

123141
if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
124-
gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy"
142+
gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" "$indent"
125143
if [ "${PIPESTATUS[0]}" != "0" ]; then
126144
echo "Could not load remote document."
127145
echo "Please check your url or network connectivity"
@@ -134,7 +152,7 @@ gh_toc(){
134152
echo
135153
fi
136154
else
137-
local rawhtml=$(gh_toc_md2html "$gh_src")
155+
local rawhtml=$(gh_toc_md2html "$gh_src" "$skip_header")
138156
if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
139157
echo "Parsing local markdown file requires access to github API"
140158
echo "Please make sure curl is installed and check your network connectivity"
@@ -147,10 +165,10 @@ gh_toc(){
147165
echo "or place GitHub auth token here: ${TOKEN_FILE}"
148166
exit 1
149167
fi
150-
local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy"`
168+
local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy" "$indent"`
151169
echo "$toc"
152170
if [ "$need_replace" = "yes" ]; then
153-
if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
171+
if grep -Fxq "<!--ts-->" "$gh_src" && grep -Fxq "<!--te-->" "$gh_src"; then
154172
echo "Found markers"
155173
else
156174
echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
@@ -161,28 +179,32 @@ gh_toc(){
161179
local dt=`date +'%F_%H%M%S'`
162180
local ext=".orig.${dt}"
163181
local toc_path="${gh_src}.toc.${dt}"
182+
local toc_createdby="<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
164183
local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
165184
# http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
166185
# clear old TOC
167186
sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
168187
# create toc file
169188
echo "${toc}" > "${toc_path}"
170-
echo -e "\n${toc_footer}\n" >> "$toc_path"
189+
if [ "${no_footer}" != "yes" ]; then
190+
echo -e "\n${toc_createdby}\n${toc_footer}\n" >> "$toc_path"
191+
fi
192+
171193
# insert toc file
172-
if [[ "`uname`" == "Darwin" ]]; then
194+
if ! sed --version > /dev/null 2>&1; then
173195
sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
174196
else
175197
sed -i "/${ts}/r ${toc_path}" "$gh_src"
176198
fi
177199
echo
178-
if [ $no_backup = "yes" ]; then
179-
rm ${toc_path} ${gh_src}${ext}
200+
if [ "${no_backup}" = "yes" ]; then
201+
rm "$toc_path" "$gh_src$ext"
180202
fi
181203
echo "!! TOC was added into: '$gh_src'"
182-
if [ -z $no_backup ]; then
204+
if [ -z "${no_backup}" ]; then
183205
echo "!! Origin version of the file: '${gh_src}${ext}'"
184206
echo "!! TOC added into a separate file: '${toc_path}'"
185-
fi
207+
fi
186208
echo
187209
fi
188210
fi
@@ -191,85 +213,171 @@ gh_toc(){
191213
#
192214
# Grabber of the TOC from rendered html
193215
#
194-
# $1 — a source url of document.
195-
# It's need if TOC is generated for multiple documents.
216+
# $1 - a source url of document.
217+
# It's need if TOC is generated for multiple documents.
218+
# $2 - number of spaces used to indent.
196219
#
197220
gh_toc_grab() {
198-
# if closed <h[1-6]> is on the new line, then move it on the prev line
199-
# for example:
200-
# was: The command <code>foo1</code>
201-
# </h1>
202-
# became: The command <code>foo1</code></h1>
221+
222+
href_regex="/href=\"[^\"]+?\"/"
223+
lefttext_regex="/(<a class=\"heading-link\".*?>)(.*)(<span aria-hidden)/"
224+
righttext_regex="/>.*/"
225+
226+
common_awk_script='
227+
modified_href = ""
228+
split(href, chars, "")
229+
for (i=1;i <= length(href); i++) {
230+
c = chars[i]
231+
res = ""
232+
if (c == "+") {
233+
res = " "
234+
} else {
235+
if (c == "%") {
236+
res = "\\x"
237+
} else {
238+
res = c ""
239+
}
240+
}
241+
modified_href = modified_href res
242+
}
243+
print sprintf("%*s", (level-1)*'"$2"', "") "* [" text "](" gh_url modified_href ")"
244+
'
245+
if [ `uname -s` == "OS/390" ]; then
246+
grepcmd="pcregrep -o"
247+
echoargs=""
248+
awkscript='{
249+
level = substr($0, 3, 1)
250+
lefttext = substr($0, match($0, '$lefttext_regex')+1, RLENGTH-18)
251+
text = substr(lefttext, match(lefttext, '$righttext_regex')+1)
252+
href = substr($0, match($0, '$href_regex')+6, RLENGTH-7)
253+
'"$common_awk_script"'
254+
}'
255+
else
256+
grepcmd="grep -Eo"
257+
echoargs="-e"
258+
awkscript='{
259+
level = substr($0, 3, 1)
260+
lefttext = substr($0, match($0, '$lefttext_regex')+1, RLENGTH-18)
261+
text = substr(lefttext, match(lefttext, '$righttext_regex')+1)
262+
href = substr($0, match($0, '$href_regex')+6, RLENGTH-7)
263+
'"$common_awk_script"'
264+
}'
265+
fi
266+
267+
# if closed <h[1-6]> is on the new line, then move it on the prev line
268+
# for example:
269+
# was: The command <code>foo1</code>
270+
# </h1>
271+
# became: The command <code>foo1</code></h1>
203272
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
273+
274+
# Sometimes a line can start with <span>. Fix that.
275+
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<span/<span/g' |
276+
204277
# find strings that corresponds to template
205-
grep -E -o '<a.*id="user-content-[^"]*".*</h[1-6]' |
278+
$grepcmd '<h.*id="user-content-[^"]*".*</h[1-6]' |
279+
206280
# remove code tags
207281
sed 's/<code>//g' | sed 's/<\/code>//g' |
282+
283+
# remove g-emoji
284+
sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
285+
208286
# now all rows are like:
209-
# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
287+
# <h1 id="user-content-..."><a href="..."> ... <span ...></span></a></h1
210288
# format result line
211-
# * $0 whole string
289+
# * $0 - whole string
212290
# * last element of each row: "</hN" where N in (1,2,3,...)
213-
echo -e "$(awk -v "gh_url=$1" '{
214-
level = substr($0, length($0), 1)
215-
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
216-
href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
217-
print sprintf("%*s", level*3, " ") "* [" text "](" gh_url href ")" }' |
218-
sed 'y/+/ /; s/%/\\x/g')"
291+
echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
219292
}
220293

294+
# perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
295+
221296
#
222297
# Returns filename only from full path or url
223298
#
224299
gh_toc_get_filename() {
225300
echo "${1##*/}"
226301
}
227302

303+
show_version() {
304+
echo "$gh_toc_version"
305+
echo
306+
echo "os: `uname -s`"
307+
echo "arch: `uname -m`"
308+
echo "kernel: `uname -r`"
309+
echo "shell: `$SHELL --version`"
310+
echo
311+
for tool in curl wget grep awk sed; do
312+
printf "%-5s: " $tool
313+
if `type $tool &>/dev/null`; then
314+
echo `$tool --version | head -n 1`
315+
else
316+
echo "not installed"
317+
fi
318+
done
319+
}
320+
321+
show_help() {
322+
local app_name=$(basename "$0")
323+
echo "GitHub TOC generator ($app_name): $gh_toc_version"
324+
echo ""
325+
echo "Usage:"
326+
echo " $app_name [options] src [src] Create TOC for a README file (url or local path)"
327+
echo " $app_name - Create TOC for markdown from STDIN"
328+
echo " $app_name --help Show help"
329+
echo " $app_name --version Show version"
330+
echo ""
331+
echo "Options:"
332+
echo " --indent <NUM> Set indent size. Default: 3."
333+
echo " --insert Insert new TOC into original file. For local files only. Default: false."
334+
echo " See https://github.com/ekalinin/github-markdown-toc/issues/41 for details."
335+
echo " --no-backup Remove backup file. Set --insert as well. Default: false."
336+
echo " --hide-footer Do not write date & author of the last TOC update. Set --insert as well. Default: false."
337+
echo " --skip-header Hide entry of the topmost headlines. Default: false."
338+
echo " See https://github.com/ekalinin/github-markdown-toc/issues/125 for details."
339+
echo ""
340+
}
341+
228342
#
229343
# Options handlers
230344
#
231345
gh_toc_app() {
232346
local need_replace="no"
347+
local indent=3
233348

234349
if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
235-
local app_name=$(basename "$0")
236-
echo "GitHub TOC generator ($app_name): $gh_toc_version"
237-
echo ""
238-
echo "Usage:"
239-
echo " $app_name [--insert] src [src] Create TOC for a README file (url or local path)"
240-
echo " $app_name [--no-backup] src [src] Create TOC without backup, requires <!--ts--> / <!--te--> placeholders"
241-
echo " $app_name - Create TOC for markdown from STDIN"
242-
echo " $app_name --help Show help"
243-
echo " $app_name --version Show version"
350+
show_help
244351
return
245352
fi
246353

247354
if [ "$1" = '--version' ]; then
248-
echo "$gh_toc_version"
249-
echo
250-
echo "os: `lsb_release -d | cut -f 2`"
251-
echo "kernel: `cat /proc/version`"
252-
echo "shell: `$SHELL --version`"
253-
echo
254-
for tool in curl wget grep awk sed; do
255-
printf "%-5s: " $tool
256-
echo `$tool --version | head -n 1`
257-
done
355+
show_version
258356
return
259357
fi
260358

359+
if [ "$1" = '--indent' ]; then
360+
indent="$2"
361+
shift 2
362+
fi
363+
261364
if [ "$1" = "-" ]; then
262365
if [ -z "$TMPDIR" ]; then
263366
TMPDIR="/tmp"
264367
elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
265368
mkdir -p "$TMPDIR"
266369
fi
267370
local gh_tmp_md
268-
gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
371+
if [ `uname -s` == "OS/390" ]; then
372+
local timestamp=$(date +%m%d%Y%H%M%S)
373+
gh_tmp_md="$TMPDIR/tmp.$timestamp"
374+
else
375+
gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
376+
fi
269377
while read input; do
270378
echo "$input" >> "$gh_tmp_md"
271379
done
272-
gh_toc_md2html "$gh_tmp_md" | gh_toc_grab ""
380+
gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" "$indent"
273381
return
274382
fi
275383

@@ -283,14 +391,27 @@ gh_toc_app() {
283391
no_backup="yes"
284392
shift
285393
fi
394+
395+
if [ "$1" = '--hide-footer' ]; then
396+
need_replace="yes"
397+
no_footer="yes"
398+
shift
399+
fi
400+
401+
if [ "$1" = '--skip-header' ]; then
402+
skip_header="yes"
403+
shift
404+
fi
405+
406+
286407
for md in "$@"
287408
do
288409
echo ""
289-
gh_toc "$md" "$#" "$need_replace" "$no_backup"
410+
gh_toc "$md" "$#" "$need_replace" "$no_backup" "$no_footer" "$indent" "$skip_header"
290411
done
291412

292413
echo ""
293-
echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)"
414+
echo "<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
294415
}
295416

296417
#

0 commit comments

Comments
 (0)