md2html.sh (7445B)
1 #!/bin/sh 2 # md2html.sh - Markdown to HTML converter 3 # 4 # usage: 5 # ./md2html.sh index.md > index.html 6 # cat index.md | ./md2html.sh > index.html 7 8 escape_html() { 9 # escape &, <, > 10 printf '%s' "$1" | sed 's/&/\&/g; s/</\</g; s/>/\>/g' 11 } 12 13 inline_format() { 14 # takes a string on $1, prints a single-line formatted string (no trailing newline) 15 t=$(escape_html "$1") 16 printf '%s' "$t" | sed \ 17 -e 's/!\[\([^]]*\)\](\([^)]*\))/<img alt="\1" src="\2">/g' \ 18 -e 's/\[\([^]]*\)\](\([^)]*\))/<a href="\2">\1<\/a>/g' \ 19 -e 's/\*\*\([^*][^*]*\)\*\*/<strong>\1<\/strong>/g' \ 20 -e 's/__\([^_][^_]*\)__/<strong>\1<\/strong>/g' \ 21 -e 's/~~\([^~][^~]*\)~~/<del>\1<\/del>/g' \ 22 -e 's/\*\([^*][^*]*\)\*/<em>\1<\/em>/g' \ 23 -e 's/_\([^_][^_]*\)_/<em>\1<\/em>/g' \ 24 -e 's/`\([^`]*\)`/<code>\1<\/code>/g' 25 } 26 27 flush_lists() { 28 [ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0 29 [ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0 30 } 31 32 close_blockquote() { 33 [ "$in_blockquote" -eq 1 ] && printf '%s\n' "</blockquote>" && in_blockquote=0 34 } 35 36 # if filename given, read from it 37 if [ $# -ge 1 ]; then 38 exec <"$1" 39 fi 40 41 in_code=0 42 in_ul=0 43 in_ol=0 44 in_blockquote=0 45 push='' 46 prev_line='' 47 48 # main loop with pushback support 49 while :; do 50 if [ -n "$push" ]; then 51 line="$push" 52 push='' 53 else 54 if ! IFS= read -r line; then 55 break 56 fi 57 fi 58 59 # If line is empty: flush any pending prev_line, close blockquote, skip 60 if [ -z "$line" ]; then 61 if [ -n "$prev_line" ]; then 62 printf '%s\n' "<p>$(inline_format "$prev_line")</p>" 63 prev_line='' 64 fi 65 if [ "$in_blockquote" -eq 1 ]; then 66 printf '%s\n' "</blockquote>" 67 in_blockquote=0 68 fi 69 # user requested no empty lines in output -> skip 70 continue 71 fi 72 73 # If prev_line exists, check for Setext underline on current line 74 if [ -n "$prev_line" ]; then 75 # trim spaces for the check 76 trimmed=$(printf '%s' "$line" | sed 's/^[ \t]*//; s/[ \t]*$//') 77 case "$trimmed" in 78 [=][=]*) 79 # H1 setext 80 printf '%s\n' "<h1>$(inline_format "$prev_line")</h1>" 81 prev_line='' 82 continue 83 ;; 84 [-][-]*) 85 # H2 setext 86 printf '%s\n' "<h2>$(inline_format "$prev_line")</h2>" 87 prev_line='' 88 continue 89 ;; 90 esac 91 92 # Not a setext underline: flush prev_line as paragraph before handling current line 93 printf '%s\n' "<p>$(inline_format "$prev_line")</p>" 94 prev_line='' 95 fi 96 97 # If we are in a blockquote and current line is NOT a blockquote line, close it. 98 case "$line" in 99 '>'*) 100 # will be handled below 101 ;; 102 *) 103 if [ "$in_blockquote" -eq 1 ]; then 104 printf '%s\n' "</blockquote>" 105 in_blockquote=0 106 fi 107 ;; 108 esac 109 110 # fenced code block toggle (flush lists/blockquote already handled above) 111 case "$line" in 112 '```'*) 113 flush_lists 114 close_blockquote 115 # ensure any pending paragraph already flushed above 116 if [ "$in_code" -eq 0 ]; then 117 # start code block (no newline after tags) 118 printf '<pre><code>' 119 in_code=1 120 else 121 # end code block (newline after closing tags) 122 printf '</code></pre>\n' 123 in_code=0 124 fi 125 continue 126 ;; 127 esac 128 129 # If currently inside a fenced code block, escape and print line 130 if [ "$in_code" -eq 1 ]; then 131 escape_html "$line" 132 printf '\n' 133 continue 134 fi 135 136 # Table detection: look for a pipe-containing line followed by a separator line 137 case "$line" in 138 *'|'*) 139 # peek next line (sep) 140 if ! IFS= read -r sep; then sep=''; fi 141 142 is_table=$(printf '%s\n' "$sep" | awk -F'|' ' 143 { 144 ok=1; 145 for(i=1;i<=NF;i++){ 146 s=$i; 147 sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); 148 if(s == "") { next } 149 if(s !~ /^:?-+:?$/) { ok=0; exit 1 } 150 } 151 if(ok) print "ok" 152 }' 2>/dev/null) 153 154 if [ "$is_table" = "ok" ]; then 155 flush_lists 156 close_blockquote 157 158 printf '%s\n' "<table>" 159 printf '%s\n' "<thead><tr>" 160 printf '%s\n' "$line" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \ 161 while IFS= read -r cell; do 162 cell_html=$(inline_format "$cell") 163 printf '<th>%s</th>' "$cell_html" 164 done 165 printf '\n%s\n' "</tr></thead>" 166 printf '%s\n' "<tbody>" 167 168 # read data rows until a non-table line or EOF; use pushback when non-table found 169 while IFS= read -r row; do 170 [ -z "$row" ] && break 171 case "$row" in 172 *'|'*) ;; 173 *) push="$row"; break ;; 174 esac 175 176 printf '%s\n' "<tr>" 177 printf '%s\n' "$row" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \ 178 while IFS= read -r cell; do 179 cell_html=$(inline_format "$cell") 180 printf '<td>%s</td>' "$cell_html" 181 done 182 printf '\n%s\n' "</tr>" 183 done 184 185 printf '%s\n' "</tbody></table>" 186 continue 187 else 188 # not a table: push back sep (so it will be processed next) 189 push="$sep" 190 fi 191 ;; 192 esac 193 194 # Horizontal rule 195 case "$line" in 196 '---'|'***'|'___') 197 flush_lists 198 close_blockquote 199 printf '%s\n' "<hr>" 200 continue 201 ;; 202 esac 203 204 # ATX Headings (# …) — close lists before printing headings 205 case "$line" in 206 \#\#\#\#\#\#\ *) flush_lists; printf '%s\n' "<h6>$(inline_format "${line#\#\#\#\#\#\# }")</h6>"; continue ;; 207 \#\#\#\#\#\ *) flush_lists; printf '%s\n' "<h5>$(inline_format "${line#\#\#\#\#\# }")</h5>"; continue ;; 208 \#\#\#\#\ *) flush_lists; printf '%s\n' "<h4>$(inline_format "${line#\#\#\#\# }")</h4>"; continue ;; 209 \#\#\#\ *) flush_lists; printf '%s\n' "<h3>$(inline_format "${line#\#\#\# }")</h3>"; continue ;; 210 \#\#\ *) flush_lists; printf '%s\n' "<h2>$(inline_format "${line#\#\# }")</h2>"; continue ;; 211 \#\ *) flush_lists; printf '%s\n' "<h1>$(inline_format "${line#\# }")</h1>"; continue ;; 212 esac 213 214 # Blockquote (lines starting with '>') 215 case "$line" in 216 '>'*) 217 flush_lists 218 if [ "$in_blockquote" -eq 0 ]; then 219 printf '%s\n' "<blockquote>" 220 in_blockquote=1 221 fi 222 # strip leading '>' and optional spaces 223 content=$(printf '%s' "$line" | sed 's/^> *//') 224 printf '%s\n' "<p>$(inline_format "$content")</p>" 225 continue 226 ;; 227 esac 228 229 # Unordered list 230 case "$line" in 231 [-+*]\ *) 232 # start or continue ul; close ordered list if open 233 close_blockquote 234 [ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0 235 if [ "$in_ul" -eq 0 ]; then 236 printf '%s\n' "<ul>" 237 in_ul=1 238 fi 239 item=${line#? } 240 item_html=$(inline_format "$item") 241 printf '%s\n' "<li>$item_html</li>" 242 continue 243 ;; 244 *) 245 if [ "$in_ul" -eq 1 ]; then 246 printf '%s\n' "</ul>" 247 in_ul=0 248 fi 249 ;; 250 esac 251 252 # Ordered list 253 case "$line" in 254 [0-9]*.\ *) 255 close_blockquote 256 [ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0 257 if [ "$in_ol" -eq 0 ]; then 258 printf '%s\n' "<ol>" 259 in_ol=1 260 fi 261 item=${line#*. } 262 item_html=$(inline_format "$item") 263 printf '%s\n' "<li>$item_html</li>" 264 continue 265 ;; 266 *) 267 if [ "$in_ol" -eq 1 ]; then 268 printf '%s\n' "</ol>" 269 in_ol=0 270 fi 271 ;; 272 esac 273 274 # If we reach here, the line is normal text (not a special block). 275 # Close any open lists (we only close lists when a non-list line appears). 276 if [ "$in_ul" -eq 1 ]; then 277 printf '%s\n' "</ul>" 278 in_ul=0 279 fi 280 if [ "$in_ol" -eq 1 ]; then 281 printf '%s\n' "</ol>" 282 in_ol=0 283 fi 284 285 # Save line for possible Setext heading check in next iteration 286 prev_line="$line" 287 done 288 289 # Flush pending prev_line 290 if [ -n "$prev_line" ]; then 291 printf '%s\n' "<p>$(inline_format "$prev_line")</p>" 292 prev_line='' 293 fi 294 295 flush_lists 296 close_blockquote 297 [ "$in_code" -eq 1 ] && printf '</code></pre>\n' 298 299 exit 0