md2html

Unnamed repository; edit this file 'description' to name the repository.
git clone https://codeberg.org/emmett1/md2html
Log | Files | Refs | README | LICENSE

md2html.sh (7445B)


      1 #!/bin/sh
      2 # md2html.sh - Markdown to HTML converter
      3 #
      4 # usage:
      5 #   ./md2html.sh index.md > index.html
      6 #   cat index.md | ./md2html.sh > index.html
      7 
      8 escape_html() {
      9 	# escape &, <, >
     10 	printf '%s' "$1" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g'
     11 }
     12 
     13 inline_format() {
     14 	# takes a string on $1, prints a single-line formatted string (no trailing newline)
     15 	t=$(escape_html "$1")
     16 	printf '%s' "$t" | sed \
     17 	  -e 's/!\[\([^]]*\)\](\([^)]*\))/<img alt="\1" src="\2">/g' \
     18 	  -e 's/\[\([^]]*\)\](\([^)]*\))/<a href="\2">\1<\/a>/g' \
     19 	  -e 's/\*\*\([^*][^*]*\)\*\*/<strong>\1<\/strong>/g' \
     20 	  -e 's/__\([^_][^_]*\)__/<strong>\1<\/strong>/g' \
     21 	  -e 's/~~\([^~][^~]*\)~~/<del>\1<\/del>/g' \
     22 	  -e 's/\*\([^*][^*]*\)\*/<em>\1<\/em>/g' \
     23 	  -e 's/_\([^_][^_]*\)_/<em>\1<\/em>/g' \
     24 	  -e 's/`\([^`]*\)`/<code>\1<\/code>/g'
     25 }
     26 
     27 flush_lists() {
     28 	[ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0
     29 	[ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0
     30 }
     31 
     32 close_blockquote() {
     33 	[ "$in_blockquote" -eq 1 ] && printf '%s\n' "</blockquote>" && in_blockquote=0
     34 }
     35 
     36 # if filename given, read from it
     37 if [ $# -ge 1 ]; then
     38 	exec <"$1"
     39 fi
     40 
     41 in_code=0
     42 in_ul=0
     43 in_ol=0
     44 in_blockquote=0
     45 push=''
     46 prev_line=''
     47 
     48 # main loop with pushback support
     49 while :; do
     50 	if [ -n "$push" ]; then
     51 		line="$push"
     52 		push=''
     53 	else
     54 		if ! IFS= read -r line; then
     55 			break
     56 		fi
     57 	fi
     58 
     59 	# If line is empty: flush any pending prev_line, close blockquote, skip
     60 	if [ -z "$line" ]; then
     61 		if [ -n "$prev_line" ]; then
     62 			printf '%s\n' "<p>$(inline_format "$prev_line")</p>"
     63 			prev_line=''
     64 		fi
     65 		if [ "$in_blockquote" -eq 1 ]; then
     66 			printf '%s\n' "</blockquote>"
     67 			in_blockquote=0
     68 		fi
     69 		# user requested no empty lines in output -> skip
     70 		continue
     71 	fi
     72 
     73 	# If prev_line exists, check for Setext underline on current line
     74 	if [ -n "$prev_line" ]; then
     75 		# trim spaces for the check
     76 		trimmed=$(printf '%s' "$line" | sed 's/^[ \t]*//; s/[ \t]*$//')
     77 		case "$trimmed" in
     78 			[=][=]*)
     79 				# H1 setext
     80 				printf '%s\n' "<h1>$(inline_format "$prev_line")</h1>"
     81 				prev_line=''
     82 				continue
     83 				;;
     84 			[-][-]*)
     85 				# H2 setext
     86 				printf '%s\n' "<h2>$(inline_format "$prev_line")</h2>"
     87 				prev_line=''
     88 				continue
     89 				;;
     90 		esac
     91 
     92 		# Not a setext underline: flush prev_line as paragraph before handling current line
     93 		printf '%s\n' "<p>$(inline_format "$prev_line")</p>"
     94 		prev_line=''
     95 	fi
     96 
     97 	# If we are in a blockquote and current line is NOT a blockquote line, close it.
     98 	case "$line" in
     99 		'>'*)
    100 			# will be handled below
    101 			;;
    102 		*)
    103 			if [ "$in_blockquote" -eq 1 ]; then
    104 				printf '%s\n' "</blockquote>"
    105 				in_blockquote=0
    106 			fi
    107 			;;
    108 	esac
    109 
    110 	# fenced code block toggle (flush lists/blockquote already handled above)
    111 	case "$line" in
    112 		'```'*)
    113 			flush_lists
    114 			close_blockquote
    115 			# ensure any pending paragraph already flushed above
    116 			if [ "$in_code" -eq 0 ]; then
    117 				# start code block (no newline after tags)
    118 				printf '<pre><code>'
    119 				in_code=1
    120 			else
    121 				# end code block (newline after closing tags)
    122 				printf '</code></pre>\n'
    123 				in_code=0
    124 			fi
    125 			continue
    126 			;;
    127 	esac
    128 
    129 	# If currently inside a fenced code block, escape and print line
    130 	if [ "$in_code" -eq 1 ]; then
    131 		escape_html "$line"
    132 		printf '\n'
    133 		continue
    134 	fi
    135 
    136 	# Table detection: look for a pipe-containing line followed by a separator line
    137 	case "$line" in
    138 		*'|'*)
    139 			# peek next line (sep)
    140 			if ! IFS= read -r sep; then sep=''; fi
    141 
    142 			is_table=$(printf '%s\n' "$sep" | awk -F'|' '
    143 				{
    144 					ok=1;
    145 					for(i=1;i<=NF;i++){
    146 					  s=$i;
    147 					  sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s);
    148 					  if(s == "") { next }
    149 					  if(s !~ /^:?-+:?$/) { ok=0; exit 1 }
    150 					}
    151 					if(ok) print "ok"
    152 				}' 2>/dev/null)
    153 
    154 			if [ "$is_table" = "ok" ]; then
    155 				flush_lists
    156 				close_blockquote
    157 
    158 				printf '%s\n' "<table>"
    159 				printf '%s\n' "<thead><tr>"
    160 				printf '%s\n' "$line" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \
    161 				while IFS= read -r cell; do
    162 					cell_html=$(inline_format "$cell")
    163 					printf '<th>%s</th>' "$cell_html"
    164 				done
    165 				printf '\n%s\n' "</tr></thead>"
    166 				printf '%s\n' "<tbody>"
    167 
    168 				# read data rows until a non-table line or EOF; use pushback when non-table found
    169 				while IFS= read -r row; do
    170 					[ -z "$row" ] && break
    171 					case "$row" in
    172 						*'|'*) ;;
    173 						*) push="$row"; break ;;
    174 					esac
    175 
    176 					printf '%s\n' "<tr>"
    177 					printf '%s\n' "$row" | awk -F'|' '{ for(i=1;i<=NF;i++){ s=$i; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s } }' | \
    178 					while IFS= read -r cell; do
    179 						cell_html=$(inline_format "$cell")
    180 						printf '<td>%s</td>' "$cell_html"
    181 					done
    182 					printf '\n%s\n' "</tr>"
    183 				done
    184 
    185 				printf '%s\n' "</tbody></table>"
    186 				continue
    187 			else
    188 				# not a table: push back sep (so it will be processed next)
    189 				push="$sep"
    190 			fi
    191 			;;
    192 	esac
    193 
    194 	# Horizontal rule
    195 	case "$line" in
    196 		'---'|'***'|'___')
    197 			flush_lists
    198 			close_blockquote
    199 			printf '%s\n' "<hr>"
    200 			continue
    201 			;;
    202 	esac
    203 
    204 	# ATX Headings (# …) — close lists before printing headings
    205 	case "$line" in
    206 		\#\#\#\#\#\#\ *) flush_lists; printf '%s\n' "<h6>$(inline_format "${line#\#\#\#\#\#\# }")</h6>"; continue ;;
    207 		\#\#\#\#\#\ *)   flush_lists; printf '%s\n' "<h5>$(inline_format "${line#\#\#\#\#\# }")</h5>"; continue ;;
    208 		\#\#\#\#\ *)     flush_lists; printf '%s\n' "<h4>$(inline_format "${line#\#\#\#\# }")</h4>"; continue ;;
    209 		\#\#\#\ *)       flush_lists; printf '%s\n' "<h3>$(inline_format "${line#\#\#\# }")</h3>"; continue ;;
    210 		\#\#\ *)         flush_lists; printf '%s\n' "<h2>$(inline_format "${line#\#\# }")</h2>"; continue ;;
    211 		\#\ *)           flush_lists; printf '%s\n' "<h1>$(inline_format "${line#\# }")</h1>"; continue ;;
    212 	esac
    213 
    214 	# Blockquote (lines starting with '>')
    215 	case "$line" in
    216 		'>'*)
    217 			flush_lists
    218 			if [ "$in_blockquote" -eq 0 ]; then
    219 				printf '%s\n' "<blockquote>"
    220 				in_blockquote=1
    221 			fi
    222 			# strip leading '>' and optional spaces
    223 			content=$(printf '%s' "$line" | sed 's/^> *//')
    224 			printf '%s\n' "<p>$(inline_format "$content")</p>"
    225 			continue
    226 			;;
    227 	esac
    228 
    229 	# Unordered list
    230 	case "$line" in
    231 		[-+*]\ *)
    232 			# start or continue ul; close ordered list if open
    233 			close_blockquote
    234 			[ "$in_ol" -eq 1 ] && printf '%s\n' "</ol>" && in_ol=0
    235 			if [ "$in_ul" -eq 0 ]; then
    236 				printf '%s\n' "<ul>"
    237 				in_ul=1
    238 			fi
    239 			item=${line#? }
    240 			item_html=$(inline_format "$item")
    241 			printf '%s\n' "<li>$item_html</li>"
    242 			continue
    243 			;;
    244 		*)
    245 			if [ "$in_ul" -eq 1 ]; then
    246 				printf '%s\n' "</ul>"
    247 				in_ul=0
    248 			fi
    249 			;;
    250 	esac
    251 
    252 	# Ordered list
    253 	case "$line" in
    254 		[0-9]*.\ *)
    255 			close_blockquote
    256 			[ "$in_ul" -eq 1 ] && printf '%s\n' "</ul>" && in_ul=0
    257 			if [ "$in_ol" -eq 0 ]; then
    258 				printf '%s\n' "<ol>"
    259 				in_ol=1
    260 			fi
    261 			item=${line#*. }
    262 			item_html=$(inline_format "$item")
    263 			printf '%s\n' "<li>$item_html</li>"
    264 			continue
    265 			;;
    266 		*)
    267 			if [ "$in_ol" -eq 1 ]; then
    268 				printf '%s\n' "</ol>"
    269 				in_ol=0
    270 			fi
    271 			;;
    272 	esac
    273 
    274 	# If we reach here, the line is normal text (not a special block).
    275 	# Close any open lists (we only close lists when a non-list line appears).
    276 	if [ "$in_ul" -eq 1 ]; then
    277 		printf '%s\n' "</ul>"
    278 		in_ul=0
    279 	fi
    280 	if [ "$in_ol" -eq 1 ]; then
    281 		printf '%s\n' "</ol>"
    282 		in_ol=0
    283 	fi
    284 
    285 	# Save line for possible Setext heading check in next iteration
    286 	prev_line="$line"
    287 done
    288 
    289 # Flush pending prev_line
    290 if [ -n "$prev_line" ]; then
    291 	printf '%s\n' "<p>$(inline_format "$prev_line")</p>"
    292 	prev_line=''
    293 fi
    294 
    295 flush_lists
    296 close_blockquote
    297 [ "$in_code" -eq 1 ] && printf '</code></pre>\n'
    298 
    299 exit 0